Actual source code: bddcprivate.c

  1: #include <../src/mat/impls/aij/seq/aij.h>
  2: #include <petsc/private/pcbddcimpl.h>
  3: #include <petsc/private/pcbddcprivateimpl.h>
  4: #include <petsc/private/kernels/blockinvert.h>
  5: #include <../src/mat/impls/dense/seq/dense.h>
  6: #include <petscdmplex.h>
  7: #include <petscblaslapack.h>
  8: #include <petsc/private/sfimpl.h>
  9: #include <petsc/private/dmpleximpl.h>
 10: #include <petscdmda.h>

 12: static PetscErrorCode MatMPIAIJRestrict(Mat, MPI_Comm, Mat *);

 14: /* if range is true,  it returns B s.t. span{B} = range(A)
 15:    if range is false, it returns B s.t. range(B) _|_ range(A) */
 16: static PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B)
 17: {
 18:   PetscScalar *uwork, *data, *U, ds = 0.;
 19:   PetscReal   *sing;
 20:   PetscBLASInt bM, bN, lwork, lierr, di = 1;
 21:   PetscInt     ulw, i, nr, nc, n;
 22: #if defined(PETSC_USE_COMPLEX)
 23:   PetscReal *rwork2;
 24: #endif

 26:   PetscFunctionBegin;
 27:   PetscCall(MatGetSize(A, &nr, &nc));
 28:   if (!nr || !nc) PetscFunctionReturn(PETSC_SUCCESS);

 30:   /* workspace */
 31:   if (!work) {
 32:     ulw = PetscMax(PetscMax(1, 5 * PetscMin(nr, nc)), 3 * PetscMin(nr, nc) + PetscMax(nr, nc));
 33:     PetscCall(PetscMalloc1(ulw, &uwork));
 34:   } else {
 35:     ulw   = lw;
 36:     uwork = work;
 37:   }
 38:   n = PetscMin(nr, nc);
 39:   if (!rwork) {
 40:     PetscCall(PetscMalloc1(n, &sing));
 41:   } else {
 42:     sing = rwork;
 43:   }

 45:   /* SVD */
 46:   PetscCall(PetscMalloc1(nr * nr, &U));
 47:   PetscCall(PetscBLASIntCast(nr, &bM));
 48:   PetscCall(PetscBLASIntCast(nc, &bN));
 49:   PetscCall(PetscBLASIntCast(ulw, &lwork));
 50:   PetscCall(MatDenseGetArray(A, &data));
 51:   PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
 52: #if !defined(PETSC_USE_COMPLEX)
 53:   PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("A", "N", &bM, &bN, data, &bM, sing, U, &bM, &ds, &di, uwork, &lwork, &lierr));
 54: #else
 55:   PetscCall(PetscMalloc1(5 * n, &rwork2));
 56:   PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("A", "N", &bM, &bN, data, &bM, sing, U, &bM, &ds, &di, uwork, &lwork, rwork2, &lierr));
 57:   PetscCall(PetscFree(rwork2));
 58: #endif
 59:   PetscCall(PetscFPTrapPop());
 60:   PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GESVD Lapack routine %" PetscBLASInt_FMT, lierr);
 61:   PetscCall(MatDenseRestoreArray(A, &data));
 62:   for (i = 0; i < n; i++)
 63:     if (sing[i] < PETSC_SMALL) break;
 64:   if (!rwork) PetscCall(PetscFree(sing));
 65:   if (!work) PetscCall(PetscFree(uwork));
 66:   /* create B */
 67:   if (!range) {
 68:     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, nr, nr - i, NULL, B));
 69:     PetscCall(MatDenseGetArray(*B, &data));
 70:     PetscCall(PetscArraycpy(data, U + nr * i, (nr - i) * nr));
 71:   } else {
 72:     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, nr, i, NULL, B));
 73:     PetscCall(MatDenseGetArray(*B, &data));
 74:     PetscCall(PetscArraycpy(data, U, i * nr));
 75:   }
 76:   PetscCall(MatDenseRestoreArray(*B, &data));
 77:   PetscCall(PetscFree(U));
 78:   PetscFunctionReturn(PETSC_SUCCESS);
 79: }

 81: /* TODO REMOVE */
 82: #if defined(PRINT_GDET)
 83: static int inc = 0;
 84: static int lev = 0;
 85: #endif

 87: static PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat *Gins, Mat *GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork)
 88: {
 89:   Mat          GE, GEd;
 90:   PetscInt     rsize, csize, esize;
 91:   PetscScalar *ptr;

 93:   PetscFunctionBegin;
 94:   PetscCall(ISGetSize(edge, &esize));
 95:   if (!esize) PetscFunctionReturn(PETSC_SUCCESS);
 96:   PetscCall(ISGetSize(extrow, &rsize));
 97:   PetscCall(ISGetSize(extcol, &csize));

 99:   /* gradients */
100:   ptr = work + 5 * esize;
101:   PetscCall(MatCreateSubMatrix(lG, extrow, extcol, MAT_INITIAL_MATRIX, &GE));
102:   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, rsize, csize, ptr, Gins));
103:   PetscCall(MatConvert(GE, MATSEQDENSE, MAT_REUSE_MATRIX, Gins));
104:   PetscCall(MatDestroy(&GE));

106:   /* constants */
107:   ptr += rsize * csize;
108:   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, esize, csize, ptr, &GEd));
109:   PetscCall(MatCreateSubMatrix(lG, edge, extcol, MAT_INITIAL_MATRIX, &GE));
110:   PetscCall(MatConvert(GE, MATSEQDENSE, MAT_REUSE_MATRIX, &GEd));
111:   PetscCall(MatDestroy(&GE));
112:   PetscCall(MatDenseOrthogonalRangeOrComplement(GEd, PETSC_FALSE, 5 * esize, work, rwork, GKins));
113:   PetscCall(MatDestroy(&GEd));

115:   if (corners) {
116:     Mat                GEc;
117:     const PetscScalar *vals;
118:     PetscScalar        v;

120:     PetscCall(MatCreateSubMatrix(lG, edge, corners, MAT_INITIAL_MATRIX, &GEc));
121:     PetscCall(MatTransposeMatMult(GEc, *GKins, MAT_INITIAL_MATRIX, 1.0, &GEd));
122:     PetscCall(MatDenseGetArrayRead(GEd, &vals));
123:     /* v       = PetscAbsScalar(vals[0]); */
124:     v        = 1.;
125:     cvals[0] = vals[0] / v;
126:     cvals[1] = vals[1] / v;
127:     PetscCall(MatDenseRestoreArrayRead(GEd, &vals));
128:     PetscCall(MatScale(*GKins, 1. / v));
129: #if defined(PRINT_GDET)
130:     {
131:       PetscViewer viewer;
132:       char        filename[256];
133:       PetscCall(PetscSNPrintf(filename, PETSC_STATIC_ARRAY_LENGTH(filename), "Gdet_l%d_r%d_cc%d.m", lev, PetscGlobalRank, inc++));
134:       PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF, filename, &viewer));
135:       PetscCall(PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_MATLAB));
136:       PetscCall(PetscObjectSetName((PetscObject)GEc, "GEc"));
137:       PetscCall(MatView(GEc, viewer));
138:       PetscCall(PetscObjectSetName((PetscObject)*GKins, "GK"));
139:       PetscCall(MatView(*GKins, viewer));
140:       PetscCall(PetscObjectSetName((PetscObject)GEd, "Gproj"));
141:       PetscCall(MatView(GEd, viewer));
142:       PetscCall(PetscViewerDestroy(&viewer));
143:     }
144: #endif
145:     PetscCall(MatDestroy(&GEd));
146:     PetscCall(MatDestroy(&GEc));
147:   }
148:   PetscFunctionReturn(PETSC_SUCCESS);
149: }

151: static PetscErrorCode MatAIJExtractRows(Mat, IS, Mat *);

153: PetscErrorCode PCBDDCNedelecSupport(PC pc)
154: {
155:   PC_BDDC               *pcbddc = (PC_BDDC *)pc->data;
156:   Mat_IS                *matis  = (Mat_IS *)pc->pmat->data;
157:   Mat                    G, T, conn, lG, lGt, lGis, lGall, lGe, lGinit;
158:   PetscSF                sfv;
159:   ISLocalToGlobalMapping el2g, vl2g, fl2g, al2g;
160:   MPI_Comm               comm;
161:   IS                     lned, primals, allprimals, nedfieldlocal, elements_corners = NULL;
162:   IS                    *eedges, *extrows, *extcols, *alleedges;
163:   PetscBT                btv, bte, btvc, btb, btbd, btvcand, btvi, btee, bter;
164:   PetscScalar           *vals, *work;
165:   PetscReal             *rwork;
166:   const PetscInt        *idxs, *ii, *jj, *iit, *jjt;
167:   PetscInt               ne, nv, Lv, order, n, field;
168:   PetscInt               i, j, extmem, cum, maxsize, nee;
169:   PetscInt              *extrow, *extrowcum, *marks, *vmarks, *gidxs;
170:   PetscInt              *sfvleaves, *sfvroots;
171:   PetscInt              *corners, *cedges;
172:   PetscInt              *ecount, **eneighs, *vcount, **vneighs;
173:   PetscInt              *emarks;
174:   PetscBool              print, eerr, done, lrc[2], conforming, global, setprimal;

176:   PetscFunctionBegin;
177:   /* If the discrete gradient is defined for a subset of dofs and global is true,
178:      it assumes G is given in global ordering for all the dofs.
179:      Otherwise, the ordering is global for the Nedelec field */
180:   order      = pcbddc->nedorder;
181:   conforming = pcbddc->conforming;
182:   field      = pcbddc->nedfield;
183:   global     = pcbddc->nedglobal;
184:   setprimal  = PETSC_FALSE;
185:   print      = PETSC_FALSE;

187:   /* Command line customization */
188:   PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC Nedelec options", "PC");
189:   PetscCall(PetscOptionsBool("-pc_bddc_nedelec_field_primal", "All edge dofs set as primals: Toselli's algorithm C", NULL, setprimal, &setprimal, NULL));
190:   /* print debug info and adaptive order TODO: to be removed */
191:   PetscCall(PetscOptionsInt("-pc_bddc_nedelec_order", "Test variable order code (to be removed)", NULL, order, &order, NULL));
192:   PetscCall(PetscOptionsBool("-pc_bddc_nedelec_print", "Print debug info", NULL, print, &print, NULL));
193:   PetscOptionsEnd();

195:   /* Return if there are no edges in the decomposition */
196:   PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &al2g, NULL));
197:   PetscCall(ISLocalToGlobalMappingGetSize(al2g, &n));
198:   PetscCall(PetscObjectGetComm((PetscObject)pc, &comm));
199:   PetscCall(VecGetArrayRead(matis->counter, (const PetscScalar **)&vals));
200:   lrc[0] = PETSC_FALSE;
201:   for (i = 0; i < n; i++) {
202:     if (PetscRealPart(vals[i]) > 2.) {
203:       lrc[0] = PETSC_TRUE;
204:       break;
205:     }
206:   }
207:   PetscCall(VecRestoreArrayRead(matis->counter, (const PetscScalar **)&vals));
208:   PetscCallMPI(MPIU_Allreduce(&lrc[0], &lrc[1], 1, MPI_C_BOOL, MPI_LOR, comm));
209:   if (!lrc[1]) PetscFunctionReturn(PETSC_SUCCESS);

211:   /* Get Nedelec field */
212:   PetscCheck(!pcbddc->n_ISForDofsLocal || field < pcbddc->n_ISForDofsLocal, comm, PETSC_ERR_USER, "Invalid field for Nedelec %" PetscInt_FMT ": number of fields is %" PetscInt_FMT, field, pcbddc->n_ISForDofsLocal);
213:   if (pcbddc->n_ISForDofsLocal && field >= 0) {
214:     PetscCall(PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]));
215:     nedfieldlocal = pcbddc->ISForDofsLocal[field];
216:     PetscCall(ISGetLocalSize(nedfieldlocal, &ne));
217:   } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) {
218:     ne            = n;
219:     nedfieldlocal = NULL;
220:     global        = PETSC_TRUE;
221:   } else if (field == PETSC_DECIDE) {
222:     PetscInt rst, ren, *idx;

224:     PetscCall(PetscArrayzero(matis->sf_leafdata, n));
225:     PetscCall(PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n));
226:     PetscCall(MatGetOwnershipRange(pcbddc->discretegradient, &rst, &ren));
227:     for (i = rst; i < ren; i++) {
228:       PetscInt nc;

230:       PetscCall(MatGetRow(pcbddc->discretegradient, i, &nc, NULL, NULL));
231:       if (nc > 1) matis->sf_rootdata[i - rst] = 1;
232:       PetscCall(MatRestoreRow(pcbddc->discretegradient, i, &nc, NULL, NULL));
233:     }
234:     PetscCall(PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE));
235:     PetscCall(PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE));
236:     PetscCall(PetscMalloc1(n, &idx));
237:     for (i = 0, ne = 0; i < n; i++)
238:       if (matis->sf_leafdata[i]) idx[ne++] = i;
239:     PetscCall(ISCreateGeneral(comm, ne, idx, PETSC_OWN_POINTER, &nedfieldlocal));
240:   } else {
241:     SETERRQ(comm, PETSC_ERR_USER, "When multiple fields are present, the Nedelec field has to be specified");
242:   }

244:   /* Sanity checks */
245:   PetscCheck(order || conforming, comm, PETSC_ERR_SUP, "Variable order and non-conforming spaces are not supported at the same time");
246:   PetscCheck(!pcbddc->user_ChangeOfBasisMatrix, comm, PETSC_ERR_SUP, "Cannot generate Nedelec support with user defined change of basis");
247:   PetscCheck(!order || (ne % order == 0), PETSC_COMM_SELF, PETSC_ERR_USER, "The number of local edge dofs %" PetscInt_FMT " is not a multiple of the order %" PetscInt_FMT, ne, order);

249:   /* Just set primal dofs and return */
250:   if (setprimal) {
251:     IS        enedfieldlocal;
252:     PetscInt *eidxs;

254:     PetscCall(PetscMalloc1(ne, &eidxs));
255:     PetscCall(VecGetArrayRead(matis->counter, (const PetscScalar **)&vals));
256:     if (nedfieldlocal) {
257:       PetscCall(ISGetIndices(nedfieldlocal, &idxs));
258:       for (i = 0, cum = 0; i < ne; i++) {
259:         if (PetscRealPart(vals[idxs[i]]) > 2.) eidxs[cum++] = idxs[i];
260:       }
261:       PetscCall(ISRestoreIndices(nedfieldlocal, &idxs));
262:     } else {
263:       for (i = 0, cum = 0; i < ne; i++) {
264:         if (PetscRealPart(vals[i]) > 2.) eidxs[cum++] = i;
265:       }
266:     }
267:     PetscCall(VecRestoreArrayRead(matis->counter, (const PetscScalar **)&vals));
268:     PetscCall(ISCreateGeneral(comm, cum, eidxs, PETSC_COPY_VALUES, &enedfieldlocal));
269:     PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, enedfieldlocal));
270:     PetscCall(PetscFree(eidxs));
271:     PetscCall(ISDestroy(&nedfieldlocal));
272:     PetscCall(ISDestroy(&enedfieldlocal));
273:     PetscFunctionReturn(PETSC_SUCCESS);
274:   }

276:   /* Compute some l2g maps */
277:   if (nedfieldlocal) {
278:     IS is;

280:     /* need to map from the local Nedelec field to local numbering */
281:     PetscCall(ISLocalToGlobalMappingCreateIS(nedfieldlocal, &fl2g));
282:     /* need to map from the local Nedelec field to global numbering for the whole dofs*/
283:     PetscCall(ISLocalToGlobalMappingApplyIS(al2g, nedfieldlocal, &is));
284:     PetscCall(ISLocalToGlobalMappingCreateIS(is, &al2g));
285:     /* need to map from the local Nedelec field to global numbering (for Nedelec only) */
286:     if (global) {
287:       PetscCall(PetscObjectReference((PetscObject)al2g));
288:       el2g = al2g;
289:     } else {
290:       IS gis;

292:       PetscCall(ISRenumber(is, NULL, NULL, &gis));
293:       PetscCall(ISLocalToGlobalMappingCreateIS(gis, &el2g));
294:       PetscCall(ISDestroy(&gis));
295:     }
296:     PetscCall(ISDestroy(&is));
297:   } else {
298:     /* one ref for the destruction of al2g, one for el2g */
299:     PetscCall(PetscObjectReference((PetscObject)al2g));
300:     PetscCall(PetscObjectReference((PetscObject)al2g));
301:     el2g = al2g;
302:     fl2g = NULL;
303:   }

305:   /* Start communication to drop connections for interior edges (for cc analysis only) */
306:   PetscCall(PetscArrayzero(matis->sf_leafdata, n));
307:   PetscCall(PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n));
308:   if (nedfieldlocal) {
309:     PetscCall(ISGetIndices(nedfieldlocal, &idxs));
310:     for (i = 0; i < ne; i++) matis->sf_leafdata[idxs[i]] = 1;
311:     PetscCall(ISRestoreIndices(nedfieldlocal, &idxs));
312:   } else {
313:     for (i = 0; i < ne; i++) matis->sf_leafdata[i] = 1;
314:   }
315:   PetscCall(PetscSFReduceBegin(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM));
316:   PetscCall(PetscSFReduceEnd(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM));

318:   /* There's no way to detect all possible corner candidates in a element-by-element case in a pure algebraic setting
319:      Firedrake attaches a index set to identify them upfront. If it is present, we assume we are in such a case */
320:   if (matis->allow_repeated) PetscCall(PetscObjectQuery((PetscObject)pcbddc->discretegradient, "_elements_corners", (PetscObject *)&elements_corners));

322:   /* drop connections with interior edges to avoid unneeded communications and memory movements */
323:   PetscCall(MatViewFromOptions(pcbddc->discretegradient, (PetscObject)pc, "-pc_bddc_discrete_gradient_view"));
324:   PetscCall(MatDuplicate(pcbddc->discretegradient, MAT_COPY_VALUES, &G));
325:   PetscCall(MatSetOption(G, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE));
326:   if (global) {
327:     PetscInt rst;

329:     PetscCall(MatGetOwnershipRange(G, &rst, NULL));
330:     for (i = 0, cum = 0; i < pc->pmat->rmap->n; i++) {
331:       if (matis->sf_rootdata[i] < 2) matis->sf_rootdata[cum++] = i + rst;
332:     }
333:     PetscCall(MatSetOption(G, MAT_NO_OFF_PROC_ZERO_ROWS, PETSC_TRUE));
334:     PetscCall(MatZeroRows(G, cum, matis->sf_rootdata, 0., NULL, NULL));
335:   } else {
336:     PetscInt *tbz;

338:     PetscCall(PetscMalloc1(ne, &tbz));
339:     PetscCall(PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE));
340:     PetscCall(PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE));
341:     PetscCall(ISGetIndices(nedfieldlocal, &idxs));
342:     for (i = 0, cum = 0; i < ne; i++)
343:       if (matis->sf_leafdata[idxs[i]] == 1) tbz[cum++] = i;
344:     PetscCall(ISRestoreIndices(nedfieldlocal, &idxs));
345:     PetscCall(ISLocalToGlobalMappingApply(el2g, cum, tbz, tbz));
346:     PetscCall(MatZeroRows(G, cum, tbz, 0., NULL, NULL));
347:     PetscCall(PetscFree(tbz));
348:   }

350:   /* Extract subdomain relevant rows of G  */
351:   PetscCall(ISLocalToGlobalMappingGetIndices(el2g, &idxs));
352:   PetscCall(ISCreateGeneral(comm, ne, idxs, PETSC_USE_POINTER, &lned));
353:   PetscCall(MatAIJExtractRows(G, lned, &lGall));
354:   /* PetscCall(MatCreateSubMatrix(G, lned, NULL, MAT_INITIAL_MATRIX, &lGall)); */
355:   PetscCall(ISLocalToGlobalMappingRestoreIndices(el2g, &idxs));
356:   PetscCall(ISDestroy(&lned));
357:   PetscCall(MatConvert(lGall, MATIS, MAT_INITIAL_MATRIX, &lGis));
358:   PetscCall(MatDestroy(&lGall));
359:   PetscCall(MatISGetLocalMat(lGis, &lG));
360:   if (matis->allow_repeated) { /* multi-element support */
361:     Mat                   *lGn, B;
362:     IS                    *is_rows, *tcols, tmap, nmap;
363:     PetscInt               subnv;
364:     const PetscInt        *subvidxs;
365:     ISLocalToGlobalMapping mapn;

367:     PetscCall(PetscCalloc1(pcbddc->n_local_subs * pcbddc->n_local_subs, &lGn));
368:     PetscCall(PetscMalloc1(pcbddc->n_local_subs, &is_rows));
369:     PetscCall(PetscMalloc1(pcbddc->n_local_subs, &tcols));
370:     for (PetscInt i = 0; i < pcbddc->n_local_subs; i++) {
371:       if (fl2g) {
372:         PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_MASK, pcbddc->local_subs[i], &is_rows[i]));
373:       } else {
374:         PetscCall(PetscObjectReference((PetscObject)pcbddc->local_subs[i]));
375:         is_rows[i] = pcbddc->local_subs[i];
376:       }
377:       PetscCall(MatCreateSubMatrix(lG, is_rows[i], NULL, MAT_INITIAL_MATRIX, &lGn[i * (1 + pcbddc->n_local_subs)]));
378:       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(lGn[i * (1 + pcbddc->n_local_subs)], &mapn));
379:       PetscCall(ISLocalToGlobalMappingGetSize(mapn, &subnv));
380:       PetscCall(ISLocalToGlobalMappingGetIndices(mapn, &subvidxs));
381:       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, subnv, subvidxs, PETSC_COPY_VALUES, &tcols[i]));
382:       PetscCall(ISLocalToGlobalMappingRestoreIndices(mapn, &subvidxs));
383:       PetscCall(ISLocalToGlobalMappingDestroy(&mapn));
384:     }

386:     /* Create new MATIS with repeated vertices */
387:     PetscCall(MatCreate(comm, &B));
388:     PetscCall(MatSetSizes(B, lGis->rmap->n, lGis->cmap->n, lGis->rmap->N, lGis->cmap->N));
389:     PetscCall(MatSetType(B, MATIS));
390:     PetscCall(MatISSetAllowRepeated(B, PETSC_TRUE));
391:     PetscCall(ISConcatenate(PETSC_COMM_SELF, pcbddc->n_local_subs, tcols, &tmap));
392:     PetscCall(ISLocalToGlobalMappingApplyIS(lGis->cmap->mapping, tmap, &nmap));
393:     PetscCall(ISDestroy(&tmap));
394:     PetscCall(ISGetLocalSize(nmap, &subnv));
395:     PetscCall(ISGetIndices(nmap, &subvidxs));
396:     PetscCall(ISCreateGeneral(comm, subnv, subvidxs, PETSC_USE_POINTER, &tmap));
397:     PetscCall(ISRestoreIndices(nmap, &subvidxs));
398:     PetscCall(ISLocalToGlobalMappingCreateIS(tmap, &mapn));
399:     PetscCall(ISDestroy(&tmap));
400:     PetscCall(ISDestroy(&nmap));
401:     PetscCall(MatSetLocalToGlobalMapping(B, lGis->rmap->mapping, mapn));
402:     PetscCall(ISLocalToGlobalMappingDestroy(&mapn));
403:     PetscCall(MatCreateNest(PETSC_COMM_SELF, pcbddc->n_local_subs, is_rows, pcbddc->n_local_subs, NULL, lGn, &lG));
404:     for (PetscInt i = 0; i < pcbddc->n_local_subs; i++) {
405:       PetscCall(MatDestroy(&lGn[i * (1 + pcbddc->n_local_subs)]));
406:       PetscCall(ISDestroy(&is_rows[i]));
407:       PetscCall(ISDestroy(&tcols[i]));
408:     }
409:     PetscCall(MatConvert(lG, MATSEQAIJ, MAT_INPLACE_MATRIX, &lG));
410:     PetscCall(PetscFree(lGn));
411:     PetscCall(PetscFree(is_rows));
412:     PetscCall(PetscFree(tcols));
413:     PetscCall(MatISSetLocalMat(B, lG));
414:     PetscCall(MatDestroy(&lG));

416:     PetscCall(MatDestroy(&lGis));
417:     lGis = B;

419:     lGis->assembled = PETSC_TRUE;
420:   }
421:   PetscCall(MatViewFromOptions(lGis, (PetscObject)pc, "-pc_bddc_nedelec_init_G_view"));

423:   /* SF for nodal dofs communications */
424:   PetscCall(MatGetLocalSize(G, NULL, &Lv));
425:   PetscCall(MatISGetLocalToGlobalMapping(lGis, NULL, &vl2g));
426:   PetscCall(PetscObjectReference((PetscObject)vl2g));
427:   PetscCall(ISLocalToGlobalMappingGetSize(vl2g, &nv));
428:   PetscCall(PetscSFCreate(comm, &sfv));
429:   PetscCall(ISLocalToGlobalMappingGetIndices(vl2g, &idxs));
430:   PetscCall(PetscSFSetGraphLayout(sfv, lGis->cmap, nv, NULL, PETSC_OWN_POINTER, idxs));
431:   PetscCall(ISLocalToGlobalMappingRestoreIndices(vl2g, &idxs));

433:   if (elements_corners) {
434:     IS      tmp;
435:     Vec     global, local;
436:     Mat_IS *tGis = (Mat_IS *)lGis->data;

438:     PetscCall(MatCreateVecs(lGis, &global, NULL));
439:     PetscCall(MatCreateVecs(tGis->A, &local, NULL));
440:     PetscCall(PCBDDCGlobalToLocal(tGis->cctx, global, local, elements_corners, &tmp));
441:     PetscCall(VecDestroy(&global));
442:     PetscCall(VecDestroy(&local));
443:     elements_corners = tmp;
444:   }

446:   /* Destroy temporary G */
447:   PetscCall(MatISGetLocalMat(lGis, &lG));
448:   PetscCall(PetscObjectReference((PetscObject)lG));
449:   PetscCall(MatDestroy(&G));
450:   PetscCall(MatDestroy(&lGis));

452:   if (print) {
453:     PetscCall(PetscObjectSetName((PetscObject)lG, "initial_lG"));
454:     PetscCall(MatView(lG, NULL));
455:   }

457:   /* Save lG for values insertion in change of basis */
458:   PetscCall(MatDuplicate(lG, MAT_COPY_VALUES, &lGinit));

460:   /* Analyze the edge-nodes connections (duplicate lG) */
461:   PetscCall(MatDuplicate(lG, MAT_COPY_VALUES, &lGe));
462:   PetscCall(MatSetOption(lGe, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE));
463:   PetscCall(PetscBTCreate(nv, &btv));
464:   PetscCall(PetscBTCreate(ne, &bte));
465:   PetscCall(PetscBTCreate(ne, &btb));
466:   PetscCall(PetscBTCreate(ne, &btbd));
467:   /* need to import the boundary specification to ensure the
468:      proper detection of coarse edges' endpoints */
469:   if (pcbddc->DirichletBoundariesLocal) {
470:     IS is;

472:     if (fl2g) {
473:       PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_MASK, pcbddc->DirichletBoundariesLocal, &is));
474:     } else {
475:       is = pcbddc->DirichletBoundariesLocal;
476:     }
477:     PetscCall(ISGetLocalSize(is, &cum));
478:     PetscCall(ISGetIndices(is, &idxs));
479:     for (i = 0; i < cum; i++) {
480:       if (idxs[i] >= 0 && idxs[i] < ne) {
481:         PetscCall(PetscBTSet(btb, idxs[i]));
482:         PetscCall(PetscBTSet(btbd, idxs[i]));
483:       }
484:     }
485:     PetscCall(ISRestoreIndices(is, &idxs));
486:     if (fl2g) PetscCall(ISDestroy(&is));
487:   }
488:   if (pcbddc->NeumannBoundariesLocal) {
489:     IS is;

491:     if (fl2g) {
492:       PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_MASK, pcbddc->NeumannBoundariesLocal, &is));
493:     } else {
494:       is = pcbddc->NeumannBoundariesLocal;
495:     }
496:     PetscCall(ISGetLocalSize(is, &cum));
497:     PetscCall(ISGetIndices(is, &idxs));
498:     for (i = 0; i < cum; i++) {
499:       if (idxs[i] >= 0 && idxs[i] < ne) PetscCall(PetscBTSet(btb, idxs[i]));
500:     }
501:     PetscCall(ISRestoreIndices(is, &idxs));
502:     if (fl2g) PetscCall(ISDestroy(&is));
503:   }

505:   /* Count neighs per dof */
506:   PetscCall(ISLocalToGlobalMappingGetNodeInfo(el2g, NULL, &ecount, NULL));
507:   PetscCall(ISLocalToGlobalMappingGetNodeInfo(vl2g, NULL, &vcount, NULL));

509:   /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs
510:      for proper detection of coarse edges' endpoints */
511:   PetscCall(PetscBTCreate(ne, &btee));
512:   for (i = 0; i < ne; i++) {
513:     if ((ecount[i] > 2 && !PetscBTLookup(btbd, i)) || (ecount[i] == 2 && PetscBTLookup(btb, i))) PetscCall(PetscBTSet(btee, i));
514:   }
515:   PetscCall(PetscMalloc1(ne, &marks));
516:   if (!conforming) {
517:     PetscCall(MatTranspose(lGe, MAT_INITIAL_MATRIX, &lGt));
518:     PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
519:   }
520:   PetscCall(MatGetRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
521:   PetscCall(MatSeqAIJGetArray(lGe, &vals));
522:   cum = 0;
523:   for (i = 0; i < ne; i++) {
524:     /* eliminate rows corresponding to edge dofs belonging to coarse faces */
525:     if (!PetscBTLookup(btee, i)) {
526:       marks[cum++] = i;
527:       continue;
528:     }
529:     /* set badly connected edge dofs as primal */
530:     if (!conforming) {
531:       if (ii[i + 1] - ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */
532:         marks[cum++] = i;
533:         PetscCall(PetscBTSet(bte, i));
534:         for (j = ii[i]; j < ii[i + 1]; j++) PetscCall(PetscBTSet(btv, jj[j]));
535:       } else {
536:         /* every edge dofs should be connected through a certain number of nodal dofs
537:            to other edge dofs belonging to coarse edges
538:            - at most 2 endpoints
539:            - order-1 interior nodal dofs
540:            - no undefined nodal dofs (nconn < order)
541:         */
542:         PetscInt ends = 0, ints = 0, undef = 0;
543:         for (j = ii[i]; j < ii[i + 1]; j++) {
544:           PetscInt v     = jj[j], k;
545:           PetscInt nconn = iit[v + 1] - iit[v];
546:           for (k = iit[v]; k < iit[v + 1]; k++)
547:             if (!PetscBTLookup(btee, jjt[k])) nconn--;
548:           if (nconn > order) ends++;
549:           else if (nconn == order) ints++;
550:           else undef++;
551:         }
552:         if (undef || ends > 2 || ints != order - 1) {
553:           marks[cum++] = i;
554:           PetscCall(PetscBTSet(bte, i));
555:           for (j = ii[i]; j < ii[i + 1]; j++) PetscCall(PetscBTSet(btv, jj[j]));
556:         }
557:       }
558:     }
559:     /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */
560:     if (!order && ii[i + 1] != ii[i]) {
561:       PetscScalar val = 1. / (ii[i + 1] - ii[i] - 1);
562:       for (j = ii[i]; j < ii[i + 1]; j++) vals[j] = val;
563:     }
564:   }
565:   PetscCall(PetscBTDestroy(&btee));
566:   PetscCall(MatSeqAIJRestoreArray(lGe, &vals));
567:   PetscCall(MatRestoreRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
568:   if (!conforming) {
569:     PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
570:     PetscCall(MatDestroy(&lGt));
571:   }
572:   PetscCall(MatZeroRows(lGe, cum, marks, 0., NULL, NULL));

574:   /* identify splitpoints and corner candidates */
575:   PetscCall(PetscMalloc2(nv, &sfvleaves, Lv, &sfvroots));
576:   PetscCall(PetscBTCreate(nv, &btvcand));
577:   if (elements_corners) {
578:     PetscCall(ISGetLocalSize(elements_corners, &cum));
579:     PetscCall(ISGetIndices(elements_corners, &idxs));
580:     for (i = 0; i < cum; i++) PetscCall(PetscBTSet(btvcand, idxs[i]));
581:     PetscCall(ISRestoreIndices(elements_corners, &idxs));
582:   }

584:   if (matis->allow_repeated) { /* assign a uniq global id to edge local subsets and communicate it with nodal space */
585:     PetscSF   emlsf, vmlsf;
586:     PetscInt *eleaves, *vleaves, *meleaves, *mvleaves;
587:     PetscInt  cum_subs = 0, n_subs = pcbddc->n_local_subs, bs, emnr, emnl, vmnr, vmnl;

589:     PetscCall(ISLocalToGlobalMappingGetBlockSize(el2g, &bs));
590:     PetscCheck(bs == 1, comm, PETSC_ERR_SUP, "Not coded");
591:     PetscCall(ISLocalToGlobalMappingGetBlockSize(vl2g, &bs));
592:     PetscCheck(bs == 1, comm, PETSC_ERR_SUP, "Not coded");

594:     PetscCall(ISLocalToGlobalMappingGetBlockMultiLeavesSF(el2g, &emlsf));
595:     PetscCall(ISLocalToGlobalMappingGetBlockMultiLeavesSF(vl2g, &vmlsf));

597:     PetscCall(PetscSFGetGraph(emlsf, &emnr, &emnl, NULL, NULL));
598:     for (i = 0, j = 0; i < ne; i++) j += ecount[i];
599:     PetscCheck(emnr == ne, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of roots in edge multi-leaves SF %" PetscInt_FMT " != %" PetscInt_FMT, emnr, ne);
600:     PetscCheck(emnl == j, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of leaves in edge multi-leaves SF %" PetscInt_FMT " != %" PetscInt_FMT, emnl, j);

602:     PetscCall(PetscSFGetGraph(vmlsf, &vmnr, &vmnl, NULL, NULL));
603:     for (i = 0, j = 0; i < nv; i++) j += vcount[i];
604:     PetscCheck(vmnr == nv, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of roots in nodal multi-leaves SF %" PetscInt_FMT " != %" PetscInt_FMT, vmnr, nv);
605:     PetscCheck(vmnl == j, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of leaves in nodal multi-leaves SF %" PetscInt_FMT " != %" PetscInt_FMT, vmnl, j);

607:     PetscCall(PetscMalloc1(ne, &eleaves));
608:     PetscCall(PetscMalloc1(nv, &vleaves));
609:     for (i = 0; i < ne; i++) eleaves[i] = PETSC_INT_MAX;
610:     for (i = 0; i < nv; i++) vleaves[i] = PETSC_INT_MAX;
611:     PetscCall(PetscMalloc1(emnl, &meleaves));
612:     PetscCall(PetscMalloc1(vmnl, &mvleaves));

614:     PetscCallMPI(MPI_Exscan(&n_subs, &cum_subs, 1, MPIU_INT, MPI_SUM, comm));
615:     PetscCall(MatGetRowIJ(lGinit, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
616:     for (i = 0; i < n_subs; i++) {
617:       const PetscInt *idxs;
618:       const PetscInt  subid = cum_subs + i;
619:       PetscInt        ns;

621:       PetscCall(ISGetLocalSize(pcbddc->local_subs[i], &ns));
622:       PetscCall(ISGetIndices(pcbddc->local_subs[i], &idxs));
623:       for (j = 0; j < ns; j++) {
624:         const PetscInt e = idxs[j];

626:         eleaves[e] = subid;
627:         for (PetscInt k = ii[e]; k < ii[e + 1]; k++) vleaves[jj[k]] = subid;
628:       }
629:       PetscCall(ISRestoreIndices(pcbddc->local_subs[i], &idxs));
630:     }
631:     PetscCall(MatRestoreRowIJ(lGinit, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
632:     PetscCall(PetscSFBcastBegin(emlsf, MPIU_INT, eleaves, meleaves, MPI_REPLACE));
633:     PetscCall(PetscSFBcastEnd(emlsf, MPIU_INT, eleaves, meleaves, MPI_REPLACE));
634:     PetscCall(PetscSFBcastBegin(vmlsf, MPIU_INT, vleaves, mvleaves, MPI_REPLACE));
635:     PetscCall(PetscSFBcastEnd(vmlsf, MPIU_INT, vleaves, mvleaves, MPI_REPLACE));
636:     PetscCall(PetscFree(eleaves));
637:     PetscCall(PetscFree(vleaves));

639:     PetscCall(PetscMalloc1(ne + 1, &eneighs));
640:     eneighs[0] = meleaves;
641:     for (i = 0; i < ne; i++) {
642:       PetscCall(PetscSortInt(ecount[i], eneighs[i]));
643:       eneighs[i + 1] = eneighs[i] + ecount[i];
644:     }
645:     PetscCall(PetscMalloc1(nv + 1, &vneighs));
646:     vneighs[0] = mvleaves;
647:     for (i = 0; i < nv; i++) {
648:       PetscCall(PetscSortInt(vcount[i], vneighs[i]));
649:       vneighs[i + 1] = vneighs[i] + vcount[i];
650:     }
651:   } else {
652:     PetscCall(ISLocalToGlobalMappingGetNodeInfo(el2g, NULL, NULL, &eneighs));
653:     PetscCall(ISLocalToGlobalMappingGetNodeInfo(vl2g, NULL, NULL, &vneighs));
654:   }

656:   PetscCall(MatTranspose(lGe, MAT_INITIAL_MATRIX, &lGt));
657:   if (print) {
658:     PetscCall(PetscObjectSetName((PetscObject)lGe, "edgerestr_lG"));
659:     PetscCall(MatView(lGe, NULL));
660:     PetscCall(PetscObjectSetName((PetscObject)lGt, "edgerestr_lGt"));
661:     PetscCall(MatView(lGt, NULL));
662:   }
663:   PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
664:   PetscCall(MatSeqAIJGetArray(lGt, &vals));
665:   for (i = 0; i < nv; i++) {
666:     PetscInt  ord = order, test = ii[i + 1] - ii[i], vc = vcount[i];
667:     PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE;
668:     if (!order) { /* variable order */
669:       PetscReal vorder = 0.;

671:       for (j = ii[i]; j < ii[i + 1]; j++) vorder += PetscRealPart(vals[j]);
672:       test = PetscFloorReal(vorder + 10. * PETSC_SQRT_MACHINE_EPSILON);
673:       PetscCheck(vorder - test <= PETSC_SQRT_MACHINE_EPSILON, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected value for vorder: %g (%" PetscInt_FMT ")", (double)vorder, test);
674:       ord = 1;
675:     }
676:     for (j = ii[i]; j < ii[i + 1] && sneighs; j++) {
677:       const PetscInt e = jj[j];

679:       if (PetscBTLookup(btbd, e)) {
680:         bdir = PETSC_TRUE;
681:         break;
682:       }
683:       if (vc != ecount[e]) {
684:         sneighs = PETSC_FALSE;
685:       } else {
686:         const PetscInt *vn = vneighs[i], *en = eneighs[e];

688:         for (PetscInt k = 0; k < vc; k++) {
689:           if (vn[k] != en[k]) {
690:             sneighs = PETSC_FALSE;
691:             break;
692:           }
693:         }
694:       }
695:     }
696:     if (elements_corners) test = 0;
697:     if (!sneighs || test >= 3 * ord || bdir) { /* splitpoints */
698:       if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "SPLITPOINT %" PetscInt_FMT " (%s %s %s)\n", i, PetscBools[!sneighs], PetscBools[test >= 3 * ord], PetscBools[bdir]));
699:       PetscCall(PetscBTSet(btv, i));
700:     } else if (test == ord) {
701:       if (order == 1 || (!order && ii[i + 1] - ii[i] == 1)) {
702:         if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "ENDPOINT %" PetscInt_FMT "\n", i));
703:         PetscCall(PetscBTSet(btv, i));
704:       } else if (!elements_corners) {
705:         if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "CORNER CANDIDATE %" PetscInt_FMT "\n", i));
706:         PetscCall(PetscBTSet(btvcand, i));
707:       }
708:     }
709:   }
710:   PetscCall(PetscBTDestroy(&btbd));

712:   /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */
713:   if (order != 1) {
714:     if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "INSPECTING CANDIDATES\n"));
715:     PetscCall(MatGetRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
716:     for (i = 0; i < nv; i++) {
717:       if (PetscBTLookup(btvcand, i)) {
718:         PetscBool found = PETSC_FALSE;
719:         for (j = ii[i]; j < ii[i + 1] && !found; j++) {
720:           PetscInt k, e = jj[j];
721:           if (PetscBTLookup(bte, e)) continue;
722:           for (k = iit[e]; k < iit[e + 1]; k++) {
723:             PetscInt v = jjt[k];
724:             if (v != i && PetscBTLookup(btvcand, v)) {
725:               found = PETSC_TRUE;
726:               break;
727:             }
728:           }
729:         }
730:         if (!found) {
731:           if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "  CANDIDATE %" PetscInt_FMT " CLEARED\n", i));
732:           PetscCall(PetscBTClear(btvcand, i));
733:         } else {
734:           if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "  CANDIDATE %" PetscInt_FMT " ACCEPTED\n", i));
735:         }
736:       }
737:     }
738:     PetscCall(MatRestoreRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
739:   }
740:   PetscCall(MatSeqAIJRestoreArray(lGt, &vals));
741:   PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
742:   PetscCall(MatDestroy(&lGe));

744:   /* Get the local G^T explicitly */
745:   PetscCall(MatDestroy(&lGt));
746:   PetscCall(MatTranspose(lG, MAT_INITIAL_MATRIX, &lGt));
747:   PetscCall(MatSetOption(lGt, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE));

749:   /* Mark shared nodal dofs */
750:   PetscCall(PetscBTCreate(nv, &btvi));
751:   for (i = 0; i < nv; i++) {
752:     if (vcount[i] > 1) PetscCall(PetscBTSet(btvi, i));
753:   }

755:   if (matis->allow_repeated) {
756:     PetscCall(PetscFree(eneighs[0]));
757:     PetscCall(PetscFree(vneighs[0]));
758:     PetscCall(PetscFree(eneighs));
759:     PetscCall(PetscFree(vneighs));
760:   }
761:   PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(el2g, NULL, &ecount, &eneighs));
762:   PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(vl2g, NULL, &vcount, &vneighs));

764:   /* communicate corners and splitpoints */
765:   PetscCall(PetscMalloc1(nv, &vmarks));
766:   PetscCall(PetscArrayzero(sfvleaves, nv));
767:   PetscCall(PetscArrayzero(sfvroots, Lv));
768:   for (i = 0; i < nv; i++)
769:     if (PetscUnlikely(PetscBTLookup(btv, i))) sfvleaves[i] = 1;

771:   if (print) {
772:     IS tbz;

774:     cum = 0;
775:     for (i = 0; i < nv; i++)
776:       if (sfvleaves[i]) vmarks[cum++] = i;

778:     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, vmarks, PETSC_COPY_VALUES, &tbz));
779:     PetscCall(PetscObjectSetName((PetscObject)tbz, "corners_to_be_zeroed_local"));
780:     PetscCall(ISView(tbz, NULL));
781:     PetscCall(ISDestroy(&tbz));
782:   }

784:   PetscCall(PetscSFReduceBegin(sfv, MPIU_INT, sfvleaves, sfvroots, MPI_SUM));
785:   PetscCall(PetscSFReduceEnd(sfv, MPIU_INT, sfvleaves, sfvroots, MPI_SUM));
786:   PetscCall(PetscSFBcastBegin(sfv, MPIU_INT, sfvroots, sfvleaves, MPI_REPLACE));
787:   PetscCall(PetscSFBcastEnd(sfv, MPIU_INT, sfvroots, sfvleaves, MPI_REPLACE));

789:   /* Zero rows of lGt corresponding to identified corners
790:      and interior nodal dofs */
791:   cum = 0;
792:   for (i = 0; i < nv; i++) {
793:     if (sfvleaves[i]) {
794:       vmarks[cum++] = i;
795:       PetscCall(PetscBTSet(btv, i));
796:     } else if (!PetscBTLookup(btvi, i)) vmarks[cum++] = i;
797:   }
798:   PetscCall(PetscBTDestroy(&btvi));
799:   if (print) {
800:     IS tbz;

802:     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, vmarks, PETSC_COPY_VALUES, &tbz));
803:     PetscCall(PetscObjectSetName((PetscObject)tbz, "corners_to_be_zeroed_with_interior"));
804:     PetscCall(ISView(tbz, NULL));
805:     PetscCall(ISDestroy(&tbz));
806:   }
807:   PetscCall(MatZeroRows(lGt, cum, vmarks, 0., NULL, NULL));
808:   PetscCall(PetscFree(vmarks));
809:   PetscCall(PetscSFDestroy(&sfv));
810:   PetscCall(PetscFree2(sfvleaves, sfvroots));

812:   /* Recompute G */
813:   PetscCall(MatDestroy(&lG));
814:   PetscCall(MatTranspose(lGt, MAT_INITIAL_MATRIX, &lG));
815:   if (print) {
816:     PetscCall(PetscObjectSetName((PetscObject)lG, "used_lG"));
817:     PetscCall(MatView(lG, NULL));
818:     PetscCall(PetscObjectSetName((PetscObject)lGt, "used_lGt"));
819:     PetscCall(MatView(lGt, NULL));
820:   }

822:   /* Get primal dofs (if any) */
823:   cum = 0;
824:   for (i = 0; i < ne; i++) {
825:     if (PetscUnlikely(PetscBTLookup(bte, i))) marks[cum++] = i;
826:   }
827:   if (fl2g) PetscCall(ISLocalToGlobalMappingApply(fl2g, cum, marks, marks));
828:   PetscCall(ISCreateGeneral(comm, cum, marks, PETSC_COPY_VALUES, &primals));
829:   if (print) {
830:     PetscCall(PetscObjectSetName((PetscObject)primals, "prescribed_primal_dofs"));
831:     PetscCall(ISView(primals, NULL));
832:   }
833:   PetscCall(PetscBTDestroy(&bte));
834:   /* TODO: what if the user passed in some of them ?  */
835:   PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primals));
836:   PetscCall(ISDestroy(&primals));

838:   /* Compute edge connectivity */
839:   PetscCall(PetscObjectSetOptionsPrefix((PetscObject)lG, "econn_"));

841:   /* Symbolic conn = lG*lGt */
842:   if (!elements_corners) { /* if present, we assume we are in the element-by-element case and the CSR graph is not needed */
843:     PetscCall(MatProductCreate(lG, lGt, NULL, &conn));
844:     PetscCall(MatProductSetType(conn, MATPRODUCT_AB));
845:     PetscCall(MatProductSetAlgorithm(conn, "default"));
846:     PetscCall(MatProductSetFill(conn, PETSC_DEFAULT));
847:     PetscCall(PetscObjectSetOptionsPrefix((PetscObject)conn, "econn_"));
848:     PetscCall(MatProductSetFromOptions(conn));
849:     PetscCall(MatProductSymbolic(conn));
850:     PetscCall(MatGetRowIJ(conn, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
851:     if (fl2g) {
852:       PetscBT   btf;
853:       PetscInt *iia, *jja, *iiu, *jju;
854:       PetscBool rest = PETSC_FALSE, free = PETSC_FALSE;

856:       /* create CSR for all local dofs */
857:       PetscCall(PetscMalloc1(n + 1, &iia));
858:       if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */
859:         PetscCheck(pcbddc->mat_graph->nvtxs_csr == n, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid size of CSR graph %" PetscInt_FMT ". Should be %" PetscInt_FMT, pcbddc->mat_graph->nvtxs_csr, n);
860:         iiu = pcbddc->mat_graph->xadj;
861:         jju = pcbddc->mat_graph->adjncy;
862:       } else if (pcbddc->use_local_adj) {
863:         rest = PETSC_TRUE;
864:         PetscCall(MatGetRowIJ(matis->A, 0, PETSC_TRUE, PETSC_FALSE, &i, (const PetscInt **)&iiu, (const PetscInt **)&jju, &done));
865:       } else {
866:         free = PETSC_TRUE;
867:         PetscCall(PetscMalloc2(n + 1, &iiu, n, &jju));
868:         iiu[0] = 0;
869:         for (i = 0; i < n; i++) {
870:           iiu[i + 1] = i + 1;
871:           jju[i]     = -1;
872:         }
873:       }

875:       /* import sizes of CSR */
876:       iia[0] = 0;
877:       for (i = 0; i < n; i++) iia[i + 1] = iiu[i + 1] - iiu[i];

879:       /* overwrite entries corresponding to the Nedelec field */
880:       PetscCall(PetscBTCreate(n, &btf));
881:       PetscCall(ISGetIndices(nedfieldlocal, &idxs));
882:       for (i = 0; i < ne; i++) {
883:         PetscCall(PetscBTSet(btf, idxs[i]));
884:         iia[idxs[i] + 1] = ii[i + 1] - ii[i];
885:       }

887:       /* iia in CSR */
888:       for (i = 0; i < n; i++) iia[i + 1] += iia[i];

890:       /* jja in CSR */
891:       PetscCall(PetscMalloc1(iia[n], &jja));
892:       for (i = 0; i < n; i++)
893:         if (!PetscBTLookup(btf, i))
894:           for (j = 0; j < iiu[i + 1] - iiu[i]; j++) jja[iia[i] + j] = jju[iiu[i] + j];

896:       /* map edge dofs connectivity */
897:       if (jj) {
898:         PetscCall(ISLocalToGlobalMappingApply(fl2g, ii[ne], jj, (PetscInt *)jj));
899:         for (i = 0; i < ne; i++) {
900:           PetscInt e = idxs[i];
901:           for (j = 0; j < ii[i + 1] - ii[i]; j++) jja[iia[e] + j] = jj[ii[i] + j];
902:         }
903:       }
904:       PetscCall(ISRestoreIndices(nedfieldlocal, &idxs));
905:       PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, n, iia, jja, PETSC_COPY_VALUES));
906:       if (rest) PetscCall(MatRestoreRowIJ(matis->A, 0, PETSC_TRUE, PETSC_FALSE, &i, (const PetscInt **)&iiu, (const PetscInt **)&jju, &done));
907:       if (free) PetscCall(PetscFree2(iiu, jju));
908:       PetscCall(PetscBTDestroy(&btf));
909:     } else {
910:       PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, n, ii, jj, PETSC_COPY_VALUES));
911:     }
912:     PetscCall(MatRestoreRowIJ(conn, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
913:     PetscCall(MatDestroy(&conn));
914:   }

916:   /* Analyze interface for edge dofs */
917:   PetscCall(PCBDDCAnalyzeInterface(pc));
918:   pcbddc->mat_graph->twodim = PETSC_FALSE;

920:   /* Get coarse edges in the edge space */
921:   PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals));

923:   if (fl2g) {
924:     PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, allprimals, &primals));
925:     PetscCall(PetscMalloc1(nee, &eedges));
926:     for (i = 0; i < nee; i++) PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, alleedges[i], &eedges[i]));
927:   } else {
928:     eedges  = alleedges;
929:     primals = allprimals;
930:   }

932:   /* Mark fine edge dofs with their coarse edge id */
933:   PetscCall(PetscArrayzero(marks, ne));
934:   PetscCall(ISGetLocalSize(primals, &cum));
935:   PetscCall(ISGetIndices(primals, &idxs));
936:   for (i = 0; i < cum; i++) marks[idxs[i]] = nee + 1;
937:   PetscCall(ISRestoreIndices(primals, &idxs));
938:   if (print) {
939:     PetscCall(PetscObjectSetName((PetscObject)primals, "obtained_primal_dofs"));
940:     PetscCall(ISView(primals, NULL));
941:   }

943:   maxsize = 0;
944:   for (i = 0; i < nee; i++) {
945:     PetscInt size, mark = i + 1;

947:     PetscCall(ISGetLocalSize(eedges[i], &size));
948:     PetscCall(ISGetIndices(eedges[i], &idxs));
949:     for (j = 0; j < size; j++) marks[idxs[j]] = mark;
950:     PetscCall(ISRestoreIndices(eedges[i], &idxs));
951:     maxsize = PetscMax(maxsize, size);
952:   }

954:   /* Find coarse edge endpoints */
955:   PetscCall(MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
956:   PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
957:   for (i = 0; i < nee; i++) {
958:     PetscInt mark = i + 1, size;

960:     PetscCall(ISGetLocalSize(eedges[i], &size));
961:     if (!size && nedfieldlocal) continue;
962:     PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i);
963:     PetscCall(ISGetIndices(eedges[i], &idxs));
964:     if (print) {
965:       PetscCall(PetscPrintf(PETSC_COMM_SELF, "ENDPOINTS ANALYSIS EDGE %" PetscInt_FMT "\n", i));
966:       PetscCall(ISView(eedges[i], NULL));
967:     }
968:     for (j = 0; j < size; j++) {
969:       PetscInt k, ee = idxs[j];
970:       if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "  idx %" PetscInt_FMT "\n", ee));
971:       for (k = ii[ee]; k < ii[ee + 1]; k++) {
972:         if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "    inspect %" PetscInt_FMT "\n", jj[k]));
973:         if (PetscBTLookup(btv, jj[k])) {
974:           if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "      corner found (already set) %" PetscInt_FMT "\n", jj[k]));
975:         } else if (PetscBTLookup(btvcand, jj[k])) { /* is it ok? */
976:           PetscInt  k2;
977:           PetscBool corner = PETSC_FALSE;
978:           for (k2 = iit[jj[k]]; k2 < iit[jj[k] + 1]; k2++) {
979:             if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "        INSPECTING %" PetscInt_FMT ": mark %" PetscInt_FMT " (ref mark %" PetscInt_FMT "), boundary %d\n", jjt[k2], marks[jjt[k2]], mark, (int)!!PetscBTLookup(btb, jjt[k2])));
980:             /* it's a corner if either is connected with an edge dof belonging to a different cc or
981:                if the edge dof lie on the natural part of the boundary */
982:             if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb, jjt[k2]))) {
983:               corner = PETSC_TRUE;
984:               break;
985:             }
986:           }
987:           if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */
988:             if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "        corner found %" PetscInt_FMT "\n", jj[k]));
989:             PetscCall(PetscBTSet(btv, jj[k]));
990:           } else {
991:             if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "        no corners found\n"));
992:           }
993:         }
994:       }
995:     }
996:     PetscCall(ISRestoreIndices(eedges[i], &idxs));
997:   }
998:   PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
999:   PetscCall(MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1000:   PetscCall(PetscBTDestroy(&btb));

1002:   /* Reset marked primal dofs */
1003:   PetscCall(ISGetLocalSize(primals, &cum));
1004:   PetscCall(ISGetIndices(primals, &idxs));
1005:   for (i = 0; i < cum; i++) marks[idxs[i]] = 0;
1006:   PetscCall(ISRestoreIndices(primals, &idxs));

1008:   /* Now use the initial lG */
1009:   PetscCall(MatDestroy(&lG));
1010:   PetscCall(MatDestroy(&lGt));
1011:   lG = lGinit;
1012:   PetscCall(MatTranspose(lG, MAT_INITIAL_MATRIX, &lGt));

1014:   /* Compute extended cols indices */
1015:   PetscCall(PetscBTCreate(nv, &btvc));
1016:   PetscCall(PetscBTCreate(nee, &bter));
1017:   PetscCall(MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1018:   PetscCall(MatSeqAIJGetMaxRowNonzeros(lG, &i));
1019:   i *= maxsize;
1020:   PetscCall(PetscCalloc1(nee, &extcols));
1021:   PetscCall(PetscMalloc2(i, &extrow, i, &gidxs));
1022:   eerr = PETSC_FALSE;
1023:   for (i = 0; i < nee; i++) {
1024:     PetscInt size, found = 0;

1026:     cum = 0;
1027:     PetscCall(ISGetLocalSize(eedges[i], &size));
1028:     if (!size && nedfieldlocal) continue;
1029:     PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i);
1030:     PetscCall(ISGetIndices(eedges[i], &idxs));
1031:     PetscCall(PetscBTMemzero(nv, btvc));
1032:     for (j = 0; j < size; j++) {
1033:       PetscInt k, ee = idxs[j];
1034:       for (k = ii[ee]; k < ii[ee + 1]; k++) {
1035:         PetscInt vv = jj[k];
1036:         if (!PetscBTLookup(btv, vv)) extrow[cum++] = vv;
1037:         else if (!PetscBTLookupSet(btvc, vv)) found++;
1038:       }
1039:     }
1040:     PetscCall(ISRestoreIndices(eedges[i], &idxs));
1041:     PetscCall(PetscSortRemoveDupsInt(&cum, extrow));
1042:     PetscCall(ISLocalToGlobalMappingApply(vl2g, cum, extrow, gidxs));
1043:     PetscCall(PetscSortIntWithArray(cum, gidxs, extrow));
1044:     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, extrow, PETSC_COPY_VALUES, &extcols[i]));
1045:     /* it may happen that endpoints are not defined at this point
1046:        if it is the case, mark this edge for a second pass */
1047:     if (cum != size - 1 || found != 2) {
1048:       PetscCall(PetscBTSet(bter, i));
1049:       if (print) {
1050:         PetscCall(PetscObjectSetName((PetscObject)eedges[i], "error_edge"));
1051:         PetscCall(ISView(eedges[i], NULL));
1052:         PetscCall(PetscObjectSetName((PetscObject)extcols[i], "error_extcol"));
1053:         PetscCall(ISView(extcols[i], NULL));
1054:       }
1055:       eerr = PETSC_TRUE;
1056:     }
1057:   }
1058:   /* PetscCheck(!eerr,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */
1059:   PetscCallMPI(MPIU_Allreduce(&eerr, &done, 1, MPI_C_BOOL, MPI_LOR, comm));
1060:   if (done) {
1061:     PetscInt *newprimals;

1063:     PetscCall(PetscMalloc1(ne, &newprimals));
1064:     PetscCall(ISGetLocalSize(primals, &cum));
1065:     PetscCall(ISGetIndices(primals, &idxs));
1066:     PetscCall(PetscArraycpy(newprimals, idxs, cum));
1067:     PetscCall(ISRestoreIndices(primals, &idxs));
1068:     PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
1069:     if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "DOING SECOND PASS (eerr %s)\n", PetscBools[eerr]));
1070:     for (i = 0; i < nee; i++) {
1071:       PetscBool has_candidates = PETSC_FALSE;
1072:       if (PetscBTLookup(bter, i)) {
1073:         PetscInt size, mark = i + 1;

1075:         PetscCall(ISGetLocalSize(eedges[i], &size));
1076:         PetscCall(ISGetIndices(eedges[i], &idxs));
1077:         /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */
1078:         for (j = 0; j < size; j++) {
1079:           PetscInt k, ee = idxs[j];
1080:           if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Inspecting edge dof %" PetscInt_FMT " [%" PetscInt_FMT " %" PetscInt_FMT ")\n", ee, ii[ee], ii[ee + 1]));
1081:           for (k = ii[ee]; k < ii[ee + 1]; k++) {
1082:             /* set all candidates located on the edge as corners */
1083:             if (PetscBTLookup(btvcand, jj[k])) {
1084:               PetscInt k2, vv = jj[k];
1085:               has_candidates = PETSC_TRUE;
1086:               if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "  Candidate set to vertex %" PetscInt_FMT "\n", vv));
1087:               PetscCall(PetscBTSet(btv, vv));
1088:               /* set all edge dofs connected to candidate as primals */
1089:               for (k2 = iit[vv]; k2 < iit[vv + 1]; k2++) {
1090:                 if (marks[jjt[k2]] == mark) {
1091:                   PetscInt k3, ee2 = jjt[k2];
1092:                   if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "    Connected edge dof set to primal %" PetscInt_FMT "\n", ee2));
1093:                   newprimals[cum++] = ee2;
1094:                   /* finally set the new corners */
1095:                   for (k3 = ii[ee2]; k3 < ii[ee2 + 1]; k3++) {
1096:                     if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "      Connected nodal dof set to vertex %" PetscInt_FMT "\n", jj[k3]));
1097:                     PetscCall(PetscBTSet(btv, jj[k3]));
1098:                   }
1099:                 }
1100:               }
1101:             } else {
1102:               if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "  Not a candidate vertex %" PetscInt_FMT "\n", jj[k]));
1103:             }
1104:           }
1105:         }
1106:         if (!has_candidates) { /* circular edge */
1107:           PetscInt k, ee = idxs[0], *tmarks;

1109:           PetscCall(PetscCalloc1(ne, &tmarks));
1110:           if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "  Circular edge %" PetscInt_FMT "\n", i));
1111:           for (k = ii[ee]; k < ii[ee + 1]; k++) {
1112:             PetscInt k2;
1113:             if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "    Set to corner %" PetscInt_FMT "\n", jj[k]));
1114:             PetscCall(PetscBTSet(btv, jj[k]));
1115:             for (k2 = iit[jj[k]]; k2 < iit[jj[k] + 1]; k2++) tmarks[jjt[k2]]++;
1116:           }
1117:           for (j = 0; j < size; j++) {
1118:             if (tmarks[idxs[j]] > 1) {
1119:               if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "  Edge dof set to primal %" PetscInt_FMT "\n", idxs[j]));
1120:               newprimals[cum++] = idxs[j];
1121:             }
1122:           }
1123:           PetscCall(PetscFree(tmarks));
1124:         }
1125:         PetscCall(ISRestoreIndices(eedges[i], &idxs));
1126:       }
1127:       PetscCall(ISDestroy(&extcols[i]));
1128:     }
1129:     PetscCall(PetscFree(extcols));
1130:     PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
1131:     PetscCall(PetscSortRemoveDupsInt(&cum, newprimals));
1132:     if (fl2g) {
1133:       PetscCall(ISLocalToGlobalMappingApply(fl2g, cum, newprimals, newprimals));
1134:       PetscCall(ISDestroy(&primals));
1135:       for (i = 0; i < nee; i++) PetscCall(ISDestroy(&eedges[i]));
1136:       PetscCall(PetscFree(eedges));
1137:     }
1138:     PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals));
1139:     PetscCall(ISCreateGeneral(comm, cum, newprimals, PETSC_COPY_VALUES, &primals));
1140:     PetscCall(PetscFree(newprimals));
1141:     PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primals));
1142:     PetscCall(ISDestroy(&primals));
1143:     PetscCall(PCBDDCAnalyzeInterface(pc));
1144:     pcbddc->mat_graph->twodim = PETSC_FALSE;
1145:     PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals));
1146:     if (fl2g) {
1147:       PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, allprimals, &primals));
1148:       PetscCall(PetscMalloc1(nee, &eedges));
1149:       for (i = 0; i < nee; i++) PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, alleedges[i], &eedges[i]));
1150:     } else {
1151:       eedges  = alleedges;
1152:       primals = allprimals;
1153:     }
1154:     PetscCall(PetscCalloc1(nee, &extcols));

1156:     /* Mark again */
1157:     PetscCall(PetscArrayzero(marks, ne));
1158:     for (i = 0; i < nee; i++) {
1159:       PetscInt size, mark = i + 1;

1161:       PetscCall(ISGetLocalSize(eedges[i], &size));
1162:       PetscCall(ISGetIndices(eedges[i], &idxs));
1163:       for (j = 0; j < size; j++) marks[idxs[j]] = mark;
1164:       PetscCall(ISRestoreIndices(eedges[i], &idxs));
1165:     }
1166:     if (print) {
1167:       PetscCall(PetscObjectSetName((PetscObject)primals, "obtained_primal_dofs_secondpass"));
1168:       PetscCall(ISView(primals, NULL));
1169:     }

1171:     /* Recompute extended cols */
1172:     eerr = PETSC_FALSE;
1173:     for (i = 0; i < nee; i++) {
1174:       PetscInt size;

1176:       cum = 0;
1177:       PetscCall(ISGetLocalSize(eedges[i], &size));
1178:       if (!size && nedfieldlocal) continue;
1179:       PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i);
1180:       PetscCall(ISGetIndices(eedges[i], &idxs));
1181:       for (j = 0; j < size; j++) {
1182:         PetscInt k, ee = idxs[j];
1183:         for (k = ii[ee]; k < ii[ee + 1]; k++)
1184:           if (!PetscBTLookup(btv, jj[k])) extrow[cum++] = jj[k];
1185:       }
1186:       PetscCall(ISRestoreIndices(eedges[i], &idxs));
1187:       PetscCall(PetscSortRemoveDupsInt(&cum, extrow));
1188:       PetscCall(ISLocalToGlobalMappingApply(vl2g, cum, extrow, gidxs));
1189:       PetscCall(PetscSortIntWithArray(cum, gidxs, extrow));
1190:       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, extrow, PETSC_COPY_VALUES, &extcols[i]));
1191:       if (cum != size - 1) {
1192:         if (print) {
1193:           PetscCall(PetscObjectSetName((PetscObject)eedges[i], "error_edge_secondpass"));
1194:           PetscCall(ISView(eedges[i], NULL));
1195:           PetscCall(PetscObjectSetName((PetscObject)extcols[i], "error_extcol_secondpass"));
1196:           PetscCall(ISView(extcols[i], NULL));
1197:         }
1198:         eerr = PETSC_TRUE;
1199:       }
1200:     }
1201:   }
1202:   PetscCall(MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1203:   PetscCall(PetscFree2(extrow, gidxs));
1204:   PetscCall(PetscBTDestroy(&bter));
1205:   if (print) PetscCall(PCBDDCGraphASCIIView(pcbddc->mat_graph, 5, PETSC_VIEWER_STDOUT_SELF));
1206:   /* an error should not occur at this point */
1207:   PetscCheck(!eerr, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected SIZE OF EDGE > EXTCOL SECOND PASS");

1209:   /* Check the number of endpoints */
1210:   PetscCall(MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1211:   PetscCall(PetscMalloc1(2 * nee, &corners));
1212:   PetscCall(PetscMalloc1(nee, &cedges));
1213:   for (i = 0; i < nee; i++) {
1214:     PetscInt size, found = 0, gc[2];

1216:     /* init with defaults */
1217:     cedges[i] = corners[i * 2] = corners[i * 2 + 1] = -1;
1218:     PetscCall(ISGetLocalSize(eedges[i], &size));
1219:     if (!size && nedfieldlocal) continue;
1220:     PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i);
1221:     PetscCall(ISGetIndices(eedges[i], &idxs));
1222:     PetscCall(PetscBTMemzero(nv, btvc));
1223:     for (j = 0; j < size; j++) {
1224:       PetscInt k, ee = idxs[j];
1225:       for (k = ii[ee]; k < ii[ee + 1]; k++) {
1226:         PetscInt vv = jj[k];
1227:         if (PetscBTLookup(btv, vv) && !PetscBTLookupSet(btvc, vv)) {
1228:           PetscCheck(found != 2, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Found more than two corners for edge %" PetscInt_FMT, i);
1229:           corners[i * 2 + found++] = vv;
1230:         }
1231:       }
1232:     }
1233:     if (found != 2) {
1234:       PetscInt e;
1235:       if (fl2g) {
1236:         PetscCall(ISLocalToGlobalMappingApply(fl2g, 1, idxs, &e));
1237:       } else {
1238:         e = idxs[0];
1239:       }
1240:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Found %" PetscInt_FMT " corners for edge %" PetscInt_FMT " (astart %" PetscInt_FMT ", estart %" PetscInt_FMT ")", found, i, e, idxs[0]);
1241:     }

1243:     /* get primal dof index on this coarse edge */
1244:     PetscCall(ISLocalToGlobalMappingApply(vl2g, 2, corners + 2 * i, gc));
1245:     if (gc[0] > gc[1]) {
1246:       PetscInt swap      = corners[2 * i];
1247:       corners[2 * i]     = corners[2 * i + 1];
1248:       corners[2 * i + 1] = swap;
1249:     }
1250:     cedges[i] = idxs[size - 1];
1251:     PetscCall(ISRestoreIndices(eedges[i], &idxs));
1252:     if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "EDGE %" PetscInt_FMT ": ce %" PetscInt_FMT ", corners (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", i, cedges[i], corners[2 * i], corners[2 * i + 1]));
1253:   }
1254:   PetscCall(MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1255:   PetscCall(PetscBTDestroy(&btvc));

1257:   if (PetscDefined(USE_DEBUG)) {
1258:     /* Inspects columns of lG (rows of lGt) and make sure the change of basis will
1259:      not interfere with neighbouring coarse edges */
1260:     PetscCall(PetscMalloc1(nee + 1, &emarks));
1261:     PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1262:     for (i = 0; i < nv; i++) {
1263:       PetscInt emax = 0, eemax = 0;

1265:       if (ii[i + 1] == ii[i] || PetscBTLookup(btv, i)) continue;
1266:       PetscCall(PetscArrayzero(emarks, nee + 1));
1267:       for (j = ii[i]; j < ii[i + 1]; j++) emarks[marks[jj[j]]]++;
1268:       for (j = 1; j < nee + 1; j++) {
1269:         if (emax < emarks[j]) {
1270:           emax  = emarks[j];
1271:           eemax = j;
1272:         }
1273:       }
1274:       /* not relevant for edges */
1275:       if (!eemax) continue;

1277:       for (j = ii[i]; j < ii[i + 1]; j++) {
1278:         PetscCheck(!marks[jj[j]] || marks[jj[j]] == eemax, PETSC_COMM_SELF, PETSC_ERR_SUP, "Found 2 coarse edges (id %" PetscInt_FMT " and %" PetscInt_FMT ") connected through the %" PetscInt_FMT " nodal dof at edge dof %" PetscInt_FMT, marks[jj[j]] - 1, eemax, i, jj[j]);
1279:       }
1280:     }
1281:     PetscCall(PetscFree(emarks));
1282:     PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1283:   }

1285:   /* Compute extended rows indices for edge blocks of the change of basis */
1286:   PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1287:   PetscCall(MatSeqAIJGetMaxRowNonzeros(lGt, &extmem));
1288:   extmem *= maxsize;
1289:   PetscCall(PetscMalloc1(extmem * nee, &extrow));
1290:   PetscCall(PetscMalloc1(nee, &extrows));
1291:   PetscCall(PetscCalloc1(nee, &extrowcum));
1292:   for (i = 0; i < nv; i++) {
1293:     PetscInt mark = 0, size, start;

1295:     if (ii[i + 1] == ii[i] || PetscBTLookup(btv, i)) continue;
1296:     for (j = ii[i]; j < ii[i + 1]; j++)
1297:       if (marks[jj[j]] && !mark) mark = marks[jj[j]];

1299:     /* not relevant */
1300:     if (!mark) continue;

1302:     /* import extended row */
1303:     mark--;
1304:     start = mark * extmem + extrowcum[mark];
1305:     size  = ii[i + 1] - ii[i];
1306:     PetscCheck(extrowcum[mark] + size <= extmem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Not enough memory allocated %" PetscInt_FMT " > %" PetscInt_FMT, extrowcum[mark] + size, extmem);
1307:     PetscCall(PetscArraycpy(extrow + start, jj + ii[i], size));
1308:     extrowcum[mark] += size;
1309:   }
1310:   PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1311:   PetscCall(MatDestroy(&lGt));
1312:   PetscCall(PetscFree(marks));

1314:   /* Compress extrows */
1315:   cum = 0;
1316:   for (i = 0; i < nee; i++) {
1317:     PetscInt size = extrowcum[i], *start = extrow + i * extmem;
1318:     PetscCall(PetscSortRemoveDupsInt(&size, start));
1319:     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, size, start, PETSC_USE_POINTER, &extrows[i]));
1320:     cum = PetscMax(cum, size);
1321:   }
1322:   PetscCall(PetscFree(extrowcum));
1323:   PetscCall(PetscBTDestroy(&btv));
1324:   PetscCall(PetscBTDestroy(&btvcand));

1326:   /* Workspace for lapack inner calls and VecSetValues */
1327:   PetscCall(PetscMalloc2((5 + cum + maxsize) * maxsize, &work, maxsize, &rwork));

1329:   /* Create change of basis matrix (no preallocation) */
1330:   PetscCall(MatCreate(comm, &T));
1331:   PetscCall(MatSetLayouts(T, pc->mat->rmap, pc->mat->cmap));
1332:   PetscCall(MatSetType(T, MATAIJ));
1333:   PetscCall(MatSetLocalToGlobalMapping(T, al2g, al2g));
1334:   PetscCall(MatSetOption(T, MAT_ROW_ORIENTED, PETSC_FALSE));
1335:   PetscCall(MatSetOption(T, MAT_IGNORE_ZERO_ENTRIES, PETSC_TRUE));
1336:   //PetscCall(MatSeqAIJSetPreallocation(T, maxsize, NULL));
1337:   //PetscCall(MatMPIAIJSetPreallocation(T, maxsize, NULL, maxsize, NULL));
1338:   //PetscCall(MatSetOption(T, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));

1340:   /* Defaults to identity */
1341:   {
1342:     Vec                w;
1343:     const PetscScalar *wa;

1345:     PetscCall(MatCreateVecs(T, &w, NULL));
1346:     PetscCall(VecSetLocalToGlobalMapping(w, al2g));
1347:     PetscCall(VecSet(w, 1.0));
1348:     for (i = 0; i < nee; i++) {
1349:       const PetscInt *idxs;
1350:       PetscInt        nl;

1352:       PetscCall(ISGetLocalSize(eedges[i], &nl));
1353:       PetscCall(ISGetIndices(eedges[i], &idxs));
1354:       PetscCall(VecSetValuesLocal(w, nl, idxs, NULL, INSERT_VALUES));
1355:       PetscCall(ISRestoreIndices(eedges[i], &idxs));
1356:     }
1357:     PetscCall(VecAssemblyBegin(w));
1358:     PetscCall(VecAssemblyEnd(w));
1359:     PetscCall(VecGetArrayRead(w, &wa));
1360:     for (i = T->rmap->rstart; i < T->rmap->rend; i++)
1361:       if (PetscAbsScalar(wa[i - T->rmap->rstart])) PetscCall(MatSetValue(T, i, i, 1.0, INSERT_VALUES));
1362:     PetscCall(VecRestoreArrayRead(w, &wa));
1363:     PetscCall(VecDestroy(&w));
1364:   }

1366:   /* Create discrete gradient for the coarser level if needed */
1367:   PetscCall(MatDestroy(&pcbddc->nedcG));
1368:   PetscCall(ISDestroy(&pcbddc->nedclocal));
1369:   if (pcbddc->current_level < pcbddc->max_levels) {
1370:     ISLocalToGlobalMapping cel2g, cvl2g;
1371:     IS                     wis, gwis;
1372:     PetscInt               cnv, cne;

1374:     PetscCall(ISCreateGeneral(comm, nee, cedges, PETSC_COPY_VALUES, &wis));
1375:     if (fl2g) {
1376:       PetscCall(ISLocalToGlobalMappingApplyIS(fl2g, wis, &pcbddc->nedclocal));
1377:     } else {
1378:       PetscCall(PetscObjectReference((PetscObject)wis));
1379:       pcbddc->nedclocal = wis;
1380:     }
1381:     PetscCall(ISLocalToGlobalMappingApplyIS(el2g, wis, &gwis));
1382:     PetscCall(ISDestroy(&wis));
1383:     PetscCall(ISRenumber(gwis, NULL, &cne, &wis));
1384:     PetscCall(ISLocalToGlobalMappingCreateIS(wis, &cel2g));
1385:     PetscCall(ISDestroy(&wis));
1386:     PetscCall(ISDestroy(&gwis));

1388:     PetscCall(ISCreateGeneral(comm, 2 * nee, corners, PETSC_USE_POINTER, &wis));
1389:     PetscCall(ISLocalToGlobalMappingApplyIS(vl2g, wis, &gwis));
1390:     PetscCall(ISDestroy(&wis));
1391:     PetscCall(ISRenumber(gwis, NULL, &cnv, &wis));
1392:     PetscCall(ISLocalToGlobalMappingCreateIS(wis, &cvl2g));
1393:     PetscCall(ISDestroy(&wis));
1394:     PetscCall(ISDestroy(&gwis));

1396:     PetscCall(MatCreate(comm, &pcbddc->nedcG));
1397:     PetscCall(MatSetSizes(pcbddc->nedcG, PETSC_DECIDE, PETSC_DECIDE, cne, cnv));
1398:     PetscCall(MatSetType(pcbddc->nedcG, MATAIJ));
1399:     PetscCall(MatSeqAIJSetPreallocation(pcbddc->nedcG, 2, NULL));
1400:     PetscCall(MatMPIAIJSetPreallocation(pcbddc->nedcG, 2, NULL, 2, NULL));
1401:     PetscCall(MatSetLocalToGlobalMapping(pcbddc->nedcG, cel2g, cvl2g));
1402:     PetscCall(ISLocalToGlobalMappingDestroy(&cel2g));
1403:     PetscCall(ISLocalToGlobalMappingDestroy(&cvl2g));
1404:   }

1406:   MatNullSpace nnsp;
1407:   PetscBool    nnsp_has_const = PETSC_FALSE;
1408:   const Vec   *nnsp_vecs      = NULL;
1409:   PetscInt     nnsp_nvecs     = 0;
1410:   VecScatter   nnsp_vscat     = NULL;
1411:   PetscCall(MatGetNullSpace(pcbddc->discretegradient, &nnsp));
1412:   if (nnsp) PetscCall(MatNullSpaceGetVecs(nnsp, &nnsp_has_const, &nnsp_nvecs, &nnsp_vecs));
1413:   if (nnsp_has_const || nnsp_nvecs) { /* create scatter to import edge constraints */
1414:     IS                 allextcols, gallextcols, galleedges, is_E_to_zero;
1415:     Vec                E, V;
1416:     PetscInt          *eedgesidxs;
1417:     const PetscScalar *evals;

1419:     PetscCall(MatCreateVecs(pc->pmat, &E, NULL));
1420:     PetscCall(MatCreateVecs(pcbddc->discretegradient, &V, NULL));
1421:     PetscCall(ISConcatenate(PETSC_COMM_SELF, nee, extcols, &allextcols));
1422:     cum = 0;
1423:     for (i = 0; i < nee; i++) {
1424:       PetscInt j;

1426:       PetscCall(ISGetLocalSize(eedges[i], &j));
1427:       PetscCheck(j, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Zero sized egde %" PetscInt_FMT, i);
1428:       cum += j - 1;
1429:     }
1430:     PetscCall(PetscMalloc1(PetscMax(cum, pc->pmat->rmap->n), &eedgesidxs));
1431:     cum = 0;
1432:     for (i = 0; i < nee; i++) {
1433:       const PetscInt *idxs;
1434:       PetscInt        j;

1436:       PetscCall(ISGetLocalSize(eedges[i], &j));
1437:       PetscCall(ISGetIndices(eedges[i], &idxs));
1438:       PetscCall(PetscArraycpy(eedgesidxs + cum, idxs, j - 1)); /* last on the edge is primal */
1439:       PetscCall(ISRestoreIndices(eedges[i], &idxs));
1440:       cum += j - 1;
1441:     }
1442:     PetscCall(ISLocalToGlobalMappingApply(al2g, cum, eedgesidxs, eedgesidxs));
1443:     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, eedgesidxs, PETSC_USE_POINTER, &galleedges));
1444:     PetscCall(ISLocalToGlobalMappingApplyIS(vl2g, allextcols, &gallextcols));
1445:     PetscCall(VecScatterCreate(V, gallextcols, E, galleedges, &nnsp_vscat));
1446:     PetscCall(ISDestroy(&allextcols));
1447:     PetscCall(ISDestroy(&gallextcols));
1448:     PetscCall(ISDestroy(&galleedges));

1450:     /* identify dofs we must zero if importing user-defined near nullspace from pmat */
1451:     PetscCall(VecSet(E, 1.0));
1452:     PetscCall(VecSetValues(E, cum, eedgesidxs, NULL, INSERT_VALUES));
1453:     PetscCall(VecAssemblyBegin(E));
1454:     PetscCall(VecAssemblyEnd(E));
1455:     PetscCall(VecGetArrayRead(E, &evals));
1456:     for (i = 0, cum = 0; i < pc->pmat->rmap->n; i++)
1457:       if (evals[i] == 0.0) eedgesidxs[cum++] = i + pc->pmat->rmap->rstart;
1458:     PetscCall(VecRestoreArrayRead(E, &evals));
1459:     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, eedgesidxs, PETSC_COPY_VALUES, &is_E_to_zero));
1460:     PetscCall(PetscFree(eedgesidxs));

1462:     PetscCall(PetscObjectCompose((PetscObject)nnsp_vscat, "__V_Vec", (PetscObject)V));
1463:     PetscCall(PetscObjectCompose((PetscObject)nnsp_vscat, "__E_Vec", (PetscObject)E));
1464:     PetscCall(PetscObjectCompose((PetscObject)nnsp_vscat, "__E_zero", (PetscObject)is_E_to_zero));
1465:     PetscCall(ISDestroy(&is_E_to_zero));
1466:     PetscCall(VecDestroy(&V));
1467:     PetscCall(VecDestroy(&E));
1468:   }
1469: #if defined(PRINT_GDET)
1470:   inc = 0;
1471:   lev = pcbddc->current_level;
1472: #endif

1474:   /* Insert values in the change of basis matrix */
1475:   for (i = 0; i < nee; i++) {
1476:     Mat         Gins = NULL, GKins = NULL;
1477:     IS          cornersis = NULL;
1478:     PetscScalar cvals[2];

1480:     if (pcbddc->nedcG) PetscCall(ISCreateGeneral(PETSC_COMM_SELF, 2, corners + 2 * i, PETSC_USE_POINTER, &cornersis));
1481:     PetscCall(PCBDDCComputeNedelecChangeEdge(lG, eedges[i], extrows[i], extcols[i], cornersis, &Gins, &GKins, cvals, work, rwork));
1482:     if (Gins && GKins) {
1483:       const PetscScalar *data;
1484:       const PetscInt    *rows, *cols;
1485:       PetscInt           nrh, nch, nrc, ncc;

1487:       PetscCall(ISGetIndices(eedges[i], &cols));
1488:       /* H1 */
1489:       PetscCall(ISGetIndices(extrows[i], &rows));
1490:       PetscCall(MatGetSize(Gins, &nrh, &nch));
1491:       PetscCall(MatDenseGetArrayRead(Gins, &data));
1492:       PetscCall(MatSetValuesLocal(T, nrh, rows, nch, cols, data, INSERT_VALUES));
1493:       PetscCall(MatDenseRestoreArrayRead(Gins, &data));
1494:       PetscCall(ISRestoreIndices(extrows[i], &rows));
1495:       /* complement */
1496:       PetscCall(MatGetSize(GKins, &nrc, &ncc));
1497:       PetscCheck(ncc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Constant function has not been generated for coarse edge %" PetscInt_FMT, i);
1498:       PetscCheck(ncc + nch == nrc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "The sum of the number of columns of GKins %" PetscInt_FMT " and Gins %" PetscInt_FMT " does not match %" PetscInt_FMT " for coarse edge %" PetscInt_FMT, ncc, nch, nrc, i);
1499:       PetscCheck(ncc == 1 || !pcbddc->nedcG, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot generate the coarse discrete gradient for coarse edge %" PetscInt_FMT " with ncc %" PetscInt_FMT, i, ncc);
1500:       PetscCall(MatDenseGetArrayRead(GKins, &data));
1501:       PetscCall(MatSetValuesLocal(T, nrc, cols, ncc, cols + nch, data, INSERT_VALUES));
1502:       PetscCall(MatDenseRestoreArrayRead(GKins, &data));

1504:       /* coarse discrete gradient */
1505:       if (pcbddc->nedcG) {
1506:         PetscInt cols[2];

1508:         cols[0] = 2 * i;
1509:         cols[1] = 2 * i + 1;
1510:         PetscCall(MatSetValuesLocal(pcbddc->nedcG, 1, &i, 2, cols, cvals, INSERT_VALUES));
1511:       }
1512:       PetscCall(ISRestoreIndices(eedges[i], &cols));
1513:     }
1514:     PetscCall(ISDestroy(&extrows[i]));
1515:     PetscCall(ISDestroy(&extcols[i]));
1516:     PetscCall(ISDestroy(&cornersis));
1517:     PetscCall(MatDestroy(&Gins));
1518:     PetscCall(MatDestroy(&GKins));
1519:   }

1521:   /* import edge constraints */
1522:   if (nnsp_vscat) {
1523:     Vec          V, E, *quadvecs;
1524:     PetscInt     nvecs, nvecs_orth;
1525:     MatNullSpace onnsp           = NULL;
1526:     PetscBool    onnsp_has_const = PETSC_FALSE;
1527:     const Vec   *onnsp_vecs      = NULL;
1528:     PetscInt     onnsp_nvecs     = 0, new_nnsp_nvecs, old_nnsp_nvecs;
1529:     IS           is_E_to_zero;

1531:     /* import nearnullspace from preconditioning matrix if user-defined */
1532:     PetscCall(MatGetNearNullSpace(pc->pmat, &onnsp));
1533:     if (onnsp) {
1534:       PetscBool isinternal;

1536:       PetscCall(PetscStrcmp("_internal_BDDC_nedelec_nnsp", ((PetscObject)onnsp)->name, &isinternal));
1537:       if (!isinternal) PetscCall(MatNullSpaceGetVecs(onnsp, &onnsp_has_const, &onnsp_nvecs, &onnsp_vecs));
1538:     }
1539:     new_nnsp_nvecs = nnsp_nvecs + (nnsp_has_const ? 1 : 0);
1540:     old_nnsp_nvecs = onnsp_nvecs + (onnsp_has_const ? 1 : 0);
1541:     nvecs          = old_nnsp_nvecs + new_nnsp_nvecs;
1542:     PetscCall(PetscMalloc1(nvecs, &quadvecs));

1544:     PetscCall(PetscObjectQuery((PetscObject)nnsp_vscat, "__V_Vec", (PetscObject *)&V));
1545:     PetscCall(PetscObjectQuery((PetscObject)nnsp_vscat, "__E_Vec", (PetscObject *)&E));
1546:     PetscCall(PetscObjectQuery((PetscObject)nnsp_vscat, "__E_zero", (PetscObject *)&is_E_to_zero));
1547:     for (i = 0; i < nvecs; i++) PetscCall(VecDuplicate(E, &quadvecs[i]));
1548:     cum = 0;
1549:     if (nnsp_has_const) {
1550:       PetscCall(VecSet(V, 1.0));
1551:       PetscCall(VecScatterBegin(nnsp_vscat, V, quadvecs[0], INSERT_VALUES, SCATTER_FORWARD));
1552:       PetscCall(VecScatterEnd(nnsp_vscat, V, quadvecs[0], INSERT_VALUES, SCATTER_FORWARD));
1553:       cum = 1;
1554:     }
1555:     for (i = 0; i < nnsp_nvecs; i++) {
1556:       PetscCall(VecScatterBegin(nnsp_vscat, nnsp_vecs[i], quadvecs[i + cum], INSERT_VALUES, SCATTER_FORWARD));
1557:       PetscCall(VecScatterEnd(nnsp_vscat, nnsp_vecs[i], quadvecs[i + cum], INSERT_VALUES, SCATTER_FORWARD));
1558:     }

1560:     /* Now add old nnsp if present */
1561:     cum = 0;
1562:     if (onnsp_has_const) {
1563:       PetscCall(VecSet(quadvecs[new_nnsp_nvecs], 1.0));
1564:       PetscCall(VecISSet(quadvecs[new_nnsp_nvecs], is_E_to_zero, 0));
1565:       cum = 1;
1566:     }
1567:     for (i = 0; i < onnsp_nvecs; i++) {
1568:       PetscCall(VecCopy(onnsp_vecs[i], quadvecs[i + cum + new_nnsp_nvecs]));
1569:       PetscCall(VecISSet(quadvecs[i + cum + new_nnsp_nvecs], is_E_to_zero, 0));
1570:     }
1571:     nvecs_orth = nvecs;
1572:     PetscCall(PCBDDCOrthonormalizeVecs(&nvecs_orth, quadvecs));
1573:     PetscCall(MatNullSpaceCreate(PetscObjectComm((PetscObject)pc), PETSC_FALSE, nvecs_orth, quadvecs, &nnsp));
1574:     for (i = 0; i < nvecs; i++) PetscCall(VecDestroy(&quadvecs[i]));
1575:     PetscCall(PetscFree(quadvecs));
1576:     PetscCall(PetscObjectSetName((PetscObject)nnsp, "_internal_BDDC_nedelec_nnsp"));
1577:     PetscCall(MatSetNearNullSpace(pc->pmat, nnsp));
1578:     PetscCall(MatNullSpaceDestroy(&nnsp));
1579:   }
1580:   PetscCall(VecScatterDestroy(&nnsp_vscat));
1581:   PetscCall(ISLocalToGlobalMappingDestroy(&vl2g));
1582:   PetscCall(ISLocalToGlobalMappingDestroy(&el2g));
1583:   PetscCall(ISLocalToGlobalMappingDestroy(&al2g));

1585:   /* Start assembling */
1586:   PetscCall(MatAssemblyBegin(T, MAT_FINAL_ASSEMBLY));
1587:   if (pcbddc->nedcG) PetscCall(MatAssemblyBegin(pcbddc->nedcG, MAT_FINAL_ASSEMBLY));

1589:   /* Free */
1590:   if (fl2g) {
1591:     PetscCall(ISDestroy(&primals));
1592:     for (i = 0; i < nee; i++) PetscCall(ISDestroy(&eedges[i]));
1593:     PetscCall(PetscFree(eedges));
1594:   }

1596:   /* hack mat_graph with primal dofs on the coarse edges */
1597:   {
1598:     PCBDDCGraph graph  = pcbddc->mat_graph;
1599:     PetscInt   *oqueue = graph->queue;
1600:     PetscInt   *ocptr  = graph->cptr;
1601:     PetscInt    ncc, *idxs;

1603:     /* find first primal edge */
1604:     if (pcbddc->nedclocal) {
1605:       PetscCall(ISGetIndices(pcbddc->nedclocal, (const PetscInt **)&idxs));
1606:     } else {
1607:       if (fl2g) PetscCall(ISLocalToGlobalMappingApply(fl2g, nee, cedges, cedges));
1608:       idxs = cedges;
1609:     }
1610:     cum = 0;
1611:     while (cum < nee && cedges[cum] < 0) cum++;

1613:     /* adapt connected components */
1614:     PetscCall(PetscMalloc2(graph->nvtxs + 1, &graph->cptr, ocptr[graph->ncc], &graph->queue));
1615:     graph->cptr[0] = 0;
1616:     for (i = 0, ncc = 0; i < graph->ncc; i++) {
1617:       PetscInt lc = ocptr[i + 1] - ocptr[i];
1618:       if (cum != nee && oqueue[ocptr[i + 1] - 1] == cedges[cum]) { /* this cc has a primal dof */
1619:         graph->cptr[ncc + 1]           = graph->cptr[ncc] + 1;
1620:         graph->queue[graph->cptr[ncc]] = cedges[cum];
1621:         ncc++;
1622:         lc--;
1623:         cum++;
1624:         while (cum < nee && cedges[cum] < 0) cum++;
1625:       }
1626:       graph->cptr[ncc + 1] = graph->cptr[ncc] + lc;
1627:       for (j = 0; j < lc; j++) graph->queue[graph->cptr[ncc] + j] = oqueue[ocptr[i] + j];
1628:       ncc++;
1629:     }
1630:     graph->ncc = ncc;
1631:     if (pcbddc->nedclocal) PetscCall(ISRestoreIndices(pcbddc->nedclocal, (const PetscInt **)&idxs));
1632:     PetscCall(PetscFree2(ocptr, oqueue));
1633:   }
1634:   PetscCall(ISLocalToGlobalMappingDestroy(&fl2g));
1635:   PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals));
1636:   PetscCall(PCBDDCGraphResetCSR(pcbddc->mat_graph));

1638:   PetscCall(ISDestroy(&nedfieldlocal));
1639:   PetscCall(PetscFree(extrow));
1640:   PetscCall(PetscFree2(work, rwork));
1641:   PetscCall(PetscFree(corners));
1642:   PetscCall(PetscFree(cedges));
1643:   PetscCall(PetscFree(extrows));
1644:   PetscCall(PetscFree(extcols));
1645:   PetscCall(MatDestroy(&lG));

1647:   /* Complete assembling */
1648:   PetscCall(MatAssemblyEnd(T, MAT_FINAL_ASSEMBLY));
1649:   PetscCall(MatViewFromOptions(T, (PetscObject)pc, "-pc_bddc_nedelec_change_view"));
1650:   if (pcbddc->nedcG) {
1651:     PetscCall(MatAssemblyEnd(pcbddc->nedcG, MAT_FINAL_ASSEMBLY));
1652:     PetscCall(MatViewFromOptions(pcbddc->nedcG, (PetscObject)pc, "-pc_bddc_nedelec_coarse_change_view"));
1653:   }

1655:   PetscCall(ISDestroy(&elements_corners));

1657:   /* set change of basis */
1658:   PetscCall(PCBDDCSetChangeOfBasisMat(pc, T, PETSC_FALSE));
1659:   PetscCall(MatDestroy(&T));
1660:   PetscFunctionReturn(PETSC_SUCCESS);
1661: }

1663: /* the near-null space of BDDC carries information on quadrature weights,
1664:    and these can be collinear -> so cheat with MatNullSpaceCreate
1665:    and create a suitable set of basis vectors first */
1666: PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp)
1667: {
1668:   PetscInt i;

1670:   PetscFunctionBegin;
1671:   for (i = 0; i < nvecs; i++) {
1672:     PetscInt first, last;

1674:     PetscCall(VecGetOwnershipRange(quad_vecs[i], &first, &last));
1675:     PetscCheck(last - first >= 2 * nvecs || !has_const, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented");
1676:     if (i >= first && i < last) {
1677:       PetscScalar *data;
1678:       PetscCall(VecGetArray(quad_vecs[i], &data));
1679:       if (!has_const) {
1680:         data[i - first] = 1.;
1681:       } else {
1682:         data[2 * i - first]     = 1. / PetscSqrtReal(2.);
1683:         data[2 * i - first + 1] = -1. / PetscSqrtReal(2.);
1684:       }
1685:       PetscCall(VecRestoreArray(quad_vecs[i], &data));
1686:     }
1687:     PetscCall(PetscObjectStateIncrease((PetscObject)quad_vecs[i]));
1688:   }
1689:   PetscCall(MatNullSpaceCreate(comm, has_const, nvecs, quad_vecs, nnsp));
1690:   for (i = 0; i < nvecs; i++) { /* reset vectors */
1691:     PetscInt first, last;
1692:     PetscCall(VecLockReadPop(quad_vecs[i]));
1693:     PetscCall(VecGetOwnershipRange(quad_vecs[i], &first, &last));
1694:     if (i >= first && i < last) {
1695:       PetscScalar *data;
1696:       PetscCall(VecGetArray(quad_vecs[i], &data));
1697:       if (!has_const) {
1698:         data[i - first] = 0.;
1699:       } else {
1700:         data[2 * i - first]     = 0.;
1701:         data[2 * i - first + 1] = 0.;
1702:       }
1703:       PetscCall(VecRestoreArray(quad_vecs[i], &data));
1704:     }
1705:     PetscCall(PetscObjectStateIncrease((PetscObject)quad_vecs[i]));
1706:     PetscCall(VecLockReadPush(quad_vecs[i]));
1707:   }
1708:   PetscFunctionReturn(PETSC_SUCCESS);
1709: }

1711: PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp)
1712: {
1713:   Mat                    loc_divudotp;
1714:   Vec                    p, v, quad_vec;
1715:   ISLocalToGlobalMapping map;
1716:   PetscScalar           *array;

1718:   PetscFunctionBegin;
1719:   PetscCall(MatCreateVecs(A, &quad_vec, NULL));
1720:   if (!transpose) {
1721:     PetscCall(MatISGetLocalToGlobalMapping(A, &map, NULL));
1722:   } else {
1723:     PetscCall(MatISGetLocalToGlobalMapping(A, NULL, &map));
1724:   }
1725:   PetscCall(PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A), PETSC_FALSE, 1, &quad_vec, nnsp));
1726:   PetscCall(VecLockReadPop(quad_vec));
1727:   PetscCall(VecSetLocalToGlobalMapping(quad_vec, map));

1729:   /* compute local quad vec */
1730:   PetscCall(MatISGetLocalMat(divudotp, &loc_divudotp));
1731:   if (!transpose) {
1732:     PetscCall(MatCreateVecs(loc_divudotp, &v, &p));
1733:   } else {
1734:     PetscCall(MatCreateVecs(loc_divudotp, &p, &v));
1735:   }
1736:   /* the assumption here is that the constant vector interpolates the constant on the L2 conforming space */
1737:   PetscCall(VecSet(p, 1.));
1738:   if (!transpose) {
1739:     PetscCall(MatMultTranspose(loc_divudotp, p, v));
1740:   } else {
1741:     PetscCall(MatMult(loc_divudotp, p, v));
1742:   }
1743:   PetscCall(VecDestroy(&p));
1744:   if (vl2l) {
1745:     Mat        lA;
1746:     VecScatter sc;
1747:     Vec        vins;

1749:     PetscCall(MatISGetLocalMat(A, &lA));
1750:     PetscCall(MatCreateVecs(lA, &vins, NULL));
1751:     PetscCall(VecScatterCreate(v, NULL, vins, vl2l, &sc));
1752:     PetscCall(VecScatterBegin(sc, v, vins, INSERT_VALUES, SCATTER_FORWARD));
1753:     PetscCall(VecScatterEnd(sc, v, vins, INSERT_VALUES, SCATTER_FORWARD));
1754:     PetscCall(VecScatterDestroy(&sc));
1755:     PetscCall(VecDestroy(&v));
1756:     v = vins;
1757:   }

1759:   /* mask summation of interface values */
1760:   PetscInt        n, *mmask, *mask, *idxs, nmr, nr;
1761:   const PetscInt *degree;
1762:   PetscSF         msf;

1764:   PetscCall(VecGetLocalSize(v, &n));
1765:   PetscCall(PetscSFGetGraph(graph->interface_subset_sf, &nr, NULL, NULL, NULL));
1766:   PetscCall(PetscSFGetMultiSF(graph->interface_subset_sf, &msf));
1767:   PetscCall(PetscSFGetGraph(msf, &nmr, NULL, NULL, NULL));
1768:   PetscCall(PetscCalloc3(nmr, &mmask, n, &mask, n, &idxs));
1769:   PetscCall(PetscSFComputeDegreeBegin(graph->interface_subset_sf, &degree));
1770:   PetscCall(PetscSFComputeDegreeEnd(graph->interface_subset_sf, &degree));
1771:   for (PetscInt i = 0, c = 0; i < nr; i++) {
1772:     mmask[c] = 1;
1773:     c += degree[i];
1774:   }
1775:   PetscCall(PetscSFScatterBegin(graph->interface_subset_sf, MPIU_INT, mmask, mask));
1776:   PetscCall(PetscSFScatterEnd(graph->interface_subset_sf, MPIU_INT, mmask, mask));
1777:   PetscCall(VecGetArray(v, &array));
1778:   for (PetscInt i = 0; i < n; i++) {
1779:     array[i] *= mask[i];
1780:     idxs[i] = i;
1781:   }
1782:   PetscCall(VecSetValuesLocal(quad_vec, n, idxs, array, ADD_VALUES));
1783:   PetscCall(VecRestoreArray(v, &array));
1784:   PetscCall(PetscFree3(mmask, mask, idxs));
1785:   PetscCall(VecDestroy(&v));
1786:   PetscCall(VecAssemblyBegin(quad_vec));
1787:   PetscCall(VecAssemblyEnd(quad_vec));
1788:   PetscCall(VecViewFromOptions(quad_vec, NULL, "-pc_bddc_quad_vec_view"));
1789:   PetscCall(VecLockReadPush(quad_vec));
1790:   PetscCall(VecDestroy(&quad_vec));
1791:   PetscFunctionReturn(PETSC_SUCCESS);
1792: }

1794: PetscErrorCode PCBDDCAddPrimalVerticesLocalIS(PC pc, IS primalv)
1795: {
1796:   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;

1798:   PetscFunctionBegin;
1799:   if (primalv) {
1800:     if (pcbddc->user_primal_vertices_local) {
1801:       IS list[2], newp;

1803:       list[0] = primalv;
1804:       list[1] = pcbddc->user_primal_vertices_local;
1805:       PetscCall(ISConcatenate(PetscObjectComm((PetscObject)pc), 2, list, &newp));
1806:       PetscCall(ISSortRemoveDups(newp));
1807:       PetscCall(ISDestroy(&list[1]));
1808:       pcbddc->user_primal_vertices_local = newp;
1809:     } else {
1810:       PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primalv));
1811:     }
1812:   }
1813:   PetscFunctionReturn(PETSC_SUCCESS);
1814: }

1816: static PetscErrorCode func_coords_private(PetscInt dim, PetscReal t, const PetscReal X[], PetscInt Nf, PetscScalar *out, void *ctx)
1817: {
1818:   PetscInt f, *comp = (PetscInt *)ctx;

1820:   PetscFunctionBegin;
1821:   for (f = 0; f < Nf; f++) out[f] = X[*comp];
1822:   PetscFunctionReturn(PETSC_SUCCESS);
1823: }

1825: PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc)
1826: {
1827:   Vec       local, global;
1828:   PC_BDDC  *pcbddc     = (PC_BDDC *)pc->data;
1829:   Mat_IS   *matis      = (Mat_IS *)pc->pmat->data;
1830:   PetscBool monolithic = PETSC_FALSE;

1832:   PetscFunctionBegin;
1833:   PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC topology options", "PC");
1834:   PetscCall(PetscOptionsBool("-pc_bddc_monolithic", "Discard any information on dofs splitting", NULL, monolithic, &monolithic, NULL));
1835:   PetscOptionsEnd();
1836:   /* need to convert from global to local topology information and remove references to information in global ordering */
1837:   PetscCall(MatCreateVecs(pc->pmat, &global, NULL));
1838:   PetscCall(MatCreateVecs(matis->A, &local, NULL));
1839:   PetscCall(VecBindToCPU(global, PETSC_TRUE));
1840:   PetscCall(VecBindToCPU(local, PETSC_TRUE));
1841:   if (monolithic) { /* just get block size to properly compute vertices */
1842:     if (pcbddc->vertex_size == 1) PetscCall(MatGetBlockSize(pc->pmat, &pcbddc->vertex_size));
1843:     goto boundary;
1844:   }

1846:   if (pcbddc->user_provided_isfordofs) {
1847:     if (pcbddc->n_ISForDofs) {
1848:       PetscInt i;

1850:       PetscCall(PetscMalloc1(pcbddc->n_ISForDofs, &pcbddc->ISForDofsLocal));
1851:       for (i = 0; i < pcbddc->n_ISForDofs; i++) {
1852:         PetscInt bs;

1854:         PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->ISForDofs[i], &pcbddc->ISForDofsLocal[i]));
1855:         PetscCall(ISGetBlockSize(pcbddc->ISForDofs[i], &bs));
1856:         PetscCall(ISSetBlockSize(pcbddc->ISForDofsLocal[i], bs));
1857:         PetscCall(ISDestroy(&pcbddc->ISForDofs[i]));
1858:       }
1859:       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
1860:       pcbddc->n_ISForDofs      = 0;
1861:       PetscCall(PetscFree(pcbddc->ISForDofs));
1862:     }
1863:   } else {
1864:     if (!pcbddc->n_ISForDofsLocal) { /* field split not present */
1865:       DM dm;

1867:       PetscCall(MatGetDM(pc->pmat, &dm));
1868:       if (!dm) PetscCall(PCGetDM(pc, &dm));
1869:       if (dm) {
1870:         IS      *fields;
1871:         PetscInt nf, i;

1873:         PetscCall(DMCreateFieldDecomposition(dm, &nf, NULL, &fields, NULL));
1874:         PetscCall(PetscMalloc1(nf, &pcbddc->ISForDofsLocal));
1875:         for (i = 0; i < nf; i++) {
1876:           PetscInt bs;

1878:           PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, fields[i], &pcbddc->ISForDofsLocal[i]));
1879:           PetscCall(ISGetBlockSize(fields[i], &bs));
1880:           PetscCall(ISSetBlockSize(pcbddc->ISForDofsLocal[i], bs));
1881:           PetscCall(ISDestroy(&fields[i]));
1882:         }
1883:         PetscCall(PetscFree(fields));
1884:         pcbddc->n_ISForDofsLocal = nf;
1885:       } else { /* See if MATIS has fields attached by the conversion from MatNest */
1886:         PetscContainer c;

1888:         PetscCall(PetscObjectQuery((PetscObject)pc->pmat, "_convert_nest_lfields", (PetscObject *)&c));
1889:         if (c) {
1890:           MatISLocalFields lf;
1891:           PetscCall(PetscContainerGetPointer(c, (void **)&lf));
1892:           PetscCall(PCBDDCSetDofsSplittingLocal(pc, lf->nr, lf->rf));
1893:         } else { /* fallback, create the default fields if bs > 1 */
1894:           PetscInt i, n = matis->A->rmap->n;
1895:           PetscCall(MatGetBlockSize(pc->pmat, &i));
1896:           if (i > 1) {
1897:             pcbddc->n_ISForDofsLocal = i;
1898:             PetscCall(PetscMalloc1(pcbddc->n_ISForDofsLocal, &pcbddc->ISForDofsLocal));
1899:             for (i = 0; i < pcbddc->n_ISForDofsLocal; i++) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), n / pcbddc->n_ISForDofsLocal, i, pcbddc->n_ISForDofsLocal, &pcbddc->ISForDofsLocal[i]));
1900:           }
1901:         }
1902:       }
1903:     } else {
1904:       PetscInt i;
1905:       for (i = 0; i < pcbddc->n_ISForDofsLocal; i++) PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LAND, &pcbddc->ISForDofsLocal[i]));
1906:     }
1907:   }

1909: boundary:
1910:   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) {
1911:     PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->DirichletBoundaries, &pcbddc->DirichletBoundariesLocal));
1912:   } else if (pcbddc->DirichletBoundariesLocal) {
1913:     PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LAND, &pcbddc->DirichletBoundariesLocal));
1914:   }
1915:   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) {
1916:     PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->NeumannBoundaries, &pcbddc->NeumannBoundariesLocal));
1917:   } else if (pcbddc->NeumannBoundariesLocal) {
1918:     PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LOR, &pcbddc->NeumannBoundariesLocal));
1919:   }
1920:   if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->user_primal_vertices, &pcbddc->user_primal_vertices_local));
1921:   PetscCall(VecDestroy(&global));
1922:   PetscCall(VecDestroy(&local));
1923:   /* detect local disconnected subdomains if requested or needed */
1924:   if (pcbddc->detect_disconnected || matis->allow_repeated) {
1925:     IS        primalv = NULL;
1926:     PetscInt  nel;
1927:     PetscBool filter = pcbddc->detect_disconnected_filter;

1929:     for (PetscInt i = 0; i < pcbddc->n_local_subs; i++) PetscCall(ISDestroy(&pcbddc->local_subs[i]));
1930:     PetscCall(PetscFree(pcbddc->local_subs));
1931:     PetscCall(MatGetVariableBlockSizes(matis->A, &nel, NULL));
1932:     if (matis->allow_repeated && nel) {
1933:       const PetscInt *elsizes;

1935:       pcbddc->n_local_subs = nel;
1936:       PetscCall(MatGetVariableBlockSizes(matis->A, NULL, &elsizes));
1937:       PetscCall(PetscMalloc1(nel, &pcbddc->local_subs));
1938:       for (PetscInt i = 0, c = 0; i < nel; i++) {
1939:         PetscCall(ISCreateStride(PETSC_COMM_SELF, elsizes[i], c, 1, &pcbddc->local_subs[i]));
1940:         c += elsizes[i];
1941:       }
1942:     } else {
1943:       PetscCall(PCBDDCDetectDisconnectedComponents(pc, filter, &pcbddc->n_local_subs, &pcbddc->local_subs, &primalv));
1944:     }
1945:     PetscCall(PCBDDCAddPrimalVerticesLocalIS(pc, primalv));
1946:     PetscCall(ISDestroy(&primalv));
1947:   }
1948:   /* early stage corner detection */
1949:   {
1950:     DM dm;

1952:     PetscCall(MatGetDM(pc->pmat, &dm));
1953:     if (!dm) PetscCall(PCGetDM(pc, &dm));
1954:     if (dm) {
1955:       PetscBool isda;

1957:       PetscCall(PetscObjectTypeCompare((PetscObject)dm, DMDA, &isda));
1958:       if (isda) {
1959:         ISLocalToGlobalMapping l2l;
1960:         IS                     corners;
1961:         Mat                    lA;
1962:         PetscBool              gl, lo;

1964:         {
1965:           Vec                cvec;
1966:           const PetscScalar *coords;
1967:           PetscInt           dof, n, cdim;
1968:           PetscBool          memc = PETSC_TRUE;

1970:           PetscCall(DMDAGetInfo(dm, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &dof, NULL, NULL, NULL, NULL, NULL));
1971:           PetscCall(DMGetCoordinates(dm, &cvec));
1972:           PetscCall(VecGetLocalSize(cvec, &n));
1973:           PetscCall(VecGetBlockSize(cvec, &cdim));
1974:           n /= cdim;
1975:           PetscCall(PetscFree(pcbddc->mat_graph->coords));
1976:           PetscCall(PetscMalloc1(dof * n * cdim, &pcbddc->mat_graph->coords));
1977:           PetscCall(VecGetArrayRead(cvec, &coords));
1978: #if defined(PETSC_USE_COMPLEX)
1979:           memc = PETSC_FALSE;
1980: #endif
1981:           if (dof != 1) memc = PETSC_FALSE;
1982:           if (memc) {
1983:             PetscCall(PetscArraycpy(pcbddc->mat_graph->coords, coords, cdim * n * dof));
1984:           } else { /* BDDC graph does not use any blocked information, we need to replicate the data */
1985:             PetscReal *bcoords = pcbddc->mat_graph->coords;
1986:             PetscInt   i, b, d;

1988:             for (i = 0; i < n; i++) {
1989:               for (b = 0; b < dof; b++) {
1990:                 for (d = 0; d < cdim; d++) bcoords[i * dof * cdim + b * cdim + d] = PetscRealPart(coords[i * cdim + d]);
1991:               }
1992:             }
1993:           }
1994:           PetscCall(VecRestoreArrayRead(cvec, &coords));
1995:           pcbddc->mat_graph->cdim  = cdim;
1996:           pcbddc->mat_graph->cnloc = dof * n;
1997:           pcbddc->mat_graph->cloc  = PETSC_FALSE;
1998:         }
1999:         PetscCall(DMDAGetSubdomainCornersIS(dm, &corners));
2000:         PetscCall(MatISGetLocalMat(pc->pmat, &lA));
2001:         PetscCall(MatGetLocalToGlobalMapping(lA, &l2l, NULL));
2002:         PetscCall(MatISRestoreLocalMat(pc->pmat, &lA));
2003:         lo = (PetscBool)(l2l && corners);
2004:         PetscCallMPI(MPIU_Allreduce(&lo, &gl, 1, MPI_C_BOOL, MPI_LAND, PetscObjectComm((PetscObject)pc)));
2005:         if (gl) { /* From PETSc's DMDA */
2006:           const PetscInt *idx;
2007:           PetscInt        dof, bs, *idxout, n;

2009:           PetscCall(DMDAGetInfo(dm, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &dof, NULL, NULL, NULL, NULL, NULL));
2010:           PetscCall(ISLocalToGlobalMappingGetBlockSize(l2l, &bs));
2011:           PetscCall(ISGetLocalSize(corners, &n));
2012:           PetscCall(ISGetIndices(corners, &idx));
2013:           if (bs == dof) {
2014:             PetscCall(PetscMalloc1(n, &idxout));
2015:             PetscCall(ISLocalToGlobalMappingApplyBlock(l2l, n, idx, idxout));
2016:           } else { /* the original DMDA local-to-local map have been modified */
2017:             PetscInt i, d;

2019:             PetscCall(PetscMalloc1(dof * n, &idxout));
2020:             for (i = 0; i < n; i++)
2021:               for (d = 0; d < dof; d++) idxout[dof * i + d] = dof * idx[i] + d;
2022:             PetscCall(ISLocalToGlobalMappingApply(l2l, dof * n, idxout, idxout));

2024:             bs = 1;
2025:             n *= dof;
2026:           }
2027:           PetscCall(ISRestoreIndices(corners, &idx));
2028:           PetscCall(DMDARestoreSubdomainCornersIS(dm, &corners));
2029:           PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)pc), bs, n, idxout, PETSC_OWN_POINTER, &corners));
2030:           PetscCall(PCBDDCAddPrimalVerticesLocalIS(pc, corners));
2031:           PetscCall(ISDestroy(&corners));
2032:           pcbddc->corner_selected  = PETSC_TRUE;
2033:           pcbddc->corner_selection = PETSC_TRUE;
2034:         }
2035:         if (corners) PetscCall(DMDARestoreSubdomainCornersIS(dm, &corners));
2036:       }
2037:     }
2038:   }
2039:   if (pcbddc->corner_selection && !pcbddc->mat_graph->cdim) {
2040:     DM dm;

2042:     PetscCall(MatGetDM(pc->pmat, &dm));
2043:     if (!dm) PetscCall(PCGetDM(pc, &dm));
2044:     if (dm) { /* this can get very expensive, I need to find a faster alternative */
2045:       Vec          vcoords;
2046:       PetscSection section;
2047:       PetscReal   *coords;
2048:       PetscInt     d, cdim, nl, nf, **ctxs;
2049:       PetscErrorCode (**funcs)(PetscInt, PetscReal, const PetscReal *, PetscInt, PetscScalar *, void *);
2050:       /* debug coordinates */
2051:       PetscViewer       viewer;
2052:       PetscBool         flg;
2053:       PetscViewerFormat format;
2054:       const char       *prefix;

2056:       PetscCall(DMGetCoordinateDim(dm, &cdim));
2057:       PetscCall(DMGetLocalSection(dm, &section));
2058:       PetscCall(PetscSectionGetNumFields(section, &nf));
2059:       PetscCall(DMCreateGlobalVector(dm, &vcoords));
2060:       PetscCall(VecGetLocalSize(vcoords, &nl));
2061:       PetscCall(PetscMalloc1(nl * cdim, &coords));
2062:       PetscCall(PetscMalloc2(nf, &funcs, nf, &ctxs));
2063:       PetscCall(PetscMalloc1(nf, &ctxs[0]));
2064:       for (d = 0; d < nf; d++) funcs[d] = func_coords_private;
2065:       for (d = 1; d < nf; d++) ctxs[d] = ctxs[d - 1] + 1;

2067:       /* debug coordinates */
2068:       PetscCall(PCGetOptionsPrefix(pc, &prefix));
2069:       PetscCall(PetscOptionsCreateViewer(PetscObjectComm((PetscObject)vcoords), ((PetscObject)vcoords)->options, prefix, "-pc_bddc_coords_vec_view", &viewer, &format, &flg));
2070:       if (flg) PetscCall(PetscViewerPushFormat(viewer, format));
2071:       for (d = 0; d < cdim; d++) {
2072:         PetscInt           i;
2073:         const PetscScalar *v;
2074:         char               name[16];

2076:         for (i = 0; i < nf; i++) ctxs[i][0] = d;
2077:         PetscCall(PetscSNPrintf(name, sizeof(name), "bddc_coords_%" PetscInt_FMT, d));
2078:         PetscCall(PetscObjectSetName((PetscObject)vcoords, name));
2079:         PetscCall(DMProjectFunction(dm, 0.0, funcs, (void **)ctxs, INSERT_VALUES, vcoords));
2080:         if (flg) PetscCall(VecView(vcoords, viewer));
2081:         PetscCall(VecGetArrayRead(vcoords, &v));
2082:         for (i = 0; i < nl; i++) coords[i * cdim + d] = PetscRealPart(v[i]);
2083:         PetscCall(VecRestoreArrayRead(vcoords, &v));
2084:       }
2085:       PetscCall(VecDestroy(&vcoords));
2086:       PetscCall(PCSetCoordinates(pc, cdim, nl, coords));
2087:       PetscCall(PetscFree(coords));
2088:       PetscCall(PetscFree(ctxs[0]));
2089:       PetscCall(PetscFree2(funcs, ctxs));
2090:       if (flg) {
2091:         PetscCall(PetscViewerPopFormat(viewer));
2092:         PetscCall(PetscViewerDestroy(&viewer));
2093:       }
2094:     }
2095:   }
2096:   PetscFunctionReturn(PETSC_SUCCESS);
2097: }

2099: PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is)
2100: {
2101:   Mat_IS         *matis = (Mat_IS *)pc->pmat->data;
2102:   IS              nis;
2103:   const PetscInt *idxs;
2104:   PetscInt        i, nd, n = matis->A->rmap->n, *nidxs, nnd;

2106:   PetscFunctionBegin;
2107:   PetscCheck(mop == MPI_LAND || mop == MPI_LOR, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Supported are MPI_LAND and MPI_LOR");
2108:   if (mop == MPI_LAND) {
2109:     /* init rootdata with true */
2110:     for (i = 0; i < pc->pmat->rmap->n; i++) matis->sf_rootdata[i] = 1;
2111:   } else {
2112:     PetscCall(PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n));
2113:   }
2114:   PetscCall(PetscArrayzero(matis->sf_leafdata, n));
2115:   PetscCall(ISGetLocalSize(*is, &nd));
2116:   PetscCall(ISGetIndices(*is, &idxs));
2117:   for (i = 0; i < nd; i++)
2118:     if (-1 < idxs[i] && idxs[i] < n) matis->sf_leafdata[idxs[i]] = 1;
2119:   PetscCall(ISRestoreIndices(*is, &idxs));
2120:   PetscCall(PetscSFReduceBegin(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, mop));
2121:   PetscCall(PetscSFReduceEnd(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, mop));
2122:   PetscCall(PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE));
2123:   PetscCall(PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE));
2124:   if (mop == MPI_LAND) {
2125:     PetscCall(PetscMalloc1(nd, &nidxs));
2126:   } else {
2127:     PetscCall(PetscMalloc1(n, &nidxs));
2128:   }
2129:   for (i = 0, nnd = 0; i < n; i++)
2130:     if (matis->sf_leafdata[i]) nidxs[nnd++] = i;
2131:   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)*is), nnd, nidxs, PETSC_OWN_POINTER, &nis));
2132:   PetscCall(ISDestroy(is));
2133:   *is = nis;
2134:   PetscFunctionReturn(PETSC_SUCCESS);
2135: }

2137: PetscErrorCode PCBDDCBenignRemoveInterior(PC pc, Vec r, Vec z)
2138: {
2139:   PC_IS   *pcis   = (PC_IS *)pc->data;
2140:   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;

2142:   PetscFunctionBegin;
2143:   if (!pcbddc->benign_have_null) PetscFunctionReturn(PETSC_SUCCESS);
2144:   if (pcbddc->ChangeOfBasisMatrix) {
2145:     Vec swap;

2147:     PetscCall(MatMultTranspose(pcbddc->ChangeOfBasisMatrix, r, pcbddc->work_change));
2148:     swap                = pcbddc->work_change;
2149:     pcbddc->work_change = r;
2150:     r                   = swap;
2151:   }
2152:   PetscCall(VecScatterBegin(pcis->global_to_D, r, pcis->vec1_D, INSERT_VALUES, SCATTER_FORWARD));
2153:   PetscCall(VecScatterEnd(pcis->global_to_D, r, pcis->vec1_D, INSERT_VALUES, SCATTER_FORWARD));
2154:   PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][0], pc, 0, 0, 0));
2155:   PetscCall(KSPSolve(pcbddc->ksp_D, pcis->vec1_D, pcis->vec2_D));
2156:   PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][0], pc, 0, 0, 0));
2157:   PetscCall(KSPCheckSolve(pcbddc->ksp_D, pc, pcis->vec2_D));
2158:   PetscCall(VecSet(z, 0.));
2159:   PetscCall(VecScatterBegin(pcis->global_to_D, pcis->vec2_D, z, INSERT_VALUES, SCATTER_REVERSE));
2160:   PetscCall(VecScatterEnd(pcis->global_to_D, pcis->vec2_D, z, INSERT_VALUES, SCATTER_REVERSE));
2161:   if (pcbddc->ChangeOfBasisMatrix) {
2162:     pcbddc->work_change = r;
2163:     PetscCall(VecCopy(z, pcbddc->work_change));
2164:     PetscCall(MatMult(pcbddc->ChangeOfBasisMatrix, pcbddc->work_change, z));
2165:   }
2166:   PetscFunctionReturn(PETSC_SUCCESS);
2167: }

2169: static PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose)
2170: {
2171:   PCBDDCBenignMatMult_ctx ctx;
2172:   PetscBool               apply_right, apply_left, reset_x;

2174:   PetscFunctionBegin;
2175:   PetscCall(MatShellGetContext(A, &ctx));
2176:   if (transpose) {
2177:     apply_right = ctx->apply_left;
2178:     apply_left  = ctx->apply_right;
2179:   } else {
2180:     apply_right = ctx->apply_right;
2181:     apply_left  = ctx->apply_left;
2182:   }
2183:   reset_x = PETSC_FALSE;
2184:   if (apply_right) {
2185:     const PetscScalar *ax;
2186:     PetscInt           nl, i;

2188:     PetscCall(VecGetLocalSize(x, &nl));
2189:     PetscCall(VecGetArrayRead(x, &ax));
2190:     PetscCall(PetscArraycpy(ctx->work, ax, nl));
2191:     PetscCall(VecRestoreArrayRead(x, &ax));
2192:     for (i = 0; i < ctx->benign_n; i++) {
2193:       PetscScalar     sum, val;
2194:       const PetscInt *idxs;
2195:       PetscInt        nz, j;
2196:       PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[i], &nz));
2197:       PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[i], &idxs));
2198:       sum = 0.;
2199:       if (ctx->apply_p0) {
2200:         val = ctx->work[idxs[nz - 1]];
2201:         for (j = 0; j < nz - 1; j++) {
2202:           sum += ctx->work[idxs[j]];
2203:           ctx->work[idxs[j]] += val;
2204:         }
2205:       } else {
2206:         for (j = 0; j < nz - 1; j++) sum += ctx->work[idxs[j]];
2207:       }
2208:       ctx->work[idxs[nz - 1]] -= sum;
2209:       PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[i], &idxs));
2210:     }
2211:     PetscCall(VecPlaceArray(x, ctx->work));
2212:     reset_x = PETSC_TRUE;
2213:   }
2214:   if (transpose) {
2215:     PetscCall(MatMultTranspose(ctx->A, x, y));
2216:   } else {
2217:     PetscCall(MatMult(ctx->A, x, y));
2218:   }
2219:   if (reset_x) PetscCall(VecResetArray(x));
2220:   if (apply_left) {
2221:     PetscScalar *ay;
2222:     PetscInt     i;

2224:     PetscCall(VecGetArray(y, &ay));
2225:     for (i = 0; i < ctx->benign_n; i++) {
2226:       PetscScalar     sum, val;
2227:       const PetscInt *idxs;
2228:       PetscInt        nz, j;
2229:       PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[i], &nz));
2230:       PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[i], &idxs));
2231:       val = -ay[idxs[nz - 1]];
2232:       if (ctx->apply_p0) {
2233:         sum = 0.;
2234:         for (j = 0; j < nz - 1; j++) {
2235:           sum += ay[idxs[j]];
2236:           ay[idxs[j]] += val;
2237:         }
2238:         ay[idxs[nz - 1]] += sum;
2239:       } else {
2240:         for (j = 0; j < nz - 1; j++) ay[idxs[j]] += val;
2241:         ay[idxs[nz - 1]] = 0.;
2242:       }
2243:       PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[i], &idxs));
2244:     }
2245:     PetscCall(VecRestoreArray(y, &ay));
2246:   }
2247:   PetscFunctionReturn(PETSC_SUCCESS);
2248: }

2250: static PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y)
2251: {
2252:   PetscFunctionBegin;
2253:   PetscCall(PCBDDCBenignMatMult_Private_Private(A, x, y, PETSC_TRUE));
2254:   PetscFunctionReturn(PETSC_SUCCESS);
2255: }

2257: static PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y)
2258: {
2259:   PetscFunctionBegin;
2260:   PetscCall(PCBDDCBenignMatMult_Private_Private(A, x, y, PETSC_FALSE));
2261:   PetscFunctionReturn(PETSC_SUCCESS);
2262: }

2264: PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore)
2265: {
2266:   PC_IS                  *pcis   = (PC_IS *)pc->data;
2267:   PC_BDDC                *pcbddc = (PC_BDDC *)pc->data;
2268:   PCBDDCBenignMatMult_ctx ctx;

2270:   PetscFunctionBegin;
2271:   if (!restore) {
2272:     Mat                A_IB, A_BI;
2273:     PetscScalar       *work;
2274:     PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL;

2276:     PetscCheck(!pcbddc->benign_original_mat, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Benign original mat has not been restored");
2277:     if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(PETSC_SUCCESS);
2278:     PetscCall(PetscMalloc1(pcis->n, &work));
2279:     PetscCall(MatCreate(PETSC_COMM_SELF, &A_IB));
2280:     PetscCall(MatSetSizes(A_IB, pcis->n - pcis->n_B, pcis->n_B, PETSC_DECIDE, PETSC_DECIDE));
2281:     PetscCall(MatSetType(A_IB, MATSHELL));
2282:     PetscCall(MatShellSetOperation(A_IB, MATOP_MULT, (PetscErrorCodeFn *)PCBDDCBenignMatMult_Private));
2283:     PetscCall(MatShellSetOperation(A_IB, MATOP_MULT_TRANSPOSE, (PetscErrorCodeFn *)PCBDDCBenignMatMultTranspose_Private));
2284:     PetscCall(PetscNew(&ctx));
2285:     PetscCall(MatShellSetContext(A_IB, ctx));
2286:     ctx->apply_left  = PETSC_TRUE;
2287:     ctx->apply_right = PETSC_FALSE;
2288:     ctx->apply_p0    = PETSC_FALSE;
2289:     ctx->benign_n    = pcbddc->benign_n;
2290:     if (reuse) {
2291:       ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs;
2292:       ctx->free                 = PETSC_FALSE;
2293:     } else { /* TODO: could be optimized for successive solves */
2294:       ISLocalToGlobalMapping N_to_D;
2295:       PetscInt               i;

2297:       PetscCall(ISLocalToGlobalMappingCreateIS(pcis->is_I_local, &N_to_D));
2298:       PetscCall(PetscMalloc1(pcbddc->benign_n, &ctx->benign_zerodiag_subs));
2299:       for (i = 0; i < pcbddc->benign_n; i++) PetscCall(ISGlobalToLocalMappingApplyIS(N_to_D, IS_GTOLM_DROP, pcbddc->benign_zerodiag_subs[i], &ctx->benign_zerodiag_subs[i]));
2300:       PetscCall(ISLocalToGlobalMappingDestroy(&N_to_D));
2301:       ctx->free = PETSC_TRUE;
2302:     }
2303:     ctx->A    = pcis->A_IB;
2304:     ctx->work = work;
2305:     PetscCall(MatSetUp(A_IB));
2306:     PetscCall(MatAssemblyBegin(A_IB, MAT_FINAL_ASSEMBLY));
2307:     PetscCall(MatAssemblyEnd(A_IB, MAT_FINAL_ASSEMBLY));
2308:     pcis->A_IB = A_IB;

2310:     /* A_BI as A_IB^T */
2311:     PetscCall(MatCreateTranspose(A_IB, &A_BI));
2312:     pcbddc->benign_original_mat = pcis->A_BI;
2313:     pcis->A_BI                  = A_BI;
2314:   } else {
2315:     if (!pcbddc->benign_original_mat) PetscFunctionReturn(PETSC_SUCCESS);
2316:     PetscCall(MatShellGetContext(pcis->A_IB, &ctx));
2317:     PetscCall(MatDestroy(&pcis->A_IB));
2318:     pcis->A_IB = ctx->A;
2319:     ctx->A     = NULL;
2320:     PetscCall(MatDestroy(&pcis->A_BI));
2321:     pcis->A_BI                  = pcbddc->benign_original_mat;
2322:     pcbddc->benign_original_mat = NULL;
2323:     if (ctx->free) {
2324:       PetscInt i;
2325:       for (i = 0; i < ctx->benign_n; i++) PetscCall(ISDestroy(&ctx->benign_zerodiag_subs[i]));
2326:       PetscCall(PetscFree(ctx->benign_zerodiag_subs));
2327:     }
2328:     PetscCall(PetscFree(ctx->work));
2329:     PetscCall(PetscFree(ctx));
2330:   }
2331:   PetscFunctionReturn(PETSC_SUCCESS);
2332: }

2334: /* used just in bddc debug mode */
2335: static PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B)
2336: {
2337:   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
2338:   Mat_IS  *matis  = (Mat_IS *)pc->pmat->data;
2339:   Mat      An;

2341:   PetscFunctionBegin;
2342:   PetscCall(MatPtAP(matis->A, pcbddc->benign_change, MAT_INITIAL_MATRIX, 2.0, &An));
2343:   PetscCall(MatZeroRowsColumns(An, pcbddc->benign_n, pcbddc->benign_p0_lidx, 1.0, NULL, NULL));
2344:   if (is1) {
2345:     PetscCall(MatCreateSubMatrix(An, is1, is2, MAT_INITIAL_MATRIX, B));
2346:     PetscCall(MatDestroy(&An));
2347:   } else {
2348:     *B = An;
2349:   }
2350:   PetscFunctionReturn(PETSC_SUCCESS);
2351: }

2353: /* TODO: add reuse flag */
2354: PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B)
2355: {
2356:   Mat             Bt;
2357:   PetscScalar    *a, *bdata;
2358:   const PetscInt *ii, *ij;
2359:   PetscInt        m, n, i, nnz, *bii, *bij;
2360:   PetscBool       flg_row;

2362:   PetscFunctionBegin;
2363:   PetscCall(MatGetSize(A, &n, &m));
2364:   PetscCall(MatGetRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &ij, &flg_row));
2365:   PetscCall(MatSeqAIJGetArray(A, &a));
2366:   nnz = n;
2367:   for (i = 0; i < ii[n]; i++) {
2368:     if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++;
2369:   }
2370:   PetscCall(PetscMalloc1(n + 1, &bii));
2371:   PetscCall(PetscMalloc1(nnz, &bij));
2372:   PetscCall(PetscMalloc1(nnz, &bdata));
2373:   nnz    = 0;
2374:   bii[0] = 0;
2375:   for (i = 0; i < n; i++) {
2376:     PetscInt j;
2377:     for (j = ii[i]; j < ii[i + 1]; j++) {
2378:       PetscScalar entry = a[j];
2379:       if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || (n == m && ij[j] == i)) {
2380:         bij[nnz]   = ij[j];
2381:         bdata[nnz] = entry;
2382:         nnz++;
2383:       }
2384:     }
2385:     bii[i + 1] = nnz;
2386:   }
2387:   PetscCall(MatSeqAIJRestoreArray(A, &a));
2388:   PetscCall(MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A), n, m, bii, bij, bdata, &Bt));
2389:   PetscCall(MatRestoreRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &ij, &flg_row));
2390:   {
2391:     Mat_SeqAIJ *b = (Mat_SeqAIJ *)Bt->data;
2392:     b->free_a     = PETSC_TRUE;
2393:     b->free_ij    = PETSC_TRUE;
2394:   }
2395:   if (*B == A) PetscCall(MatDestroy(&A));
2396:   *B = Bt;
2397:   PetscFunctionReturn(PETSC_SUCCESS);
2398: }

2400: PetscErrorCode PCBDDCDetectDisconnectedComponents(PC pc, PetscBool filter, PetscInt *ncc, IS *cc[], IS *primalv)
2401: {
2402:   Mat                    B = NULL;
2403:   DM                     dm;
2404:   IS                     is_dummy, *cc_n;
2405:   ISLocalToGlobalMapping l2gmap_dummy;
2406:   PCBDDCGraph            graph;
2407:   PetscInt              *xadj_filtered = NULL, *adjncy_filtered = NULL;
2408:   PetscInt               i, n;
2409:   PetscInt              *xadj, *adjncy;
2410:   PetscBool              isplex = PETSC_FALSE;

2412:   PetscFunctionBegin;
2413:   if (ncc) *ncc = 0;
2414:   if (cc) *cc = NULL;
2415:   if (primalv) *primalv = NULL;
2416:   PetscCall(PCBDDCGraphCreate(&graph));
2417:   PetscCall(MatGetDM(pc->pmat, &dm));
2418:   if (!dm) PetscCall(PCGetDM(pc, &dm));
2419:   if (dm) PetscCall(PetscObjectTypeCompareAny((PetscObject)dm, &isplex, DMPLEX, DMP4EST, DMP8EST, ""));
2420:   if (filter) isplex = PETSC_FALSE;

2422:   if (isplex) { /* this code has been modified from plexpartition.c */
2423:     PetscInt        p, pStart, pEnd, a, adjSize, idx, size, nroots;
2424:     PetscInt       *adj = NULL;
2425:     IS              cellNumbering;
2426:     const PetscInt *cellNum;
2427:     PetscBool       useCone, useClosure;
2428:     PetscSection    section;
2429:     PetscSegBuffer  adjBuffer;
2430:     PetscSF         sfPoint;

2432:     PetscCall(DMConvert(dm, DMPLEX, &dm));
2433:     PetscCall(DMPlexGetHeightStratum(dm, 0, &pStart, &pEnd));
2434:     PetscCall(DMGetPointSF(dm, &sfPoint));
2435:     PetscCall(PetscSFGetGraph(sfPoint, &nroots, NULL, NULL, NULL));
2436:     /* Build adjacency graph via a section/segbuffer */
2437:     PetscCall(PetscSectionCreate(PetscObjectComm((PetscObject)dm), &section));
2438:     PetscCall(PetscSectionSetChart(section, pStart, pEnd));
2439:     PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 1000, &adjBuffer));
2440:     /* Always use FVM adjacency to create partitioner graph */
2441:     PetscCall(DMGetBasicAdjacency(dm, &useCone, &useClosure));
2442:     PetscCall(DMSetBasicAdjacency(dm, PETSC_TRUE, PETSC_FALSE));
2443:     PetscCall(DMPlexGetCellNumbering(dm, &cellNumbering));
2444:     PetscCall(ISGetIndices(cellNumbering, &cellNum));
2445:     for (n = 0, p = pStart; p < pEnd; p++) {
2446:       /* Skip non-owned cells in parallel (ParMetis expects no overlap) */
2447:       if (nroots > 0) {
2448:         if (cellNum[p] < 0) continue;
2449:       }
2450:       adjSize = PETSC_DETERMINE;
2451:       PetscCall(DMPlexGetAdjacency(dm, p, &adjSize, &adj));
2452:       for (a = 0; a < adjSize; ++a) {
2453:         const PetscInt point = adj[a];
2454:         if (pStart <= point && point < pEnd) {
2455:           PetscInt *PETSC_RESTRICT pBuf;
2456:           PetscCall(PetscSectionAddDof(section, p, 1));
2457:           PetscCall(PetscSegBufferGetInts(adjBuffer, 1, &pBuf));
2458:           *pBuf = point;
2459:         }
2460:       }
2461:       n++;
2462:     }
2463:     PetscCall(DMSetBasicAdjacency(dm, useCone, useClosure));
2464:     /* Derive CSR graph from section/segbuffer */
2465:     PetscCall(PetscSectionSetUp(section));
2466:     PetscCall(PetscSectionGetStorageSize(section, &size));
2467:     PetscCall(PetscMalloc1(n + 1, &xadj));
2468:     for (idx = 0, p = pStart; p < pEnd; p++) {
2469:       if (nroots > 0) {
2470:         if (cellNum[p] < 0) continue;
2471:       }
2472:       PetscCall(PetscSectionGetOffset(section, p, &xadj[idx++]));
2473:     }
2474:     xadj[n] = size;
2475:     PetscCall(PetscSegBufferExtractAlloc(adjBuffer, &adjncy));
2476:     /* Clean up */
2477:     PetscCall(PetscSegBufferDestroy(&adjBuffer));
2478:     PetscCall(PetscSectionDestroy(&section));
2479:     PetscCall(PetscFree(adj));
2480:     graph->xadj   = xadj;
2481:     graph->adjncy = adjncy;
2482:   } else {
2483:     Mat       A;
2484:     PetscBool isseqaij, flg_row;

2486:     PetscCall(MatISGetLocalMat(pc->pmat, &A));
2487:     if (!A->rmap->N || !A->cmap->N) {
2488:       PetscCall(PCBDDCGraphDestroy(&graph));
2489:       PetscFunctionReturn(PETSC_SUCCESS);
2490:     }
2491:     PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isseqaij));
2492:     if (!isseqaij && filter) {
2493:       PetscBool isseqdense;

2495:       PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQDENSE, &isseqdense));
2496:       if (!isseqdense) {
2497:         PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
2498:       } else { /* TODO: rectangular case and LDA */
2499:         PetscScalar *array;
2500:         PetscReal    chop = 1.e-6;

2502:         PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));
2503:         PetscCall(MatDenseGetArray(B, &array));
2504:         PetscCall(MatGetSize(B, &n, NULL));
2505:         for (i = 0; i < n; i++) {
2506:           PetscInt j;
2507:           for (j = i + 1; j < n; j++) {
2508:             PetscReal thresh = chop * (PetscAbsScalar(array[i * (n + 1)]) + PetscAbsScalar(array[j * (n + 1)]));
2509:             if (PetscAbsScalar(array[i * n + j]) < thresh) array[i * n + j] = 0.;
2510:             if (PetscAbsScalar(array[j * n + i]) < thresh) array[j * n + i] = 0.;
2511:           }
2512:         }
2513:         PetscCall(MatDenseRestoreArray(B, &array));
2514:         PetscCall(MatConvert(B, MATSEQAIJ, MAT_INPLACE_MATRIX, &B));
2515:       }
2516:     } else {
2517:       PetscCall(PetscObjectReference((PetscObject)A));
2518:       B = A;
2519:     }
2520:     PetscCall(MatGetRowIJ(B, 0, PETSC_TRUE, PETSC_FALSE, &n, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row));

2522:     /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */
2523:     if (filter) {
2524:       PetscScalar *data;
2525:       PetscInt     j, cum;

2527:       PetscCall(PetscCalloc2(n + 1, &xadj_filtered, xadj[n], &adjncy_filtered));
2528:       PetscCall(MatSeqAIJGetArray(B, &data));
2529:       cum = 0;
2530:       for (i = 0; i < n; i++) {
2531:         PetscInt t;

2533:         for (j = xadj[i]; j < xadj[i + 1]; j++) {
2534:           if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) continue;
2535:           adjncy_filtered[cum + xadj_filtered[i]++] = adjncy[j];
2536:         }
2537:         t                = xadj_filtered[i];
2538:         xadj_filtered[i] = cum;
2539:         cum += t;
2540:       }
2541:       PetscCall(MatSeqAIJRestoreArray(B, &data));
2542:       graph->xadj   = xadj_filtered;
2543:       graph->adjncy = adjncy_filtered;
2544:     } else {
2545:       graph->xadj   = xadj;
2546:       graph->adjncy = adjncy;
2547:     }
2548:   }
2549:   /* compute local connected components using PCBDDCGraph */
2550:   graph->seq_graph = PETSC_TRUE; /* analyze local connected components (i.e. disconnected subdomains) irrespective of dofs count */
2551:   PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &is_dummy));
2552:   PetscCall(ISLocalToGlobalMappingCreateIS(is_dummy, &l2gmap_dummy));
2553:   PetscCall(ISDestroy(&is_dummy));
2554:   PetscCall(PCBDDCGraphInit(graph, l2gmap_dummy, n, PETSC_INT_MAX));
2555:   PetscCall(ISLocalToGlobalMappingDestroy(&l2gmap_dummy));
2556:   PetscCall(PCBDDCGraphSetUp(graph, 1, NULL, NULL, 0, NULL, NULL));
2557:   PetscCall(PCBDDCGraphComputeConnectedComponents(graph));

2559:   /* partial clean up */
2560:   PetscCall(PetscFree2(xadj_filtered, adjncy_filtered));
2561:   if (B) {
2562:     PetscBool flg_row;
2563:     PetscCall(MatRestoreRowIJ(B, 0, PETSC_TRUE, PETSC_FALSE, &n, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row));
2564:     PetscCall(MatDestroy(&B));
2565:   }
2566:   if (isplex) {
2567:     PetscCall(PetscFree(xadj));
2568:     PetscCall(PetscFree(adjncy));
2569:   }

2571:   /* get back data */
2572:   if (isplex) {
2573:     if (ncc) *ncc = graph->ncc;
2574:     if (cc || primalv) {
2575:       Mat          A;
2576:       PetscBT      btv, btvt, btvc;
2577:       PetscSection subSection;
2578:       PetscInt    *ids, cum, cump, *cids, *pids;
2579:       PetscInt     dim, cStart, cEnd, fStart, fEnd, vStart, vEnd, pStart, pEnd;

2581:       PetscCall(DMGetDimension(dm, &dim));
2582:       PetscCall(DMPlexGetSubdomainSection(dm, &subSection));
2583:       PetscCall(DMPlexGetHeightStratum(dm, 1, &fStart, &fEnd));
2584:       PetscCall(DMPlexGetHeightStratum(dm, 0, &cStart, &cEnd));
2585:       PetscCall(DMPlexGetDepthStratum(dm, 0, &vStart, &vEnd));
2586:       PetscCall(DMPlexGetChart(dm, &pStart, &pEnd));
2587:       PetscCall(MatISGetLocalMat(pc->pmat, &A));
2588:       PetscCall(PetscMalloc3(A->rmap->n, &ids, graph->ncc + 1, &cids, A->rmap->n, &pids));
2589:       PetscCall(PetscBTCreate(A->rmap->n, &btv));
2590:       PetscCall(PetscBTCreate(A->rmap->n, &btvt));
2591:       PetscCall(PetscBTCreate(pEnd - pStart, &btvc));

2593:       /* First see if we find corners for the subdomains, i.e. a vertex
2594:          shared by at least dim subdomain boundary faces. This does not
2595:          cover all the possible cases with simplices but it is enough
2596:          for tensor cells */
2597:       if (vStart != fStart && dim <= 3) {
2598:         for (PetscInt c = cStart; c < cEnd; c++) {
2599:           PetscInt        nf, cnt = 0, mcnt = dim, *cfaces;
2600:           const PetscInt *faces;

2602:           PetscCall(DMPlexGetConeSize(dm, c, &nf));
2603:           PetscCall(DMGetWorkArray(dm, nf, MPIU_INT, &cfaces));
2604:           PetscCall(DMPlexGetCone(dm, c, &faces));
2605:           for (PetscInt f = 0; f < nf; f++) {
2606:             PetscInt nc, ff;

2608:             PetscCall(DMPlexGetSupportSize(dm, faces[f], &nc));
2609:             PetscCall(DMPlexGetTreeParent(dm, faces[f], &ff, NULL));
2610:             if (nc == 1 && faces[f] == ff) cfaces[cnt++] = faces[f];
2611:           }
2612:           if (cnt >= mcnt) {
2613:             PetscInt size, *closure = NULL;

2615:             PetscCall(DMPlexGetTransitiveClosure(dm, c, PETSC_TRUE, &size, &closure));
2616:             for (PetscInt k = 0; k < 2 * size; k += 2) {
2617:               PetscInt v = closure[k];
2618:               if (v >= vStart && v < vEnd) {
2619:                 PetscInt vsize, *vclosure = NULL;

2621:                 cnt = 0;
2622:                 PetscCall(DMPlexGetTransitiveClosure(dm, v, PETSC_FALSE, &vsize, &vclosure));
2623:                 for (PetscInt vk = 0; vk < 2 * vsize; vk += 2) {
2624:                   PetscInt f = vclosure[vk];
2625:                   if (f >= fStart && f < fEnd) {
2626:                     PetscInt  nc, ff;
2627:                     PetscBool valid = PETSC_FALSE;

2629:                     for (PetscInt fk = 0; fk < nf; fk++)
2630:                       if (f == cfaces[fk]) valid = PETSC_TRUE;
2631:                     if (!valid) continue;
2632:                     PetscCall(DMPlexGetSupportSize(dm, f, &nc));
2633:                     PetscCall(DMPlexGetTreeParent(dm, f, &ff, NULL));
2634:                     if (nc == 1 && f == ff) cnt++;
2635:                   }
2636:                 }
2637:                 if (cnt >= mcnt) PetscCall(PetscBTSet(btvc, v - pStart));
2638:                 PetscCall(DMPlexRestoreTransitiveClosure(dm, v, PETSC_FALSE, &vsize, &vclosure));
2639:               }
2640:             }
2641:             PetscCall(DMPlexRestoreTransitiveClosure(dm, c, PETSC_TRUE, &size, &closure));
2642:           }
2643:           PetscCall(DMRestoreWorkArray(dm, nf, MPIU_INT, &cfaces));
2644:         }
2645:       }

2647:       cids[0] = 0;
2648:       for (i = 0, cump = 0, cum = 0; i < graph->ncc; i++) {
2649:         PetscInt j;

2651:         PetscCall(PetscBTMemzero(A->rmap->n, btvt));
2652:         for (j = graph->cptr[i]; j < graph->cptr[i + 1]; j++) {
2653:           PetscInt k, size, *closure = NULL, cell = graph->queue[j];

2655:           PetscCall(DMPlexGetTransitiveClosure(dm, cell, PETSC_TRUE, &size, &closure));
2656:           for (k = 0; k < 2 * size; k += 2) {
2657:             PetscInt s, pp, p = closure[k], off, dof, cdof;

2659:             PetscCall(PetscSectionGetConstraintDof(subSection, p, &cdof));
2660:             PetscCall(PetscSectionGetOffset(subSection, p, &off));
2661:             PetscCall(PetscSectionGetDof(subSection, p, &dof));
2662:             for (s = 0; s < dof - cdof; s++) {
2663:               if (PetscBTLookupSet(btvt, off + s)) continue;
2664:               if (PetscBTLookup(btvc, p - pStart)) pids[cump++] = off + s; /* subdomain corner */
2665:               else if (!PetscBTLookup(btv, off + s)) ids[cum++] = off + s;
2666:               else pids[cump++] = off + s; /* cross-vertex */
2667:             }
2668:             PetscCall(DMPlexGetTreeParent(dm, p, &pp, NULL));
2669:             if (pp != p) {
2670:               PetscCall(PetscSectionGetConstraintDof(subSection, pp, &cdof));
2671:               PetscCall(PetscSectionGetOffset(subSection, pp, &off));
2672:               PetscCall(PetscSectionGetDof(subSection, pp, &dof));
2673:               for (s = 0; s < dof - cdof; s++) {
2674:                 if (PetscBTLookupSet(btvt, off + s)) continue;
2675:                 if (PetscBTLookup(btvc, pp - pStart)) pids[cump++] = off + s; /* subdomain corner */
2676:                 else if (!PetscBTLookup(btv, off + s)) ids[cum++] = off + s;
2677:                 else pids[cump++] = off + s; /* cross-vertex */
2678:               }
2679:             }
2680:           }
2681:           PetscCall(DMPlexRestoreTransitiveClosure(dm, cell, PETSC_TRUE, &size, &closure));
2682:         }
2683:         cids[i + 1] = cum;
2684:         /* mark dofs as already assigned */
2685:         for (j = cids[i]; j < cids[i + 1]; j++) PetscCall(PetscBTSet(btv, ids[j]));
2686:       }
2687:       if (cc) {
2688:         PetscCall(PetscMalloc1(graph->ncc, &cc_n));
2689:         for (i = 0; i < graph->ncc; i++) PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cids[i + 1] - cids[i], ids + cids[i], PETSC_COPY_VALUES, &cc_n[i]));
2690:         *cc = cc_n;
2691:       }
2692:       if (primalv) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), cump, pids, PETSC_COPY_VALUES, primalv));
2693:       PetscCall(PetscFree3(ids, cids, pids));
2694:       PetscCall(PetscBTDestroy(&btv));
2695:       PetscCall(PetscBTDestroy(&btvt));
2696:       PetscCall(PetscBTDestroy(&btvc));
2697:       PetscCall(DMDestroy(&dm));
2698:     }
2699:   } else {
2700:     if (ncc) *ncc = graph->ncc;
2701:     if (cc) {
2702:       PetscCall(PetscMalloc1(graph->ncc, &cc_n));
2703:       for (i = 0; i < graph->ncc; i++) PetscCall(ISCreateGeneral(PETSC_COMM_SELF, graph->cptr[i + 1] - graph->cptr[i], graph->queue + graph->cptr[i], PETSC_COPY_VALUES, &cc_n[i]));
2704:       *cc = cc_n;
2705:     }
2706:   }
2707:   /* clean up graph */
2708:   graph->xadj   = NULL;
2709:   graph->adjncy = NULL;
2710:   PetscCall(PCBDDCGraphDestroy(&graph));
2711:   PetscFunctionReturn(PETSC_SUCCESS);
2712: }

2714: PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag)
2715: {
2716:   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
2717:   PC_IS   *pcis   = (PC_IS *)pc->data;
2718:   IS       dirIS  = NULL;
2719:   PetscInt i;

2721:   PetscFunctionBegin;
2722:   PetscCall(PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph, &dirIS));
2723:   if (zerodiag) {
2724:     Mat             A;
2725:     Vec             vec3_N;
2726:     PetscScalar    *vals;
2727:     const PetscInt *idxs;
2728:     PetscInt        nz, *count;

2730:     /* p0 */
2731:     PetscCall(VecSet(pcis->vec1_N, 0.));
2732:     PetscCall(PetscMalloc1(pcis->n, &vals));
2733:     PetscCall(ISGetLocalSize(zerodiag, &nz));
2734:     PetscCall(ISGetIndices(zerodiag, &idxs));
2735:     for (i = 0; i < nz; i++) vals[i] = 1.;
2736:     PetscCall(VecSetValues(pcis->vec1_N, nz, idxs, vals, INSERT_VALUES));
2737:     PetscCall(VecAssemblyBegin(pcis->vec1_N));
2738:     PetscCall(VecAssemblyEnd(pcis->vec1_N));
2739:     /* v_I */
2740:     PetscCall(VecSetRandom(pcis->vec2_N, NULL));
2741:     for (i = 0; i < nz; i++) vals[i] = 0.;
2742:     PetscCall(VecSetValues(pcis->vec2_N, nz, idxs, vals, INSERT_VALUES));
2743:     PetscCall(ISRestoreIndices(zerodiag, &idxs));
2744:     PetscCall(ISGetIndices(pcis->is_B_local, &idxs));
2745:     for (i = 0; i < pcis->n_B; i++) vals[i] = 0.;
2746:     PetscCall(VecSetValues(pcis->vec2_N, pcis->n_B, idxs, vals, INSERT_VALUES));
2747:     PetscCall(ISRestoreIndices(pcis->is_B_local, &idxs));
2748:     if (dirIS) {
2749:       PetscInt n;

2751:       PetscCall(ISGetLocalSize(dirIS, &n));
2752:       PetscCall(ISGetIndices(dirIS, &idxs));
2753:       for (i = 0; i < n; i++) vals[i] = 0.;
2754:       PetscCall(VecSetValues(pcis->vec2_N, n, idxs, vals, INSERT_VALUES));
2755:       PetscCall(ISRestoreIndices(dirIS, &idxs));
2756:     }
2757:     PetscCall(VecAssemblyBegin(pcis->vec2_N));
2758:     PetscCall(VecAssemblyEnd(pcis->vec2_N));
2759:     PetscCall(VecDuplicate(pcis->vec1_N, &vec3_N));
2760:     PetscCall(VecSet(vec3_N, 0.));
2761:     PetscCall(MatISGetLocalMat(pc->pmat, &A));
2762:     PetscCall(MatMult(A, pcis->vec1_N, vec3_N));
2763:     PetscCall(VecDot(vec3_N, pcis->vec2_N, &vals[0]));
2764:     PetscCheck(PetscAbsScalar(vals[0]) <= 1.e-1, PETSC_COMM_SELF, PETSC_ERR_SUP, "Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)", (double)PetscAbsScalar(vals[0]));
2765:     PetscCall(PetscFree(vals));
2766:     PetscCall(VecDestroy(&vec3_N));

2768:     /* there should not be any pressure dofs lying on the interface */
2769:     PetscCall(PetscCalloc1(pcis->n, &count));
2770:     PetscCall(ISGetIndices(pcis->is_B_local, &idxs));
2771:     for (i = 0; i < pcis->n_B; i++) count[idxs[i]]++;
2772:     PetscCall(ISRestoreIndices(pcis->is_B_local, &idxs));
2773:     PetscCall(ISGetIndices(zerodiag, &idxs));
2774:     for (i = 0; i < nz; i++) PetscCheck(!count[idxs[i]], PETSC_COMM_SELF, PETSC_ERR_SUP, "Benign trick can not be applied! pressure dof %" PetscInt_FMT " is an interface dof", idxs[i]);
2775:     PetscCall(ISRestoreIndices(zerodiag, &idxs));
2776:     PetscCall(PetscFree(count));
2777:   }
2778:   PetscCall(ISDestroy(&dirIS));

2780:   /* check PCBDDCBenignGetOrSetP0 */
2781:   PetscCall(VecSetRandom(pcis->vec1_global, NULL));
2782:   for (i = 0; i < pcbddc->benign_n; i++) pcbddc->benign_p0[i] = -PetscGlobalRank - i;
2783:   PetscCall(PCBDDCBenignGetOrSetP0(pc, pcis->vec1_global, PETSC_FALSE));
2784:   for (i = 0; i < pcbddc->benign_n; i++) pcbddc->benign_p0[i] = 1;
2785:   PetscCall(PCBDDCBenignGetOrSetP0(pc, pcis->vec1_global, PETSC_TRUE));
2786:   for (i = 0; i < pcbddc->benign_n; i++) {
2787:     PetscInt val = PetscRealPart(pcbddc->benign_p0[i]);
2788:     PetscCheck(val == -PetscGlobalRank - i, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error testing PCBDDCBenignGetOrSetP0! Found %g at %" PetscInt_FMT " instead of %g", (double)PetscRealPart(pcbddc->benign_p0[i]), i, (double)(-PetscGlobalRank - i));
2789:   }
2790:   PetscFunctionReturn(PETSC_SUCCESS);
2791: }

2793: PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, PetscBool reuse, IS *zerodiaglocal)
2794: {
2795:   PC_BDDC  *pcbddc    = (PC_BDDC *)pc->data;
2796:   Mat_IS   *matis     = (Mat_IS *)pc->pmat->data;
2797:   IS        pressures = NULL, zerodiag = NULL, *bzerodiag = NULL, zerodiag_save, *zerodiag_subs;
2798:   PetscInt  nz, n, benign_n, bsp = 1;
2799:   PetscInt *interior_dofs, n_interior_dofs, nneu;
2800:   PetscBool sorted, have_null, has_null_pressures, recompute_zerodiag, checkb;

2802:   PetscFunctionBegin;
2803:   if (reuse) goto project_b0;
2804:   PetscCall(PetscSFDestroy(&pcbddc->benign_sf));
2805:   PetscCall(MatDestroy(&pcbddc->benign_B0));
2806:   for (n = 0; n < pcbddc->benign_n; n++) PetscCall(ISDestroy(&pcbddc->benign_zerodiag_subs[n]));
2807:   PetscCall(PetscFree(pcbddc->benign_zerodiag_subs));
2808:   has_null_pressures = PETSC_TRUE;
2809:   have_null          = PETSC_TRUE;
2810:   /* if a local information on dofs is present, gets pressure dofs from command line (uses the last field is not provided)
2811:      Without local information, it uses only the zerodiagonal dofs (ok if the pressure block is all zero and it is a scalar field)
2812:      Checks if all the pressure dofs in each subdomain have a zero diagonal
2813:      If not, a change of basis on pressures is not needed
2814:      since the local Schur complements are already SPD
2815:   */
2816:   if (pcbddc->n_ISForDofsLocal) {
2817:     IS        iP = NULL;
2818:     PetscInt  p, *pp;
2819:     PetscBool flg, blocked = PETSC_FALSE;

2821:     PetscCall(PetscMalloc1(pcbddc->n_ISForDofsLocal, &pp));
2822:     n = pcbddc->n_ISForDofsLocal;
2823:     PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC benign options", "PC");
2824:     PetscCall(PetscOptionsIntArray("-pc_bddc_pressure_field", "Field id for pressures", NULL, pp, &n, &flg));
2825:     PetscCall(PetscOptionsBool("-pc_bddc_pressure_blocked", "Use blocked pressure fields", NULL, blocked, &blocked, NULL));
2826:     PetscOptionsEnd();
2827:     if (!flg) {
2828:       n     = 1;
2829:       pp[0] = pcbddc->n_ISForDofsLocal - 1;
2830:     }

2832:     bsp = 0;
2833:     for (p = 0; p < n; p++) {
2834:       PetscInt bs = 1;

2836:       PetscCheck(pp[p] >= 0 && pp[p] < pcbddc->n_ISForDofsLocal, PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Invalid field id for pressures %" PetscInt_FMT, pp[p]);
2837:       if (blocked) PetscCall(ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]], &bs));
2838:       bsp += bs;
2839:     }
2840:     PetscCall(PetscMalloc1(bsp, &bzerodiag));
2841:     bsp = 0;
2842:     for (p = 0; p < n; p++) {
2843:       const PetscInt *idxs;
2844:       PetscInt        b, bs = 1, npl, *bidxs;

2846:       if (blocked) PetscCall(ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]], &bs));
2847:       PetscCall(ISGetLocalSize(pcbddc->ISForDofsLocal[pp[p]], &npl));
2848:       PetscCall(ISGetIndices(pcbddc->ISForDofsLocal[pp[p]], &idxs));
2849:       PetscCall(PetscMalloc1(npl / bs, &bidxs));
2850:       for (b = 0; b < bs; b++) {
2851:         PetscInt i;

2853:         for (i = 0; i < npl / bs; i++) bidxs[i] = idxs[bs * i + b];
2854:         PetscCall(ISCreateGeneral(PETSC_COMM_SELF, npl / bs, bidxs, PETSC_COPY_VALUES, &bzerodiag[bsp]));
2855:         bsp++;
2856:       }
2857:       PetscCall(PetscFree(bidxs));
2858:       PetscCall(ISRestoreIndices(pcbddc->ISForDofsLocal[pp[p]], &idxs));
2859:     }
2860:     PetscCall(ISConcatenate(PETSC_COMM_SELF, bsp, bzerodiag, &pressures));

2862:     /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */
2863:     PetscCall(PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_lP", (PetscObject *)&iP));
2864:     if (iP) {
2865:       IS newpressures;

2867:       PetscCall(ISDifference(pressures, iP, &newpressures));
2868:       PetscCall(ISDestroy(&pressures));
2869:       pressures = newpressures;
2870:     }
2871:     PetscCall(ISSorted(pressures, &sorted));
2872:     if (!sorted) PetscCall(ISSort(pressures));
2873:     PetscCall(PetscFree(pp));
2874:   }

2876:   /* pcis has not been setup yet, so get the local size from the subdomain matrix */
2877:   PetscCall(MatGetLocalSize(pcbddc->local_mat, &n, NULL));
2878:   if (!n) pcbddc->benign_change_explicit = PETSC_TRUE;
2879:   PetscCall(MatFindZeroDiagonals(pcbddc->local_mat, &zerodiag));
2880:   PetscCall(ISSorted(zerodiag, &sorted));
2881:   if (!sorted) PetscCall(ISSort(zerodiag));
2882:   PetscCall(PetscObjectReference((PetscObject)zerodiag));
2883:   zerodiag_save = zerodiag;
2884:   PetscCall(ISGetLocalSize(zerodiag, &nz));
2885:   if (!nz) {
2886:     if (n) have_null = PETSC_FALSE;
2887:     has_null_pressures = PETSC_FALSE;
2888:     PetscCall(ISDestroy(&zerodiag));
2889:   }
2890:   recompute_zerodiag = PETSC_FALSE;

2892:   /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */
2893:   zerodiag_subs   = NULL;
2894:   benign_n        = 0;
2895:   n_interior_dofs = 0;
2896:   interior_dofs   = NULL;
2897:   nneu            = 0;
2898:   if (pcbddc->NeumannBoundariesLocal) PetscCall(ISGetLocalSize(pcbddc->NeumannBoundariesLocal, &nneu));
2899:   checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level);
2900:   if (checkb) { /* need to compute interior nodes */
2901:     PetscInt               n, i;
2902:     PetscInt              *count;
2903:     ISLocalToGlobalMapping mapping;

2905:     PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &mapping, NULL));
2906:     PetscCall(ISLocalToGlobalMappingGetNodeInfo(mapping, &n, &count, NULL));
2907:     PetscCall(PetscMalloc1(n, &interior_dofs));
2908:     for (i = 0; i < n; i++)
2909:       if (count[i] < 2) interior_dofs[n_interior_dofs++] = i;
2910:     PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(mapping, &n, &count, NULL));
2911:   }
2912:   if (has_null_pressures) {
2913:     IS             *subs;
2914:     PetscInt        nsubs, i, j, nl;
2915:     const PetscInt *idxs;
2916:     PetscScalar    *array;
2917:     Vec            *work;

2919:     subs  = pcbddc->local_subs;
2920:     nsubs = pcbddc->n_local_subs;
2921:     /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */
2922:     if (checkb) {
2923:       PetscCall(VecDuplicateVecs(matis->y, 2, &work));
2924:       PetscCall(ISGetLocalSize(zerodiag, &nl));
2925:       PetscCall(ISGetIndices(zerodiag, &idxs));
2926:       /* work[0] = 1_p */
2927:       PetscCall(VecSet(work[0], 0.));
2928:       PetscCall(VecGetArray(work[0], &array));
2929:       for (j = 0; j < nl; j++) array[idxs[j]] = 1.;
2930:       PetscCall(VecRestoreArray(work[0], &array));
2931:       /* work[0] = 1_v */
2932:       PetscCall(VecSet(work[1], 1.));
2933:       PetscCall(VecGetArray(work[1], &array));
2934:       for (j = 0; j < nl; j++) array[idxs[j]] = 0.;
2935:       PetscCall(VecRestoreArray(work[1], &array));
2936:       PetscCall(ISRestoreIndices(zerodiag, &idxs));
2937:     }

2939:     if (nsubs > 1 || bsp > 1) {
2940:       IS      *is;
2941:       PetscInt b, totb;

2943:       totb  = bsp;
2944:       is    = bsp > 1 ? bzerodiag : &zerodiag;
2945:       nsubs = PetscMax(nsubs, 1);
2946:       PetscCall(PetscCalloc1(nsubs * totb, &zerodiag_subs));
2947:       for (b = 0; b < totb; b++) {
2948:         for (i = 0; i < nsubs; i++) {
2949:           ISLocalToGlobalMapping l2g;
2950:           IS                     t_zerodiag_subs;
2951:           PetscInt               nl;

2953:           if (subs) {
2954:             PetscCall(ISLocalToGlobalMappingCreateIS(subs[i], &l2g));
2955:           } else {
2956:             IS tis;

2958:             PetscCall(MatGetLocalSize(pcbddc->local_mat, &nl, NULL));
2959:             PetscCall(ISCreateStride(PETSC_COMM_SELF, nl, 0, 1, &tis));
2960:             PetscCall(ISLocalToGlobalMappingCreateIS(tis, &l2g));
2961:             PetscCall(ISDestroy(&tis));
2962:           }
2963:           PetscCall(ISGlobalToLocalMappingApplyIS(l2g, IS_GTOLM_DROP, is[b], &t_zerodiag_subs));
2964:           PetscCall(ISGetLocalSize(t_zerodiag_subs, &nl));
2965:           if (nl) {
2966:             PetscBool valid = PETSC_TRUE;

2968:             if (checkb) {
2969:               PetscCall(VecSet(matis->x, 0));
2970:               PetscCall(ISGetLocalSize(subs[i], &nl));
2971:               PetscCall(ISGetIndices(subs[i], &idxs));
2972:               PetscCall(VecGetArray(matis->x, &array));
2973:               for (j = 0; j < nl; j++) array[idxs[j]] = 1.;
2974:               PetscCall(VecRestoreArray(matis->x, &array));
2975:               PetscCall(ISRestoreIndices(subs[i], &idxs));
2976:               PetscCall(VecPointwiseMult(matis->x, work[0], matis->x));
2977:               PetscCall(MatMult(matis->A, matis->x, matis->y));
2978:               PetscCall(VecPointwiseMult(matis->y, work[1], matis->y));
2979:               PetscCall(VecGetArray(matis->y, &array));
2980:               for (j = 0; j < n_interior_dofs; j++) {
2981:                 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) {
2982:                   valid = PETSC_FALSE;
2983:                   break;
2984:                 }
2985:               }
2986:               PetscCall(VecRestoreArray(matis->y, &array));
2987:             }
2988:             if (valid && nneu) {
2989:               const PetscInt *idxs;
2990:               PetscInt        nzb;

2992:               PetscCall(ISGetIndices(pcbddc->NeumannBoundariesLocal, &idxs));
2993:               PetscCall(ISGlobalToLocalMappingApply(l2g, IS_GTOLM_DROP, nneu, idxs, &nzb, NULL));
2994:               PetscCall(ISRestoreIndices(pcbddc->NeumannBoundariesLocal, &idxs));
2995:               if (nzb) valid = PETSC_FALSE;
2996:             }
2997:             if (valid && pressures) {
2998:               IS       t_pressure_subs, tmp;
2999:               PetscInt i1, i2;

3001:               PetscCall(ISGlobalToLocalMappingApplyIS(l2g, IS_GTOLM_DROP, pressures, &t_pressure_subs));
3002:               PetscCall(ISEmbed(t_zerodiag_subs, t_pressure_subs, PETSC_TRUE, &tmp));
3003:               PetscCall(ISGetLocalSize(tmp, &i1));
3004:               PetscCall(ISGetLocalSize(t_zerodiag_subs, &i2));
3005:               if (i2 != i1) valid = PETSC_FALSE;
3006:               PetscCall(ISDestroy(&t_pressure_subs));
3007:               PetscCall(ISDestroy(&tmp));
3008:             }
3009:             if (valid) {
3010:               PetscCall(ISLocalToGlobalMappingApplyIS(l2g, t_zerodiag_subs, &zerodiag_subs[benign_n]));
3011:               benign_n++;
3012:             } else recompute_zerodiag = PETSC_TRUE;
3013:           }
3014:           PetscCall(ISDestroy(&t_zerodiag_subs));
3015:           PetscCall(ISLocalToGlobalMappingDestroy(&l2g));
3016:         }
3017:       }
3018:     } else { /* there's just one subdomain (or zero if they have not been detected */
3019:       PetscBool valid = PETSC_TRUE;

3021:       if (nneu) valid = PETSC_FALSE;
3022:       if (valid && pressures) PetscCall(ISEqual(pressures, zerodiag, &valid));
3023:       if (valid && checkb) {
3024:         PetscCall(MatMult(matis->A, work[0], matis->x));
3025:         PetscCall(VecPointwiseMult(matis->x, work[1], matis->x));
3026:         PetscCall(VecGetArray(matis->x, &array));
3027:         for (j = 0; j < n_interior_dofs; j++) {
3028:           if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) {
3029:             valid = PETSC_FALSE;
3030:             break;
3031:           }
3032:         }
3033:         PetscCall(VecRestoreArray(matis->x, &array));
3034:       }
3035:       if (valid) {
3036:         benign_n = 1;
3037:         PetscCall(PetscMalloc1(benign_n, &zerodiag_subs));
3038:         PetscCall(PetscObjectReference((PetscObject)zerodiag));
3039:         zerodiag_subs[0] = zerodiag;
3040:       }
3041:     }
3042:     if (checkb) PetscCall(VecDestroyVecs(2, &work));
3043:   }
3044:   PetscCall(PetscFree(interior_dofs));

3046:   if (!benign_n) {
3047:     PetscInt n;

3049:     PetscCall(ISDestroy(&zerodiag));
3050:     recompute_zerodiag = PETSC_FALSE;
3051:     PetscCall(MatGetLocalSize(pcbddc->local_mat, &n, NULL));
3052:     if (n) have_null = PETSC_FALSE;
3053:   }

3055:   /* final check for null pressures */
3056:   if (zerodiag && pressures) PetscCall(ISEqual(pressures, zerodiag, &have_null));

3058:   if (recompute_zerodiag) {
3059:     PetscCall(ISDestroy(&zerodiag));
3060:     if (benign_n == 1) {
3061:       PetscCall(PetscObjectReference((PetscObject)zerodiag_subs[0]));
3062:       zerodiag = zerodiag_subs[0];
3063:     } else {
3064:       PetscInt i, nzn, *new_idxs;

3066:       nzn = 0;
3067:       for (i = 0; i < benign_n; i++) {
3068:         PetscInt ns;
3069:         PetscCall(ISGetLocalSize(zerodiag_subs[i], &ns));
3070:         nzn += ns;
3071:       }
3072:       PetscCall(PetscMalloc1(nzn, &new_idxs));
3073:       nzn = 0;
3074:       for (i = 0; i < benign_n; i++) {
3075:         PetscInt ns, *idxs;
3076:         PetscCall(ISGetLocalSize(zerodiag_subs[i], &ns));
3077:         PetscCall(ISGetIndices(zerodiag_subs[i], (const PetscInt **)&idxs));
3078:         PetscCall(PetscArraycpy(new_idxs + nzn, idxs, ns));
3079:         PetscCall(ISRestoreIndices(zerodiag_subs[i], (const PetscInt **)&idxs));
3080:         nzn += ns;
3081:       }
3082:       PetscCall(PetscSortInt(nzn, new_idxs));
3083:       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nzn, new_idxs, PETSC_OWN_POINTER, &zerodiag));
3084:     }
3085:     have_null = PETSC_FALSE;
3086:   }

3088:   /* determines if the coarse solver will be singular or not */
3089:   PetscCallMPI(MPIU_Allreduce(&have_null, &pcbddc->benign_null, 1, MPI_C_BOOL, MPI_LAND, PetscObjectComm((PetscObject)pc)));

3091:   /* Prepare matrix to compute no-net-flux */
3092:   if (pcbddc->compute_nonetflux && !pcbddc->divudotp) {
3093:     Mat                    A, loc_divudotp;
3094:     ISLocalToGlobalMapping rl2g, cl2g, l2gmap;
3095:     IS                     row, col, isused = NULL;
3096:     PetscInt               M, N, n, st, n_isused;

3098:     if (pressures) {
3099:       isused = pressures;
3100:     } else {
3101:       isused = zerodiag_save;
3102:     }
3103:     PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &l2gmap, NULL));
3104:     PetscCall(MatISGetLocalMat(pc->pmat, &A));
3105:     PetscCall(MatGetLocalSize(A, &n, NULL));
3106:     PetscCheck(isused || (n == 0), PETSC_COMM_SELF, PETSC_ERR_USER, "Don't know how to extract div u dot p! Please provide the pressure field");
3107:     n_isused = 0;
3108:     if (isused) PetscCall(ISGetLocalSize(isused, &n_isused));
3109:     PetscCallMPI(MPI_Scan(&n_isused, &st, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc)));
3110:     st = st - n_isused;
3111:     if (n) {
3112:       const PetscInt *gidxs;

3114:       PetscCall(MatCreateSubMatrix(A, isused, NULL, MAT_INITIAL_MATRIX, &loc_divudotp));
3115:       PetscCall(ISLocalToGlobalMappingGetIndices(l2gmap, &gidxs));
3116:       /* TODO: extend ISCreateStride with st = PETSC_DECIDE */
3117:       PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), n_isused, st, 1, &row));
3118:       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), n, gidxs, PETSC_COPY_VALUES, &col));
3119:       PetscCall(ISLocalToGlobalMappingRestoreIndices(l2gmap, &gidxs));
3120:     } else {
3121:       PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, 0, 0, 1, NULL, &loc_divudotp));
3122:       PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), n_isused, st, 1, &row));
3123:       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), 0, NULL, PETSC_COPY_VALUES, &col));
3124:     }
3125:     PetscCall(MatGetSize(pc->pmat, NULL, &N));
3126:     PetscCall(ISGetSize(row, &M));
3127:     PetscCall(ISLocalToGlobalMappingCreateIS(row, &rl2g));
3128:     PetscCall(ISLocalToGlobalMappingCreateIS(col, &cl2g));
3129:     PetscCall(ISDestroy(&row));
3130:     PetscCall(ISDestroy(&col));
3131:     PetscCall(MatCreate(PetscObjectComm((PetscObject)pc), &pcbddc->divudotp));
3132:     PetscCall(MatSetType(pcbddc->divudotp, MATIS));
3133:     PetscCall(MatSetSizes(pcbddc->divudotp, PETSC_DECIDE, PETSC_DECIDE, M, N));
3134:     PetscCall(MatSetLocalToGlobalMapping(pcbddc->divudotp, rl2g, cl2g));
3135:     PetscCall(ISLocalToGlobalMappingDestroy(&rl2g));
3136:     PetscCall(ISLocalToGlobalMappingDestroy(&cl2g));
3137:     PetscCall(MatISSetLocalMat(pcbddc->divudotp, loc_divudotp));
3138:     PetscCall(MatDestroy(&loc_divudotp));
3139:     PetscCall(MatAssemblyBegin(pcbddc->divudotp, MAT_FINAL_ASSEMBLY));
3140:     PetscCall(MatAssemblyEnd(pcbddc->divudotp, MAT_FINAL_ASSEMBLY));
3141:   }
3142:   PetscCall(ISDestroy(&zerodiag_save));
3143:   PetscCall(ISDestroy(&pressures));
3144:   if (bzerodiag) {
3145:     PetscInt i;

3147:     for (i = 0; i < bsp; i++) PetscCall(ISDestroy(&bzerodiag[i]));
3148:     PetscCall(PetscFree(bzerodiag));
3149:   }
3150:   pcbddc->benign_n             = benign_n;
3151:   pcbddc->benign_zerodiag_subs = zerodiag_subs;

3153:   /* determines if the problem has subdomains with 0 pressure block */
3154:   have_null = (PetscBool)(!!pcbddc->benign_n);
3155:   PetscCallMPI(MPIU_Allreduce(&have_null, &pcbddc->benign_have_null, 1, MPI_C_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc)));

3157: project_b0:
3158:   PetscCall(MatGetLocalSize(pcbddc->local_mat, &n, NULL));
3159:   /* change of basis and p0 dofs */
3160:   if (pcbddc->benign_n) {
3161:     PetscInt i, s, *nnz;

3163:     /* local change of basis for pressures */
3164:     PetscCall(MatDestroy(&pcbddc->benign_change));
3165:     PetscCall(MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat), &pcbddc->benign_change));
3166:     PetscCall(MatSetType(pcbddc->benign_change, MATAIJ));
3167:     PetscCall(MatSetSizes(pcbddc->benign_change, n, n, PETSC_DECIDE, PETSC_DECIDE));
3168:     PetscCall(PetscMalloc1(n, &nnz));
3169:     for (i = 0; i < n; i++) nnz[i] = 1; /* defaults to identity */
3170:     for (i = 0; i < pcbddc->benign_n; i++) {
3171:       const PetscInt *idxs;
3172:       PetscInt        nzs, j;

3174:       PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nzs));
3175:       PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[i], &idxs));
3176:       for (j = 0; j < nzs - 1; j++) nnz[idxs[j]] = 2; /* change on pressures */
3177:       nnz[idxs[nzs - 1]] = nzs;                       /* last local pressure dof in subdomain */
3178:       PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[i], &idxs));
3179:     }
3180:     PetscCall(MatSeqAIJSetPreallocation(pcbddc->benign_change, 0, nnz));
3181:     PetscCall(MatSetOption(pcbddc->benign_change, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
3182:     PetscCall(PetscFree(nnz));
3183:     /* set identity by default */
3184:     for (i = 0; i < n; i++) PetscCall(MatSetValue(pcbddc->benign_change, i, i, 1., INSERT_VALUES));
3185:     PetscCall(PetscFree3(pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx, pcbddc->benign_p0));
3186:     PetscCall(PetscMalloc3(pcbddc->benign_n, &pcbddc->benign_p0_lidx, pcbddc->benign_n, &pcbddc->benign_p0_gidx, pcbddc->benign_n, &pcbddc->benign_p0));
3187:     /* set change on pressures */
3188:     for (s = 0; s < pcbddc->benign_n; s++) {
3189:       PetscScalar    *array;
3190:       const PetscInt *idxs;
3191:       PetscInt        nzs;

3193:       PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[s], &nzs));
3194:       PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[s], &idxs));
3195:       for (i = 0; i < nzs - 1; i++) {
3196:         PetscScalar vals[2];
3197:         PetscInt    cols[2];

3199:         cols[0] = idxs[i];
3200:         cols[1] = idxs[nzs - 1];
3201:         vals[0] = 1.;
3202:         vals[1] = 1.;
3203:         PetscCall(MatSetValues(pcbddc->benign_change, 1, cols, 2, cols, vals, INSERT_VALUES));
3204:       }
3205:       PetscCall(PetscMalloc1(nzs, &array));
3206:       for (i = 0; i < nzs - 1; i++) array[i] = -1.;
3207:       array[nzs - 1] = 1.;
3208:       PetscCall(MatSetValues(pcbddc->benign_change, 1, idxs + nzs - 1, nzs, idxs, array, INSERT_VALUES));
3209:       /* store local idxs for p0 */
3210:       pcbddc->benign_p0_lidx[s] = idxs[nzs - 1];
3211:       PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[s], &idxs));
3212:       PetscCall(PetscFree(array));
3213:     }
3214:     PetscCall(MatAssemblyBegin(pcbddc->benign_change, MAT_FINAL_ASSEMBLY));
3215:     PetscCall(MatAssemblyEnd(pcbddc->benign_change, MAT_FINAL_ASSEMBLY));

3217:     /* project if needed */
3218:     if (pcbddc->benign_change_explicit) {
3219:       Mat M;

3221:       PetscCall(MatPtAP(pcbddc->local_mat, pcbddc->benign_change, MAT_INITIAL_MATRIX, 2.0, &M));
3222:       PetscCall(MatDestroy(&pcbddc->local_mat));
3223:       PetscCall(MatSeqAIJCompress(M, &pcbddc->local_mat));
3224:       PetscCall(MatDestroy(&M));
3225:     }
3226:     /* store global idxs for p0 */
3227:     PetscCall(ISLocalToGlobalMappingApply(matis->rmapping, pcbddc->benign_n, pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx));
3228:   }
3229:   *zerodiaglocal = zerodiag;
3230:   PetscFunctionReturn(PETSC_SUCCESS);
3231: }

3233: PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get)
3234: {
3235:   PC_BDDC     *pcbddc = (PC_BDDC *)pc->data;
3236:   PetscScalar *array;

3238:   PetscFunctionBegin;
3239:   if (!pcbddc->benign_sf) {
3240:     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)pc), &pcbddc->benign_sf));
3241:     PetscCall(PetscSFSetGraphLayout(pcbddc->benign_sf, pc->pmat->rmap, pcbddc->benign_n, NULL, PETSC_OWN_POINTER, pcbddc->benign_p0_gidx));
3242:   }
3243:   if (get) {
3244:     PetscCall(VecGetArrayRead(v, (const PetscScalar **)&array));
3245:     PetscCall(PetscSFBcastBegin(pcbddc->benign_sf, MPIU_SCALAR, array, pcbddc->benign_p0, MPI_REPLACE));
3246:     PetscCall(PetscSFBcastEnd(pcbddc->benign_sf, MPIU_SCALAR, array, pcbddc->benign_p0, MPI_REPLACE));
3247:     PetscCall(VecRestoreArrayRead(v, (const PetscScalar **)&array));
3248:   } else {
3249:     PetscCall(VecGetArray(v, &array));
3250:     PetscCall(PetscSFReduceBegin(pcbddc->benign_sf, MPIU_SCALAR, pcbddc->benign_p0, array, MPI_REPLACE));
3251:     PetscCall(PetscSFReduceEnd(pcbddc->benign_sf, MPIU_SCALAR, pcbddc->benign_p0, array, MPI_REPLACE));
3252:     PetscCall(VecRestoreArray(v, &array));
3253:   }
3254:   PetscFunctionReturn(PETSC_SUCCESS);
3255: }

3257: PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop)
3258: {
3259:   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;

3261:   PetscFunctionBegin;
3262:   /* TODO: add error checking
3263:     - avoid nested pop (or push) calls.
3264:     - cannot push before pop.
3265:     - cannot call this if pcbddc->local_mat is NULL
3266:   */
3267:   if (!pcbddc->benign_n) PetscFunctionReturn(PETSC_SUCCESS);
3268:   if (pop) {
3269:     if (pcbddc->benign_change_explicit) {
3270:       IS       is_p0;
3271:       MatReuse reuse;

3273:       /* extract B_0 */
3274:       reuse = MAT_INITIAL_MATRIX;
3275:       if (pcbddc->benign_B0) reuse = MAT_REUSE_MATRIX;
3276:       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, pcbddc->benign_n, pcbddc->benign_p0_lidx, PETSC_COPY_VALUES, &is_p0));
3277:       PetscCall(MatCreateSubMatrix(pcbddc->local_mat, is_p0, NULL, reuse, &pcbddc->benign_B0));
3278:       /* remove rows and cols from local problem */
3279:       PetscCall(MatSetOption(pcbddc->local_mat, MAT_KEEP_NONZERO_PATTERN, PETSC_TRUE));
3280:       PetscCall(MatSetOption(pcbddc->local_mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
3281:       PetscCall(MatZeroRowsColumnsIS(pcbddc->local_mat, is_p0, 1.0, NULL, NULL));
3282:       PetscCall(ISDestroy(&is_p0));
3283:     } else {
3284:       Mat_IS      *matis = (Mat_IS *)pc->pmat->data;
3285:       PetscScalar *vals;
3286:       PetscInt     i, n, *idxs_ins;

3288:       PetscCall(VecGetLocalSize(matis->y, &n));
3289:       PetscCall(PetscMalloc2(n, &idxs_ins, n, &vals));
3290:       if (!pcbddc->benign_B0) {
3291:         PetscInt *nnz;
3292:         PetscCall(MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat), &pcbddc->benign_B0));
3293:         PetscCall(MatSetType(pcbddc->benign_B0, MATAIJ));
3294:         PetscCall(MatSetSizes(pcbddc->benign_B0, pcbddc->benign_n, n, PETSC_DECIDE, PETSC_DECIDE));
3295:         PetscCall(PetscMalloc1(pcbddc->benign_n, &nnz));
3296:         for (i = 0; i < pcbddc->benign_n; i++) {
3297:           PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nnz[i]));
3298:           nnz[i] = n - nnz[i];
3299:         }
3300:         PetscCall(MatSeqAIJSetPreallocation(pcbddc->benign_B0, 0, nnz));
3301:         PetscCall(MatSetOption(pcbddc->benign_B0, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
3302:         PetscCall(PetscFree(nnz));
3303:       }

3305:       for (i = 0; i < pcbddc->benign_n; i++) {
3306:         PetscScalar *array;
3307:         PetscInt    *idxs, j, nz, cum;

3309:         PetscCall(VecSet(matis->x, 0.));
3310:         PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nz));
3311:         PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[i], (const PetscInt **)&idxs));
3312:         for (j = 0; j < nz; j++) vals[j] = 1.;
3313:         PetscCall(VecSetValues(matis->x, nz, idxs, vals, INSERT_VALUES));
3314:         PetscCall(VecAssemblyBegin(matis->x));
3315:         PetscCall(VecAssemblyEnd(matis->x));
3316:         PetscCall(VecSet(matis->y, 0.));
3317:         PetscCall(MatMult(matis->A, matis->x, matis->y));
3318:         PetscCall(VecGetArray(matis->y, &array));
3319:         cum = 0;
3320:         for (j = 0; j < n; j++) {
3321:           if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) {
3322:             vals[cum]     = array[j];
3323:             idxs_ins[cum] = j;
3324:             cum++;
3325:           }
3326:         }
3327:         PetscCall(MatSetValues(pcbddc->benign_B0, 1, &i, cum, idxs_ins, vals, INSERT_VALUES));
3328:         PetscCall(VecRestoreArray(matis->y, &array));
3329:         PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[i], (const PetscInt **)&idxs));
3330:       }
3331:       PetscCall(MatAssemblyBegin(pcbddc->benign_B0, MAT_FINAL_ASSEMBLY));
3332:       PetscCall(MatAssemblyEnd(pcbddc->benign_B0, MAT_FINAL_ASSEMBLY));
3333:       PetscCall(PetscFree2(idxs_ins, vals));
3334:     }
3335:   } else { /* push */

3337:     PetscCheck(pcbddc->benign_change_explicit, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot push B0!");
3338:     for (PetscInt i = 0; i < pcbddc->benign_n; i++) {
3339:       PetscScalar *B0_vals;
3340:       PetscInt    *B0_cols, B0_ncol;

3342:       PetscCall(MatGetRow(pcbddc->benign_B0, i, &B0_ncol, (const PetscInt **)&B0_cols, (const PetscScalar **)&B0_vals));
3343:       PetscCall(MatSetValues(pcbddc->local_mat, 1, pcbddc->benign_p0_lidx + i, B0_ncol, B0_cols, B0_vals, INSERT_VALUES));
3344:       PetscCall(MatSetValues(pcbddc->local_mat, B0_ncol, B0_cols, 1, pcbddc->benign_p0_lidx + i, B0_vals, INSERT_VALUES));
3345:       PetscCall(MatSetValue(pcbddc->local_mat, pcbddc->benign_p0_lidx[i], pcbddc->benign_p0_lidx[i], 0.0, INSERT_VALUES));
3346:       PetscCall(MatRestoreRow(pcbddc->benign_B0, i, &B0_ncol, (const PetscInt **)&B0_cols, (const PetscScalar **)&B0_vals));
3347:     }
3348:     PetscCall(MatAssemblyBegin(pcbddc->local_mat, MAT_FINAL_ASSEMBLY));
3349:     PetscCall(MatAssemblyEnd(pcbddc->local_mat, MAT_FINAL_ASSEMBLY));
3350:   }
3351:   PetscFunctionReturn(PETSC_SUCCESS);
3352: }

3354: PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
3355: {
3356:   PC_BDDC        *pcbddc     = (PC_BDDC *)pc->data;
3357:   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
3358:   PetscBLASInt    B_neigs, B_ierr, B_lwork;
3359:   PetscBLASInt   *B_iwork, *B_ifail;
3360:   PetscScalar    *work, lwork;
3361:   PetscScalar    *St, *S, *eigv;
3362:   PetscScalar    *Sarray, *Starray;
3363:   PetscReal      *eigs, thresh, lthresh, uthresh;
3364:   PetscInt        i, nmax, nmin, nv, cum, mss, cum2, cumarray, maxneigs;
3365:   PetscBool       allocated_S_St, upart;
3366: #if defined(PETSC_USE_COMPLEX)
3367:   PetscReal *rwork;
3368: #endif

3370:   PetscFunctionBegin;
3371:   if (!pcbddc->adaptive_selection) PetscFunctionReturn(PETSC_SUCCESS);
3372:   PetscCheck(sub_schurs, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Adaptive selection of constraints requires SubSchurs data");
3373:   PetscCheck(sub_schurs->schur_explicit || !sub_schurs->n_subs, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO");
3374:   PetscCheck(!sub_schurs->n_subs || sub_schurs->is_symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Adaptive selection not yet implemented for this matrix pencil (herm %d, symm %d, posdef %d)", sub_schurs->is_hermitian, sub_schurs->is_symmetric,
3375:              sub_schurs->is_posdef);
3376:   PetscCall(PetscLogEventBegin(PC_BDDC_AdaptiveSetUp[pcbddc->current_level], pc, 0, 0, 0));

3378:   if (pcbddc->dbg_flag) {
3379:     if (!pcbddc->dbg_viewer) pcbddc->dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pc));
3380:     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
3381:     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n"));
3382:     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Check adaptive selection of constraints\n"));
3383:     PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
3384:   }

3386:   if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d cc %" PetscInt_FMT " (%d,%d).\n", PetscGlobalRank, sub_schurs->n_subs, sub_schurs->is_hermitian, sub_schurs->is_posdef));

3388:   /* max size of subsets */
3389:   mss = 0;
3390:   for (i = 0; i < sub_schurs->n_subs; i++) {
3391:     PetscInt subset_size;

3393:     PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
3394:     mss = PetscMax(mss, subset_size);
3395:   }

3397:   /* min/max and threshold */
3398:   nmax           = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
3399:   nmin           = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0;
3400:   nmax           = PetscMax(nmin, nmax);
3401:   allocated_S_St = PETSC_FALSE;
3402:   if (nmin || !sub_schurs->is_posdef) { /* XXX */
3403:     allocated_S_St = PETSC_TRUE;
3404:   }

3406:   /* allocate lapack workspace */
3407:   cum = cum2 = 0;
3408:   maxneigs   = 0;
3409:   for (i = 0; i < sub_schurs->n_subs; i++) {
3410:     PetscInt n, subset_size;

3412:     PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
3413:     n = PetscMin(subset_size, nmax);
3414:     cum += subset_size;
3415:     cum2 += subset_size * n;
3416:     maxneigs = PetscMax(maxneigs, n);
3417:   }
3418:   lwork = 0;
3419:   if (mss) {
3420:     PetscScalar  sdummy  = 0.;
3421:     PetscBLASInt B_itype = 1;
3422:     PetscBLASInt B_N, idummy = 0;
3423:     PetscReal    rdummy = 0., zero = 0.0;
3424:     PetscReal    eps = 0.0; /* dlamch? */

3426:     PetscCheck(sub_schurs->is_symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented");
3427:     PetscCall(PetscBLASIntCast(mss, &B_N));
3428:     B_lwork = -1;
3429:     /* some implementations may complain about NULL pointers, even if we are querying */
3430:     S       = &sdummy;
3431:     St      = &sdummy;
3432:     eigs    = &rdummy;
3433:     eigv    = &sdummy;
3434:     B_iwork = &idummy;
3435:     B_ifail = &idummy;
3436: #if defined(PETSC_USE_COMPLEX)
3437:     rwork = &rdummy;
3438: #endif
3439:     thresh = 1.0;
3440:     PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
3441: #if defined(PETSC_USE_COMPLEX)
3442:     PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &zero, &thresh, B_iwork, B_iwork, &eps, &B_neigs, eigs, eigv, &B_N, &lwork, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3443: #else
3444:     PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &zero, &thresh, B_iwork, B_iwork, &eps, &B_neigs, eigs, eigv, &B_N, &lwork, &B_lwork, B_iwork, B_ifail, &B_ierr));
3445: #endif
3446:     PetscCheck(B_ierr == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to SYGVX Lapack routine %" PetscBLASInt_FMT, B_ierr);
3447:     PetscCall(PetscFPTrapPop());
3448:   }

3450:   nv = 0;
3451:   if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */
3452:     PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &nv));
3453:   }
3454:   PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lwork), &B_lwork));
3455:   if (allocated_S_St) PetscCall(PetscMalloc2(mss * mss, &S, mss * mss, &St));
3456:   PetscCall(PetscMalloc5(mss * mss, &eigv, mss, &eigs, B_lwork, &work, 5 * mss, &B_iwork, mss, &B_ifail));
3457: #if defined(PETSC_USE_COMPLEX)
3458:   PetscCall(PetscMalloc1(7 * mss, &rwork));
3459: #endif
3460:   PetscCall(PetscMalloc5(nv + sub_schurs->n_subs, &pcbddc->adaptive_constraints_n, nv + sub_schurs->n_subs + 1, &pcbddc->adaptive_constraints_idxs_ptr, nv + sub_schurs->n_subs + 1, &pcbddc->adaptive_constraints_data_ptr, nv + cum, &pcbddc->adaptive_constraints_idxs, nv + cum2,
3461:                          &pcbddc->adaptive_constraints_data));
3462:   PetscCall(PetscArrayzero(pcbddc->adaptive_constraints_n, nv + sub_schurs->n_subs));

3464:   maxneigs = 0;
3465:   cum = cumarray                           = 0;
3466:   pcbddc->adaptive_constraints_idxs_ptr[0] = 0;
3467:   pcbddc->adaptive_constraints_data_ptr[0] = 0;
3468:   if (sub_schurs->is_vertices && pcbddc->use_vertices) {
3469:     const PetscInt *idxs;

3471:     PetscCall(ISGetIndices(sub_schurs->is_vertices, &idxs));
3472:     for (cum = 0; cum < nv; cum++) {
3473:       pcbddc->adaptive_constraints_n[cum]            = 1;
3474:       pcbddc->adaptive_constraints_idxs[cum]         = idxs[cum];
3475:       pcbddc->adaptive_constraints_data[cum]         = 1.0;
3476:       pcbddc->adaptive_constraints_idxs_ptr[cum + 1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + 1;
3477:       pcbddc->adaptive_constraints_data_ptr[cum + 1] = pcbddc->adaptive_constraints_data_ptr[cum] + 1;
3478:     }
3479:     PetscCall(ISRestoreIndices(sub_schurs->is_vertices, &idxs));
3480:   }

3482:   if (mss) { /* multilevel */
3483:     if (sub_schurs->gdsw) {
3484:       PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &Sarray));
3485:       PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray));
3486:     } else {
3487:       PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all, &Sarray));
3488:       PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray));
3489:     }
3490:   }

3492:   lthresh = pcbddc->adaptive_threshold[0];
3493:   uthresh = pcbddc->adaptive_threshold[1];
3494:   upart   = pcbddc->use_deluxe_scaling;
3495:   for (i = 0; i < sub_schurs->n_subs; i++) {
3496:     const PetscInt *idxs;
3497:     PetscReal       upper, lower;
3498:     PetscInt        j, subset_size, eigs_start = 0;
3499:     PetscBLASInt    B_N;
3500:     PetscBool       same_data = PETSC_FALSE;
3501:     PetscBool       scal      = PETSC_FALSE;

3503:     if (upart) {
3504:       upper = PETSC_MAX_REAL;
3505:       lower = uthresh;
3506:     } else {
3507:       if (sub_schurs->gdsw) {
3508:         upper = uthresh;
3509:         lower = PETSC_MIN_REAL;
3510:       } else {
3511:         PetscCheck(sub_schurs->is_posdef, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented without deluxe scaling");
3512:         upper = 1. / uthresh;
3513:         lower = 0.;
3514:       }
3515:     }
3516:     PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
3517:     PetscCall(ISGetIndices(sub_schurs->is_subs[i], &idxs));
3518:     PetscCall(PetscBLASIntCast(subset_size, &B_N));
3519:     /* this is experimental: we assume the dofs have been properly grouped to have
3520:        the diagonal blocks Schur complements either positive or negative definite (true for Stokes) */
3521:     if (!sub_schurs->is_posdef) {
3522:       Mat T;

3524:       for (j = 0; j < subset_size; j++) {
3525:         if (PetscRealPart(*(Sarray + cumarray + j * (subset_size + 1))) < 0.0) {
3526:           PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, Sarray + cumarray, &T));
3527:           PetscCall(MatScale(T, -1.0));
3528:           PetscCall(MatDestroy(&T));
3529:           PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, Starray + cumarray, &T));
3530:           PetscCall(MatScale(T, -1.0));
3531:           PetscCall(MatDestroy(&T));
3532:           if (sub_schurs->change_primal_sub) {
3533:             PetscInt        nz, k;
3534:             const PetscInt *idxs;

3536:             PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i], &nz));
3537:             PetscCall(ISGetIndices(sub_schurs->change_primal_sub[i], &idxs));
3538:             for (k = 0; k < nz; k++) {
3539:               *(Sarray + cumarray + idxs[k] * (subset_size + 1)) *= -1.0;
3540:               *(Starray + cumarray + idxs[k] * (subset_size + 1)) = 0.0;
3541:             }
3542:             PetscCall(ISRestoreIndices(sub_schurs->change_primal_sub[i], &idxs));
3543:           }
3544:           scal = PETSC_TRUE;
3545:           break;
3546:         }
3547:       }
3548:     }

3550:     if (allocated_S_St) { /* S and S_t should be copied since we could need them later */
3551:       if (sub_schurs->is_symmetric) {
3552:         PetscInt j, k;
3553:         if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscArraycmp() later */
3554:           PetscCall(PetscArrayzero(S, subset_size * subset_size));
3555:           PetscCall(PetscArrayzero(St, subset_size * subset_size));
3556:         }
3557:         for (j = 0; j < subset_size; j++) {
3558:           for (k = j; k < subset_size; k++) {
3559:             S[j * subset_size + k]  = Sarray[cumarray + j * subset_size + k];
3560:             St[j * subset_size + k] = Starray[cumarray + j * subset_size + k];
3561:           }
3562:         }
3563:       } else {
3564:         PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size));
3565:         PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size));
3566:       }
3567:     } else {
3568:       S  = Sarray + cumarray;
3569:       St = Starray + cumarray;
3570:     }
3571:     /* see if we can save some work */
3572:     if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) PetscCall(PetscArraycmp(S, St, subset_size * subset_size, &same_data));

3574:     if (same_data && !sub_schurs->change) { /* there's no need of constraints here */
3575:       B_neigs = 0;
3576:     } else {
3577:       PetscBLASInt B_itype = 1, B_IL = 1, B_IU = 0;
3578:       PetscReal    eps = -1.0; /* dlamch? */
3579:       PetscInt     nmin_s;
3580:       PetscBool    compute_range;

3582:       PetscCheck(sub_schurs->is_symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented");
3583:       B_neigs       = 0;
3584:       compute_range = (PetscBool)!same_data;
3585:       if (nmin >= subset_size) compute_range = PETSC_FALSE;

3587:       if (pcbddc->dbg_flag) {
3588:         PetscInt nc = 0, c = pcbddc->mat_graph->nodes[idxs[0]].count, w = pcbddc->mat_graph->nodes[idxs[0]].which_dof;

3590:         if (sub_schurs->change_primal_sub) PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i], &nc));
3591:         PetscCall(
3592:           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Computing for sub %" PetscInt_FMT "/%" PetscInt_FMT " size %" PetscInt_FMT " count %" PetscInt_FMT " fid %" PetscInt_FMT " (range %d) (change %" PetscInt_FMT ").\n", i, sub_schurs->n_subs, subset_size, c, w, compute_range, nc));
3593:       }

3595:       PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
3596:       if (compute_range) {
3597:         /* ask for eigenvalues larger than thresh */
3598:         if (sub_schurs->is_posdef) {
3599: #if defined(PETSC_USE_COMPLEX)
3600:           PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3601: #else
3602:           PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3603: #endif
3604:           PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3605:         } else { /* no theory so far, but it works nicely */
3606:           PetscInt  recipe = 0, recipe_m = 1;
3607:           PetscReal bb[2];

3609:           PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe", &recipe, NULL));
3610:           switch (recipe) {
3611:           case 0:
3612:             if (scal) {
3613:               bb[0] = PETSC_MIN_REAL;
3614:               bb[1] = lthresh;
3615:             } else {
3616:               bb[0] = uthresh;
3617:               bb[1] = PETSC_MAX_REAL;
3618:             }
3619: #if defined(PETSC_USE_COMPLEX)
3620:             PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3621: #else
3622:             PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3623: #endif
3624:             PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3625:             break;
3626:           case 1:
3627:             bb[0] = PETSC_MIN_REAL;
3628:             bb[1] = lthresh * lthresh;
3629: #if defined(PETSC_USE_COMPLEX)
3630:             PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3631: #else
3632:             PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3633: #endif
3634:             PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3635:             if (!scal) {
3636:               PetscBLASInt B_neigs2 = 0;

3638:               bb[0] = PetscMax(lthresh * lthresh, uthresh);
3639:               bb[1] = PETSC_MAX_REAL;
3640:               PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size));
3641:               PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size));
3642: #if defined(PETSC_USE_COMPLEX)
3643:               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3644: #else
3645:               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3646: #endif
3647:               PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3648:               B_neigs += B_neigs2;
3649:             }
3650:             break;
3651:           case 2:
3652:             if (scal) {
3653:               bb[0] = PETSC_MIN_REAL;
3654:               bb[1] = 0;
3655: #if defined(PETSC_USE_COMPLEX)
3656:               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3657: #else
3658:               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3659: #endif
3660:               PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3661:             } else {
3662:               PetscBLASInt B_neigs2 = 0;
3663:               PetscBool    do_copy  = PETSC_FALSE;

3665:               lthresh = PetscMax(lthresh, 0.0);
3666:               if (lthresh > 0.0) {
3667:                 bb[0] = PETSC_MIN_REAL;
3668:                 bb[1] = lthresh * lthresh;

3670:                 do_copy = PETSC_TRUE;
3671: #if defined(PETSC_USE_COMPLEX)
3672:                 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3673: #else
3674:                 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3675: #endif
3676:                 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3677:               }
3678:               bb[0] = PetscMax(lthresh * lthresh, uthresh);
3679:               bb[1] = PETSC_MAX_REAL;
3680:               if (do_copy) {
3681:                 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size));
3682:                 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size));
3683:               }
3684: #if defined(PETSC_USE_COMPLEX)
3685:               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3686: #else
3687:               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3688: #endif
3689:               PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3690:               B_neigs += B_neigs2;
3691:             }
3692:             break;
3693:           case 3:
3694:             if (scal) {
3695:               PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe3_min_scal", &recipe_m, NULL));
3696:             } else {
3697:               PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe3_min", &recipe_m, NULL));
3698:             }
3699:             if (!scal) {
3700:               bb[0] = uthresh;
3701:               bb[1] = PETSC_MAX_REAL;
3702: #if defined(PETSC_USE_COMPLEX)
3703:               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3704: #else
3705:               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3706: #endif
3707:               PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3708:             }
3709:             if (recipe_m > 0 && B_N - B_neigs > 0) {
3710:               PetscBLASInt B_neigs2 = 0;

3712:               PetscCall(PetscBLASIntCast(PetscMin(recipe_m, B_N - B_neigs), &B_IU));
3713:               PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size));
3714:               PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size));
3715: #if defined(PETSC_USE_COMPLEX)
3716:               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3717: #else
3718:               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3719: #endif
3720:               PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3721:               B_neigs += B_neigs2;
3722:             }
3723:             break;
3724:           case 4:
3725:             bb[0] = PETSC_MIN_REAL;
3726:             bb[1] = lthresh;
3727: #if defined(PETSC_USE_COMPLEX)
3728:             PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3729: #else
3730:             PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3731: #endif
3732:             PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3733:             {
3734:               PetscBLASInt B_neigs2 = 0;

3736:               bb[0] = PetscMax(lthresh + PETSC_SMALL, uthresh);
3737:               bb[1] = PETSC_MAX_REAL;
3738:               PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size));
3739:               PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size));
3740: #if defined(PETSC_USE_COMPLEX)
3741:               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3742: #else
3743:               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3744: #endif
3745:               PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3746:               B_neigs += B_neigs2;
3747:             }
3748:             break;
3749:           case 5: /* same as before: first compute all eigenvalues, then filter */
3750: #if defined(PETSC_USE_COMPLEX)
3751:             PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "A", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3752: #else
3753:             PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "A", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3754: #endif
3755:             PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3756:             {
3757:               PetscInt e, k, ne;
3758:               for (e = 0, ne = 0; e < B_neigs; e++) {
3759:                 if (eigs[e] < lthresh || eigs[e] > uthresh) {
3760:                   for (k = 0; k < B_N; k++) S[ne * B_N + k] = eigv[e * B_N + k];
3761:                   eigs[ne] = eigs[e];
3762:                   ne++;
3763:                 }
3764:               }
3765:               PetscCall(PetscArraycpy(eigv, S, B_N * ne));
3766:               PetscCall(PetscBLASIntCast(ne, &B_neigs));
3767:             }
3768:             break;
3769:           default:
3770:             SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Unknown recipe %" PetscInt_FMT, recipe);
3771:           }
3772:         }
3773:       } else if (!same_data) { /* this is just to see all the eigenvalues */
3774:         PetscCall(PetscBLASIntCast(PetscMax(1, PetscMin(B_N, nmax)), &B_IU));
3775: #if defined(PETSC_USE_COMPLEX)
3776:         PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3777: #else
3778:         PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3779: #endif
3780:         PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3781:       } else { /* same_data is true, so just get the adaptive functional requested by the user */
3782:         PetscInt k;
3783:         PetscCheck(sub_schurs->change_primal_sub, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen");
3784:         PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i], &nmax));
3785:         PetscCall(PetscBLASIntCast(nmax, &B_neigs));
3786:         nmin = nmax;
3787:         PetscCall(PetscArrayzero(eigv, subset_size * nmax));
3788:         for (k = 0; k < nmax; k++) {
3789:           eigs[k]                     = 1. / PETSC_SMALL;
3790:           eigv[k * (subset_size + 1)] = 1.0;
3791:         }
3792:       }
3793:       PetscCall(PetscFPTrapPop());
3794:       if (B_ierr) {
3795:         PetscCheck(B_ierr >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: illegal value for argument %" PetscBLASInt_FMT, -B_ierr);
3796:         PetscCheck(B_ierr > B_N, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: %" PetscBLASInt_FMT " eigenvalues failed to converge", B_ierr);
3797:         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: leading minor of order %" PetscBLASInt_FMT " is not positive definite", B_ierr - B_N - 1);
3798:       }

3800:       if (B_neigs > nmax) {
3801:         if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "   found %" PetscBLASInt_FMT " eigs, more than maximum required %" PetscInt_FMT ".\n", B_neigs, nmax));
3802:         if (upart) eigs_start = scal ? 0 : B_neigs - nmax;
3803:         PetscCall(PetscBLASIntCast(nmax, &B_neigs));
3804:       }

3806:       nmin_s = PetscMin(nmin, B_N);
3807:       if (B_neigs < nmin_s) {
3808:         PetscBLASInt B_neigs2 = 0;

3810:         if (upart) {
3811:           if (scal) {
3812:             PetscCall(PetscBLASIntCast(nmin_s, &B_IU));
3813:             B_IL = B_neigs + 1;
3814:           } else {
3815:             PetscCall(PetscBLASIntCast(B_N - nmin_s + 1, &B_IL));
3816:             B_IU = B_N - B_neigs;
3817:           }
3818:         } else {
3819:           B_IL = B_neigs + 1;
3820:           PetscCall(PetscBLASIntCast(nmin_s, &B_IU));
3821:         }
3822:         if (pcbddc->dbg_flag) {
3823:           PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "   found %" PetscBLASInt_FMT " eigs, less than minimum required %" PetscInt_FMT ". Asking for %" PetscBLASInt_FMT " to %" PetscBLASInt_FMT " incl (fortran like)\n", B_neigs, nmin, B_IL, B_IU));
3824:         }
3825:         if (sub_schurs->is_symmetric) {
3826:           PetscInt j, k;
3827:           for (j = 0; j < subset_size; j++) {
3828:             for (k = j; k < subset_size; k++) {
3829:               S[j * subset_size + k]  = Sarray[cumarray + j * subset_size + k];
3830:               St[j * subset_size + k] = Starray[cumarray + j * subset_size + k];
3831:             }
3832:           }
3833:         } else {
3834:           PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size));
3835:           PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size));
3836:         }
3837:         PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
3838: #if defined(PETSC_USE_COMPLEX)
3839:         PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * subset_size, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3840: #else
3841:         PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * subset_size, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3842: #endif
3843:         PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3844:         PetscCall(PetscFPTrapPop());
3845:         B_neigs += B_neigs2;
3846:       }
3847:       if (B_ierr) {
3848:         PetscCheck(B_ierr >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: illegal value for argument %" PetscBLASInt_FMT, -B_ierr);
3849:         PetscCheck(B_ierr > B_N, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: %" PetscBLASInt_FMT " eigenvalues failed to converge", B_ierr);
3850:         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: leading minor of order %" PetscBLASInt_FMT " is not positive definite", B_ierr - B_N - 1);
3851:       }
3852:       if (pcbddc->dbg_flag) {
3853:         PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "   -> Got %" PetscBLASInt_FMT " eigs\n", B_neigs));
3854:         for (j = 0; j < B_neigs; j++) {
3855:           if (!sub_schurs->gdsw) {
3856:             if (eigs[j] == 0.0) {
3857:               PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "     Inf\n"));
3858:             } else {
3859:               if (upart) {
3860:                 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "     %1.6e\n", (double)eigs[j + eigs_start]));
3861:               } else {
3862:                 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "     %1.6e\n", (double)(1 / eigs[j + eigs_start])));
3863:               }
3864:             }
3865:           } else {
3866:             double pg = (double)eigs[j + eigs_start];
3867:             if (pg < 2 * PETSC_SMALL) pg = 0.0;
3868:             PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "     %1.6e\n", pg));
3869:           }
3870:         }
3871:       }
3872:     }
3873:     /* change the basis back to the original one */
3874:     if (sub_schurs->change) {
3875:       Mat change, phi, phit;

3877:       if (pcbddc->dbg_flag > 2) {
3878:         PetscInt ii;
3879:         for (ii = 0; ii < B_neigs; ii++) {
3880:           PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "   -> Eigenvector (old basis) %" PetscInt_FMT "/%" PetscBLASInt_FMT " (%" PetscBLASInt_FMT ")\n", ii, B_neigs, B_N));
3881:           for (j = 0; j < B_N; j++) {
3882: #if defined(PETSC_USE_COMPLEX)
3883:             PetscReal r = PetscRealPart(eigv[(ii + eigs_start) * subset_size + j]);
3884:             PetscReal c = PetscImaginaryPart(eigv[(ii + eigs_start) * subset_size + j]);
3885:             PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "       %1.4e + %1.4e i\n", (double)r, (double)c));
3886: #else
3887:             PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "       %1.4e\n", (double)(eigv[(ii + eigs_start) * subset_size + j])));
3888: #endif
3889:           }
3890:         }
3891:       }
3892:       PetscCall(KSPGetOperators(sub_schurs->change[i], &change, NULL));
3893:       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, B_neigs, eigv + eigs_start * subset_size, &phit));
3894:       PetscCall(MatMatMult(change, phit, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &phi));
3895:       PetscCall(MatCopy(phi, phit, SAME_NONZERO_PATTERN));
3896:       PetscCall(MatDestroy(&phit));
3897:       PetscCall(MatDestroy(&phi));
3898:     }
3899:     maxneigs                               = PetscMax(B_neigs, maxneigs);
3900:     pcbddc->adaptive_constraints_n[i + nv] = B_neigs;
3901:     if (B_neigs) {
3902:       PetscCall(PetscArraycpy(pcbddc->adaptive_constraints_data + pcbddc->adaptive_constraints_data_ptr[cum], eigv + eigs_start * subset_size, B_neigs * subset_size));

3904:       if (pcbddc->dbg_flag > 1) {
3905:         PetscInt ii;
3906:         for (ii = 0; ii < B_neigs; ii++) {
3907:           PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "   -> Eigenvector %" PetscInt_FMT "/%" PetscBLASInt_FMT " (%" PetscBLASInt_FMT ")\n", ii, B_neigs, B_N));
3908:           for (j = 0; j < B_N; j++) {
3909: #if defined(PETSC_USE_COMPLEX)
3910:             PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]]);
3911:             PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]]);
3912:             PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "       %1.4e + %1.4e i\n", (double)r, (double)c));
3913: #else
3914:             PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "       %1.4e\n", (double)PetscRealPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]])));
3915: #endif
3916:           }
3917:         }
3918:       }
3919:       PetscCall(PetscArraycpy(pcbddc->adaptive_constraints_idxs + pcbddc->adaptive_constraints_idxs_ptr[cum], idxs, subset_size));
3920:       pcbddc->adaptive_constraints_idxs_ptr[cum + 1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size;
3921:       pcbddc->adaptive_constraints_data_ptr[cum + 1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size * B_neigs;
3922:       cum++;
3923:     }
3924:     PetscCall(ISRestoreIndices(sub_schurs->is_subs[i], &idxs));
3925:     /* shift for next computation */
3926:     cumarray += subset_size * subset_size;
3927:   }
3928:   if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));

3930:   if (mss) {
3931:     if (sub_schurs->gdsw) {
3932:       PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &Sarray));
3933:       PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray));
3934:     } else {
3935:       PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all, &Sarray));
3936:       PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all, &Starray));
3937:       /* destroy matrices (junk) */
3938:       PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_inv_all));
3939:       PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_tilda_all));
3940:     }
3941:   }
3942:   if (allocated_S_St) PetscCall(PetscFree2(S, St));
3943:   PetscCall(PetscFree5(eigv, eigs, work, B_iwork, B_ifail));
3944: #if defined(PETSC_USE_COMPLEX)
3945:   PetscCall(PetscFree(rwork));
3946: #endif
3947:   if (pcbddc->dbg_flag) {
3948:     PetscInt maxneigs_r;
3949:     PetscCallMPI(MPIU_Allreduce(&maxneigs, &maxneigs_r, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)pc)));
3950:     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Maximum number of constraints per cc %" PetscInt_FMT "\n", maxneigs_r));
3951:   }
3952:   PetscCall(PetscLogEventEnd(PC_BDDC_AdaptiveSetUp[pcbddc->current_level], pc, 0, 0, 0));
3953:   PetscFunctionReturn(PETSC_SUCCESS);
3954: }

3956: PetscErrorCode PCBDDCSetUpSolvers(PC pc)
3957: {
3958:   Mat coarse_submat;

3960:   PetscFunctionBegin;
3961:   /* Setup local scatters R_to_B and (optionally) R_to_D */
3962:   /* PCBDDCSetUpLocalWorkVectors should be called first! */
3963:   PetscCall(PCBDDCSetUpLocalScatters(pc));

3965:   /* Setup local neumann solver ksp_R */
3966:   /* PCBDDCSetUpLocalScatters should be called first! */
3967:   PetscCall(PCBDDCSetUpLocalSolvers(pc, PETSC_FALSE, PETSC_TRUE));

3969:   /*
3970:      Setup local correction and local part of coarse basis.
3971:      Gives back the dense local part of the coarse matrix in column major ordering
3972:   */
3973:   PetscCall(PCBDDCSetUpCorrection(pc, &coarse_submat));

3975:   /* Compute total number of coarse nodes and setup coarse solver */
3976:   PetscCall(PCBDDCSetUpCoarseSolver(pc, coarse_submat));
3977:   PetscCall(MatDestroy(&coarse_submat));
3978:   PetscFunctionReturn(PETSC_SUCCESS);
3979: }

3981: PetscErrorCode PCBDDCResetCustomization(PC pc)
3982: {
3983:   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;

3985:   PetscFunctionBegin;
3986:   PetscCall(ISDestroy(&pcbddc->user_primal_vertices));
3987:   PetscCall(ISDestroy(&pcbddc->user_primal_vertices_local));
3988:   PetscCall(ISDestroy(&pcbddc->NeumannBoundaries));
3989:   PetscCall(ISDestroy(&pcbddc->NeumannBoundariesLocal));
3990:   PetscCall(ISDestroy(&pcbddc->DirichletBoundaries));
3991:   PetscCall(MatNullSpaceDestroy(&pcbddc->onearnullspace));
3992:   PetscCall(PetscFree(pcbddc->onearnullvecs_state));
3993:   PetscCall(ISDestroy(&pcbddc->DirichletBoundariesLocal));
3994:   PetscCall(PCBDDCSetDofsSplitting(pc, 0, NULL));
3995:   PetscCall(PCBDDCSetDofsSplittingLocal(pc, 0, NULL));
3996:   PetscFunctionReturn(PETSC_SUCCESS);
3997: }

3999: PetscErrorCode PCBDDCResetTopography(PC pc)
4000: {
4001:   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
4002:   PetscInt i;

4004:   PetscFunctionBegin;
4005:   PetscCall(MatDestroy(&pcbddc->nedcG));
4006:   PetscCall(ISDestroy(&pcbddc->nedclocal));
4007:   PetscCall(MatDestroy(&pcbddc->discretegradient));
4008:   PetscCall(MatDestroy(&pcbddc->user_ChangeOfBasisMatrix));
4009:   PetscCall(MatDestroy(&pcbddc->ChangeOfBasisMatrix));
4010:   PetscCall(MatDestroy(&pcbddc->switch_static_change));
4011:   PetscCall(VecDestroy(&pcbddc->work_change));
4012:   PetscCall(MatDestroy(&pcbddc->ConstraintMatrix));
4013:   PetscCall(MatDestroy(&pcbddc->divudotp));
4014:   PetscCall(ISDestroy(&pcbddc->divudotp_vl2l));
4015:   PetscCall(PCBDDCGraphDestroy(&pcbddc->mat_graph));
4016:   for (i = 0; i < pcbddc->n_local_subs; i++) PetscCall(ISDestroy(&pcbddc->local_subs[i]));
4017:   pcbddc->n_local_subs = 0;
4018:   PetscCall(PetscFree(pcbddc->local_subs));
4019:   PetscCall(PCBDDCSubSchursDestroy(&pcbddc->sub_schurs));
4020:   pcbddc->graphanalyzed        = PETSC_FALSE;
4021:   pcbddc->recompute_topography = PETSC_TRUE;
4022:   pcbddc->corner_selected      = PETSC_FALSE;
4023:   PetscFunctionReturn(PETSC_SUCCESS);
4024: }

4026: PetscErrorCode PCBDDCResetSolvers(PC pc)
4027: {
4028:   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;

4030:   PetscFunctionBegin;
4031:   PetscCall(VecDestroy(&pcbddc->coarse_vec));
4032:   PetscCall(MatDestroy(&pcbddc->coarse_phi_B));
4033:   PetscCall(MatDestroy(&pcbddc->coarse_phi_D));
4034:   PetscCall(MatDestroy(&pcbddc->coarse_psi_B));
4035:   PetscCall(MatDestroy(&pcbddc->coarse_psi_D));
4036:   PetscCall(VecDestroy(&pcbddc->vec1_P));
4037:   PetscCall(VecDestroy(&pcbddc->vec1_C));
4038:   PetscCall(MatDestroy(&pcbddc->local_auxmat2));
4039:   PetscCall(MatDestroy(&pcbddc->local_auxmat1));
4040:   PetscCall(VecDestroy(&pcbddc->vec1_R));
4041:   PetscCall(VecDestroy(&pcbddc->vec2_R));
4042:   PetscCall(ISDestroy(&pcbddc->is_R_local));
4043:   PetscCall(VecScatterDestroy(&pcbddc->R_to_B));
4044:   PetscCall(VecScatterDestroy(&pcbddc->R_to_D));
4045:   PetscCall(VecScatterDestroy(&pcbddc->coarse_loc_to_glob));
4046:   PetscCall(KSPReset(pcbddc->ksp_D));
4047:   PetscCall(KSPReset(pcbddc->ksp_R));
4048:   PetscCall(KSPReset(pcbddc->coarse_ksp));
4049:   PetscCall(MatDestroy(&pcbddc->local_mat));
4050:   PetscCall(PetscFree(pcbddc->primal_indices_local_idxs));
4051:   PetscCall(PetscFree2(pcbddc->local_primal_ref_node, pcbddc->local_primal_ref_mult));
4052:   PetscCall(PetscFree(pcbddc->global_primal_indices));
4053:   PetscCall(ISDestroy(&pcbddc->coarse_subassembling));
4054:   PetscCall(MatDestroy(&pcbddc->benign_change));
4055:   PetscCall(VecDestroy(&pcbddc->benign_vec));
4056:   PetscCall(PCBDDCBenignShellMat(pc, PETSC_TRUE));
4057:   PetscCall(MatDestroy(&pcbddc->benign_B0));
4058:   PetscCall(PetscSFDestroy(&pcbddc->benign_sf));
4059:   if (pcbddc->benign_zerodiag_subs) {
4060:     PetscInt i;
4061:     for (i = 0; i < pcbddc->benign_n; i++) PetscCall(ISDestroy(&pcbddc->benign_zerodiag_subs[i]));
4062:     PetscCall(PetscFree(pcbddc->benign_zerodiag_subs));
4063:   }
4064:   PetscCall(PetscFree3(pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx, pcbddc->benign_p0));
4065:   PetscFunctionReturn(PETSC_SUCCESS);
4066: }

4068: PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
4069: {
4070:   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
4071:   PC_IS   *pcis   = (PC_IS *)pc->data;
4072:   VecType  impVecType;
4073:   PetscInt n_constraints, n_R, old_size;

4075:   PetscFunctionBegin;
4076:   n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices;
4077:   n_R           = pcis->n - pcbddc->n_vertices;
4078:   PetscCall(VecGetType(pcis->vec1_N, &impVecType));
4079:   /* local work vectors (try to avoid unneeded work)*/
4080:   /* R nodes */
4081:   old_size = -1;
4082:   if (pcbddc->vec1_R) PetscCall(VecGetSize(pcbddc->vec1_R, &old_size));
4083:   if (n_R != old_size) {
4084:     PetscCall(VecDestroy(&pcbddc->vec1_R));
4085:     PetscCall(VecDestroy(&pcbddc->vec2_R));
4086:     PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_R));
4087:     PetscCall(VecSetSizes(pcbddc->vec1_R, PETSC_DECIDE, n_R));
4088:     PetscCall(VecSetType(pcbddc->vec1_R, impVecType));
4089:     PetscCall(VecDuplicate(pcbddc->vec1_R, &pcbddc->vec2_R));
4090:   }
4091:   /* local primal dofs */
4092:   old_size = -1;
4093:   if (pcbddc->vec1_P) PetscCall(VecGetSize(pcbddc->vec1_P, &old_size));
4094:   if (pcbddc->local_primal_size != old_size) {
4095:     PetscCall(VecDestroy(&pcbddc->vec1_P));
4096:     PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_P));
4097:     PetscCall(VecSetSizes(pcbddc->vec1_P, PETSC_DECIDE, pcbddc->local_primal_size));
4098:     PetscCall(VecSetType(pcbddc->vec1_P, impVecType));
4099:   }
4100:   /* local explicit constraints */
4101:   old_size = -1;
4102:   if (pcbddc->vec1_C) PetscCall(VecGetSize(pcbddc->vec1_C, &old_size));
4103:   if (n_constraints && n_constraints != old_size) {
4104:     PetscCall(VecDestroy(&pcbddc->vec1_C));
4105:     PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_C));
4106:     PetscCall(VecSetSizes(pcbddc->vec1_C, PETSC_DECIDE, n_constraints));
4107:     PetscCall(VecSetType(pcbddc->vec1_C, impVecType));
4108:   }
4109:   PetscFunctionReturn(PETSC_SUCCESS);
4110: }

4112: static PetscErrorCode MatSetValuesSubMat(Mat A, Mat S, PetscInt nr, const PetscInt rows[], PetscInt nc, const PetscInt cols[], InsertMode imode)
4113: {
4114:   PetscBool          flg;
4115:   const PetscScalar *a;

4117:   PetscFunctionBegin;
4118:   PetscCall(PetscObjectBaseTypeCompare((PetscObject)S, MATSEQDENSE, &flg));
4119:   if (flg) {
4120:     PetscCall(MatDenseGetArrayRead(S, &a));
4121:     PetscCall(MatSetOption(A, MAT_ROW_ORIENTED, PETSC_FALSE));
4122:     PetscCall(MatSetValues(A, nr, rows, nc, cols, a, imode));
4123:     PetscCall(MatSetOption(A, MAT_ROW_ORIENTED, PETSC_TRUE));
4124:     PetscCall(MatDenseRestoreArrayRead(S, &a));
4125:   } else {
4126:     const PetscInt *ii, *jj;
4127:     PetscInt        n;
4128:     PetscInt        buf[8192], *bufc = NULL;
4129:     PetscBool       freeb = PETSC_FALSE;
4130:     Mat             Sm    = S;

4132:     PetscCall(PetscObjectBaseTypeCompare((PetscObject)S, MATSEQAIJ, &flg));
4133:     if (!flg) PetscCall(MatConvert(S, MATSEQAIJ, MAT_INITIAL_MATRIX, &Sm));
4134:     else PetscCall(PetscObjectReference((PetscObject)S));
4135:     PetscCall(MatSeqAIJGetArrayRead(Sm, &a));
4136:     PetscCall(MatGetRowIJ(Sm, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg));
4137:     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot get IJ structure");
4138:     if (nc <= (PetscInt)PETSC_STATIC_ARRAY_LENGTH(buf)) {
4139:       bufc = buf;
4140:     } else {
4141:       PetscCall(PetscMalloc1(nc, &bufc));
4142:       freeb = PETSC_TRUE;
4143:     }

4145:     for (PetscInt i = 0; i < n; i++) {
4146:       const PetscInt nci = ii[i + 1] - ii[i];

4148:       for (PetscInt j = 0; j < nci; j++) bufc[j] = cols[jj[ii[i] + j]];
4149:       PetscCall(MatSetValues(A, 1, rows + i, nci, bufc, a + ii[i], imode));
4150:     }
4151:     PetscCall(MatRestoreRowIJ(Sm, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg));
4152:     PetscCall(MatSeqAIJRestoreArrayRead(Sm, &a));
4153:     PetscCall(MatDestroy(&Sm));
4154:     if (freeb) PetscCall(PetscFree(bufc));
4155:   }
4156:   PetscCall(MatAssemblyBegin(A, MAT_FLUSH_ASSEMBLY));
4157:   PetscCall(MatAssemblyEnd(A, MAT_FLUSH_ASSEMBLY));
4158:   PetscFunctionReturn(PETSC_SUCCESS);
4159: }

4161: static PetscErrorCode MatCreateSeqAIJFromDenseExpand(Mat D, PetscInt n, const PetscInt j[], Mat *mat)
4162: {
4163:   Mat_SeqAIJ        *aij;
4164:   PetscInt          *ii, *jj;
4165:   PetscScalar       *aa;
4166:   PetscInt           nnz = 0, m, nc;
4167:   const PetscScalar *a;
4168:   const PetscScalar  zero = 0.0;

4170:   PetscFunctionBegin;
4171:   PetscCall(MatGetLocalSize(D, &m, &nc));
4172:   PetscCall(MatDenseGetArrayRead(D, &a));
4173:   PetscCall(PetscMalloc1(m + 1, &ii));
4174:   PetscCall(PetscMalloc1(m * nc, &jj));
4175:   PetscCall(PetscMalloc1(m * nc, &aa));
4176:   ii[0] = 0;
4177:   for (PetscInt k = 0; k < m; k++) {
4178:     for (PetscInt s = 0; s < nc; s++) {
4179:       const PetscInt    c = s + k * nc;
4180:       const PetscScalar v = a[k + s * m];

4182:       if (PetscUnlikely(j[c] < 0 || v == zero)) continue;
4183:       jj[nnz] = j[c];
4184:       aa[nnz] = a[k + s * m];
4185:       nnz++;
4186:     }
4187:     ii[k + 1] = nnz;
4188:   }

4190:   PetscCall(MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)D), m, n, ii, jj, aa, mat));
4191:   PetscCall(MatDenseRestoreArrayRead(D, &a));

4193:   aij          = (Mat_SeqAIJ *)(*mat)->data;
4194:   aij->free_a  = PETSC_TRUE;
4195:   aij->free_ij = PETSC_TRUE;
4196:   PetscFunctionReturn(PETSC_SUCCESS);
4197: }

4199: /* adapted from MatInvertVariableBlockDiagonal_SeqAIJ */
4200: static PetscErrorCode MatSeqAIJInvertVariableBlockDiagonalMat(Mat A, PetscInt nblocks, const PetscInt *bsizes, Mat *B)
4201: {
4202:   PetscInt        n = A->rmap->n, ncnt = 0, ncnt2 = 0, bsizemax = 0, *v_pivots = NULL;
4203:   const PetscBool allowzeropivot    = PETSC_FALSE;
4204:   PetscBool       zeropivotdetected = PETSC_FALSE;
4205:   const PetscReal shift             = 0.0;
4206:   PetscInt        ipvt[5], *ii, *jj, *indi, *indj;
4207:   PetscScalar     work[25], *v_work = NULL, *aa, *diag;
4208:   PetscLogDouble  flops = 0.0;

4210:   PetscFunctionBegin;
4211:   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Not for rectangular matrices");
4212:   for (PetscInt i = 0; i < nblocks; i++) {
4213:     ncnt += bsizes[i];
4214:     ncnt2 += PetscSqr(bsizes[i]);
4215:   }
4216:   PetscCheck(ncnt == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Total blocksizes %" PetscInt_FMT " doesn't match number matrix rows %" PetscInt_FMT, ncnt, n);
4217:   for (PetscInt i = 0; i < nblocks; i++) bsizemax = PetscMax(bsizemax, bsizes[i]);
4218:   if (bsizemax > 7) PetscCall(PetscMalloc2(bsizemax, &v_work, bsizemax, &v_pivots));

4220:   PetscCall(PetscMalloc1(n + 1, &ii));
4221:   PetscCall(PetscMalloc1(ncnt2, &jj));
4222:   PetscCall(PetscCalloc1(ncnt2, &aa));

4224:   ncnt  = 0;
4225:   ii[0] = 0;
4226:   indi  = ii;
4227:   indj  = jj;
4228:   diag  = aa;
4229:   for (PetscInt i = 0; i < nblocks; i++) {
4230:     const PetscInt bs = bsizes[i];

4232:     for (PetscInt k = 0; k < bs; k++) {
4233:       indi[k + 1] = indi[k] + bs;
4234:       for (PetscInt j = 0; j < bs; j++) indj[k * bs + j] = ncnt + j;
4235:     }
4236:     PetscCall(MatGetValues(A, bs, indj, bs, indj, diag));
4237:     switch (bs) {
4238:     case 1:
4239:       *diag = 1.0 / (*diag);
4240:       break;
4241:     case 2:
4242:       PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected));
4243:       break;
4244:     case 3:
4245:       PetscCall(PetscKernel_A_gets_inverse_A_3(diag, shift, allowzeropivot, &zeropivotdetected));
4246:       break;
4247:     case 4:
4248:       PetscCall(PetscKernel_A_gets_inverse_A_4(diag, shift, allowzeropivot, &zeropivotdetected));
4249:       break;
4250:     case 5:
4251:       PetscCall(PetscKernel_A_gets_inverse_A_5(diag, ipvt, work, shift, allowzeropivot, &zeropivotdetected));
4252:       break;
4253:     case 6:
4254:       PetscCall(PetscKernel_A_gets_inverse_A_6(diag, shift, allowzeropivot, &zeropivotdetected));
4255:       break;
4256:     case 7:
4257:       PetscCall(PetscKernel_A_gets_inverse_A_7(diag, shift, allowzeropivot, &zeropivotdetected));
4258:       break;
4259:     default:
4260:       PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, v_pivots, v_work, allowzeropivot, &zeropivotdetected));
4261:     }
4262:     ncnt += bs;
4263:     flops += 2.0 * PetscPowInt(bs, 3) / 3.0;
4264:     diag += bs * bs;
4265:     indj += bs * bs;
4266:     indi += bs;
4267:   }
4268:   PetscCall(PetscLogFlops(flops));
4269:   PetscCall(PetscFree2(v_work, v_pivots));
4270:   PetscCall(MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A), n, n, ii, jj, aa, B));
4271:   {
4272:     Mat_SeqAIJ *aij = (Mat_SeqAIJ *)(*B)->data;
4273:     aij->free_a     = PETSC_TRUE;
4274:     aij->free_ij    = PETSC_TRUE;
4275:   }
4276:   PetscFunctionReturn(PETSC_SUCCESS);
4277: }

4279: static PetscErrorCode MatDenseScatter(Mat A, PetscSF sf, Mat B)
4280: {
4281:   const PetscScalar *rarr;
4282:   PetscScalar       *larr;
4283:   PetscSF            vsf;
4284:   PetscInt           n, rld, lld;

4286:   PetscFunctionBegin;
4287:   PetscCall(MatGetSize(A, NULL, &n));
4288:   PetscCall(MatDenseGetLDA(A, &rld));
4289:   PetscCall(MatDenseGetLDA(B, &lld));
4290:   PetscCall(MatDenseGetArrayRead(A, &rarr));
4291:   PetscCall(MatDenseGetArrayWrite(B, &larr));
4292:   PetscCall(PetscSFCreateStridedSF(sf, n, rld, lld, &vsf));
4293:   PetscCall(PetscSFBcastBegin(vsf, MPIU_SCALAR, rarr, larr, MPI_REPLACE));
4294:   PetscCall(PetscSFBcastEnd(vsf, MPIU_SCALAR, rarr, larr, MPI_REPLACE));
4295:   PetscCall(MatDenseRestoreArrayRead(A, &rarr));
4296:   PetscCall(MatDenseRestoreArrayWrite(B, &larr));
4297:   PetscCall(PetscSFDestroy(&vsf));
4298:   PetscFunctionReturn(PETSC_SUCCESS);
4299: }

4301: PetscErrorCode PCBDDCSetUpCorrection(PC pc, Mat *coarse_submat)
4302: {
4303:   PC_IS          *pcis       = (PC_IS *)pc->data;
4304:   PC_BDDC        *pcbddc     = (PC_BDDC *)pc->data;
4305:   PCBDDCGraph     graph      = pcbddc->mat_graph;
4306:   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
4307:   /* submatrices of local problem */
4308:   Mat A_RV = NULL, A_VR, A_VV, local_auxmat2_R = NULL;
4309:   /* submatrices of local coarse problem */
4310:   Mat S_CV = NULL, S_VC = NULL, S_CC = NULL;
4311:   /* working matrices */
4312:   Mat C_CR;

4314:   /* additional working stuff */
4315:   PC              pc_R;
4316:   IS              is_R, is_V, is_C;
4317:   const PetscInt *idx_V, *idx_C;
4318:   Mat             F, Brhs = NULL;
4319:   Vec             dummy_vec;
4320:   PetscBool       isLU, isCHOL, need_benign_correction, sparserhs;
4321:   PetscInt       *idx_V_B;
4322:   PetscInt        lda_rhs, n_vertices, n_constraints, *p0_lidx_I;
4323:   PetscInt        n_eff_vertices, n_eff_constraints;
4324:   PetscInt        i, n_R, n_D, n_B;
4325:   PetscScalar     one = 1.0, m_one = -1.0;

4327:   /* Multi-element support */
4328:   PetscBool multi_element = graph->multi_element;
4329:   PetscInt *V_to_eff_V = NULL, *C_to_eff_C = NULL;
4330:   PetscInt *B_eff_V_J = NULL, *R_eff_V_J = NULL, *B_eff_C_J = NULL, *R_eff_C_J = NULL;
4331:   IS        is_C_perm = NULL;
4332:   PetscInt  n_C_bss = 0, *C_bss = NULL;
4333:   Mat       coarse_phi_multi;

4335:   PetscFunctionBegin;
4336:   PetscCheck(pcbddc->symmetric_primal || !pcbddc->benign_n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Non-symmetric primal basis computation with benign trick not yet implemented");
4337:   PetscCall(PetscLogEventBegin(PC_BDDC_CorrectionSetUp[pcbddc->current_level], pc, 0, 0, 0));

4339:   /* Set Non-overlapping dimensions */
4340:   n_vertices    = pcbddc->n_vertices;
4341:   n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices;
4342:   n_B           = pcis->n_B;
4343:   n_D           = pcis->n - n_B;
4344:   n_R           = pcis->n - n_vertices;

4346:   /* vertices in boundary numbering */
4347:   PetscCall(PetscMalloc1(n_vertices, &idx_V_B));
4348:   PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, n_vertices, pcbddc->local_primal_ref_node, &i, idx_V_B));
4349:   PetscCheck(i == n_vertices, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in boundary numbering for BDDC vertices! %" PetscInt_FMT " != %" PetscInt_FMT, n_vertices, i);

4351:   /* these two cases still need to be optimized */
4352:   if (pcbddc->benign_saddle_point || !pcbddc->symmetric_primal) multi_element = PETSC_FALSE;

4354:   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
4355:   if (multi_element) {
4356:     PetscCheck(!pcbddc->benign_n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented");

4358:     PetscCall(MatCreate(PETSC_COMM_SELF, coarse_submat));
4359:     PetscCall(MatSetSizes(*coarse_submat, pcbddc->local_primal_size, pcbddc->local_primal_size, pcbddc->local_primal_size, pcbddc->local_primal_size));
4360:     PetscCall(MatSetType(*coarse_submat, MATSEQAIJ));
4361:     PetscCall(MatSetOption(*coarse_submat, MAT_IGNORE_ZERO_ENTRIES, PETSC_TRUE));
4362:     PetscCall(MatSetOption(*coarse_submat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));

4364:     /* group vertices and constraints by subdomain id */
4365:     const PetscInt *vidxs = pcbddc->primal_indices_local_idxs;
4366:     const PetscInt *cidxs = pcbddc->primal_indices_local_idxs + n_vertices;
4367:     PetscInt       *count_eff, *V_eff_to_V, *C_eff_to_C, *nnz;
4368:     PetscInt        n_el = PetscMax(graph->n_local_subs, 1);

4370:     PetscCall(PetscCalloc1(2 * n_el, &count_eff));
4371:     PetscCall(PetscMalloc1(n_vertices, &V_to_eff_V));
4372:     PetscCall(PetscMalloc1(n_constraints, &C_to_eff_C));
4373:     for (PetscInt i = 0; i < n_vertices; i++) {
4374:       PetscInt s = 2 * graph->nodes[vidxs[i]].local_sub;

4376:       V_to_eff_V[i] = count_eff[s];
4377:       count_eff[s] += 1;
4378:     }
4379:     for (PetscInt i = 0; i < n_constraints; i++) {
4380:       PetscInt s = 2 * graph->nodes[cidxs[i]].local_sub + 1;

4382:       C_to_eff_C[i] = count_eff[s];
4383:       count_eff[s] += 1;
4384:     }

4386:     /* preallocation */
4387:     PetscCall(PetscMalloc1(n_vertices + n_constraints, &nnz));
4388:     for (PetscInt i = 0; i < n_vertices; i++) {
4389:       PetscInt s = 2 * graph->nodes[vidxs[i]].local_sub;

4391:       nnz[i] = count_eff[s] + count_eff[s + 1];
4392:     }
4393:     for (PetscInt i = 0; i < n_constraints; i++) {
4394:       PetscInt s = 2 * graph->nodes[cidxs[i]].local_sub;

4396:       nnz[i + n_vertices] = count_eff[s] + count_eff[s + 1];
4397:     }
4398:     PetscCall(MatSeqAIJSetPreallocation(*coarse_submat, 0, nnz));
4399:     PetscCall(PetscFree(nnz));

4401:     n_eff_vertices    = 0;
4402:     n_eff_constraints = 0;
4403:     for (PetscInt i = 0; i < n_el; i++) {
4404:       n_eff_vertices       = PetscMax(n_eff_vertices, count_eff[2 * i]);
4405:       n_eff_constraints    = PetscMax(n_eff_constraints, count_eff[2 * i + 1]);
4406:       count_eff[2 * i]     = 0;
4407:       count_eff[2 * i + 1] = 0;
4408:     }

4410:     const PetscInt *idx;
4411:     PetscCall(PetscMalloc2(n_el * n_eff_vertices, &V_eff_to_V, n_el * n_eff_constraints, &C_eff_to_C));

4413:     for (PetscInt i = 0; i < n_vertices; i++) {
4414:       const PetscInt e = graph->nodes[vidxs[i]].local_sub;
4415:       const PetscInt s = 2 * e;

4417:       V_eff_to_V[e * n_eff_vertices + count_eff[s]] = i;
4418:       count_eff[s] += 1;
4419:     }
4420:     for (PetscInt i = 0; i < n_constraints; i++) {
4421:       const PetscInt e = graph->nodes[cidxs[i]].local_sub;
4422:       const PetscInt s = 2 * e + 1;

4424:       C_eff_to_C[e * n_eff_constraints + count_eff[s]] = i;
4425:       count_eff[s] += 1;
4426:     }

4428:     PetscCall(PetscMalloc1(n_R * n_eff_vertices, &R_eff_V_J));
4429:     PetscCall(PetscMalloc1(n_R * n_eff_constraints, &R_eff_C_J));
4430:     PetscCall(PetscMalloc1(n_B * n_eff_vertices, &B_eff_V_J));
4431:     PetscCall(PetscMalloc1(n_B * n_eff_constraints, &B_eff_C_J));
4432:     for (PetscInt i = 0; i < n_R * n_eff_vertices; i++) R_eff_V_J[i] = -1;
4433:     for (PetscInt i = 0; i < n_R * n_eff_constraints; i++) R_eff_C_J[i] = -1;
4434:     for (PetscInt i = 0; i < n_B * n_eff_vertices; i++) B_eff_V_J[i] = -1;
4435:     for (PetscInt i = 0; i < n_B * n_eff_constraints; i++) B_eff_C_J[i] = -1;

4437:     PetscCall(ISGetIndices(pcbddc->is_R_local, &idx));
4438:     for (PetscInt i = 0; i < n_R; i++) {
4439:       const PetscInt e = graph->nodes[idx[i]].local_sub;
4440:       const PetscInt s = 2 * e;
4441:       PetscInt       j;

4443:       for (j = 0; j < count_eff[s]; j++) R_eff_V_J[i * n_eff_vertices + j] = V_eff_to_V[e * n_eff_vertices + j];
4444:       for (j = 0; j < count_eff[s + 1]; j++) R_eff_C_J[i * n_eff_constraints + j] = C_eff_to_C[e * n_eff_constraints + j];
4445:     }
4446:     PetscCall(ISRestoreIndices(pcbddc->is_R_local, &idx));
4447:     PetscCall(ISGetIndices(pcis->is_B_local, &idx));
4448:     for (PetscInt i = 0; i < n_B; i++) {
4449:       const PetscInt e = graph->nodes[idx[i]].local_sub;
4450:       const PetscInt s = 2 * e;
4451:       PetscInt       j;

4453:       for (j = 0; j < count_eff[s]; j++) B_eff_V_J[i * n_eff_vertices + j] = V_eff_to_V[e * n_eff_vertices + j];
4454:       for (j = 0; j < count_eff[s + 1]; j++) B_eff_C_J[i * n_eff_constraints + j] = C_eff_to_C[e * n_eff_constraints + j];
4455:     }
4456:     PetscCall(ISRestoreIndices(pcis->is_B_local, &idx));

4458:     /* permutation and blocksizes for block invert of S_CC */
4459:     PetscInt *idxp;

4461:     PetscCall(PetscMalloc1(n_constraints, &idxp));
4462:     PetscCall(PetscMalloc1(n_el, &C_bss));
4463:     n_C_bss = 0;
4464:     for (PetscInt e = 0, cnt = 0; e < n_el; e++) {
4465:       const PetscInt nc = count_eff[2 * e + 1];

4467:       if (nc) C_bss[n_C_bss++] = nc;
4468:       for (PetscInt c = 0; c < nc; c++) idxp[cnt + c] = C_eff_to_C[e * n_eff_constraints + c];
4469:       cnt += nc;
4470:     }

4472:     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n_constraints, idxp, PETSC_OWN_POINTER, &is_C_perm));

4474:     PetscCall(PetscFree2(V_eff_to_V, C_eff_to_C));
4475:     PetscCall(PetscFree(count_eff));
4476:   } else {
4477:     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, pcbddc->local_primal_size, pcbddc->local_primal_size, NULL, coarse_submat));
4478:     n_eff_constraints = n_constraints;
4479:     n_eff_vertices    = n_vertices;
4480:   }

4482:   /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
4483:   PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_R));
4484:   PetscCall(PCSetUp(pc_R));
4485:   PetscCall(PetscObjectTypeCompare((PetscObject)pc_R, PCLU, &isLU));
4486:   PetscCall(PetscObjectTypeCompare((PetscObject)pc_R, PCCHOLESKY, &isCHOL));
4487:   lda_rhs                = n_R;
4488:   need_benign_correction = PETSC_FALSE;
4489:   if (isLU || isCHOL) {
4490:     PetscCall(PCFactorGetMatrix(pc_R, &F));
4491:   } else if (sub_schurs && sub_schurs->reuse_solver) {
4492:     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4493:     MatFactorType      type;

4495:     F = reuse_solver->F;
4496:     PetscCall(MatGetFactorType(F, &type));
4497:     if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE;
4498:     if (type == MAT_FACTOR_LU) isLU = PETSC_TRUE;
4499:     PetscCall(MatGetSize(F, &lda_rhs, NULL));
4500:     need_benign_correction = (PetscBool)(!!reuse_solver->benign_n);
4501:   } else F = NULL;

4503:   /* determine if we can use a sparse right-hand side */
4504:   sparserhs = PETSC_FALSE;
4505:   if (F && !multi_element) {
4506:     MatSolverType solver;

4508:     PetscCall(MatFactorGetSolverType(F, &solver));
4509:     PetscCall(PetscStrcmp(solver, MATSOLVERMUMPS, &sparserhs));
4510:   }

4512:   /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */
4513:   dummy_vec = NULL;
4514:   if (need_benign_correction && lda_rhs != n_R && F) {
4515:     PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &dummy_vec));
4516:     PetscCall(VecSetSizes(dummy_vec, lda_rhs, PETSC_DECIDE));
4517:     PetscCall(VecSetType(dummy_vec, ((PetscObject)pcis->vec1_N)->type_name));
4518:   }

4520:   PetscCall(MatDestroy(&pcbddc->local_auxmat1));
4521:   PetscCall(MatDestroy(&pcbddc->local_auxmat2));

4523:   PetscCall(ISCreateStride(PETSC_COMM_SELF, n_R, 0, 1, &is_R));
4524:   PetscCall(ISCreateStride(PETSC_COMM_SELF, n_vertices, 0, 1, &is_V));
4525:   PetscCall(ISCreateStride(PETSC_COMM_SELF, n_constraints, n_vertices, 1, &is_C));
4526:   PetscCall(ISGetIndices(is_V, &idx_V));
4527:   PetscCall(ISGetIndices(is_C, &idx_C));

4529:   /* Precompute stuffs needed for preprocessing and application of BDDC*/
4530:   if (n_constraints) {
4531:     Mat C_B;

4533:     /* Extract constraints on R nodes: C_{CR}  */
4534:     PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_C, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &C_CR));
4535:     PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_C, pcis->is_B_local, MAT_INITIAL_MATRIX, &C_B));

4537:     /* Assemble         local_auxmat2_R =        (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */
4538:     /* Assemble pcbddc->local_auxmat2   = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */
4539:     if (!sparserhs) {
4540:       PetscScalar *marr;

4542:       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_eff_constraints, NULL, &Brhs));
4543:       PetscCall(MatDenseGetArrayWrite(Brhs, &marr));
4544:       for (i = 0; i < n_constraints; i++) {
4545:         const PetscScalar *row_cmat_values;
4546:         const PetscInt    *row_cmat_indices;
4547:         PetscInt           size_of_constraint, j, col = C_to_eff_C ? C_to_eff_C[i] : i;

4549:         PetscCall(MatGetRow(C_CR, i, &size_of_constraint, &row_cmat_indices, &row_cmat_values));
4550:         for (j = 0; j < size_of_constraint; j++) marr[row_cmat_indices[j] + col * lda_rhs] = -row_cmat_values[j];
4551:         PetscCall(MatRestoreRow(C_CR, i, &size_of_constraint, &row_cmat_indices, &row_cmat_values));
4552:       }
4553:       PetscCall(MatDenseRestoreArrayWrite(Brhs, &marr));
4554:     } else {
4555:       Mat tC_CR;

4557:       PetscCall(MatScale(C_CR, -1.0));
4558:       if (lda_rhs != n_R) {
4559:         PetscScalar *aa;
4560:         PetscInt     r, *ii, *jj;
4561:         PetscBool    done;

4563:         PetscCall(MatGetRowIJ(C_CR, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done));
4564:         PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "GetRowIJ failed");
4565:         PetscCall(MatSeqAIJGetArray(C_CR, &aa));
4566:         PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, n_constraints, lda_rhs, ii, jj, aa, &tC_CR));
4567:         PetscCall(MatRestoreRowIJ(C_CR, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done));
4568:         PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "RestoreRowIJ failed");
4569:       } else {
4570:         PetscCall(PetscObjectReference((PetscObject)C_CR));
4571:         tC_CR = C_CR;
4572:       }
4573:       PetscCall(MatCreateTranspose(tC_CR, &Brhs));
4574:       PetscCall(MatDestroy(&tC_CR));
4575:     }
4576:     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_eff_constraints, NULL, &local_auxmat2_R));
4577:     if (F) {
4578:       if (need_benign_correction) {
4579:         PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

4581:         /* rhs is already zero on interior dofs, no need to change the rhs */
4582:         PetscCall(PetscArrayzero(reuse_solver->benign_save_vals, pcbddc->benign_n));
4583:       }
4584:       PetscCall(MatMatSolve(F, Brhs, local_auxmat2_R));
4585:       if (need_benign_correction) {
4586:         PetscScalar       *marr;
4587:         PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

4589:         /* XXX multi_element? */
4590:         PetscCall(MatDenseGetArray(local_auxmat2_R, &marr));
4591:         if (lda_rhs != n_R) {
4592:           for (i = 0; i < n_eff_constraints; i++) {
4593:             PetscCall(VecPlaceArray(dummy_vec, marr + i * lda_rhs));
4594:             PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_TRUE, PETSC_TRUE));
4595:             PetscCall(VecResetArray(dummy_vec));
4596:           }
4597:         } else {
4598:           for (i = 0; i < n_eff_constraints; i++) {
4599:             PetscCall(VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs));
4600:             PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_TRUE, PETSC_TRUE));
4601:             PetscCall(VecResetArray(pcbddc->vec1_R));
4602:           }
4603:         }
4604:         PetscCall(MatDenseRestoreArray(local_auxmat2_R, &marr));
4605:       }
4606:     } else {
4607:       const PetscScalar *barr;
4608:       PetscScalar       *marr;

4610:       PetscCall(MatDenseGetArrayRead(Brhs, &barr));
4611:       PetscCall(MatDenseGetArray(local_auxmat2_R, &marr));
4612:       for (i = 0; i < n_eff_constraints; i++) {
4613:         PetscCall(VecPlaceArray(pcbddc->vec1_R, barr + i * lda_rhs));
4614:         PetscCall(VecPlaceArray(pcbddc->vec2_R, marr + i * lda_rhs));
4615:         PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R));
4616:         PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R));
4617:         PetscCall(VecResetArray(pcbddc->vec1_R));
4618:         PetscCall(VecResetArray(pcbddc->vec2_R));
4619:       }
4620:       PetscCall(MatDenseRestoreArrayRead(Brhs, &barr));
4621:       PetscCall(MatDenseRestoreArray(local_auxmat2_R, &marr));
4622:     }
4623:     if (sparserhs) PetscCall(MatScale(C_CR, -1.0));
4624:     PetscCall(MatDestroy(&Brhs));
4625:     /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR})^{-1}  */
4626:     if (!pcbddc->switch_static) {
4627:       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, n_eff_constraints, NULL, &pcbddc->local_auxmat2));
4628:       for (i = 0; i < n_eff_constraints; i++) {
4629:         Vec r, b;
4630:         PetscCall(MatDenseGetColumnVecRead(local_auxmat2_R, i, &r));
4631:         PetscCall(MatDenseGetColumnVec(pcbddc->local_auxmat2, i, &b));
4632:         PetscCall(VecScatterBegin(pcbddc->R_to_B, r, b, INSERT_VALUES, SCATTER_FORWARD));
4633:         PetscCall(VecScatterEnd(pcbddc->R_to_B, r, b, INSERT_VALUES, SCATTER_FORWARD));
4634:         PetscCall(MatDenseRestoreColumnVec(pcbddc->local_auxmat2, i, &b));
4635:         PetscCall(MatDenseRestoreColumnVecRead(local_auxmat2_R, i, &r));
4636:       }
4637:       if (multi_element) {
4638:         Mat T;

4640:         PetscCall(MatCreateSeqAIJFromDenseExpand(local_auxmat2_R, n_constraints, R_eff_C_J, &T));
4641:         PetscCall(MatDestroy(&local_auxmat2_R));
4642:         local_auxmat2_R = T;
4643:         PetscCall(MatCreateSeqAIJFromDenseExpand(pcbddc->local_auxmat2, n_constraints, B_eff_C_J, &T));
4644:         PetscCall(MatDestroy(&pcbddc->local_auxmat2));
4645:         pcbddc->local_auxmat2 = T;
4646:       }
4647:       PetscCall(MatMatMult(C_B, pcbddc->local_auxmat2, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &S_CC));
4648:     } else {
4649:       if (multi_element) {
4650:         Mat T;

4652:         PetscCall(MatCreateSeqAIJFromDenseExpand(local_auxmat2_R, n_constraints, R_eff_C_J, &T));
4653:         PetscCall(MatDestroy(&local_auxmat2_R));
4654:         local_auxmat2_R = T;
4655:       }
4656:       if (lda_rhs != n_R) {
4657:         PetscCall(MatCreateSubMatrix(local_auxmat2_R, is_R, NULL, MAT_INITIAL_MATRIX, &pcbddc->local_auxmat2));
4658:       } else {
4659:         PetscCall(PetscObjectReference((PetscObject)local_auxmat2_R));
4660:         pcbddc->local_auxmat2 = local_auxmat2_R;
4661:       }
4662:       PetscCall(MatMatMult(C_CR, pcbddc->local_auxmat2, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &S_CC));
4663:     }
4664:     PetscCall(MatScale(S_CC, m_one));
4665:     if (multi_element) {
4666:       Mat T, T2;
4667:       IS  isp, ispi;

4669:       isp = is_C_perm;

4671:       PetscCall(ISInvertPermutation(isp, PETSC_DECIDE, &ispi));
4672:       PetscCall(MatPermute(S_CC, isp, isp, &T));
4673:       PetscCall(MatSeqAIJInvertVariableBlockDiagonalMat(T, n_C_bss, C_bss, &T2));
4674:       PetscCall(MatDestroy(&T));
4675:       PetscCall(MatDestroy(&S_CC));
4676:       PetscCall(MatPermute(T2, ispi, ispi, &S_CC));
4677:       PetscCall(MatDestroy(&T2));
4678:       PetscCall(ISDestroy(&ispi));
4679:     } else {
4680:       if (isCHOL) {
4681:         PetscCall(MatCholeskyFactor(S_CC, NULL, NULL));
4682:       } else {
4683:         PetscCall(MatLUFactor(S_CC, NULL, NULL, NULL));
4684:       }
4685:       PetscCall(MatSeqDenseInvertFactors_Private(S_CC));
4686:     }
4687:     /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */
4688:     PetscCall(MatMatMult(S_CC, C_B, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &pcbddc->local_auxmat1));
4689:     PetscCall(MatDestroy(&C_B));
4690:     PetscCall(MatSetValuesSubMat(*coarse_submat, S_CC, n_constraints, idx_C, n_constraints, idx_C, INSERT_VALUES));
4691:   }

4693:   /* Get submatrices from subdomain matrix */
4694:   if (n_vertices) {
4695: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
4696:     PetscBool oldpin;
4697: #endif
4698:     IS is_aux;

4700:     if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */
4701:       IS tis;

4703:       PetscCall(ISDuplicate(pcbddc->is_R_local, &tis));
4704:       PetscCall(ISSort(tis));
4705:       PetscCall(ISComplement(tis, 0, pcis->n, &is_aux));
4706:       PetscCall(ISDestroy(&tis));
4707:     } else {
4708:       PetscCall(ISComplement(pcbddc->is_R_local, 0, pcis->n, &is_aux));
4709:     }
4710: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
4711:     oldpin = pcbddc->local_mat->boundtocpu;
4712: #endif
4713:     PetscCall(MatBindToCPU(pcbddc->local_mat, PETSC_TRUE));
4714:     PetscCall(MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, is_aux, MAT_INITIAL_MATRIX, &A_RV));
4715:     PetscCall(MatCreateSubMatrix(pcbddc->local_mat, is_aux, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &A_VR));
4716:     /* TODO REMOVE: MatMatMult(A_VR,A_RRmA_RV) below may raise an error */
4717:     PetscCall(MatConvert(A_VR, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_VR));
4718:     PetscCall(MatCreateSubMatrix(pcbddc->local_mat, is_aux, is_aux, MAT_INITIAL_MATRIX, &A_VV));
4719: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
4720:     PetscCall(MatBindToCPU(pcbddc->local_mat, oldpin));
4721: #endif
4722:     PetscCall(ISDestroy(&is_aux));
4723:   }
4724:   PetscCall(ISDestroy(&is_C_perm));
4725:   PetscCall(PetscFree(C_bss));

4727:   p0_lidx_I = NULL;
4728:   if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) {
4729:     const PetscInt *idxs;

4731:     PetscCall(ISGetIndices(pcis->is_I_local, &idxs));
4732:     PetscCall(PetscMalloc1(pcbddc->benign_n, &p0_lidx_I));
4733:     for (i = 0; i < pcbddc->benign_n; i++) PetscCall(PetscFindInt(pcbddc->benign_p0_lidx[i], pcis->n - pcis->n_B, idxs, &p0_lidx_I[i]));
4734:     PetscCall(ISRestoreIndices(pcis->is_I_local, &idxs));
4735:   }

4737:   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */

4739:   /* Matrices of coarse basis functions (local) */
4740:   PetscCall(MatDestroy(&pcbddc->coarse_phi_B));
4741:   PetscCall(MatDestroy(&pcbddc->coarse_psi_B));
4742:   PetscCall(MatDestroy(&pcbddc->coarse_phi_D));
4743:   PetscCall(MatDestroy(&pcbddc->coarse_psi_D));
4744:   if (!multi_element) {
4745:     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, pcbddc->local_primal_size, NULL, &pcbddc->coarse_phi_B));
4746:     if (pcbddc->switch_static || pcbddc->dbg_flag) PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_D, pcbddc->local_primal_size, NULL, &pcbddc->coarse_phi_D));
4747:     coarse_phi_multi = NULL;
4748:   } else { /* Create temporary NEST matrix to hold coarse basis functions blocks */
4749:     IS is_rows[2] = {pcbddc->is_R_local, NULL};
4750:     IS is_cols[2] = {is_V, is_C};

4752:     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n_vertices, pcbddc->local_primal_ref_node, PETSC_USE_POINTER, &is_rows[1]));
4753:     PetscCall(MatCreateNest(PETSC_COMM_SELF, 2, is_rows, 2, is_cols, NULL, &coarse_phi_multi));
4754:     PetscCall(ISDestroy(&is_rows[1]));
4755:   }

4757:   /* vertices */
4758:   if (n_vertices) {
4759:     PetscBool restoreavr = PETSC_FALSE;
4760:     Mat       A_RRmA_RV  = NULL;

4762:     PetscCall(MatSetValuesSubMat(*coarse_submat, A_VV, n_vertices, idx_V, n_vertices, idx_V, ADD_VALUES));
4763:     PetscCall(MatDestroy(&A_VV));

4765:     if (n_R) {
4766:       Mat A_RV_bcorr = NULL, S_VV;

4768:       PetscCall(MatScale(A_RV, m_one));
4769:       if (need_benign_correction) {
4770:         ISLocalToGlobalMapping RtoN;
4771:         IS                     is_p0;
4772:         PetscInt              *idxs_p0, n;

4774:         PetscCall(PetscMalloc1(pcbddc->benign_n, &idxs_p0));
4775:         PetscCall(ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local, &RtoN));
4776:         PetscCall(ISGlobalToLocalMappingApply(RtoN, IS_GTOLM_DROP, pcbddc->benign_n, pcbddc->benign_p0_lidx, &n, idxs_p0));
4777:         PetscCheck(n == pcbddc->benign_n, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in R numbering for benign p0! %" PetscInt_FMT " != %" PetscInt_FMT, n, pcbddc->benign_n);
4778:         PetscCall(ISLocalToGlobalMappingDestroy(&RtoN));
4779:         PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idxs_p0, PETSC_OWN_POINTER, &is_p0));
4780:         PetscCall(MatCreateSubMatrix(A_RV, is_p0, NULL, MAT_INITIAL_MATRIX, &A_RV_bcorr));
4781:         PetscCall(ISDestroy(&is_p0));
4782:       }

4784:       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_eff_vertices, NULL, &A_RRmA_RV));
4785:       if (!sparserhs || need_benign_correction) {
4786:         if (lda_rhs == n_R && !multi_element) {
4787:           PetscCall(MatConvert(A_RV, MATDENSE, MAT_INPLACE_MATRIX, &A_RV));
4788:         } else {
4789:           Mat             T;
4790:           PetscScalar    *av, *array;
4791:           const PetscInt *xadj, *adjncy;
4792:           PetscInt        n;
4793:           PetscBool       flg_row;

4795:           PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_eff_vertices, NULL, &T));
4796:           PetscCall(MatDenseGetArrayWrite(T, &array));
4797:           PetscCall(MatConvert(A_RV, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_RV));
4798:           PetscCall(MatGetRowIJ(A_RV, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row));
4799:           PetscCall(MatSeqAIJGetArray(A_RV, &av));
4800:           for (i = 0; i < n; i++) {
4801:             PetscInt j;
4802:             for (j = xadj[i]; j < xadj[i + 1]; j++) array[lda_rhs * (V_to_eff_V ? V_to_eff_V[adjncy[j]] : adjncy[j]) + i] = av[j];
4803:           }
4804:           PetscCall(MatRestoreRowIJ(A_RV, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row));
4805:           PetscCall(MatDenseRestoreArrayWrite(T, &array));
4806:           PetscCall(MatDestroy(&A_RV));
4807:           A_RV = T;
4808:         }
4809:         if (need_benign_correction) {
4810:           PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4811:           PetscScalar       *marr;

4813:           /* XXX multi_element */
4814:           PetscCall(MatDenseGetArray(A_RV, &marr));
4815:           /* need \Phi^T A_RV = (I+L)A_RV, L given by

4817:                  | 0 0  0 | (V)
4818:              L = | 0 0 -1 | (P-p0)
4819:                  | 0 0 -1 | (p0)

4821:           */
4822:           for (i = 0; i < reuse_solver->benign_n; i++) {
4823:             const PetscScalar *vals;
4824:             const PetscInt    *idxs, *idxs_zero;
4825:             PetscInt           n, j, nz;

4827:             PetscCall(ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i], &nz));
4828:             PetscCall(ISGetIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero));
4829:             PetscCall(MatGetRow(A_RV_bcorr, i, &n, &idxs, &vals));
4830:             for (j = 0; j < n; j++) {
4831:               PetscScalar val = vals[j];
4832:               PetscInt    k, col = idxs[j];
4833:               for (k = 0; k < nz; k++) marr[idxs_zero[k] + lda_rhs * col] -= val;
4834:             }
4835:             PetscCall(MatRestoreRow(A_RV_bcorr, i, &n, &idxs, &vals));
4836:             PetscCall(ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero));
4837:           }
4838:           PetscCall(MatDenseRestoreArray(A_RV, &marr));
4839:         }
4840:         PetscCall(PetscObjectReference((PetscObject)A_RV));
4841:         Brhs = A_RV;
4842:       } else {
4843:         Mat tA_RVT, A_RVT;

4845:         if (!pcbddc->symmetric_primal) {
4846:           /* A_RV already scaled by -1 */
4847:           PetscCall(MatTranspose(A_RV, MAT_INITIAL_MATRIX, &A_RVT));
4848:         } else {
4849:           restoreavr = PETSC_TRUE;
4850:           PetscCall(MatScale(A_VR, -1.0));
4851:           PetscCall(PetscObjectReference((PetscObject)A_VR));
4852:           A_RVT = A_VR;
4853:         }
4854:         if (lda_rhs != n_R) {
4855:           PetscScalar *aa;
4856:           PetscInt     r, *ii, *jj;
4857:           PetscBool    done;

4859:           PetscCall(MatGetRowIJ(A_RVT, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done));
4860:           PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "GetRowIJ failed");
4861:           PetscCall(MatSeqAIJGetArray(A_RVT, &aa));
4862:           PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, n_vertices, lda_rhs, ii, jj, aa, &tA_RVT));
4863:           PetscCall(MatRestoreRowIJ(A_RVT, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done));
4864:           PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "RestoreRowIJ failed");
4865:         } else {
4866:           PetscCall(PetscObjectReference((PetscObject)A_RVT));
4867:           tA_RVT = A_RVT;
4868:         }
4869:         PetscCall(MatCreateTranspose(tA_RVT, &Brhs));
4870:         PetscCall(MatDestroy(&tA_RVT));
4871:         PetscCall(MatDestroy(&A_RVT));
4872:       }
4873:       if (F) {
4874:         /* need to correct the rhs */
4875:         if (need_benign_correction) {
4876:           PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4877:           PetscScalar       *marr;

4879:           PetscCall(MatDenseGetArray(Brhs, &marr));
4880:           if (lda_rhs != n_R) {
4881:             for (i = 0; i < n_eff_vertices; i++) {
4882:               PetscCall(VecPlaceArray(dummy_vec, marr + i * lda_rhs));
4883:               PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_FALSE, PETSC_TRUE));
4884:               PetscCall(VecResetArray(dummy_vec));
4885:             }
4886:           } else {
4887:             for (i = 0; i < n_eff_vertices; i++) {
4888:               PetscCall(VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs));
4889:               PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_FALSE, PETSC_TRUE));
4890:               PetscCall(VecResetArray(pcbddc->vec1_R));
4891:             }
4892:           }
4893:           PetscCall(MatDenseRestoreArray(Brhs, &marr));
4894:         }
4895:         PetscCall(MatMatSolve(F, Brhs, A_RRmA_RV));
4896:         if (restoreavr) PetscCall(MatScale(A_VR, -1.0));
4897:         /* need to correct the solution */
4898:         if (need_benign_correction) {
4899:           PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4900:           PetscScalar       *marr;

4902:           PetscCall(MatDenseGetArray(A_RRmA_RV, &marr));
4903:           if (lda_rhs != n_R) {
4904:             for (i = 0; i < n_eff_vertices; i++) {
4905:               PetscCall(VecPlaceArray(dummy_vec, marr + i * lda_rhs));
4906:               PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_TRUE, PETSC_TRUE));
4907:               PetscCall(VecResetArray(dummy_vec));
4908:             }
4909:           } else {
4910:             for (i = 0; i < n_eff_vertices; i++) {
4911:               PetscCall(VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs));
4912:               PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_TRUE, PETSC_TRUE));
4913:               PetscCall(VecResetArray(pcbddc->vec1_R));
4914:             }
4915:           }
4916:           PetscCall(MatDenseRestoreArray(A_RRmA_RV, &marr));
4917:         }
4918:       } else {
4919:         const PetscScalar *barr;
4920:         PetscScalar       *marr;

4922:         PetscCall(MatDenseGetArrayRead(Brhs, &barr));
4923:         PetscCall(MatDenseGetArray(A_RRmA_RV, &marr));
4924:         for (i = 0; i < n_eff_vertices; i++) {
4925:           PetscCall(VecPlaceArray(pcbddc->vec1_R, barr + i * lda_rhs));
4926:           PetscCall(VecPlaceArray(pcbddc->vec2_R, marr + i * lda_rhs));
4927:           PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R));
4928:           PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R));
4929:           PetscCall(VecResetArray(pcbddc->vec1_R));
4930:           PetscCall(VecResetArray(pcbddc->vec2_R));
4931:         }
4932:         PetscCall(MatDenseRestoreArrayRead(Brhs, &barr));
4933:         PetscCall(MatDenseRestoreArray(A_RRmA_RV, &marr));
4934:       }
4935:       PetscCall(MatDestroy(&A_RV));
4936:       PetscCall(MatDestroy(&Brhs));
4937:       /* S_VV and S_CV */
4938:       if (n_constraints) {
4939:         Mat B;

4941:         PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, n_eff_vertices, NULL, &B));
4942:         PetscCall(MatDenseScatter(A_RRmA_RV, pcbddc->R_to_B, B));

4944:         /* S_CV = pcbddc->local_auxmat1 * B */
4945:         if (multi_element) {
4946:           Mat T;

4948:           PetscCall(MatCreateSeqAIJFromDenseExpand(B, n_vertices, B_eff_V_J, &T));
4949:           PetscCall(MatDestroy(&B));
4950:           B = T;
4951:         }
4952:         PetscCall(MatProductCreate(pcbddc->local_auxmat1, B, NULL, &S_CV));
4953:         PetscCall(MatProductSetType(S_CV, MATPRODUCT_AB));
4954:         PetscCall(MatProductSetFromOptions(S_CV));
4955:         PetscCall(MatProductSymbolic(S_CV));
4956:         PetscCall(MatProductNumeric(S_CV));
4957:         PetscCall(MatProductClear(S_CV));
4958:         PetscCall(MatDestroy(&B));

4960:         /* B = local_auxmat2_R * S_CV */
4961:         PetscCall(MatProductCreate(local_auxmat2_R, S_CV, NULL, &B));
4962:         PetscCall(MatProductSetType(B, MATPRODUCT_AB));
4963:         PetscCall(MatProductSetFromOptions(B));
4964:         PetscCall(MatProductSymbolic(B));
4965:         PetscCall(MatProductNumeric(B));

4967:         PetscCall(MatScale(S_CV, m_one));
4968:         PetscCall(MatSetValuesSubMat(*coarse_submat, S_CV, n_constraints, idx_C, n_vertices, idx_V, INSERT_VALUES));

4970:         if (multi_element) {
4971:           Mat T;

4973:           PetscCall(MatCreateSeqAIJFromDenseExpand(A_RRmA_RV, n_vertices, R_eff_V_J, &T));
4974:           PetscCall(MatDestroy(&A_RRmA_RV));
4975:           A_RRmA_RV = T;
4976:         }
4977:         PetscCall(MatAXPY(A_RRmA_RV, 1.0, B, UNKNOWN_NONZERO_PATTERN)); /* XXX ? */
4978:         PetscCall(MatDestroy(&B));
4979:       } else if (multi_element) {
4980:         Mat T;

4982:         PetscCall(MatCreateSeqAIJFromDenseExpand(A_RRmA_RV, n_vertices, R_eff_V_J, &T));
4983:         PetscCall(MatDestroy(&A_RRmA_RV));
4984:         A_RRmA_RV = T;
4985:       }

4987:       if (lda_rhs != n_R) {
4988:         Mat T;

4990:         PetscCall(MatCreateSubMatrix(A_RRmA_RV, is_R, NULL, MAT_INITIAL_MATRIX, &T));
4991:         PetscCall(MatDestroy(&A_RRmA_RV));
4992:         A_RRmA_RV = T;
4993:       }

4995:       /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */
4996:       if (need_benign_correction) { /* XXX SPARSE */
4997:         PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4998:         PetscScalar       *sums;
4999:         const PetscScalar *marr;

5001:         PetscCall(MatDenseGetArrayRead(A_RRmA_RV, &marr));
5002:         PetscCall(PetscMalloc1(n_vertices, &sums));
5003:         for (i = 0; i < reuse_solver->benign_n; i++) {
5004:           const PetscScalar *vals;
5005:           const PetscInt    *idxs, *idxs_zero;
5006:           PetscInt           n, j, nz;

5008:           PetscCall(ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i], &nz));
5009:           PetscCall(ISGetIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero));
5010:           for (j = 0; j < n_vertices; j++) {
5011:             sums[j] = 0.;
5012:             for (PetscInt k = 0; k < nz; k++) sums[j] += marr[idxs_zero[k] + j * n_R];
5013:           }
5014:           PetscCall(MatGetRow(A_RV_bcorr, i, &n, &idxs, &vals));
5015:           for (j = 0; j < n; j++) {
5016:             PetscScalar val = vals[j];
5017:             for (PetscInt k = 0; k < n_vertices; k++) PetscCall(MatSetValue(*coarse_submat, idx_V[idxs[j]], idx_V[k], val * sums[k], ADD_VALUES));
5018:           }
5019:           PetscCall(MatRestoreRow(A_RV_bcorr, i, &n, &idxs, &vals));
5020:           PetscCall(ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero));
5021:         }
5022:         PetscCall(PetscFree(sums));
5023:         PetscCall(MatDestroy(&A_RV_bcorr));
5024:         PetscCall(MatDenseRestoreArrayRead(A_RRmA_RV, &marr));
5025:       }

5027:       PetscCall(MatMatMult(A_VR, A_RRmA_RV, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &S_VV));
5028:       PetscCall(MatSetValuesSubMat(*coarse_submat, S_VV, n_vertices, idx_V, n_vertices, idx_V, ADD_VALUES));
5029:       PetscCall(MatDestroy(&S_VV));
5030:     }

5032:     /* coarse basis functions */
5033:     if (coarse_phi_multi) {
5034:       Mat Vid;

5036:       PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, n_vertices, n_vertices, 1, NULL, &Vid));
5037:       PetscCall(MatShift_Basic(Vid, 1.0));
5038:       PetscCall(MatNestSetSubMat(coarse_phi_multi, 0, 0, A_RRmA_RV));
5039:       PetscCall(MatNestSetSubMat(coarse_phi_multi, 1, 0, Vid));
5040:       PetscCall(MatDestroy(&Vid));
5041:     } else {
5042:       if (A_RRmA_RV) {
5043:         PetscCall(MatDenseScatter(A_RRmA_RV, pcbddc->R_to_B, pcbddc->coarse_phi_B));
5044:         if (pcbddc->switch_static || pcbddc->dbg_flag) {
5045:           PetscCall(MatDenseScatter(A_RRmA_RV, pcbddc->R_to_D, pcbddc->coarse_phi_D));
5046:           if (pcbddc->benign_n) {
5047:             for (i = 0; i < n_vertices; i++) PetscCall(MatSetValues(pcbddc->coarse_phi_D, pcbddc->benign_n, p0_lidx_I, 1, &i, NULL, INSERT_VALUES));
5048:             PetscCall(MatAssemblyBegin(pcbddc->coarse_phi_D, MAT_FINAL_ASSEMBLY));
5049:             PetscCall(MatAssemblyEnd(pcbddc->coarse_phi_D, MAT_FINAL_ASSEMBLY));
5050:           }
5051:         }
5052:       }
5053:       for (i = 0; i < n_vertices; i++) PetscCall(MatSetValues(pcbddc->coarse_phi_B, 1, &idx_V_B[i], 1, &i, &one, INSERT_VALUES));
5054:       PetscCall(MatAssemblyBegin(pcbddc->coarse_phi_B, MAT_FINAL_ASSEMBLY));
5055:       PetscCall(MatAssemblyEnd(pcbddc->coarse_phi_B, MAT_FINAL_ASSEMBLY));
5056:     }
5057:     PetscCall(MatDestroy(&A_RRmA_RV));
5058:   }
5059:   PetscCall(MatDestroy(&A_RV));
5060:   PetscCall(VecDestroy(&dummy_vec));

5062:   if (n_constraints) {
5063:     Mat B, B2;

5065:     PetscCall(MatScale(S_CC, m_one));
5066:     PetscCall(MatProductCreate(local_auxmat2_R, S_CC, NULL, &B));
5067:     PetscCall(MatProductSetType(B, MATPRODUCT_AB));
5068:     PetscCall(MatProductSetFromOptions(B));
5069:     PetscCall(MatProductSymbolic(B));
5070:     PetscCall(MatProductNumeric(B));

5072:     if (n_vertices) {
5073:       if (isCHOL || need_benign_correction) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */
5074:         PetscCall(MatTranspose(S_CV, MAT_INITIAL_MATRIX, &S_VC));
5075:       } else {
5076:         if (lda_rhs != n_R) {
5077:           Mat tB;

5079:           PetscCall(MatCreateSubMatrix(B, is_R, NULL, MAT_INITIAL_MATRIX, &tB));
5080:           PetscCall(MatDestroy(&B));
5081:           B = tB;
5082:         }
5083:         PetscCall(MatMatMult(A_VR, B, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &S_VC));
5084:       }
5085:       PetscCall(MatSetValuesSubMat(*coarse_submat, S_VC, n_vertices, idx_V, n_constraints, idx_C, INSERT_VALUES));
5086:     }

5088:     /* coarse basis functions */
5089:     if (coarse_phi_multi) {
5090:       PetscCall(MatNestSetSubMat(coarse_phi_multi, 0, 1, B));
5091:     } else {
5092:       PetscCall(MatDenseGetSubMatrix(pcbddc->coarse_phi_B, PETSC_DECIDE, PETSC_DECIDE, n_vertices, n_vertices + n_constraints, &B2));
5093:       PetscCall(MatDenseScatter(B, pcbddc->R_to_B, B2));
5094:       PetscCall(MatDenseRestoreSubMatrix(pcbddc->coarse_phi_B, &B2));
5095:       if (pcbddc->switch_static || pcbddc->dbg_flag) {
5096:         PetscCall(MatDenseGetSubMatrix(pcbddc->coarse_phi_D, PETSC_DECIDE, PETSC_DECIDE, n_vertices, n_vertices + n_constraints, &B2));
5097:         PetscCall(MatDenseScatter(B, pcbddc->R_to_D, B2));
5098:         if (pcbddc->benign_n) {
5099:           for (i = 0; i < n_constraints; i++) PetscCall(MatSetValues(B2, pcbddc->benign_n, p0_lidx_I, 1, &i, NULL, INSERT_VALUES));
5100:         }
5101:         PetscCall(MatDenseRestoreSubMatrix(pcbddc->coarse_phi_D, &B2));
5102:       }
5103:     }
5104:     PetscCall(MatDestroy(&B));
5105:   }

5107:   /* assemble sparse coarse basis functions */
5108:   if (coarse_phi_multi) {
5109:     Mat T;

5111:     PetscCall(MatConvert(coarse_phi_multi, MATSEQAIJ, MAT_INITIAL_MATRIX, &T));
5112:     PetscCall(MatDestroy(&coarse_phi_multi));
5113:     PetscCall(MatCreateSubMatrix(T, pcis->is_B_local, NULL, MAT_INITIAL_MATRIX, &pcbddc->coarse_phi_B));
5114:     if (pcbddc->switch_static || pcbddc->dbg_flag) PetscCall(MatCreateSubMatrix(T, pcis->is_I_local, NULL, MAT_INITIAL_MATRIX, &pcbddc->coarse_phi_D));
5115:     PetscCall(MatDestroy(&T));
5116:   }
5117:   PetscCall(MatDestroy(&local_auxmat2_R));
5118:   PetscCall(PetscFree(p0_lidx_I));

5120:   /* coarse matrix entries relative to B_0 */
5121:   if (pcbddc->benign_n) {
5122:     Mat                B0_B, B0_BPHI;
5123:     IS                 is_dummy;
5124:     const PetscScalar *data;
5125:     PetscInt           j;

5127:     PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->benign_n, 0, 1, &is_dummy));
5128:     PetscCall(MatCreateSubMatrix(pcbddc->benign_B0, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B0_B));
5129:     PetscCall(ISDestroy(&is_dummy));
5130:     PetscCall(MatMatMult(B0_B, pcbddc->coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &B0_BPHI));
5131:     PetscCall(MatConvert(B0_BPHI, MATSEQDENSE, MAT_INPLACE_MATRIX, &B0_BPHI));
5132:     PetscCall(MatDenseGetArrayRead(B0_BPHI, &data));
5133:     for (j = 0; j < pcbddc->benign_n; j++) {
5134:       PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j;
5135:       for (i = 0; i < pcbddc->local_primal_size; i++) {
5136:         PetscCall(MatSetValue(*coarse_submat, primal_idx, i, data[i * pcbddc->benign_n + j], INSERT_VALUES));
5137:         PetscCall(MatSetValue(*coarse_submat, i, primal_idx, data[i * pcbddc->benign_n + j], INSERT_VALUES));
5138:       }
5139:     }
5140:     PetscCall(MatDenseRestoreArrayRead(B0_BPHI, &data));
5141:     PetscCall(MatDestroy(&B0_B));
5142:     PetscCall(MatDestroy(&B0_BPHI));
5143:   }

5145:   /* compute other basis functions for non-symmetric problems */
5146:   if (!pcbddc->symmetric_primal) {
5147:     Mat          B_V = NULL, B_C = NULL;
5148:     PetscScalar *marray, *work;

5150:     /* TODO multi_element MatDenseScatter */
5151:     if (n_constraints) {
5152:       Mat S_CCT, C_CRT;

5154:       PetscCall(MatScale(S_CC, m_one));
5155:       PetscCall(MatTranspose(C_CR, MAT_INITIAL_MATRIX, &C_CRT));
5156:       PetscCall(MatTranspose(S_CC, MAT_INITIAL_MATRIX, &S_CCT));
5157:       PetscCall(MatMatMult(C_CRT, S_CCT, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &B_C));
5158:       PetscCall(MatConvert(B_C, MATDENSE, MAT_INPLACE_MATRIX, &B_C));
5159:       PetscCall(MatDestroy(&S_CCT));
5160:       if (n_vertices) {
5161:         Mat S_VCT;

5163:         PetscCall(MatTranspose(S_VC, MAT_INITIAL_MATRIX, &S_VCT));
5164:         PetscCall(MatMatMult(C_CRT, S_VCT, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &B_V));
5165:         PetscCall(MatDestroy(&S_VCT));
5166:         PetscCall(MatConvert(B_V, MATDENSE, MAT_INPLACE_MATRIX, &B_V));
5167:       }
5168:       PetscCall(MatDestroy(&C_CRT));
5169:     } else {
5170:       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_R, n_vertices, NULL, &B_V));
5171:     }
5172:     if (n_vertices && n_R) {
5173:       PetscScalar    *av, *marray;
5174:       const PetscInt *xadj, *adjncy;
5175:       PetscInt        n;
5176:       PetscBool       flg_row;

5178:       /* B_V = B_V - A_VR^T */
5179:       PetscCall(MatConvert(A_VR, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_VR));
5180:       PetscCall(MatGetRowIJ(A_VR, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row));
5181:       PetscCall(MatSeqAIJGetArray(A_VR, &av));
5182:       PetscCall(MatDenseGetArray(B_V, &marray));
5183:       for (i = 0; i < n; i++) {
5184:         PetscInt j;
5185:         for (j = xadj[i]; j < xadj[i + 1]; j++) marray[i * n_R + adjncy[j]] -= av[j];
5186:       }
5187:       PetscCall(MatDenseRestoreArray(B_V, &marray));
5188:       PetscCall(MatRestoreRowIJ(A_VR, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row));
5189:       PetscCall(MatDestroy(&A_VR));
5190:     }

5192:     /* currently there's no support for MatTransposeMatSolve(F,B,X) */
5193:     PetscCall(PetscMalloc1(n_R * pcbddc->local_primal_size, &work));
5194:     if (n_vertices) {
5195:       PetscCall(MatDenseGetArray(B_V, &marray));
5196:       for (i = 0; i < n_vertices; i++) {
5197:         PetscCall(VecPlaceArray(pcbddc->vec1_R, marray + i * n_R));
5198:         PetscCall(VecPlaceArray(pcbddc->vec2_R, work + i * n_R));
5199:         PetscCall(KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R));
5200:         PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R));
5201:         PetscCall(VecResetArray(pcbddc->vec1_R));
5202:         PetscCall(VecResetArray(pcbddc->vec2_R));
5203:       }
5204:       PetscCall(MatDenseRestoreArray(B_V, &marray));
5205:     }
5206:     if (B_C) {
5207:       PetscCall(MatDenseGetArray(B_C, &marray));
5208:       for (i = n_vertices; i < n_constraints + n_vertices; i++) {
5209:         PetscCall(VecPlaceArray(pcbddc->vec1_R, marray + (i - n_vertices) * n_R));
5210:         PetscCall(VecPlaceArray(pcbddc->vec2_R, work + i * n_R));
5211:         PetscCall(KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R));
5212:         PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R));
5213:         PetscCall(VecResetArray(pcbddc->vec1_R));
5214:         PetscCall(VecResetArray(pcbddc->vec2_R));
5215:       }
5216:       PetscCall(MatDenseRestoreArray(B_C, &marray));
5217:     }
5218:     /* coarse basis functions */
5219:     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, pcbddc->local_primal_size, NULL, &pcbddc->coarse_psi_B));
5220:     if (pcbddc->switch_static || pcbddc->dbg_flag) PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_D, pcbddc->local_primal_size, NULL, &pcbddc->coarse_psi_D));
5221:     for (i = 0; i < pcbddc->local_primal_size; i++) {
5222:       Vec v;

5224:       PetscCall(VecPlaceArray(pcbddc->vec1_R, work + i * n_R));
5225:       PetscCall(MatDenseGetColumnVec(pcbddc->coarse_psi_B, i, &v));
5226:       PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD));
5227:       PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD));
5228:       if (i < n_vertices) {
5229:         PetscScalar one = 1.0;
5230:         PetscCall(VecSetValues(v, 1, &idx_V_B[i], &one, INSERT_VALUES));
5231:         PetscCall(VecAssemblyBegin(v));
5232:         PetscCall(VecAssemblyEnd(v));
5233:       }
5234:       PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_psi_B, i, &v));

5236:       if (pcbddc->switch_static || pcbddc->dbg_flag) {
5237:         PetscCall(MatDenseGetColumnVec(pcbddc->coarse_psi_D, i, &v));
5238:         PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD));
5239:         PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD));
5240:         PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_psi_D, i, &v));
5241:       }
5242:       PetscCall(VecResetArray(pcbddc->vec1_R));
5243:     }
5244:     PetscCall(MatDestroy(&B_V));
5245:     PetscCall(MatDestroy(&B_C));
5246:     PetscCall(PetscFree(work));
5247:   } else {
5248:     PetscCall(PetscObjectReference((PetscObject)pcbddc->coarse_phi_B));
5249:     pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
5250:     PetscCall(PetscObjectReference((PetscObject)pcbddc->coarse_phi_D));
5251:     pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
5252:   }
5253:   PetscCall(MatAssemblyBegin(*coarse_submat, MAT_FINAL_ASSEMBLY));
5254:   PetscCall(MatAssemblyEnd(*coarse_submat, MAT_FINAL_ASSEMBLY));

5256:   /* free memory */
5257:   PetscCall(PetscFree(V_to_eff_V));
5258:   PetscCall(PetscFree(C_to_eff_C));
5259:   PetscCall(PetscFree(R_eff_V_J));
5260:   PetscCall(PetscFree(R_eff_C_J));
5261:   PetscCall(PetscFree(B_eff_V_J));
5262:   PetscCall(PetscFree(B_eff_C_J));
5263:   PetscCall(ISDestroy(&is_R));
5264:   PetscCall(ISRestoreIndices(is_V, &idx_V));
5265:   PetscCall(ISRestoreIndices(is_C, &idx_C));
5266:   PetscCall(ISDestroy(&is_V));
5267:   PetscCall(ISDestroy(&is_C));
5268:   PetscCall(PetscFree(idx_V_B));
5269:   PetscCall(MatDestroy(&S_CV));
5270:   PetscCall(MatDestroy(&S_VC));
5271:   PetscCall(MatDestroy(&S_CC));
5272:   if (n_vertices) PetscCall(MatDestroy(&A_VR));
5273:   if (n_constraints) PetscCall(MatDestroy(&C_CR));
5274:   PetscCall(PetscLogEventEnd(PC_BDDC_CorrectionSetUp[pcbddc->current_level], pc, 0, 0, 0));

5276:   /* Checking coarse_sub_mat and coarse basis functions */
5277:   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
5278:   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
5279:   if (pcbddc->dbg_flag) {
5280:     Mat       AUXMAT, TM1, TM2, TM3, TM4;
5281:     Mat       coarse_phi_D, coarse_phi_B;
5282:     Mat       coarse_psi_D, coarse_psi_B;
5283:     Mat       A_II, A_BB, A_IB, A_BI;
5284:     Mat       C_B, CPHI;
5285:     IS        is_dummy;
5286:     Vec       mones;
5287:     MatType   checkmattype = MATSEQAIJ;
5288:     PetscReal real_value;

5290:     if (pcbddc->benign_n && !pcbddc->benign_change_explicit) {
5291:       Mat A;
5292:       PetscCall(PCBDDCBenignProject(pc, NULL, NULL, &A));
5293:       PetscCall(MatCreateSubMatrix(A, pcis->is_I_local, pcis->is_I_local, MAT_INITIAL_MATRIX, &A_II));
5294:       PetscCall(MatCreateSubMatrix(A, pcis->is_I_local, pcis->is_B_local, MAT_INITIAL_MATRIX, &A_IB));
5295:       PetscCall(MatCreateSubMatrix(A, pcis->is_B_local, pcis->is_I_local, MAT_INITIAL_MATRIX, &A_BI));
5296:       PetscCall(MatCreateSubMatrix(A, pcis->is_B_local, pcis->is_B_local, MAT_INITIAL_MATRIX, &A_BB));
5297:       PetscCall(MatDestroy(&A));
5298:     } else {
5299:       PetscCall(MatConvert(pcis->A_II, checkmattype, MAT_INITIAL_MATRIX, &A_II));
5300:       PetscCall(MatConvert(pcis->A_IB, checkmattype, MAT_INITIAL_MATRIX, &A_IB));
5301:       PetscCall(MatConvert(pcis->A_BI, checkmattype, MAT_INITIAL_MATRIX, &A_BI));
5302:       PetscCall(MatConvert(pcis->A_BB, checkmattype, MAT_INITIAL_MATRIX, &A_BB));
5303:     }
5304:     PetscCall(MatConvert(pcbddc->coarse_phi_D, checkmattype, MAT_INITIAL_MATRIX, &coarse_phi_D));
5305:     PetscCall(MatConvert(pcbddc->coarse_phi_B, checkmattype, MAT_INITIAL_MATRIX, &coarse_phi_B));
5306:     if (!pcbddc->symmetric_primal) {
5307:       PetscCall(MatConvert(pcbddc->coarse_psi_D, checkmattype, MAT_INITIAL_MATRIX, &coarse_psi_D));
5308:       PetscCall(MatConvert(pcbddc->coarse_psi_B, checkmattype, MAT_INITIAL_MATRIX, &coarse_psi_B));
5309:     }
5310:     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n"));
5311:     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Check coarse sub mat computation (symmetric %d)\n", pcbddc->symmetric_primal));
5312:     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
5313:     if (!pcbddc->symmetric_primal) {
5314:       PetscCall(MatMatMult(A_II, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT));
5315:       PetscCall(MatTransposeMatMult(coarse_psi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM1));
5316:       PetscCall(MatDestroy(&AUXMAT));
5317:       PetscCall(MatMatMult(A_BB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT));
5318:       PetscCall(MatTransposeMatMult(coarse_psi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM2));
5319:       PetscCall(MatDestroy(&AUXMAT));
5320:       PetscCall(MatMatMult(A_IB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT));
5321:       PetscCall(MatTransposeMatMult(coarse_psi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM3));
5322:       PetscCall(MatDestroy(&AUXMAT));
5323:       PetscCall(MatMatMult(A_BI, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT));
5324:       PetscCall(MatTransposeMatMult(coarse_psi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM4));
5325:       PetscCall(MatDestroy(&AUXMAT));
5326:     } else {
5327:       PetscCall(MatPtAP(A_II, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &TM1));
5328:       PetscCall(MatPtAP(A_BB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &TM2));
5329:       PetscCall(MatMatMult(A_IB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT));
5330:       PetscCall(MatTransposeMatMult(coarse_phi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM3));
5331:       PetscCall(MatDestroy(&AUXMAT));
5332:       PetscCall(MatMatMult(A_BI, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT));
5333:       PetscCall(MatTransposeMatMult(coarse_phi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM4));
5334:       PetscCall(MatDestroy(&AUXMAT));
5335:     }
5336:     PetscCall(MatAXPY(TM1, one, TM2, DIFFERENT_NONZERO_PATTERN));
5337:     PetscCall(MatAXPY(TM1, one, TM3, DIFFERENT_NONZERO_PATTERN));
5338:     PetscCall(MatAXPY(TM1, one, TM4, DIFFERENT_NONZERO_PATTERN));
5339:     PetscCall(MatConvert(TM1, MATSEQDENSE, MAT_INPLACE_MATRIX, &TM1));
5340:     if (pcbddc->benign_n) {
5341:       Mat                B0_B, B0_BPHI;
5342:       const PetscScalar *data2;
5343:       PetscScalar       *data;
5344:       PetscInt           j;

5346:       PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->benign_n, 0, 1, &is_dummy));
5347:       PetscCall(MatCreateSubMatrix(pcbddc->benign_B0, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B0_B));
5348:       PetscCall(MatMatMult(B0_B, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &B0_BPHI));
5349:       PetscCall(MatConvert(B0_BPHI, MATSEQDENSE, MAT_INPLACE_MATRIX, &B0_BPHI));
5350:       PetscCall(MatDenseGetArray(TM1, &data));
5351:       PetscCall(MatDenseGetArrayRead(B0_BPHI, &data2));
5352:       for (j = 0; j < pcbddc->benign_n; j++) {
5353:         PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j;
5354:         for (i = 0; i < pcbddc->local_primal_size; i++) {
5355:           data[primal_idx * pcbddc->local_primal_size + i] += data2[i * pcbddc->benign_n + j];
5356:           data[i * pcbddc->local_primal_size + primal_idx] += data2[i * pcbddc->benign_n + j];
5357:         }
5358:       }
5359:       PetscCall(MatDenseRestoreArray(TM1, &data));
5360:       PetscCall(MatDenseRestoreArrayRead(B0_BPHI, &data2));
5361:       PetscCall(MatDestroy(&B0_B));
5362:       PetscCall(ISDestroy(&is_dummy));
5363:       PetscCall(MatDestroy(&B0_BPHI));
5364:     }
5365:     PetscCall(MatAXPY(TM1, m_one, *coarse_submat, DIFFERENT_NONZERO_PATTERN));
5366:     PetscCall(MatNorm(TM1, NORM_FROBENIUS, &real_value));
5367:     PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
5368:     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d          matrix error % 1.14e\n", PetscGlobalRank, (double)real_value));

5370:     /* check constraints */
5371:     PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->local_primal_size - pcbddc->benign_n, 0, 1, &is_dummy));
5372:     PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &C_B));
5373:     if (!pcbddc->benign_n) { /* TODO: add benign case */
5374:       PetscCall(MatMatMult(C_B, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &CPHI));
5375:     } else {
5376:       PetscScalar *data;
5377:       Mat          tmat;
5378:       PetscCall(MatDenseGetArray(pcbddc->coarse_phi_B, &data));
5379:       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, pcis->n_B, pcbddc->local_primal_size - pcbddc->benign_n, data, &tmat));
5380:       PetscCall(MatDenseRestoreArray(pcbddc->coarse_phi_B, &data));
5381:       PetscCall(MatMatMult(C_B, tmat, MAT_INITIAL_MATRIX, 1.0, &CPHI));
5382:       PetscCall(MatDestroy(&tmat));
5383:     }
5384:     PetscCall(MatCreateVecs(CPHI, &mones, NULL));
5385:     PetscCall(VecSet(mones, -1.0));
5386:     PetscCall(MatDiagonalSet(CPHI, mones, ADD_VALUES));
5387:     PetscCall(MatNorm(CPHI, NORM_FROBENIUS, &real_value));
5388:     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d phi constraints error % 1.14e\n", PetscGlobalRank, (double)real_value));
5389:     if (!pcbddc->symmetric_primal) {
5390:       PetscCall(MatMatMult(C_B, coarse_psi_B, MAT_REUSE_MATRIX, 1.0, &CPHI));
5391:       PetscCall(VecSet(mones, -1.0));
5392:       PetscCall(MatDiagonalSet(CPHI, mones, ADD_VALUES));
5393:       PetscCall(MatNorm(CPHI, NORM_FROBENIUS, &real_value));
5394:       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d psi constraints error % 1.14e\n", PetscGlobalRank, (double)real_value));
5395:     }
5396:     PetscCall(MatDestroy(&C_B));
5397:     PetscCall(MatDestroy(&CPHI));
5398:     PetscCall(ISDestroy(&is_dummy));
5399:     PetscCall(VecDestroy(&mones));
5400:     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
5401:     PetscCall(MatDestroy(&A_II));
5402:     PetscCall(MatDestroy(&A_BB));
5403:     PetscCall(MatDestroy(&A_IB));
5404:     PetscCall(MatDestroy(&A_BI));
5405:     PetscCall(MatDestroy(&TM1));
5406:     PetscCall(MatDestroy(&TM2));
5407:     PetscCall(MatDestroy(&TM3));
5408:     PetscCall(MatDestroy(&TM4));
5409:     PetscCall(MatDestroy(&coarse_phi_D));
5410:     PetscCall(MatDestroy(&coarse_phi_B));
5411:     if (!pcbddc->symmetric_primal) {
5412:       PetscCall(MatDestroy(&coarse_psi_D));
5413:       PetscCall(MatDestroy(&coarse_psi_B));
5414:     }
5415:   }

5417: #if 0
5418:   {
5419:     PetscViewer viewer;
5420:     char filename[256];

5422:     PetscCall(PetscSNPrintf(filename, PETSC_STATIC_ARRAY_LENGTH(filename), "details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level));
5423:     PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer));
5424:     PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB));
5425:     PetscCall(PetscObjectSetName((PetscObject)*coarse_submat,"coarse submat"));
5426:     PetscCall(MatView(*coarse_submat,viewer));
5427:     if (pcbddc->coarse_phi_B) {
5428:       PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B"));
5429:       PetscCall(MatView(pcbddc->coarse_phi_B,viewer));
5430:     }
5431:     if (pcbddc->coarse_phi_D) {
5432:       PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D"));
5433:       PetscCall(MatView(pcbddc->coarse_phi_D,viewer));
5434:     }
5435:     if (pcbddc->coarse_psi_B) {
5436:       PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B"));
5437:       PetscCall(MatView(pcbddc->coarse_psi_B,viewer));
5438:     }
5439:     if (pcbddc->coarse_psi_D) {
5440:       PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D"));
5441:       PetscCall(MatView(pcbddc->coarse_psi_D,viewer));
5442:     }
5443:     PetscCall(PetscObjectSetName((PetscObject)pcbddc->local_mat,"A"));
5444:     PetscCall(MatView(pcbddc->local_mat,viewer));
5445:     PetscCall(PetscObjectSetName((PetscObject)pcbddc->ConstraintMatrix,"C"));
5446:     PetscCall(MatView(pcbddc->ConstraintMatrix,viewer));
5447:     PetscCall(PetscObjectSetName((PetscObject)pcis->is_I_local,"I"));
5448:     PetscCall(ISView(pcis->is_I_local,viewer));
5449:     PetscCall(PetscObjectSetName((PetscObject)pcis->is_B_local,"B"));
5450:     PetscCall(ISView(pcis->is_B_local,viewer));
5451:     PetscCall(PetscObjectSetName((PetscObject)pcbddc->is_R_local,"R"));
5452:     PetscCall(ISView(pcbddc->is_R_local,viewer));
5453:     PetscCall(PetscViewerDestroy(&viewer));
5454:   }
5455: #endif

5457:   /* device support */
5458:   {
5459:     PetscBool iscuda, iship, iskokkos;
5460:     MatType   mtype = NULL;

5462:     PetscCall(PetscObjectTypeCompareAny((PetscObject)pcis->vec1_N, &iscuda, VECCUDA, VECMPICUDA, VECSEQCUDA, ""));
5463:     PetscCall(PetscObjectTypeCompareAny((PetscObject)pcis->vec1_N, &iship, VECHIP, VECMPIHIP, VECSEQHIP, ""));
5464:     PetscCall(PetscObjectTypeCompareAny((PetscObject)pcis->vec1_N, &iskokkos, VECKOKKOS, VECMPIKOKKOS, VECSEQKOKKOS, ""));
5465:     if (iskokkos) {
5466:       if (PetscDefined(HAVE_MACRO_KOKKOS_ENABLE_CUDA)) iscuda = PETSC_TRUE;
5467:       else if (PetscDefined(HAVE_MACRO_KOKKOS_ENABLE_HIP)) iship = PETSC_TRUE;
5468:     }
5469:     if (iskokkos) mtype = multi_element ? MATSEQAIJKOKKOS : (iscuda ? MATSEQDENSECUDA : MATSEQDENSEHIP);
5470:     else if (iship) mtype = multi_element ? MATSEQAIJHIPSPARSE : MATSEQDENSEHIP;
5471:     else if (iscuda) mtype = multi_element ? MATSEQAIJCUSPARSE : MATSEQDENSECUDA;
5472:     if (mtype) {
5473:       if (pcbddc->local_auxmat1) PetscCall(MatConvert(pcbddc->local_auxmat1, mtype, MAT_INPLACE_MATRIX, &pcbddc->local_auxmat1));
5474:       if (pcbddc->local_auxmat2) PetscCall(MatConvert(pcbddc->local_auxmat2, mtype, MAT_INPLACE_MATRIX, &pcbddc->local_auxmat2));
5475:       if (pcbddc->coarse_phi_B) PetscCall(MatConvert(pcbddc->coarse_phi_B, mtype, MAT_INPLACE_MATRIX, &pcbddc->coarse_phi_B));
5476:       if (pcbddc->coarse_phi_D) PetscCall(MatConvert(pcbddc->coarse_phi_D, mtype, MAT_INPLACE_MATRIX, &pcbddc->coarse_phi_D));
5477:       if (pcbddc->coarse_psi_B) PetscCall(MatConvert(pcbddc->coarse_psi_B, mtype, MAT_INPLACE_MATRIX, &pcbddc->coarse_psi_B));
5478:       if (pcbddc->coarse_psi_D) PetscCall(MatConvert(pcbddc->coarse_psi_D, mtype, MAT_INPLACE_MATRIX, &pcbddc->coarse_psi_D));
5479:     }
5480:   }
5481:   PetscFunctionReturn(PETSC_SUCCESS);
5482: }

5484: PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat *B)
5485: {
5486:   Mat      *work_mat;
5487:   IS        isrow_s, iscol_s;
5488:   PetscBool rsorted, csorted;
5489:   PetscInt  rsize, *idxs_perm_r = NULL, csize, *idxs_perm_c = NULL;

5491:   PetscFunctionBegin;
5492:   PetscCall(ISSorted(isrow, &rsorted));
5493:   PetscCall(ISSorted(iscol, &csorted));
5494:   PetscCall(ISGetLocalSize(isrow, &rsize));
5495:   PetscCall(ISGetLocalSize(iscol, &csize));

5497:   if (!rsorted) {
5498:     const PetscInt *idxs;
5499:     PetscInt       *idxs_sorted, i;

5501:     PetscCall(PetscMalloc1(rsize, &idxs_perm_r));
5502:     PetscCall(PetscMalloc1(rsize, &idxs_sorted));
5503:     for (i = 0; i < rsize; i++) idxs_perm_r[i] = i;
5504:     PetscCall(ISGetIndices(isrow, &idxs));
5505:     PetscCall(PetscSortIntWithPermutation(rsize, idxs, idxs_perm_r));
5506:     for (i = 0; i < rsize; i++) idxs_sorted[i] = idxs[idxs_perm_r[i]];
5507:     PetscCall(ISRestoreIndices(isrow, &idxs));
5508:     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, rsize, idxs_sorted, PETSC_OWN_POINTER, &isrow_s));
5509:   } else {
5510:     PetscCall(PetscObjectReference((PetscObject)isrow));
5511:     isrow_s = isrow;
5512:   }

5514:   if (!csorted) {
5515:     if (isrow == iscol) {
5516:       PetscCall(PetscObjectReference((PetscObject)isrow_s));
5517:       iscol_s = isrow_s;
5518:     } else {
5519:       const PetscInt *idxs;
5520:       PetscInt       *idxs_sorted, i;

5522:       PetscCall(PetscMalloc1(csize, &idxs_perm_c));
5523:       PetscCall(PetscMalloc1(csize, &idxs_sorted));
5524:       for (i = 0; i < csize; i++) idxs_perm_c[i] = i;
5525:       PetscCall(ISGetIndices(iscol, &idxs));
5526:       PetscCall(PetscSortIntWithPermutation(csize, idxs, idxs_perm_c));
5527:       for (i = 0; i < csize; i++) idxs_sorted[i] = idxs[idxs_perm_c[i]];
5528:       PetscCall(ISRestoreIndices(iscol, &idxs));
5529:       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, csize, idxs_sorted, PETSC_OWN_POINTER, &iscol_s));
5530:     }
5531:   } else {
5532:     PetscCall(PetscObjectReference((PetscObject)iscol));
5533:     iscol_s = iscol;
5534:   }

5536:   PetscCall(MatCreateSubMatrices(A, 1, &isrow_s, &iscol_s, MAT_INITIAL_MATRIX, &work_mat));

5538:   if (!rsorted || !csorted) {
5539:     Mat new_mat;
5540:     IS  is_perm_r, is_perm_c;

5542:     if (!rsorted) {
5543:       PetscInt *idxs_r, i;
5544:       PetscCall(PetscMalloc1(rsize, &idxs_r));
5545:       for (i = 0; i < rsize; i++) idxs_r[idxs_perm_r[i]] = i;
5546:       PetscCall(PetscFree(idxs_perm_r));
5547:       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, rsize, idxs_r, PETSC_OWN_POINTER, &is_perm_r));
5548:     } else {
5549:       PetscCall(ISCreateStride(PETSC_COMM_SELF, rsize, 0, 1, &is_perm_r));
5550:     }
5551:     PetscCall(ISSetPermutation(is_perm_r));

5553:     if (!csorted) {
5554:       if (isrow_s == iscol_s) {
5555:         PetscCall(PetscObjectReference((PetscObject)is_perm_r));
5556:         is_perm_c = is_perm_r;
5557:       } else {
5558:         PetscInt *idxs_c, i;
5559:         PetscCheck(idxs_perm_c, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Permutation array not present");
5560:         PetscCall(PetscMalloc1(csize, &idxs_c));
5561:         for (i = 0; i < csize; i++) idxs_c[idxs_perm_c[i]] = i;
5562:         PetscCall(PetscFree(idxs_perm_c));
5563:         PetscCall(ISCreateGeneral(PETSC_COMM_SELF, csize, idxs_c, PETSC_OWN_POINTER, &is_perm_c));
5564:       }
5565:     } else {
5566:       PetscCall(ISCreateStride(PETSC_COMM_SELF, csize, 0, 1, &is_perm_c));
5567:     }
5568:     PetscCall(ISSetPermutation(is_perm_c));

5570:     PetscCall(MatPermute(work_mat[0], is_perm_r, is_perm_c, &new_mat));
5571:     PetscCall(MatDestroy(&work_mat[0]));
5572:     work_mat[0] = new_mat;
5573:     PetscCall(ISDestroy(&is_perm_r));
5574:     PetscCall(ISDestroy(&is_perm_c));
5575:   }

5577:   PetscCall(PetscObjectReference((PetscObject)work_mat[0]));
5578:   *B = work_mat[0];
5579:   PetscCall(MatDestroyMatrices(1, &work_mat));
5580:   PetscCall(ISDestroy(&isrow_s));
5581:   PetscCall(ISDestroy(&iscol_s));
5582:   PetscFunctionReturn(PETSC_SUCCESS);
5583: }

5585: static PetscErrorCode MatPtAPWithPrefix_Private(Mat A, Mat P, PetscReal fill, const char *prefix, Mat *C)
5586: {
5587:   PetscFunctionBegin;
5588:   PetscCall(MatProductCreate(A, P, NULL, C));
5589:   PetscCall(MatProductSetType(*C, MATPRODUCT_PtAP));
5590:   PetscCall(MatProductSetAlgorithm(*C, "default"));
5591:   PetscCall(MatProductSetFill(*C, fill));
5592:   PetscCall(MatSetOptionsPrefix(*C, prefix));
5593:   PetscCall(MatProductSetFromOptions(*C));
5594:   PetscCall(MatProductSymbolic(*C));
5595:   PetscCall(MatProductNumeric(*C));
5596:   (*C)->symmetric = A->symmetric;
5597:   (*C)->spd       = A->spd;
5598:   PetscFunctionReturn(PETSC_SUCCESS);
5599: }

5601: PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
5602: {
5603:   Mat_IS   *matis  = (Mat_IS *)pc->pmat->data;
5604:   PC_BDDC  *pcbddc = (PC_BDDC *)pc->data;
5605:   Mat       new_mat, lA;
5606:   IS        is_local, is_global;
5607:   PetscInt  local_size;
5608:   PetscBool isseqaij, issym, isset;
5609:   char      ptapprefix[256];

5611:   PetscFunctionBegin;
5612:   PetscCall(MatDestroy(&pcbddc->local_mat));
5613:   PetscCall(MatGetSize(matis->A, &local_size, NULL));
5614:   if (pcbddc->mat_graph->multi_element) {
5615:     Mat     *mats, *bdiags;
5616:     IS      *gsubs;
5617:     PetscInt nsubs = pcbddc->n_local_subs;

5619:     PetscCall(PetscCalloc1(nsubs * nsubs, &mats));
5620: #if 1
5621:     PetscCall(PetscMalloc1(nsubs, &gsubs));
5622:     for (PetscInt i = 0; i < nsubs; i++) PetscCall(ISLocalToGlobalMappingApplyIS(matis->rmapping, pcbddc->local_subs[i], &gsubs[i]));
5623:     PetscCall(MatCreateSubMatrices(ChangeOfBasisMatrix, nsubs, gsubs, gsubs, MAT_INITIAL_MATRIX, &bdiags));
5624:     for (PetscInt i = 0; i < nsubs; i++) PetscCall(ISDestroy(&gsubs[i]));
5625:     PetscCall(PetscFree(gsubs));
5626: #else /* this does not work since MatCreateSubMatrices does not support repeated indices */
5627:     Mat *tmats;
5628:     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)matis->A), local_size, 0, 1, &is_local));
5629:     PetscCall(ISLocalToGlobalMappingApplyIS(matis->rmapping, is_local, &is_global));
5630:     PetscCall(ISDestroy(&is_local));
5631:     PetscCall(MatSetOption(ChangeOfBasisMatrix, MAT_SUBMAT_SINGLEIS, PETSC_TRUE));
5632:     PetscCall(MatCreateSubMatrices(ChangeOfBasisMatrix, 1, &is_global, &is_global, MAT_INITIAL_MATRIX, &tmats));
5633:     PetscCall(ISDestroy(&is_global));
5634:     PetscCall(MatCreateSubMatrices(tmats[0], nsubs, pcbddc->local_subs, pcbddc->local_subs, MAT_INITIAL_MATRIX, &bdiags));
5635:     PetscCall(MatDestroySubMatrices(1, &tmats));
5636: #endif
5637:     for (PetscInt i = 0; i < nsubs; i++) mats[i * (1 + nsubs)] = bdiags[i];
5638:     PetscCall(MatCreateNest(PETSC_COMM_SELF, nsubs, pcbddc->local_subs, nsubs, pcbddc->local_subs, mats, &new_mat));
5639:     PetscCall(MatConvert(new_mat, MATSEQAIJ, MAT_INPLACE_MATRIX, &new_mat));
5640:     PetscCall(MatDestroySubMatrices(nsubs, &bdiags));
5641:     PetscCall(PetscFree(mats));
5642:   } else {
5643:     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)matis->A), local_size, 0, 1, &is_local));
5644:     PetscCall(ISLocalToGlobalMappingApplyIS(matis->rmapping, is_local, &is_global));
5645:     PetscCall(ISDestroy(&is_local));
5646:     PetscCall(MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix, is_global, is_global, &new_mat));
5647:     PetscCall(ISDestroy(&is_global));
5648:   }
5649:   if (pcbddc->dbg_flag) {
5650:     Vec       x, x_change;
5651:     PetscReal error;

5653:     PetscCall(MatCreateVecs(ChangeOfBasisMatrix, &x, &x_change));
5654:     PetscCall(VecSetRandom(x, NULL));
5655:     PetscCall(MatMult(ChangeOfBasisMatrix, x, x_change));
5656:     PetscCall(VecScatterBegin(matis->cctx, x, matis->x, INSERT_VALUES, SCATTER_FORWARD));
5657:     PetscCall(VecScatterEnd(matis->cctx, x, matis->x, INSERT_VALUES, SCATTER_FORWARD));
5658:     PetscCall(MatMult(new_mat, matis->x, matis->y));
5659:     if (!pcbddc->change_interior) {
5660:       const PetscScalar *x, *y, *v;
5661:       PetscReal          lerror = 0.;
5662:       PetscInt           i;

5664:       PetscCall(VecGetArrayRead(matis->x, &x));
5665:       PetscCall(VecGetArrayRead(matis->y, &y));
5666:       PetscCall(VecGetArrayRead(matis->counter, &v));
5667:       for (i = 0; i < local_size; i++)
5668:         if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i] - y[i]) > lerror) lerror = PetscAbsScalar(x[i] - y[i]);
5669:       PetscCall(VecRestoreArrayRead(matis->x, &x));
5670:       PetscCall(VecRestoreArrayRead(matis->y, &y));
5671:       PetscCall(VecRestoreArrayRead(matis->counter, &v));
5672:       PetscCallMPI(MPIU_Allreduce(&lerror, &error, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)pc)));
5673:       if (error > PETSC_SMALL) {
5674:         if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) {
5675:           SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on I: %1.6e", (double)error);
5676:         } else {
5677:           SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Error global vs local change on I: %1.6e", (double)error);
5678:         }
5679:       }
5680:     }
5681:     PetscCall(VecScatterBegin(matis->rctx, matis->y, x, INSERT_VALUES, SCATTER_REVERSE));
5682:     PetscCall(VecScatterEnd(matis->rctx, matis->y, x, INSERT_VALUES, SCATTER_REVERSE));
5683:     PetscCall(VecAXPY(x, -1.0, x_change));
5684:     PetscCall(VecNorm(x, NORM_INFINITY, &error));
5685:     if (error > PETSC_SMALL) {
5686:       if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) {
5687:         SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on N: %1.6e", (double)error);
5688:       } else {
5689:         SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Error global vs local change on N: %1.6e", (double)error);
5690:       }
5691:     }
5692:     PetscCall(VecDestroy(&x));
5693:     PetscCall(VecDestroy(&x_change));
5694:   }

5696:   /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */
5697:   PetscCall(PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject *)&lA));

5699:   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
5700:   if (((PetscObject)pc)->prefix) PetscCall(PetscSNPrintf(ptapprefix, sizeof(ptapprefix), "%spc_bddc_change_", ((PetscObject)pc)->prefix));
5701:   else PetscCall(PetscSNPrintf(ptapprefix, sizeof(ptapprefix), "pc_bddc_change_"));
5702:   PetscCall(PetscObjectBaseTypeCompare((PetscObject)matis->A, MATSEQAIJ, &isseqaij));
5703:   if (isseqaij) {
5704:     PetscCall(MatDestroy(&pcbddc->local_mat));
5705:     PetscCall(MatPtAPWithPrefix_Private(matis->A, new_mat, PETSC_DEFAULT, ptapprefix, &pcbddc->local_mat));
5706:     if (lA) {
5707:       Mat work;
5708:       PetscCall(MatPtAPWithPrefix_Private(lA, new_mat, PETSC_DEFAULT, ptapprefix, &work));
5709:       PetscCall(PetscObjectCompose((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject)work));
5710:       PetscCall(MatDestroy(&work));
5711:     }
5712:   } else {
5713:     Mat work_mat;

5715:     PetscCall(MatDestroy(&pcbddc->local_mat));
5716:     PetscCall(MatConvert(matis->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &work_mat));
5717:     PetscCall(MatPtAPWithPrefix_Private(work_mat, new_mat, PETSC_DEFAULT, ptapprefix, &pcbddc->local_mat));
5718:     PetscCall(MatDestroy(&work_mat));
5719:     if (lA) {
5720:       Mat work;
5721:       PetscCall(MatConvert(lA, MATSEQAIJ, MAT_INITIAL_MATRIX, &work_mat));
5722:       PetscCall(MatPtAPWithPrefix_Private(work_mat, new_mat, PETSC_DEFAULT, ptapprefix, &work));
5723:       PetscCall(PetscObjectCompose((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject)work));
5724:       PetscCall(MatDestroy(&work));
5725:     }
5726:   }
5727:   PetscCall(MatIsSymmetricKnown(matis->A, &isset, &issym));
5728:   if (isset) PetscCall(MatSetOption(pcbddc->local_mat, MAT_SYMMETRIC, issym));
5729:   PetscCall(MatDestroy(&new_mat));
5730:   PetscFunctionReturn(PETSC_SUCCESS);
5731: }

5733: PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
5734: {
5735:   PC_IS          *pcis        = (PC_IS *)pc->data;
5736:   PC_BDDC        *pcbddc      = (PC_BDDC *)pc->data;
5737:   PCBDDCSubSchurs sub_schurs  = pcbddc->sub_schurs;
5738:   PetscInt       *idx_R_local = NULL;
5739:   PetscInt        n_vertices, i, j, n_R, n_D, n_B;
5740:   PetscInt        vbs, bs;
5741:   PetscBT         bitmask = NULL;

5743:   PetscFunctionBegin;
5744:   /*
5745:     No need to setup local scatters if
5746:       - primal space is unchanged
5747:         AND
5748:       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
5749:         AND
5750:       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
5751:   */
5752:   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) PetscFunctionReturn(PETSC_SUCCESS);
5753:   /* destroy old objects */
5754:   PetscCall(ISDestroy(&pcbddc->is_R_local));
5755:   PetscCall(VecScatterDestroy(&pcbddc->R_to_B));
5756:   PetscCall(VecScatterDestroy(&pcbddc->R_to_D));
5757:   /* Set Non-overlapping dimensions */
5758:   n_B        = pcis->n_B;
5759:   n_D        = pcis->n - n_B;
5760:   n_vertices = pcbddc->n_vertices;

5762:   /* Dohrmann's notation: dofs split in R (Remaining: all dofs but the vertices) and V (Vertices) */

5764:   /* create auxiliary bitmask and allocate workspace */
5765:   if (!sub_schurs || !sub_schurs->reuse_solver) {
5766:     PetscCall(PetscMalloc1(pcis->n - n_vertices, &idx_R_local));
5767:     PetscCall(PetscBTCreate(pcis->n, &bitmask));
5768:     for (i = 0; i < n_vertices; i++) PetscCall(PetscBTSet(bitmask, pcbddc->local_primal_ref_node[i]));

5770:     for (i = 0, n_R = 0; i < pcis->n; i++) {
5771:       if (!PetscBTLookup(bitmask, i)) idx_R_local[n_R++] = i;
5772:     }
5773:   } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */
5774:     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

5776:     PetscCall(ISGetIndices(reuse_solver->is_R, (const PetscInt **)&idx_R_local));
5777:     PetscCall(ISGetLocalSize(reuse_solver->is_R, &n_R));
5778:   }

5780:   /* Block code */
5781:   vbs = 1;
5782:   PetscCall(MatGetBlockSize(pcbddc->local_mat, &bs));
5783:   if (bs > 1 && !(n_vertices % bs)) {
5784:     PetscBool is_blocked = PETSC_TRUE;
5785:     PetscInt *vary;
5786:     if (!sub_schurs || !sub_schurs->reuse_solver) {
5787:       PetscCall(PetscMalloc1(pcis->n / bs, &vary));
5788:       PetscCall(PetscArrayzero(vary, pcis->n / bs));
5789:       /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
5790:       /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */
5791:       for (i = 0; i < n_vertices; i++) vary[pcbddc->local_primal_ref_node[i] / bs]++;
5792:       for (i = 0; i < pcis->n / bs; i++) {
5793:         if (vary[i] != 0 && vary[i] != bs) {
5794:           is_blocked = PETSC_FALSE;
5795:           break;
5796:         }
5797:       }
5798:       PetscCall(PetscFree(vary));
5799:     } else {
5800:       /* Verify directly the R set */
5801:       for (i = 0; i < n_R / bs; i++) {
5802:         PetscInt j, node = idx_R_local[bs * i];
5803:         for (j = 1; j < bs; j++) {
5804:           if (node != idx_R_local[bs * i + j] - j) {
5805:             is_blocked = PETSC_FALSE;
5806:             break;
5807:           }
5808:         }
5809:       }
5810:     }
5811:     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
5812:       vbs = bs;
5813:       for (i = 0; i < n_R / vbs; i++) idx_R_local[i] = idx_R_local[vbs * i] / vbs;
5814:     }
5815:   }
5816:   PetscCall(ISCreateBlock(PETSC_COMM_SELF, vbs, n_R / vbs, idx_R_local, PETSC_COPY_VALUES, &pcbddc->is_R_local));
5817:   if (sub_schurs && sub_schurs->reuse_solver) {
5818:     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

5820:     PetscCall(ISRestoreIndices(reuse_solver->is_R, (const PetscInt **)&idx_R_local));
5821:     PetscCall(ISDestroy(&reuse_solver->is_R));
5822:     PetscCall(PetscObjectReference((PetscObject)pcbddc->is_R_local));
5823:     reuse_solver->is_R = pcbddc->is_R_local;
5824:   } else {
5825:     PetscCall(PetscFree(idx_R_local));
5826:   }

5828:   /* print some info if requested */
5829:   if (pcbddc->dbg_flag) {
5830:     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n"));
5831:     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
5832:     PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
5833:     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d local dimensions\n", PetscGlobalRank));
5834:     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "local_size = %" PetscInt_FMT ", dirichlet_size = %" PetscInt_FMT ", boundary_size = %" PetscInt_FMT "\n", pcis->n, n_D, n_B));
5835:     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "r_size = %" PetscInt_FMT ", v_size = %" PetscInt_FMT ", constraints = %" PetscInt_FMT ", local_primal_size = %" PetscInt_FMT "\n", n_R, n_vertices,
5836:                                                  pcbddc->local_primal_size - n_vertices - pcbddc->benign_n, pcbddc->local_primal_size));
5837:     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
5838:   }

5840:   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
5841:   if (!sub_schurs || !sub_schurs->reuse_solver) {
5842:     IS        is_aux1, is_aux2;
5843:     PetscInt *aux_array1, *aux_array2, *is_indices, *idx_R_local;

5845:     PetscCall(ISGetIndices(pcbddc->is_R_local, (const PetscInt **)&idx_R_local));
5846:     PetscCall(PetscMalloc1(pcis->n_B - n_vertices, &aux_array1));
5847:     PetscCall(PetscMalloc1(pcis->n_B - n_vertices, &aux_array2));
5848:     PetscCall(ISGetIndices(pcis->is_I_local, (const PetscInt **)&is_indices));
5849:     for (i = 0; i < n_D; i++) PetscCall(PetscBTSet(bitmask, is_indices[i]));
5850:     PetscCall(ISRestoreIndices(pcis->is_I_local, (const PetscInt **)&is_indices));
5851:     for (i = 0, j = 0; i < n_R; i++) {
5852:       if (!PetscBTLookup(bitmask, idx_R_local[i])) aux_array1[j++] = i;
5853:     }
5854:     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, j, aux_array1, PETSC_OWN_POINTER, &is_aux1));
5855:     PetscCall(ISGetIndices(pcis->is_B_local, (const PetscInt **)&is_indices));
5856:     for (i = 0, j = 0; i < n_B; i++) {
5857:       if (!PetscBTLookup(bitmask, is_indices[i])) aux_array2[j++] = i;
5858:     }
5859:     PetscCall(ISRestoreIndices(pcis->is_B_local, (const PetscInt **)&is_indices));
5860:     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, j, aux_array2, PETSC_OWN_POINTER, &is_aux2));
5861:     PetscCall(VecScatterCreate(pcbddc->vec1_R, is_aux1, pcis->vec1_B, is_aux2, &pcbddc->R_to_B));
5862:     PetscCall(ISDestroy(&is_aux1));
5863:     PetscCall(ISDestroy(&is_aux2));

5865:     if (pcbddc->switch_static || pcbddc->dbg_flag) {
5866:       PetscCall(PetscMalloc1(n_D, &aux_array1));
5867:       for (i = 0, j = 0; i < n_R; i++) {
5868:         if (PetscBTLookup(bitmask, idx_R_local[i])) aux_array1[j++] = i;
5869:       }
5870:       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, j, aux_array1, PETSC_OWN_POINTER, &is_aux1));
5871:       PetscCall(VecScatterCreate(pcbddc->vec1_R, is_aux1, pcis->vec1_D, (IS)0, &pcbddc->R_to_D));
5872:       PetscCall(ISDestroy(&is_aux1));
5873:     }
5874:     PetscCall(PetscBTDestroy(&bitmask));
5875:     PetscCall(ISRestoreIndices(pcbddc->is_R_local, (const PetscInt **)&idx_R_local));
5876:   } else {
5877:     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5878:     IS                 tis;
5879:     PetscInt           schur_size;

5881:     PetscCall(ISGetLocalSize(reuse_solver->is_B, &schur_size));
5882:     PetscCall(ISCreateStride(PETSC_COMM_SELF, schur_size, n_D, 1, &tis));
5883:     PetscCall(VecScatterCreate(pcbddc->vec1_R, tis, pcis->vec1_B, reuse_solver->is_B, &pcbddc->R_to_B));
5884:     PetscCall(ISDestroy(&tis));
5885:     if (pcbddc->switch_static || pcbddc->dbg_flag) {
5886:       PetscCall(ISCreateStride(PETSC_COMM_SELF, n_D, 0, 1, &tis));
5887:       PetscCall(VecScatterCreate(pcbddc->vec1_R, tis, pcis->vec1_D, (IS)0, &pcbddc->R_to_D));
5888:       PetscCall(ISDestroy(&tis));
5889:     }
5890:   }
5891:   PetscFunctionReturn(PETSC_SUCCESS);
5892: }

5894: PetscErrorCode MatNullSpacePropagateAny_Private(Mat A, IS is, Mat B)
5895: {
5896:   MatNullSpace NullSpace;
5897:   Mat          dmat;
5898:   const Vec   *nullvecs;
5899:   Vec          v, v2, *nullvecs2;
5900:   VecScatter   sct = NULL;
5901:   PetscScalar *ddata;
5902:   PetscInt     k, nnsp_size, bsiz, bsiz2, n, N, bs;
5903:   PetscBool    nnsp_has_cnst;

5905:   PetscFunctionBegin;
5906:   if (!is && !B) { /* MATIS */
5907:     Mat_IS *matis = (Mat_IS *)A->data;

5909:     if (!B) PetscCall(MatISGetLocalMat(A, &B));
5910:     sct = matis->cctx;
5911:     PetscCall(PetscObjectReference((PetscObject)sct));
5912:   } else {
5913:     PetscCall(MatGetNullSpace(B, &NullSpace));
5914:     if (!NullSpace) PetscCall(MatGetNearNullSpace(B, &NullSpace));
5915:     if (NullSpace) PetscFunctionReturn(PETSC_SUCCESS);
5916:   }
5917:   PetscCall(MatGetNullSpace(A, &NullSpace));
5918:   if (!NullSpace) PetscCall(MatGetNearNullSpace(A, &NullSpace));
5919:   if (!NullSpace) PetscFunctionReturn(PETSC_SUCCESS);

5921:   PetscCall(MatCreateVecs(A, &v, NULL));
5922:   PetscCall(MatCreateVecs(B, &v2, NULL));
5923:   if (!sct) PetscCall(VecScatterCreate(v, is, v2, NULL, &sct));
5924:   PetscCall(MatNullSpaceGetVecs(NullSpace, &nnsp_has_cnst, &nnsp_size, &nullvecs));
5925:   bsiz = bsiz2 = nnsp_size + !!nnsp_has_cnst;
5926:   PetscCall(PetscMalloc1(bsiz, &nullvecs2));
5927:   PetscCall(VecGetBlockSize(v2, &bs));
5928:   PetscCall(VecGetSize(v2, &N));
5929:   PetscCall(VecGetLocalSize(v2, &n));
5930:   PetscCall(PetscMalloc1(n * bsiz, &ddata));
5931:   for (k = 0; k < nnsp_size; k++) {
5932:     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), bs, n, N, ddata + n * k, &nullvecs2[k]));
5933:     PetscCall(VecScatterBegin(sct, nullvecs[k], nullvecs2[k], INSERT_VALUES, SCATTER_FORWARD));
5934:     PetscCall(VecScatterEnd(sct, nullvecs[k], nullvecs2[k], INSERT_VALUES, SCATTER_FORWARD));
5935:   }
5936:   if (nnsp_has_cnst) {
5937:     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), bs, n, N, ddata + n * nnsp_size, &nullvecs2[nnsp_size]));
5938:     PetscCall(VecSet(nullvecs2[nnsp_size], 1.0));
5939:   }
5940:   PetscCall(PCBDDCOrthonormalizeVecs(&bsiz2, nullvecs2));
5941:   PetscCall(MatNullSpaceCreate(PetscObjectComm((PetscObject)B), PETSC_FALSE, bsiz2, nullvecs2, &NullSpace));

5943:   PetscCall(MatCreateDense(PetscObjectComm((PetscObject)B), n, PETSC_DECIDE, N, bsiz2, ddata, &dmat));
5944:   PetscCall(PetscObjectContainerCompose((PetscObject)dmat, "_PBDDC_Null_dmat_arr", ddata, PetscCtxDestroyDefault));
5945:   PetscCall(PetscObjectCompose((PetscObject)NullSpace, "_PBDDC_Null_dmat", (PetscObject)dmat));
5946:   PetscCall(MatDestroy(&dmat));

5948:   for (k = 0; k < bsiz; k++) PetscCall(VecDestroy(&nullvecs2[k]));
5949:   PetscCall(PetscFree(nullvecs2));
5950:   PetscCall(MatSetNearNullSpace(B, NullSpace));
5951:   PetscCall(MatNullSpaceDestroy(&NullSpace));
5952:   PetscCall(VecDestroy(&v));
5953:   PetscCall(VecDestroy(&v2));
5954:   PetscCall(VecScatterDestroy(&sct));
5955:   PetscFunctionReturn(PETSC_SUCCESS);
5956: }

5958: PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
5959: {
5960:   PC_BDDC     *pcbddc = (PC_BDDC *)pc->data;
5961:   PC_IS       *pcis   = (PC_IS *)pc->data;
5962:   PC           pc_temp;
5963:   Mat          A_RR;
5964:   MatNullSpace nnsp;
5965:   MatReuse     reuse;
5966:   PetscScalar  m_one = -1.0;
5967:   PetscReal    value;
5968:   PetscInt     n_D, n_R;
5969:   PetscBool    issbaij, opts, isset, issym;
5970:   PetscBool    f = PETSC_FALSE;
5971:   char         dir_prefix[256], neu_prefix[256], str_level[16];
5972:   size_t       len;

5974:   PetscFunctionBegin;
5975:   PetscCall(PetscLogEventBegin(PC_BDDC_LocalSolvers[pcbddc->current_level], pc, 0, 0, 0));
5976:   /* approximate solver, propagate NearNullSpace if needed */
5977:   if (!pc->setupcalled && (pcbddc->NullSpace_corr[0] || pcbddc->NullSpace_corr[2])) {
5978:     MatNullSpace gnnsp1, gnnsp2;
5979:     PetscBool    lhas, ghas;

5981:     PetscCall(MatGetNearNullSpace(pcbddc->local_mat, &nnsp));
5982:     PetscCall(MatGetNearNullSpace(pc->pmat, &gnnsp1));
5983:     PetscCall(MatGetNullSpace(pc->pmat, &gnnsp2));
5984:     lhas = nnsp ? PETSC_TRUE : PETSC_FALSE;
5985:     PetscCallMPI(MPIU_Allreduce(&lhas, &ghas, 1, MPI_C_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc)));
5986:     if (!ghas && (gnnsp1 || gnnsp2)) PetscCall(MatNullSpacePropagateAny_Private(pc->pmat, NULL, NULL));
5987:   }

5989:   /* compute prefixes */
5990:   PetscCall(PetscStrncpy(dir_prefix, "", sizeof(dir_prefix)));
5991:   PetscCall(PetscStrncpy(neu_prefix, "", sizeof(neu_prefix)));
5992:   if (!pcbddc->current_level) {
5993:     PetscCall(PetscStrncpy(dir_prefix, ((PetscObject)pc)->prefix, sizeof(dir_prefix)));
5994:     PetscCall(PetscStrncpy(neu_prefix, ((PetscObject)pc)->prefix, sizeof(neu_prefix)));
5995:     PetscCall(PetscStrlcat(dir_prefix, "pc_bddc_dirichlet_", sizeof(dir_prefix)));
5996:     PetscCall(PetscStrlcat(neu_prefix, "pc_bddc_neumann_", sizeof(neu_prefix)));
5997:   } else {
5998:     PetscCall(PetscSNPrintf(str_level, sizeof(str_level), "l%" PetscInt_FMT "_", pcbddc->current_level));
5999:     PetscCall(PetscStrlen(((PetscObject)pc)->prefix, &len));
6000:     len -= 15;                                /* remove "pc_bddc_coarse_" */
6001:     if (pcbddc->current_level > 1) len -= 3;  /* remove "lX_" with X level number */
6002:     if (pcbddc->current_level > 10) len -= 1; /* remove another char from level number */
6003:     /* Nonstandard use of PetscStrncpy() to only copy a portion of the input string */
6004:     PetscCall(PetscStrncpy(dir_prefix, ((PetscObject)pc)->prefix, len + 1));
6005:     PetscCall(PetscStrncpy(neu_prefix, ((PetscObject)pc)->prefix, len + 1));
6006:     PetscCall(PetscStrlcat(dir_prefix, "pc_bddc_dirichlet_", sizeof(dir_prefix)));
6007:     PetscCall(PetscStrlcat(neu_prefix, "pc_bddc_neumann_", sizeof(neu_prefix)));
6008:     PetscCall(PetscStrlcat(dir_prefix, str_level, sizeof(dir_prefix)));
6009:     PetscCall(PetscStrlcat(neu_prefix, str_level, sizeof(neu_prefix)));
6010:   }

6012:   /* DIRICHLET PROBLEM */
6013:   if (dirichlet) {
6014:     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
6015:     if (pcbddc->benign_n && !pcbddc->benign_change_explicit) {
6016:       PetscCheck(sub_schurs && sub_schurs->reuse_solver, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented");
6017:       if (pcbddc->dbg_flag) {
6018:         Mat A_IIn;

6020:         PetscCall(PCBDDCBenignProject(pc, pcis->is_I_local, pcis->is_I_local, &A_IIn));
6021:         PetscCall(MatDestroy(&pcis->A_II));
6022:         pcis->A_II = A_IIn;
6023:       }
6024:     }
6025:     PetscCall(MatIsSymmetricKnown(pcbddc->local_mat, &isset, &issym));
6026:     if (isset) PetscCall(MatSetOption(pcis->A_II, MAT_SYMMETRIC, issym));

6028:     /* Matrix for Dirichlet problem is pcis->A_II */
6029:     n_D  = pcis->n - pcis->n_B;
6030:     opts = PETSC_FALSE;
6031:     if (!pcbddc->ksp_D) { /* create object if not yet build */
6032:       opts = PETSC_TRUE;
6033:       PetscCall(KSPCreate(PETSC_COMM_SELF, &pcbddc->ksp_D));
6034:       PetscCall(KSPSetNestLevel(pcbddc->ksp_D, pc->kspnestlevel));
6035:       PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D, (PetscObject)pc, 1));
6036:       /* default */
6037:       PetscCall(KSPSetType(pcbddc->ksp_D, KSPPREONLY));
6038:       PetscCall(KSPSetOptionsPrefix(pcbddc->ksp_D, dir_prefix));
6039:       PetscCall(PetscObjectTypeCompare((PetscObject)pcis->pA_II, MATSEQSBAIJ, &issbaij));
6040:       PetscCall(KSPGetPC(pcbddc->ksp_D, &pc_temp));
6041:       if (issbaij) {
6042:         PetscCall(PCSetType(pc_temp, PCCHOLESKY));
6043:       } else {
6044:         PetscCall(PCSetType(pc_temp, PCLU));
6045:       }
6046:       PetscCall(KSPSetErrorIfNotConverged(pcbddc->ksp_D, pc->erroriffailure));
6047:     }
6048:     PetscCall(MatSetOptionsPrefix(pcis->pA_II, ((PetscObject)pcbddc->ksp_D)->prefix));
6049:     PetscCall(MatViewFromOptions(pcis->pA_II, NULL, "-mat_view"));
6050:     PetscCall(KSPSetOperators(pcbddc->ksp_D, pcis->A_II, pcis->pA_II));
6051:     /* Allow user's customization */
6052:     if (opts) PetscCall(KSPSetFromOptions(pcbddc->ksp_D));
6053:     PetscCall(MatGetNearNullSpace(pcis->pA_II, &nnsp));
6054:     if (pcbddc->NullSpace_corr[0] && !nnsp) { /* approximate solver, propagate NearNullSpace */
6055:       PetscCall(MatNullSpacePropagateAny_Private(pcbddc->local_mat, pcis->is_I_local, pcis->pA_II));
6056:     }
6057:     PetscCall(MatGetNearNullSpace(pcis->pA_II, &nnsp));
6058:     PetscCall(KSPGetPC(pcbddc->ksp_D, &pc_temp));
6059:     PetscCall(PetscObjectHasFunction((PetscObject)pc_temp, "PCSetCoordinates_C", &f));
6060:     if (f && pcbddc->mat_graph->cloc && !nnsp) {
6061:       PetscReal      *coords = pcbddc->mat_graph->coords, *scoords;
6062:       const PetscInt *idxs;
6063:       PetscInt        cdim = pcbddc->mat_graph->cdim, nl, i, d;

6065:       PetscCall(ISGetLocalSize(pcis->is_I_local, &nl));
6066:       PetscCall(ISGetIndices(pcis->is_I_local, &idxs));
6067:       PetscCall(PetscMalloc1(nl * cdim, &scoords));
6068:       for (i = 0; i < nl; i++) {
6069:         for (d = 0; d < cdim; d++) scoords[i * cdim + d] = coords[idxs[i] * cdim + d];
6070:       }
6071:       PetscCall(ISRestoreIndices(pcis->is_I_local, &idxs));
6072:       PetscCall(PCSetCoordinates(pc_temp, cdim, nl, scoords));
6073:       PetscCall(PetscFree(scoords));
6074:     }
6075:     if (sub_schurs && sub_schurs->reuse_solver) {
6076:       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

6078:       PetscCall(KSPSetPC(pcbddc->ksp_D, reuse_solver->interior_solver));
6079:     }

6081:     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
6082:     if (!n_D) {
6083:       PetscCall(KSPGetPC(pcbddc->ksp_D, &pc_temp));
6084:       PetscCall(PCSetType(pc_temp, PCNONE));
6085:     }
6086:     PetscCall(KSPSetUp(pcbddc->ksp_D));
6087:     /* set ksp_D into pcis data */
6088:     PetscCall(PetscObjectReference((PetscObject)pcbddc->ksp_D));
6089:     PetscCall(KSPDestroy(&pcis->ksp_D));
6090:     pcis->ksp_D = pcbddc->ksp_D;
6091:   }

6093:   /* NEUMANN PROBLEM */
6094:   A_RR = NULL;
6095:   if (neumann) {
6096:     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
6097:     PetscInt        ibs, mbs;
6098:     PetscBool       issbaij, reuse_neumann_solver, isset, issym;
6099:     Mat_IS         *matis = (Mat_IS *)pc->pmat->data;

6101:     reuse_neumann_solver = PETSC_FALSE;
6102:     if (sub_schurs && sub_schurs->reuse_solver) {
6103:       IS iP;

6105:       reuse_neumann_solver = PETSC_TRUE;
6106:       PetscCall(PetscObjectQuery((PetscObject)sub_schurs->A, "__KSPFETIDP_iP", (PetscObject *)&iP));
6107:       if (iP) reuse_neumann_solver = PETSC_FALSE;
6108:     }
6109:     /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
6110:     PetscCall(ISGetSize(pcbddc->is_R_local, &n_R));
6111:     if (pcbddc->ksp_R) { /* already created ksp */
6112:       PetscInt nn_R;
6113:       PetscCall(KSPGetOperators(pcbddc->ksp_R, NULL, &A_RR));
6114:       PetscCall(PetscObjectReference((PetscObject)A_RR));
6115:       PetscCall(MatGetSize(A_RR, &nn_R, NULL));
6116:       if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
6117:         PetscCall(KSPReset(pcbddc->ksp_R));
6118:         PetscCall(MatDestroy(&A_RR));
6119:         reuse = MAT_INITIAL_MATRIX;
6120:       } else {                                /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
6121:         if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
6122:           PetscCall(MatDestroy(&A_RR));
6123:           reuse = MAT_INITIAL_MATRIX;
6124:         } else { /* safe to reuse the matrix */
6125:           reuse = MAT_REUSE_MATRIX;
6126:         }
6127:       }
6128:       /* last check */
6129:       if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
6130:         PetscCall(MatDestroy(&A_RR));
6131:         reuse = MAT_INITIAL_MATRIX;
6132:       }
6133:     } else { /* first time, so we need to create the matrix */
6134:       reuse = MAT_INITIAL_MATRIX;
6135:     }
6136:     /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection
6137:        TODO: Get Rid of these conversions */
6138:     PetscCall(MatGetBlockSize(pcbddc->local_mat, &mbs));
6139:     PetscCall(ISGetBlockSize(pcbddc->is_R_local, &ibs));
6140:     PetscCall(PetscObjectTypeCompare((PetscObject)pcbddc->local_mat, MATSEQSBAIJ, &issbaij));
6141:     if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */
6142:       if (matis->A == pcbddc->local_mat) {
6143:         PetscCall(MatDestroy(&pcbddc->local_mat));
6144:         PetscCall(MatConvert(matis->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &pcbddc->local_mat));
6145:       } else {
6146:         PetscCall(MatConvert(pcbddc->local_mat, MATSEQAIJ, MAT_INPLACE_MATRIX, &pcbddc->local_mat));
6147:       }
6148:     } else if (issbaij) { /* need to convert to BAIJ to get off-diagonal blocks */
6149:       if (matis->A == pcbddc->local_mat) {
6150:         PetscCall(MatDestroy(&pcbddc->local_mat));
6151:         PetscCall(MatConvert(matis->A, mbs > 1 ? MATSEQBAIJ : MATSEQAIJ, MAT_INITIAL_MATRIX, &pcbddc->local_mat));
6152:       } else {
6153:         PetscCall(MatConvert(pcbddc->local_mat, mbs > 1 ? MATSEQBAIJ : MATSEQAIJ, MAT_INPLACE_MATRIX, &pcbddc->local_mat));
6154:       }
6155:     }
6156:     /* extract A_RR */
6157:     if (reuse_neumann_solver) {
6158:       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

6160:       if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */
6161:         PetscCall(MatDestroy(&A_RR));
6162:         if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */
6163:           PetscCall(PCBDDCBenignProject(pc, pcbddc->is_R_local, pcbddc->is_R_local, &A_RR));
6164:         } else {
6165:           PetscCall(MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &A_RR));
6166:         }
6167:       } else {
6168:         PetscCall(MatDestroy(&A_RR));
6169:         PetscCall(PCGetOperators(reuse_solver->correction_solver, &A_RR, NULL));
6170:         PetscCall(PetscObjectReference((PetscObject)A_RR));
6171:       }
6172:     } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */
6173:       PetscCall(MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, pcbddc->is_R_local, reuse, &A_RR));
6174:     }
6175:     PetscCall(MatIsSymmetricKnown(pcbddc->local_mat, &isset, &issym));
6176:     if (isset) PetscCall(MatSetOption(A_RR, MAT_SYMMETRIC, issym));
6177:     opts = PETSC_FALSE;
6178:     if (!pcbddc->ksp_R) { /* create object if not present */
6179:       opts = PETSC_TRUE;
6180:       PetscCall(KSPCreate(PETSC_COMM_SELF, &pcbddc->ksp_R));
6181:       PetscCall(KSPSetNestLevel(pcbddc->ksp_R, pc->kspnestlevel));
6182:       PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R, (PetscObject)pc, 1));
6183:       /* default */
6184:       PetscCall(KSPSetType(pcbddc->ksp_R, KSPPREONLY));
6185:       PetscCall(KSPSetOptionsPrefix(pcbddc->ksp_R, neu_prefix));
6186:       PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_temp));
6187:       PetscCall(PetscObjectTypeCompare((PetscObject)A_RR, MATSEQSBAIJ, &issbaij));
6188:       if (issbaij) {
6189:         PetscCall(PCSetType(pc_temp, PCCHOLESKY));
6190:       } else {
6191:         PetscCall(PCSetType(pc_temp, PCLU));
6192:       }
6193:       PetscCall(KSPSetErrorIfNotConverged(pcbddc->ksp_R, pc->erroriffailure));
6194:     }
6195:     PetscCall(MatSetOptionsPrefix(A_RR, ((PetscObject)pcbddc->ksp_R)->prefix));
6196:     PetscCall(MatViewFromOptions(A_RR, NULL, "-mat_view"));
6197:     PetscCall(KSPSetOperators(pcbddc->ksp_R, A_RR, A_RR));
6198:     if (opts) { /* Allow user's customization once */
6199:       PetscCall(KSPSetFromOptions(pcbddc->ksp_R));
6200:     }
6201:     PetscCall(MatGetNearNullSpace(A_RR, &nnsp));
6202:     if (pcbddc->NullSpace_corr[2] && !nnsp) { /* approximate solver, propagate NearNullSpace */
6203:       PetscCall(MatNullSpacePropagateAny_Private(pcbddc->local_mat, pcbddc->is_R_local, A_RR));
6204:     }
6205:     PetscCall(MatGetNearNullSpace(A_RR, &nnsp));
6206:     PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_temp));
6207:     PetscCall(PetscObjectHasFunction((PetscObject)pc_temp, "PCSetCoordinates_C", &f));
6208:     if (f && pcbddc->mat_graph->cloc && !nnsp) {
6209:       PetscReal      *coords = pcbddc->mat_graph->coords, *scoords;
6210:       const PetscInt *idxs;
6211:       PetscInt        cdim = pcbddc->mat_graph->cdim, nl, i, d;

6213:       PetscCall(ISGetLocalSize(pcbddc->is_R_local, &nl));
6214:       PetscCall(ISGetIndices(pcbddc->is_R_local, &idxs));
6215:       PetscCall(PetscMalloc1(nl * cdim, &scoords));
6216:       for (i = 0; i < nl; i++) {
6217:         for (d = 0; d < cdim; d++) scoords[i * cdim + d] = coords[idxs[i] * cdim + d];
6218:       }
6219:       PetscCall(ISRestoreIndices(pcbddc->is_R_local, &idxs));
6220:       PetscCall(PCSetCoordinates(pc_temp, cdim, nl, scoords));
6221:       PetscCall(PetscFree(scoords));
6222:     }

6224:     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
6225:     if (!n_R) {
6226:       PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_temp));
6227:       PetscCall(PCSetType(pc_temp, PCNONE));
6228:     }
6229:     /* Reuse solver if it is present */
6230:     if (reuse_neumann_solver) {
6231:       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

6233:       PetscCall(KSPSetPC(pcbddc->ksp_R, reuse_solver->correction_solver));
6234:     }
6235:     PetscCall(KSPSetUp(pcbddc->ksp_R));
6236:   }

6238:   if (pcbddc->dbg_flag) {
6239:     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
6240:     PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
6241:     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n"));
6242:   }
6243:   PetscCall(PetscLogEventEnd(PC_BDDC_LocalSolvers[pcbddc->current_level], pc, 0, 0, 0));

6245:   /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */
6246:   if (pcbddc->NullSpace_corr[0]) PetscCall(PCBDDCSetUseExactDirichlet(pc, PETSC_FALSE));
6247:   if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) PetscCall(PCBDDCNullSpaceAssembleCorrection(pc, PETSC_TRUE, pcbddc->NullSpace_corr[1]));
6248:   if (neumann && pcbddc->NullSpace_corr[2]) PetscCall(PCBDDCNullSpaceAssembleCorrection(pc, PETSC_FALSE, pcbddc->NullSpace_corr[3]));
6249:   /* check Dirichlet and Neumann solvers */
6250:   if (pcbddc->dbg_flag) {
6251:     if (dirichlet) { /* Dirichlet */
6252:       PetscCall(VecSetRandom(pcis->vec1_D, NULL));
6253:       PetscCall(MatMult(pcis->A_II, pcis->vec1_D, pcis->vec2_D));
6254:       PetscCall(KSPSolve(pcbddc->ksp_D, pcis->vec2_D, pcis->vec2_D));
6255:       PetscCall(KSPCheckSolve(pcbddc->ksp_D, pc, pcis->vec2_D));
6256:       PetscCall(VecAXPY(pcis->vec1_D, m_one, pcis->vec2_D));
6257:       PetscCall(VecNorm(pcis->vec1_D, NORM_INFINITY, &value));
6258:       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n", PetscGlobalRank, ((PetscObject)pcbddc->ksp_D)->prefix, (double)value));
6259:       PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
6260:     }
6261:     if (neumann) { /* Neumann */
6262:       PetscCall(VecSetRandom(pcbddc->vec1_R, NULL));
6263:       PetscCall(MatMult(A_RR, pcbddc->vec1_R, pcbddc->vec2_R));
6264:       PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec2_R, pcbddc->vec2_R));
6265:       PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R));
6266:       PetscCall(VecAXPY(pcbddc->vec1_R, m_one, pcbddc->vec2_R));
6267:       PetscCall(VecNorm(pcbddc->vec1_R, NORM_INFINITY, &value));
6268:       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n", PetscGlobalRank, ((PetscObject)pcbddc->ksp_R)->prefix, (double)value));
6269:       PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
6270:     }
6271:   }
6272:   /* free Neumann problem's matrix */
6273:   PetscCall(MatDestroy(&A_RR));
6274:   PetscFunctionReturn(PETSC_SUCCESS);
6275: }

6277: static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose)
6278: {
6279:   PC_BDDC        *pcbddc       = (PC_BDDC *)pc->data;
6280:   PCBDDCSubSchurs sub_schurs   = pcbddc->sub_schurs;
6281:   PetscBool       reuse_solver = sub_schurs ? (sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE) : PETSC_FALSE;

6283:   PetscFunctionBegin;
6284:   if (!reuse_solver) PetscCall(VecSet(pcbddc->vec1_R, 0.));
6285:   if (!pcbddc->switch_static) {
6286:     if (applytranspose && pcbddc->local_auxmat1) {
6287:       PetscCall(MatMultTranspose(pcbddc->local_auxmat2, inout_B, pcbddc->vec1_C));
6288:       PetscCall(MatMultTransposeAdd(pcbddc->local_auxmat1, pcbddc->vec1_C, inout_B, inout_B));
6289:     }
6290:     if (!reuse_solver) {
6291:       PetscCall(VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6292:       PetscCall(VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6293:     } else {
6294:       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

6296:       PetscCall(VecScatterBegin(reuse_solver->correction_scatter_B, inout_B, reuse_solver->rhs_B, INSERT_VALUES, SCATTER_FORWARD));
6297:       PetscCall(VecScatterEnd(reuse_solver->correction_scatter_B, inout_B, reuse_solver->rhs_B, INSERT_VALUES, SCATTER_FORWARD));
6298:     }
6299:   } else {
6300:     PetscCall(VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6301:     PetscCall(VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6302:     PetscCall(VecScatterBegin(pcbddc->R_to_D, inout_D, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6303:     PetscCall(VecScatterEnd(pcbddc->R_to_D, inout_D, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6304:     if (applytranspose && pcbddc->local_auxmat1) {
6305:       PetscCall(MatMultTranspose(pcbddc->local_auxmat2, pcbddc->vec1_R, pcbddc->vec1_C));
6306:       PetscCall(MatMultTransposeAdd(pcbddc->local_auxmat1, pcbddc->vec1_C, inout_B, inout_B));
6307:       PetscCall(VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6308:       PetscCall(VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6309:     }
6310:   }
6311:   PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][1], pc, 0, 0, 0));
6312:   if (!reuse_solver || pcbddc->switch_static) {
6313:     if (applytranspose) {
6314:       PetscCall(KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec1_R));
6315:     } else {
6316:       PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec1_R));
6317:     }
6318:     PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec1_R));
6319:   } else {
6320:     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

6322:     if (applytranspose) {
6323:       PetscCall(MatFactorSolveSchurComplementTranspose(reuse_solver->F, reuse_solver->rhs_B, reuse_solver->sol_B));
6324:     } else {
6325:       PetscCall(MatFactorSolveSchurComplement(reuse_solver->F, reuse_solver->rhs_B, reuse_solver->sol_B));
6326:     }
6327:   }
6328:   PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][1], pc, 0, 0, 0));
6329:   PetscCall(VecSet(inout_B, 0.));
6330:   if (!pcbddc->switch_static) {
6331:     if (!reuse_solver) {
6332:       PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD));
6333:       PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD));
6334:     } else {
6335:       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

6337:       PetscCall(VecScatterBegin(reuse_solver->correction_scatter_B, reuse_solver->sol_B, inout_B, INSERT_VALUES, SCATTER_REVERSE));
6338:       PetscCall(VecScatterEnd(reuse_solver->correction_scatter_B, reuse_solver->sol_B, inout_B, INSERT_VALUES, SCATTER_REVERSE));
6339:     }
6340:     if (!applytranspose && pcbddc->local_auxmat1) {
6341:       PetscCall(MatMult(pcbddc->local_auxmat1, inout_B, pcbddc->vec1_C));
6342:       PetscCall(MatMultAdd(pcbddc->local_auxmat2, pcbddc->vec1_C, inout_B, inout_B));
6343:     }
6344:   } else {
6345:     PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD));
6346:     PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD));
6347:     PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD));
6348:     PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD));
6349:     if (!applytranspose && pcbddc->local_auxmat1) {
6350:       PetscCall(MatMult(pcbddc->local_auxmat1, inout_B, pcbddc->vec1_C));
6351:       PetscCall(MatMultAdd(pcbddc->local_auxmat2, pcbddc->vec1_C, pcbddc->vec1_R, pcbddc->vec1_R));
6352:     }
6353:     PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD));
6354:     PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD));
6355:     PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD));
6356:     PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD));
6357:   }
6358:   PetscFunctionReturn(PETSC_SUCCESS);
6359: }

6361: /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
6362: PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
6363: {
6364:   PC_BDDC          *pcbddc = (PC_BDDC *)pc->data;
6365:   PC_IS            *pcis   = (PC_IS *)pc->data;
6366:   const PetscScalar zero   = 0.0;

6368:   PetscFunctionBegin;
6369:   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
6370:   if (!pcbddc->benign_apply_coarse_only) {
6371:     if (applytranspose) {
6372:       PetscCall(MatMultTranspose(pcbddc->coarse_phi_B, pcis->vec1_B, pcbddc->vec1_P));
6373:       if (pcbddc->switch_static) PetscCall(MatMultTransposeAdd(pcbddc->coarse_phi_D, pcis->vec1_D, pcbddc->vec1_P, pcbddc->vec1_P));
6374:     } else {
6375:       PetscCall(MatMultTranspose(pcbddc->coarse_psi_B, pcis->vec1_B, pcbddc->vec1_P));
6376:       if (pcbddc->switch_static) PetscCall(MatMultTransposeAdd(pcbddc->coarse_psi_D, pcis->vec1_D, pcbddc->vec1_P, pcbddc->vec1_P));
6377:     }
6378:   } else {
6379:     PetscCall(VecSet(pcbddc->vec1_P, zero));
6380:   }

6382:   /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */
6383:   if (pcbddc->benign_n) {
6384:     PetscScalar *array;
6385:     PetscInt     j;

6387:     PetscCall(VecGetArray(pcbddc->vec1_P, &array));
6388:     for (j = 0; j < pcbddc->benign_n; j++) array[pcbddc->local_primal_size - pcbddc->benign_n + j] += pcbddc->benign_p0[j];
6389:     PetscCall(VecRestoreArray(pcbddc->vec1_P, &array));
6390:   }

6392:   /* start communications from local primal nodes to rhs of coarse solver */
6393:   PetscCall(VecSet(pcbddc->coarse_vec, zero));
6394:   PetscCall(PCBDDCScatterCoarseDataBegin(pc, ADD_VALUES, SCATTER_FORWARD));
6395:   PetscCall(PCBDDCScatterCoarseDataEnd(pc, ADD_VALUES, SCATTER_FORWARD));

6397:   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
6398:   PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][2], pc, 0, 0, 0));
6399:   if (pcbddc->coarse_ksp) {
6400:     Mat          coarse_mat;
6401:     Vec          rhs, sol;
6402:     MatNullSpace nullsp;
6403:     PetscBool    isbddc = PETSC_FALSE;

6405:     if (pcbddc->benign_have_null) {
6406:       PC coarse_pc;

6408:       PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc));
6409:       PetscCall(PetscObjectTypeCompare((PetscObject)coarse_pc, PCBDDC, &isbddc));
6410:       /* we need to propagate to coarser levels the need for a possible benign correction */
6411:       if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) {
6412:         PC_BDDC *coarsepcbddc                  = (PC_BDDC *)coarse_pc->data;
6413:         coarsepcbddc->benign_skip_correction   = PETSC_FALSE;
6414:         coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE;
6415:       }
6416:     }
6417:     PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &rhs));
6418:     PetscCall(KSPGetSolution(pcbddc->coarse_ksp, &sol));
6419:     PetscCall(KSPGetOperators(pcbddc->coarse_ksp, &coarse_mat, NULL));
6420:     if (applytranspose) {
6421:       PetscCheck(!pcbddc->benign_apply_coarse_only, PetscObjectComm((PetscObject)pcbddc->coarse_ksp), PETSC_ERR_SUP, "Not yet implemented");
6422:       PetscCall(KSPSolveTranspose(pcbddc->coarse_ksp, rhs, sol));
6423:       PetscCall(KSPCheckSolve(pcbddc->coarse_ksp, pc, sol));
6424:       PetscCall(MatGetTransposeNullSpace(coarse_mat, &nullsp));
6425:       if (nullsp) PetscCall(MatNullSpaceRemove(nullsp, sol));
6426:     } else {
6427:       PetscCall(MatGetNullSpace(coarse_mat, &nullsp));
6428:       if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */
6429:         PC coarse_pc;

6431:         if (nullsp) PetscCall(MatNullSpaceRemove(nullsp, rhs));
6432:         PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc));
6433:         PetscCall(PCPreSolve(coarse_pc, pcbddc->coarse_ksp));
6434:         PetscCall(PCBDDCBenignRemoveInterior(coarse_pc, rhs, sol));
6435:         PetscCall(PCPostSolve(coarse_pc, pcbddc->coarse_ksp));
6436:       } else {
6437:         PetscCall(KSPSolve(pcbddc->coarse_ksp, rhs, sol));
6438:         PetscCall(KSPCheckSolve(pcbddc->coarse_ksp, pc, sol));
6439:         if (nullsp) PetscCall(MatNullSpaceRemove(nullsp, sol));
6440:       }
6441:     }
6442:     /* we don't need the benign correction at coarser levels anymore */
6443:     if (pcbddc->benign_have_null && isbddc) {
6444:       PC       coarse_pc;
6445:       PC_BDDC *coarsepcbddc;

6447:       PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc));
6448:       coarsepcbddc                           = (PC_BDDC *)coarse_pc->data;
6449:       coarsepcbddc->benign_skip_correction   = PETSC_TRUE;
6450:       coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE;
6451:     }
6452:   }
6453:   PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][2], pc, 0, 0, 0));

6455:   /* Local solution on R nodes */
6456:   if (!pcbddc->benign_apply_coarse_only) PetscCall(PCBDDCSolveSubstructureCorrection(pc, pcis->vec1_B, pcis->vec1_D, applytranspose));
6457:   /* communications from coarse sol to local primal nodes */
6458:   PetscCall(PCBDDCScatterCoarseDataBegin(pc, INSERT_VALUES, SCATTER_REVERSE));
6459:   PetscCall(PCBDDCScatterCoarseDataEnd(pc, INSERT_VALUES, SCATTER_REVERSE));

6461:   /* Sum contributions from the two levels */
6462:   if (!pcbddc->benign_apply_coarse_only) {
6463:     if (applytranspose) {
6464:       PetscCall(MatMultAdd(pcbddc->coarse_psi_B, pcbddc->vec1_P, pcis->vec1_B, pcis->vec1_B));
6465:       if (pcbddc->switch_static) PetscCall(MatMultAdd(pcbddc->coarse_psi_D, pcbddc->vec1_P, pcis->vec1_D, pcis->vec1_D));
6466:     } else {
6467:       PetscCall(MatMultAdd(pcbddc->coarse_phi_B, pcbddc->vec1_P, pcis->vec1_B, pcis->vec1_B));
6468:       if (pcbddc->switch_static) PetscCall(MatMultAdd(pcbddc->coarse_phi_D, pcbddc->vec1_P, pcis->vec1_D, pcis->vec1_D));
6469:     }
6470:     /* store p0 */
6471:     if (pcbddc->benign_n) {
6472:       PetscScalar *array;
6473:       PetscInt     j;

6475:       PetscCall(VecGetArray(pcbddc->vec1_P, &array));
6476:       for (j = 0; j < pcbddc->benign_n; j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size - pcbddc->benign_n + j];
6477:       PetscCall(VecRestoreArray(pcbddc->vec1_P, &array));
6478:     }
6479:   } else { /* expand the coarse solution */
6480:     if (applytranspose) {
6481:       PetscCall(MatMult(pcbddc->coarse_psi_B, pcbddc->vec1_P, pcis->vec1_B));
6482:     } else {
6483:       PetscCall(MatMult(pcbddc->coarse_phi_B, pcbddc->vec1_P, pcis->vec1_B));
6484:     }
6485:   }
6486:   PetscFunctionReturn(PETSC_SUCCESS);
6487: }

6489: PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc, InsertMode imode, ScatterMode smode)
6490: {
6491:   PC_BDDC           *pcbddc = (PC_BDDC *)pc->data;
6492:   Vec                from, to;
6493:   const PetscScalar *array;

6495:   PetscFunctionBegin;
6496:   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
6497:     from = pcbddc->coarse_vec;
6498:     to   = pcbddc->vec1_P;
6499:     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
6500:       Vec tvec;

6502:       PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &tvec));
6503:       PetscCall(VecResetArray(tvec));
6504:       PetscCall(KSPGetSolution(pcbddc->coarse_ksp, &tvec));
6505:       PetscCall(VecGetArrayRead(tvec, &array));
6506:       PetscCall(VecPlaceArray(from, array));
6507:       PetscCall(VecRestoreArrayRead(tvec, &array));
6508:     }
6509:   } else { /* from local to global -> put data in coarse right-hand side */
6510:     from = pcbddc->vec1_P;
6511:     to   = pcbddc->coarse_vec;
6512:   }
6513:   PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob, from, to, imode, smode));
6514:   PetscFunctionReturn(PETSC_SUCCESS);
6515: }

6517: PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
6518: {
6519:   PC_BDDC           *pcbddc = (PC_BDDC *)pc->data;
6520:   Vec                from, to;
6521:   const PetscScalar *array;

6523:   PetscFunctionBegin;
6524:   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
6525:     from = pcbddc->coarse_vec;
6526:     to   = pcbddc->vec1_P;
6527:   } else { /* from local to global -> put data in coarse right-hand side */
6528:     from = pcbddc->vec1_P;
6529:     to   = pcbddc->coarse_vec;
6530:   }
6531:   PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob, from, to, imode, smode));
6532:   if (smode == SCATTER_FORWARD) {
6533:     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
6534:       Vec tvec;

6536:       PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &tvec));
6537:       PetscCall(VecGetArrayRead(to, &array));
6538:       PetscCall(VecPlaceArray(tvec, array));
6539:       PetscCall(VecRestoreArrayRead(to, &array));
6540:     }
6541:   } else {
6542:     if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
6543:       PetscCall(VecResetArray(from));
6544:     }
6545:   }
6546:   PetscFunctionReturn(PETSC_SUCCESS);
6547: }

6549: PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
6550: {
6551:   PC_IS   *pcis   = (PC_IS *)pc->data;
6552:   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
6553:   Mat_IS  *matis  = (Mat_IS *)pc->pmat->data;
6554:   /* one and zero */
6555:   PetscScalar one = 1.0, zero = 0.0;
6556:   /* space to store constraints and their local indices */
6557:   PetscScalar *constraints_data;
6558:   PetscInt    *constraints_idxs, *constraints_idxs_B;
6559:   PetscInt    *constraints_idxs_ptr, *constraints_data_ptr;
6560:   PetscInt    *constraints_n;
6561:   /* iterators */
6562:   PetscInt i, j, k, total_counts, total_counts_cc, cum;
6563:   /* BLAS integers */
6564:   PetscBLASInt lwork, lierr;
6565:   PetscBLASInt Blas_N, Blas_M, Blas_K, Blas_one = 1;
6566:   PetscBLASInt Blas_LDA, Blas_LDB, Blas_LDC;
6567:   /* reuse */
6568:   PetscInt  olocal_primal_size, olocal_primal_size_cc;
6569:   PetscInt *olocal_primal_ref_node, *olocal_primal_ref_mult;
6570:   /* change of basis */
6571:   PetscBool qr_needed;
6572:   PetscBT   change_basis, qr_needed_idx;
6573:   /* auxiliary stuff */
6574:   PetscInt *nnz, *is_indices;
6575:   PetscInt  ncc;
6576:   /* some quantities */
6577:   PetscInt  n_vertices, total_primal_vertices, valid_constraints;
6578:   PetscInt  size_of_constraint, max_size_of_constraint = 0, max_constraints, temp_constraints;
6579:   PetscReal tol; /* tolerance for retaining eigenmodes */

6581:   PetscFunctionBegin;
6582:   tol = PetscSqrtReal(PETSC_SMALL);
6583:   /* Destroy Mat objects computed previously */
6584:   PetscCall(MatDestroy(&pcbddc->ChangeOfBasisMatrix));
6585:   PetscCall(MatDestroy(&pcbddc->ConstraintMatrix));
6586:   PetscCall(MatDestroy(&pcbddc->switch_static_change));
6587:   /* save info on constraints from previous setup (if any) */
6588:   olocal_primal_size    = pcbddc->local_primal_size;
6589:   olocal_primal_size_cc = pcbddc->local_primal_size_cc;
6590:   PetscCall(PetscMalloc2(olocal_primal_size_cc, &olocal_primal_ref_node, olocal_primal_size_cc, &olocal_primal_ref_mult));
6591:   PetscCall(PetscArraycpy(olocal_primal_ref_node, pcbddc->local_primal_ref_node, olocal_primal_size_cc));
6592:   PetscCall(PetscArraycpy(olocal_primal_ref_mult, pcbddc->local_primal_ref_mult, olocal_primal_size_cc));
6593:   PetscCall(PetscFree2(pcbddc->local_primal_ref_node, pcbddc->local_primal_ref_mult));
6594:   PetscCall(PetscFree(pcbddc->primal_indices_local_idxs));

6596:   if (!pcbddc->adaptive_selection) {
6597:     IS           ISForVertices, *ISForFaces, *ISForEdges;
6598:     MatNullSpace nearnullsp;
6599:     const Vec   *nearnullvecs;
6600:     Vec         *localnearnullsp;
6601:     PetscScalar *array;
6602:     PetscInt     n_ISForFaces, n_ISForEdges, nnsp_size, o_nf, o_ne;
6603:     PetscBool    nnsp_has_cnst;
6604:     /* LAPACK working arrays for SVD or POD */
6605:     PetscBool    skip_lapack, boolforchange;
6606:     PetscScalar *work;
6607:     PetscReal   *singular_vals;
6608: #if defined(PETSC_USE_COMPLEX)
6609:     PetscReal *rwork;
6610: #endif
6611:     PetscScalar *temp_basis = NULL, *correlation_mat = NULL;
6612:     PetscBLASInt dummy_int    = 1;
6613:     PetscScalar  dummy_scalar = 1.;
6614:     PetscBool    use_pod      = PETSC_FALSE;

6616:     /* MKL SVD with same input gives different results on different processes! */
6617: #if defined(PETSC_MISSING_LAPACK_GESVD) || defined(PETSC_HAVE_MKL_LIBS)
6618:     use_pod = PETSC_TRUE;
6619: #endif
6620:     /* Get index sets for faces, edges and vertices from graph */
6621:     PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, &n_ISForFaces, &ISForFaces, &n_ISForEdges, &ISForEdges, &ISForVertices));
6622:     o_nf       = n_ISForFaces;
6623:     o_ne       = n_ISForEdges;
6624:     n_vertices = 0;
6625:     if (ISForVertices) PetscCall(ISGetSize(ISForVertices, &n_vertices));
6626:     /* print some info */
6627:     if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) {
6628:       if (!pcbddc->dbg_viewer) pcbddc->dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pc));
6629:       PetscCall(PCBDDCGraphASCIIView(pcbddc->mat_graph, pcbddc->dbg_flag, pcbddc->dbg_viewer));
6630:       PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
6631:       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n"));
6632:       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate vertices (%d)\n", PetscGlobalRank, n_vertices, pcbddc->use_vertices));
6633:       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate edges    (%d)\n", PetscGlobalRank, n_ISForEdges, pcbddc->use_edges));
6634:       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate faces    (%d)\n", PetscGlobalRank, n_ISForFaces, pcbddc->use_faces));
6635:       PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
6636:       PetscCall(PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer));
6637:     }

6639:     if (!pcbddc->use_vertices) n_vertices = 0;
6640:     if (!pcbddc->use_edges) n_ISForEdges = 0;
6641:     if (!pcbddc->use_faces) n_ISForFaces = 0;

6643:     /* check if near null space is attached to global mat */
6644:     if (pcbddc->use_nnsp) PetscCall(MatGetNearNullSpace(pc->pmat, &nearnullsp));
6645:     else nearnullsp = NULL;

6647:     if (nearnullsp) {
6648:       PetscCall(MatNullSpaceGetVecs(nearnullsp, &nnsp_has_cnst, &nnsp_size, &nearnullvecs));
6649:       /* remove any stored info */
6650:       PetscCall(MatNullSpaceDestroy(&pcbddc->onearnullspace));
6651:       PetscCall(PetscFree(pcbddc->onearnullvecs_state));
6652:       /* store information for BDDC solver reuse */
6653:       PetscCall(PetscObjectReference((PetscObject)nearnullsp));
6654:       pcbddc->onearnullspace = nearnullsp;
6655:       PetscCall(PetscMalloc1(nnsp_size, &pcbddc->onearnullvecs_state));
6656:       for (i = 0; i < nnsp_size; i++) PetscCall(PetscObjectStateGet((PetscObject)nearnullvecs[i], &pcbddc->onearnullvecs_state[i]));
6657:     } else { /* if near null space is not provided BDDC uses constants by default */
6658:       nnsp_size     = 0;
6659:       nnsp_has_cnst = PETSC_TRUE;
6660:     }
6661:     /* get max number of constraints on a single cc */
6662:     max_constraints = nnsp_size;
6663:     if (nnsp_has_cnst) max_constraints++;

6665:     /*
6666:          Evaluate maximum storage size needed by the procedure
6667:          - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]"
6668:          - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]"
6669:          There can be multiple constraints per connected component
6670:                                                                                                                                                            */
6671:     ncc = n_vertices + n_ISForFaces + n_ISForEdges;
6672:     PetscCall(PetscMalloc3(ncc + 1, &constraints_idxs_ptr, ncc + 1, &constraints_data_ptr, ncc, &constraints_n));

6674:     total_counts = n_ISForFaces + n_ISForEdges;
6675:     total_counts *= max_constraints;
6676:     total_counts += n_vertices;
6677:     PetscCall(PetscBTCreate(total_counts, &change_basis));

6679:     total_counts           = 0;
6680:     max_size_of_constraint = 0;
6681:     for (i = 0; i < n_ISForEdges + n_ISForFaces; i++) {
6682:       IS used_is;
6683:       if (i < n_ISForEdges) {
6684:         used_is = ISForEdges[i];
6685:       } else {
6686:         used_is = ISForFaces[i - n_ISForEdges];
6687:       }
6688:       PetscCall(ISGetSize(used_is, &j));
6689:       total_counts += j;
6690:       max_size_of_constraint = PetscMax(j, max_size_of_constraint);
6691:     }
6692:     PetscCall(PetscMalloc3(total_counts * max_constraints + n_vertices, &constraints_data, total_counts + n_vertices, &constraints_idxs, total_counts + n_vertices, &constraints_idxs_B));

6694:     /* get local part of global near null space vectors */
6695:     PetscCall(PetscMalloc1(nnsp_size, &localnearnullsp));
6696:     for (k = 0; k < nnsp_size; k++) {
6697:       PetscCall(VecDuplicate(pcis->vec1_N, &localnearnullsp[k]));
6698:       PetscCall(VecScatterBegin(matis->rctx, nearnullvecs[k], localnearnullsp[k], INSERT_VALUES, SCATTER_FORWARD));
6699:       PetscCall(VecScatterEnd(matis->rctx, nearnullvecs[k], localnearnullsp[k], INSERT_VALUES, SCATTER_FORWARD));
6700:     }

6702:     /* whether or not to skip lapack calls */
6703:     skip_lapack = PETSC_TRUE;
6704:     if (n_ISForFaces + n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;

6706:     /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
6707:     if (!skip_lapack) {
6708:       PetscScalar temp_work;

6710:       if (use_pod) {
6711:         /* Proper Orthogonal Decomposition (POD) using the snapshot method */
6712:         PetscCall(PetscMalloc1(max_constraints * max_constraints, &correlation_mat));
6713:         PetscCall(PetscMalloc1(max_constraints, &singular_vals));
6714:         PetscCall(PetscMalloc1(max_size_of_constraint * max_constraints, &temp_basis));
6715: #if defined(PETSC_USE_COMPLEX)
6716:         PetscCall(PetscMalloc1(3 * max_constraints, &rwork));
6717: #endif
6718:         /* now we evaluate the optimal workspace using query with lwork=-1 */
6719:         PetscCall(PetscBLASIntCast(max_constraints, &Blas_N));
6720:         PetscCall(PetscBLASIntCast(max_constraints, &Blas_LDA));
6721:         lwork = -1;
6722:         PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6723: #if !defined(PETSC_USE_COMPLEX)
6724:         PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, &temp_work, &lwork, &lierr));
6725: #else
6726:         PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, &temp_work, &lwork, rwork, &lierr));
6727: #endif
6728:         PetscCall(PetscFPTrapPop());
6729:         PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to SYEV Lapack routine %" PetscBLASInt_FMT, lierr);
6730:       } else {
6731: #if !defined(PETSC_MISSING_LAPACK_GESVD)
6732:         /* SVD */
6733:         PetscInt max_n, min_n;
6734:         max_n = max_size_of_constraint;
6735:         min_n = max_constraints;
6736:         if (max_size_of_constraint < max_constraints) {
6737:           min_n = max_size_of_constraint;
6738:           max_n = max_constraints;
6739:         }
6740:         PetscCall(PetscMalloc1(min_n, &singular_vals));
6741:   #if defined(PETSC_USE_COMPLEX)
6742:         PetscCall(PetscMalloc1(5 * min_n, &rwork));
6743:   #endif
6744:         /* now we evaluate the optimal workspace using query with lwork=-1 */
6745:         lwork = -1;
6746:         PetscCall(PetscBLASIntCast(max_n, &Blas_M));
6747:         PetscCall(PetscBLASIntCast(min_n, &Blas_N));
6748:         PetscCall(PetscBLASIntCast(max_n, &Blas_LDA));
6749:         PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6750:   #if !defined(PETSC_USE_COMPLEX)
6751:         PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, &constraints_data[0], &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, &temp_work, &lwork, &lierr));
6752:   #else
6753:         PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, &constraints_data[0], &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, &temp_work, &lwork, rwork, &lierr));
6754:   #endif
6755:         PetscCall(PetscFPTrapPop());
6756:         PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to GESVD Lapack routine %" PetscBLASInt_FMT, lierr);
6757: #else
6758:         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "This should not happen");
6759: #endif /* on missing GESVD */
6760:       }
6761:       /* Allocate optimal workspace */
6762:       PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(temp_work), &lwork));
6763:       PetscCall(PetscMalloc1(lwork, &work));
6764:     }
6765:     /* Now we can loop on constraining sets */
6766:     total_counts            = 0;
6767:     constraints_idxs_ptr[0] = 0;
6768:     constraints_data_ptr[0] = 0;
6769:     /* vertices */
6770:     if (n_vertices) {
6771:       PetscCall(ISGetIndices(ISForVertices, (const PetscInt **)&is_indices));
6772:       PetscCall(PetscArraycpy(constraints_idxs, is_indices, n_vertices));
6773:       for (i = 0; i < n_vertices; i++) {
6774:         constraints_n[total_counts]            = 1;
6775:         constraints_data[total_counts]         = 1.0;
6776:         constraints_idxs_ptr[total_counts + 1] = constraints_idxs_ptr[total_counts] + 1;
6777:         constraints_data_ptr[total_counts + 1] = constraints_data_ptr[total_counts] + 1;
6778:         total_counts++;
6779:       }
6780:       PetscCall(ISRestoreIndices(ISForVertices, (const PetscInt **)&is_indices));
6781:     }

6783:     /* edges and faces */
6784:     total_counts_cc = total_counts;
6785:     for (ncc = 0; ncc < n_ISForEdges + n_ISForFaces; ncc++) {
6786:       IS        used_is;
6787:       PetscBool idxs_copied = PETSC_FALSE;

6789:       if (ncc < n_ISForEdges) {
6790:         used_is       = ISForEdges[ncc];
6791:         boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
6792:       } else {
6793:         used_is       = ISForFaces[ncc - n_ISForEdges];
6794:         boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
6795:       }
6796:       temp_constraints = 0; /* zero the number of constraints I have on this conn comp */

6798:       PetscCall(ISGetSize(used_is, &size_of_constraint));
6799:       if (!size_of_constraint) continue;
6800:       PetscCall(ISGetIndices(used_is, (const PetscInt **)&is_indices));
6801:       if (nnsp_has_cnst) {
6802:         PetscScalar quad_value;

6804:         PetscCall(PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc], is_indices, size_of_constraint));
6805:         idxs_copied = PETSC_TRUE;

6807:         if (!pcbddc->use_nnsp_true) {
6808:           quad_value = (PetscScalar)(1.0 / PetscSqrtReal((PetscReal)size_of_constraint));
6809:         } else {
6810:           quad_value = 1.0;
6811:         }
6812:         for (j = 0; j < size_of_constraint; j++) constraints_data[constraints_data_ptr[total_counts_cc] + j] = quad_value;
6813:         temp_constraints++;
6814:         total_counts++;
6815:       }
6816:       for (k = 0; k < nnsp_size; k++) {
6817:         PetscReal    real_value;
6818:         PetscScalar *ptr_to_data;

6820:         PetscCall(VecGetArrayRead(localnearnullsp[k], (const PetscScalar **)&array));
6821:         ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc] + temp_constraints * size_of_constraint];
6822:         for (j = 0; j < size_of_constraint; j++) ptr_to_data[j] = array[is_indices[j]];
6823:         PetscCall(VecRestoreArrayRead(localnearnullsp[k], (const PetscScalar **)&array));
6824:         /* check if array is null on the connected component */
6825:         PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N));
6826:         PetscCallBLAS("BLASasum", real_value = BLASasum_(&Blas_N, ptr_to_data, &Blas_one));
6827:         if (real_value > tol * size_of_constraint) { /* keep indices and values */
6828:           temp_constraints++;
6829:           total_counts++;
6830:           if (!idxs_copied) {
6831:             PetscCall(PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc], is_indices, size_of_constraint));
6832:             idxs_copied = PETSC_TRUE;
6833:           }
6834:         }
6835:       }
6836:       PetscCall(ISRestoreIndices(used_is, (const PetscInt **)&is_indices));
6837:       valid_constraints = temp_constraints;
6838:       if (!pcbddc->use_nnsp_true && temp_constraints) {
6839:         if (temp_constraints == 1) { /* just normalize the constraint */
6840:           PetscScalar norm, *ptr_to_data;

6842:           ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
6843:           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N));
6844:           PetscCallBLAS("BLASdot", norm = BLASdot_(&Blas_N, ptr_to_data, &Blas_one, ptr_to_data, &Blas_one));
6845:           norm = 1.0 / PetscSqrtReal(PetscRealPart(norm));
6846:           PetscCallBLAS("BLASscal", BLASscal_(&Blas_N, &norm, ptr_to_data, &Blas_one));
6847:         } else { /* perform SVD */
6848:           PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];

6850:           if (use_pod) {
6851:             /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
6852:                POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
6853:                -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
6854:                   the constraints basis will differ (by a complex factor with absolute value equal to 1)
6855:                   from that computed using LAPACKgesvd
6856:                -> This is due to a different computation of eigenvectors in LAPACKheev
6857:                -> The quality of the POD-computed basis will be the same */
6858:             PetscCall(PetscArrayzero(correlation_mat, temp_constraints * temp_constraints));
6859:             /* Store upper triangular part of correlation matrix */
6860:             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N));
6861:             PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6862:             for (j = 0; j < temp_constraints; j++) {
6863:               for (k = 0; k < j + 1; k++) PetscCallBLAS("BLASdot", correlation_mat[j * temp_constraints + k] = BLASdot_(&Blas_N, ptr_to_data + k * size_of_constraint, &Blas_one, ptr_to_data + j * size_of_constraint, &Blas_one));
6864:             }
6865:             /* compute eigenvalues and eigenvectors of correlation matrix */
6866:             PetscCall(PetscBLASIntCast(temp_constraints, &Blas_N));
6867:             PetscCall(PetscBLASIntCast(temp_constraints, &Blas_LDA));
6868: #if !defined(PETSC_USE_COMPLEX)
6869:             PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, work, &lwork, &lierr));
6870: #else
6871:             PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, work, &lwork, rwork, &lierr));
6872: #endif
6873:             PetscCall(PetscFPTrapPop());
6874:             PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYEV Lapack routine %" PetscBLASInt_FMT, lierr);
6875:             /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
6876:             j = 0;
6877:             while (j < temp_constraints && singular_vals[j] / singular_vals[temp_constraints - 1] < tol) j++;
6878:             total_counts      = total_counts - j;
6879:             valid_constraints = temp_constraints - j;
6880:             /* scale and copy POD basis into used quadrature memory */
6881:             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M));
6882:             PetscCall(PetscBLASIntCast(temp_constraints, &Blas_N));
6883:             PetscCall(PetscBLASIntCast(temp_constraints, &Blas_K));
6884:             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
6885:             PetscCall(PetscBLASIntCast(temp_constraints, &Blas_LDB));
6886:             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDC));
6887:             if (j < temp_constraints) {
6888:               PetscInt ii;
6889:               for (k = j; k < temp_constraints; k++) singular_vals[k] = 1.0 / PetscSqrtReal(singular_vals[k]);
6890:               PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6891:               PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &Blas_M, &Blas_N, &Blas_K, &one, ptr_to_data, &Blas_LDA, correlation_mat, &Blas_LDB, &zero, temp_basis, &Blas_LDC));
6892:               PetscCall(PetscFPTrapPop());
6893:               for (k = 0; k < temp_constraints - j; k++) {
6894:                 for (ii = 0; ii < size_of_constraint; ii++) ptr_to_data[k * size_of_constraint + ii] = singular_vals[temp_constraints - 1 - k] * temp_basis[(temp_constraints - 1 - k) * size_of_constraint + ii];
6895:               }
6896:             }
6897:           } else {
6898: #if !defined(PETSC_MISSING_LAPACK_GESVD)
6899:             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M));
6900:             PetscCall(PetscBLASIntCast(temp_constraints, &Blas_N));
6901:             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
6902:             PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6903:   #if !defined(PETSC_USE_COMPLEX)
6904:             PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, ptr_to_data, &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, work, &lwork, &lierr));
6905:   #else
6906:             PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, ptr_to_data, &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, work, &lwork, rwork, &lierr));
6907:   #endif
6908:             PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GESVD Lapack routine %" PetscBLASInt_FMT, lierr);
6909:             PetscCall(PetscFPTrapPop());
6910:             /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
6911:             k = temp_constraints;
6912:             if (k > size_of_constraint) k = size_of_constraint;
6913:             j = 0;
6914:             while (j < k && singular_vals[k - j - 1] / singular_vals[0] < tol) j++;
6915:             valid_constraints = k - j;
6916:             total_counts      = total_counts - temp_constraints + valid_constraints;
6917: #else
6918:             SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "This should not happen");
6919: #endif /* on missing GESVD */
6920:           }
6921:         }
6922:       }
6923:       /* update pointers information */
6924:       if (valid_constraints) {
6925:         constraints_n[total_counts_cc]            = valid_constraints;
6926:         constraints_idxs_ptr[total_counts_cc + 1] = constraints_idxs_ptr[total_counts_cc] + size_of_constraint;
6927:         constraints_data_ptr[total_counts_cc + 1] = constraints_data_ptr[total_counts_cc] + size_of_constraint * valid_constraints;
6928:         /* set change_of_basis flag */
6929:         if (boolforchange) PetscCall(PetscBTSet(change_basis, total_counts_cc));
6930:         total_counts_cc++;
6931:       }
6932:     }
6933:     /* free workspace */
6934:     if (!skip_lapack) {
6935:       PetscCall(PetscFree(work));
6936: #if defined(PETSC_USE_COMPLEX)
6937:       PetscCall(PetscFree(rwork));
6938: #endif
6939:       PetscCall(PetscFree(singular_vals));
6940:       PetscCall(PetscFree(correlation_mat));
6941:       PetscCall(PetscFree(temp_basis));
6942:     }
6943:     for (k = 0; k < nnsp_size; k++) PetscCall(VecDestroy(&localnearnullsp[k]));
6944:     PetscCall(PetscFree(localnearnullsp));
6945:     /* free index sets of faces, edges and vertices */
6946:     PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, &o_nf, &ISForFaces, &o_ne, &ISForEdges, &ISForVertices));
6947:   } else {
6948:     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;

6950:     total_counts = 0;
6951:     n_vertices   = 0;
6952:     if (sub_schurs->is_vertices && pcbddc->use_vertices) PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_vertices));
6953:     max_constraints = 0;
6954:     total_counts_cc = 0;
6955:     for (i = 0; i < sub_schurs->n_subs + n_vertices; i++) {
6956:       total_counts += pcbddc->adaptive_constraints_n[i];
6957:       if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++;
6958:       max_constraints = PetscMax(max_constraints, pcbddc->adaptive_constraints_n[i]);
6959:     }
6960:     constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr;
6961:     constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr;
6962:     constraints_idxs     = pcbddc->adaptive_constraints_idxs;
6963:     constraints_data     = pcbddc->adaptive_constraints_data;
6964:     /* constraints_n differs from pcbddc->adaptive_constraints_n */
6965:     PetscCall(PetscMalloc1(total_counts_cc, &constraints_n));
6966:     total_counts_cc = 0;
6967:     for (i = 0; i < sub_schurs->n_subs + n_vertices; i++) {
6968:       if (pcbddc->adaptive_constraints_n[i]) constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i];
6969:     }

6971:     max_size_of_constraint = 0;
6972:     for (i = 0; i < total_counts_cc; i++) max_size_of_constraint = PetscMax(max_size_of_constraint, constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]);
6973:     PetscCall(PetscMalloc1(constraints_idxs_ptr[total_counts_cc], &constraints_idxs_B));
6974:     /* Change of basis */
6975:     PetscCall(PetscBTCreate(total_counts_cc, &change_basis));
6976:     if (pcbddc->use_change_of_basis) {
6977:       for (i = 0; i < sub_schurs->n_subs; i++) {
6978:         if (PetscBTLookup(sub_schurs->is_edge, i) || pcbddc->use_change_on_faces) PetscCall(PetscBTSet(change_basis, i + n_vertices));
6979:       }
6980:     }
6981:   }
6982:   pcbddc->local_primal_size = total_counts;
6983:   PetscCall(PetscMalloc1(pcbddc->local_primal_size + pcbddc->benign_n, &pcbddc->primal_indices_local_idxs));

6985:   /* map constraints_idxs in boundary numbering */
6986:   if (pcbddc->use_change_of_basis) {
6987:     PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, constraints_idxs_ptr[total_counts_cc], constraints_idxs, &i, constraints_idxs_B));
6988:     PetscCheck(i == constraints_idxs_ptr[total_counts_cc], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in boundary numbering for constraints indices %" PetscInt_FMT " != %" PetscInt_FMT, constraints_idxs_ptr[total_counts_cc], i);
6989:   }

6991:   /* Create constraint matrix */
6992:   PetscCall(MatCreate(PETSC_COMM_SELF, &pcbddc->ConstraintMatrix));
6993:   PetscCall(MatSetType(pcbddc->ConstraintMatrix, MATAIJ));
6994:   PetscCall(MatSetSizes(pcbddc->ConstraintMatrix, pcbddc->local_primal_size, pcis->n, pcbddc->local_primal_size, pcis->n));

6996:   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
6997:   /* determine if a QR strategy is needed for change of basis */
6998:   qr_needed = pcbddc->use_qr_single;
6999:   PetscCall(PetscBTCreate(total_counts_cc, &qr_needed_idx));
7000:   total_primal_vertices        = 0;
7001:   pcbddc->local_primal_size_cc = 0;
7002:   for (i = 0; i < total_counts_cc; i++) {
7003:     size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i];
7004:     if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) {
7005:       pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]];
7006:       pcbddc->local_primal_size_cc += 1;
7007:     } else if (PetscBTLookup(change_basis, i)) {
7008:       for (k = 0; k < constraints_n[i]; k++) pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i] + k];
7009:       pcbddc->local_primal_size_cc += constraints_n[i];
7010:       if (constraints_n[i] > 1 || pcbddc->use_qr_single) {
7011:         PetscCall(PetscBTSet(qr_needed_idx, i));
7012:         qr_needed = PETSC_TRUE;
7013:       }
7014:     } else {
7015:       pcbddc->local_primal_size_cc += 1;
7016:     }
7017:   }
7018:   /* note that the local variable n_vertices used below stores the number of pointwise constraints */
7019:   pcbddc->n_vertices = total_primal_vertices;
7020:   /* permute indices in order to have a sorted set of vertices */
7021:   PetscCall(PetscSortInt(total_primal_vertices, pcbddc->primal_indices_local_idxs));
7022:   PetscCall(PetscMalloc2(pcbddc->local_primal_size_cc + pcbddc->benign_n, &pcbddc->local_primal_ref_node, pcbddc->local_primal_size_cc + pcbddc->benign_n, &pcbddc->local_primal_ref_mult));
7023:   PetscCall(PetscArraycpy(pcbddc->local_primal_ref_node, pcbddc->primal_indices_local_idxs, total_primal_vertices));
7024:   for (i = 0; i < total_primal_vertices; i++) pcbddc->local_primal_ref_mult[i] = 1;

7026:   /* nonzero structure of constraint matrix */
7027:   /* and get reference dof for local constraints */
7028:   PetscCall(PetscMalloc1(pcbddc->local_primal_size, &nnz));
7029:   for (i = 0; i < total_primal_vertices; i++) nnz[i] = 1;

7031:   j            = total_primal_vertices;
7032:   total_counts = total_primal_vertices;
7033:   cum          = total_primal_vertices;
7034:   for (i = n_vertices; i < total_counts_cc; i++) {
7035:     if (!PetscBTLookup(change_basis, i)) {
7036:       pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]];
7037:       pcbddc->local_primal_ref_mult[cum] = constraints_n[i];
7038:       cum++;
7039:       size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i];
7040:       for (k = 0; k < constraints_n[i]; k++) {
7041:         pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i] + k];
7042:         nnz[j + k]                                        = size_of_constraint;
7043:       }
7044:       j += constraints_n[i];
7045:     }
7046:   }
7047:   PetscCall(MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix, 0, nnz));
7048:   PetscCall(MatSetOption(pcbddc->ConstraintMatrix, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
7049:   PetscCall(MatSetOption(pcbddc->ConstraintMatrix, MAT_IGNORE_ZERO_ENTRIES, PETSC_TRUE));
7050:   PetscCall(PetscFree(nnz));

7052:   /* set values in constraint matrix */
7053:   for (i = 0; i < total_primal_vertices; i++) PetscCall(MatSetValue(pcbddc->ConstraintMatrix, i, pcbddc->local_primal_ref_node[i], 1.0, INSERT_VALUES));
7054:   total_counts = total_primal_vertices;
7055:   for (i = n_vertices; i < total_counts_cc; i++) {
7056:     if (!PetscBTLookup(change_basis, i)) {
7057:       PetscInt *cols;

7059:       size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i];
7060:       cols               = constraints_idxs + constraints_idxs_ptr[i];
7061:       for (k = 0; k < constraints_n[i]; k++) {
7062:         PetscInt     row = total_counts + k;
7063:         PetscScalar *vals;

7065:         vals = constraints_data + constraints_data_ptr[i] + k * size_of_constraint;
7066:         PetscCall(MatSetValues(pcbddc->ConstraintMatrix, 1, &row, size_of_constraint, cols, vals, INSERT_VALUES));
7067:       }
7068:       total_counts += constraints_n[i];
7069:     }
7070:   }
7071:   /* assembling */
7072:   PetscCall(MatAssemblyBegin(pcbddc->ConstraintMatrix, MAT_FINAL_ASSEMBLY));
7073:   PetscCall(MatAssemblyEnd(pcbddc->ConstraintMatrix, MAT_FINAL_ASSEMBLY));
7074:   PetscCall(MatViewFromOptions(pcbddc->ConstraintMatrix, (PetscObject)pc, "-pc_bddc_constraint_mat_view"));

7076:   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
7077:   if (pcbddc->use_change_of_basis) {
7078:     /* dual and primal dofs on a single cc */
7079:     PetscInt dual_dofs, primal_dofs;
7080:     /* working stuff for GEQRF */
7081:     PetscScalar *qr_basis = NULL, *qr_tau = NULL, *qr_work = NULL, lqr_work_t;
7082:     PetscBLASInt lqr_work;
7083:     /* working stuff for UNGQR */
7084:     PetscScalar *gqr_work = NULL, lgqr_work_t = 0.0;
7085:     PetscBLASInt lgqr_work;
7086:     /* working stuff for TRTRS */
7087:     PetscScalar *trs_rhs = NULL;
7088:     PetscBLASInt Blas_NRHS;
7089:     /* pointers for values insertion into change of basis matrix */
7090:     PetscInt    *start_rows, *start_cols;
7091:     PetscScalar *start_vals;
7092:     /* working stuff for values insertion */
7093:     PetscBT   is_primal;
7094:     PetscInt *aux_primal_numbering_B;
7095:     /* matrix sizes */
7096:     PetscInt global_size, local_size;
7097:     /* temporary change of basis */
7098:     Mat localChangeOfBasisMatrix;
7099:     /* extra space for debugging */
7100:     PetscScalar *dbg_work = NULL;

7102:     PetscCall(MatCreate(PETSC_COMM_SELF, &localChangeOfBasisMatrix));
7103:     PetscCall(MatSetType(localChangeOfBasisMatrix, MATAIJ));
7104:     PetscCall(MatSetSizes(localChangeOfBasisMatrix, pcis->n, pcis->n, pcis->n, pcis->n));
7105:     /* nonzeros for local mat */
7106:     PetscCall(PetscMalloc1(pcis->n, &nnz));
7107:     if (!pcbddc->benign_change || pcbddc->fake_change) {
7108:       for (i = 0; i < pcis->n; i++) nnz[i] = 1;
7109:     } else {
7110:       const PetscInt *ii;
7111:       PetscInt        n;
7112:       PetscBool       flg_row;
7113:       PetscCall(MatGetRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, NULL, &flg_row));
7114:       for (i = 0; i < n; i++) nnz[i] = ii[i + 1] - ii[i];
7115:       PetscCall(MatRestoreRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, NULL, &flg_row));
7116:     }
7117:     for (i = n_vertices; i < total_counts_cc; i++) {
7118:       if (PetscBTLookup(change_basis, i)) {
7119:         size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i];
7120:         if (PetscBTLookup(qr_needed_idx, i)) {
7121:           for (j = 0; j < size_of_constraint; j++) nnz[constraints_idxs[constraints_idxs_ptr[i] + j]] = size_of_constraint;
7122:         } else {
7123:           nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint;
7124:           for (j = 1; j < size_of_constraint; j++) nnz[constraints_idxs[constraints_idxs_ptr[i] + j]] = 2;
7125:         }
7126:       }
7127:     }
7128:     PetscCall(MatSeqAIJSetPreallocation(localChangeOfBasisMatrix, 0, nnz));
7129:     PetscCall(MatSetOption(localChangeOfBasisMatrix, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
7130:     PetscCall(PetscFree(nnz));
7131:     /* Set interior change in the matrix */
7132:     if (!pcbddc->benign_change || pcbddc->fake_change) {
7133:       for (i = 0; i < pcis->n; i++) PetscCall(MatSetValue(localChangeOfBasisMatrix, i, i, 1.0, INSERT_VALUES));
7134:     } else {
7135:       const PetscInt *ii, *jj;
7136:       PetscScalar    *aa;
7137:       PetscInt        n;
7138:       PetscBool       flg_row;
7139:       PetscCall(MatGetRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg_row));
7140:       PetscCall(MatSeqAIJGetArray(pcbddc->benign_change, &aa));
7141:       for (i = 0; i < n; i++) PetscCall(MatSetValues(localChangeOfBasisMatrix, 1, &i, ii[i + 1] - ii[i], jj + ii[i], aa + ii[i], INSERT_VALUES));
7142:       PetscCall(MatSeqAIJRestoreArray(pcbddc->benign_change, &aa));
7143:       PetscCall(MatRestoreRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg_row));
7144:     }

7146:     if (pcbddc->dbg_flag) {
7147:       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n"));
7148:       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Checking change of basis computation for subdomain %04d\n", PetscGlobalRank));
7149:     }

7151:     /* Now we loop on the constraints which need a change of basis */
7152:     /*
7153:        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
7154:        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)

7156:        Basic blocks of change of basis matrix T computed:

7158:           - By using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)

7160:             | 1        0   ...        0         s_1/S |
7161:             | 0        1   ...        0         s_2/S |
7162:             |              ...                        |
7163:             | 0        ...            1     s_{n-1}/S |
7164:             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |

7166:             with S = \sum_{i=1}^n s_i^2
7167:             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
7168:                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering

7170:           - QR decomposition of constraints otherwise
7171:     */
7172:     if (qr_needed && max_size_of_constraint) {
7173:       /* space to store Q */
7174:       PetscCall(PetscMalloc1(max_size_of_constraint * max_size_of_constraint, &qr_basis));
7175:       /* array to store scaling factors for reflectors */
7176:       PetscCall(PetscMalloc1(max_constraints, &qr_tau));
7177:       /* first we issue queries for optimal work */
7178:       PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_M));
7179:       PetscCall(PetscBLASIntCast(max_constraints, &Blas_N));
7180:       PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_LDA));
7181:       lqr_work = -1;
7182:       PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&Blas_M, &Blas_N, qr_basis, &Blas_LDA, qr_tau, &lqr_work_t, &lqr_work, &lierr));
7183:       PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to GEQRF Lapack routine %" PetscBLASInt_FMT, lierr);
7184:       PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t), &lqr_work));
7185:       PetscCall(PetscMalloc1(lqr_work, &qr_work));
7186:       lgqr_work = -1;
7187:       PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_M));
7188:       PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_N));
7189:       PetscCall(PetscBLASIntCast(max_constraints, &Blas_K));
7190:       PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_LDA));
7191:       if (Blas_K > Blas_M) Blas_K = Blas_M; /* adjust just for computing optimal work */
7192:       PetscCallBLAS("LAPACKorgqr", LAPACKorgqr_(&Blas_M, &Blas_N, &Blas_K, qr_basis, &Blas_LDA, qr_tau, &lgqr_work_t, &lgqr_work, &lierr));
7193:       PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to ORGQR/UNGQR Lapack routine %" PetscBLASInt_FMT, lierr);
7194:       PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t), &lgqr_work));
7195:       PetscCall(PetscMalloc1(lgqr_work, &gqr_work));
7196:       /* array to store rhs and solution of triangular solver */
7197:       PetscCall(PetscMalloc1(max_constraints * max_constraints, &trs_rhs));
7198:       /* allocating workspace for check */
7199:       if (pcbddc->dbg_flag) PetscCall(PetscMalloc1(max_size_of_constraint * (max_constraints + max_size_of_constraint), &dbg_work));
7200:     }
7201:     /* array to store whether a node is primal or not */
7202:     PetscCall(PetscBTCreate(pcis->n_B, &is_primal));
7203:     PetscCall(PetscMalloc1(total_primal_vertices, &aux_primal_numbering_B));
7204:     PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, total_primal_vertices, pcbddc->local_primal_ref_node, &i, aux_primal_numbering_B));
7205:     PetscCheck(i == total_primal_vertices, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in boundary numbering for BDDC vertices! %" PetscInt_FMT " != %" PetscInt_FMT, total_primal_vertices, i);
7206:     for (i = 0; i < total_primal_vertices; i++) PetscCall(PetscBTSet(is_primal, aux_primal_numbering_B[i]));
7207:     PetscCall(PetscFree(aux_primal_numbering_B));

7209:     /* loop on constraints and see whether or not they need a change of basis and compute it */
7210:     for (total_counts = n_vertices; total_counts < total_counts_cc; total_counts++) {
7211:       size_of_constraint = constraints_idxs_ptr[total_counts + 1] - constraints_idxs_ptr[total_counts];
7212:       if (PetscBTLookup(change_basis, total_counts)) {
7213:         /* get constraint info */
7214:         primal_dofs = constraints_n[total_counts];
7215:         dual_dofs   = size_of_constraint - primal_dofs;

7217:         if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Constraints %" PetscInt_FMT ": %" PetscInt_FMT " need a change of basis (size %" PetscInt_FMT ")\n", total_counts, primal_dofs, size_of_constraint));

7219:         if (PetscBTLookup(qr_needed_idx, total_counts)) { /* QR */

7221:           /* copy quadrature constraints for change of basis check */
7222:           if (pcbddc->dbg_flag) PetscCall(PetscArraycpy(dbg_work, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs));
7223:           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
7224:           PetscCall(PetscArraycpy(qr_basis, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs));

7226:           /* compute QR decomposition of constraints */
7227:           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M));
7228:           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_N));
7229:           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
7230:           PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
7231:           PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&Blas_M, &Blas_N, qr_basis, &Blas_LDA, qr_tau, qr_work, &lqr_work, &lierr));
7232:           PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GEQRF Lapack routine %" PetscBLASInt_FMT, lierr);
7233:           PetscCall(PetscFPTrapPop());

7235:           /* explicitly compute R^-T */
7236:           PetscCall(PetscArrayzero(trs_rhs, primal_dofs * primal_dofs));
7237:           for (j = 0; j < primal_dofs; j++) trs_rhs[j * (primal_dofs + 1)] = 1.0;
7238:           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_N));
7239:           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_NRHS));
7240:           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
7241:           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_LDB));
7242:           PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
7243:           PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "T", "N", &Blas_N, &Blas_NRHS, qr_basis, &Blas_LDA, trs_rhs, &Blas_LDB, &lierr));
7244:           PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in TRTRS Lapack routine %" PetscBLASInt_FMT, lierr);
7245:           PetscCall(PetscFPTrapPop());

7247:           /* explicitly compute all columns of Q (Q = [Q1 | Q2]) overwriting QR factorization in qr_basis */
7248:           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M));
7249:           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N));
7250:           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_K));
7251:           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
7252:           PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
7253:           PetscCallBLAS("LAPACKorgqr", LAPACKorgqr_(&Blas_M, &Blas_N, &Blas_K, qr_basis, &Blas_LDA, qr_tau, gqr_work, &lgqr_work, &lierr));
7254:           PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in ORGQR/UNGQR Lapack routine %" PetscBLASInt_FMT, lierr);
7255:           PetscCall(PetscFPTrapPop());

7257:           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
7258:              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
7259:              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
7260:           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M));
7261:           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_N));
7262:           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_K));
7263:           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
7264:           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_LDB));
7265:           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDC));
7266:           PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
7267:           PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &Blas_M, &Blas_N, &Blas_K, &one, qr_basis, &Blas_LDA, trs_rhs, &Blas_LDB, &zero, constraints_data + constraints_data_ptr[total_counts], &Blas_LDC));
7268:           PetscCall(PetscFPTrapPop());
7269:           PetscCall(PetscArraycpy(qr_basis, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs));

7271:           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
7272:           start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]];
7273:           /* insert cols for primal dofs */
7274:           for (j = 0; j < primal_dofs; j++) {
7275:             start_vals = &qr_basis[j * size_of_constraint];
7276:             start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts] + j];
7277:             PetscCall(MatSetValues(localChangeOfBasisMatrix, size_of_constraint, start_rows, 1, start_cols, start_vals, INSERT_VALUES));
7278:           }
7279:           /* insert cols for dual dofs */
7280:           for (j = 0, k = 0; j < dual_dofs; k++) {
7281:             if (!PetscBTLookup(is_primal, constraints_idxs_B[constraints_idxs_ptr[total_counts] + k])) {
7282:               start_vals = &qr_basis[(primal_dofs + j) * size_of_constraint];
7283:               start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts] + k];
7284:               PetscCall(MatSetValues(localChangeOfBasisMatrix, size_of_constraint, start_rows, 1, start_cols, start_vals, INSERT_VALUES));
7285:               j++;
7286:             }
7287:           }

7289:           /* check change of basis */
7290:           if (pcbddc->dbg_flag) {
7291:             PetscInt  ii, jj;
7292:             PetscBool valid_qr = PETSC_TRUE;
7293:             PetscCall(PetscBLASIntCast(primal_dofs, &Blas_M));
7294:             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N));
7295:             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_K));
7296:             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
7297:             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDB));
7298:             PetscCall(PetscBLASIntCast(primal_dofs, &Blas_LDC));
7299:             PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
7300:             PetscCallBLAS("BLASgemm", BLASgemm_("T", "N", &Blas_M, &Blas_N, &Blas_K, &one, dbg_work, &Blas_LDA, qr_basis, &Blas_LDB, &zero, &dbg_work[size_of_constraint * primal_dofs], &Blas_LDC));
7301:             PetscCall(PetscFPTrapPop());
7302:             for (jj = 0; jj < size_of_constraint; jj++) {
7303:               for (ii = 0; ii < primal_dofs; ii++) {
7304:                 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]) > 1.e-12) valid_qr = PETSC_FALSE;
7305:                 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii] - (PetscReal)1) > 1.e-12) valid_qr = PETSC_FALSE;
7306:               }
7307:             }
7308:             if (!valid_qr) {
7309:               PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> wrong change of basis!\n"));
7310:               for (jj = 0; jj < size_of_constraint; jj++) {
7311:                 for (ii = 0; ii < primal_dofs; ii++) {
7312:                   if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]) > 1.e-12) {
7313:                     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\tQr basis function %" PetscInt_FMT " is not orthogonal to constraint %" PetscInt_FMT " (%1.14e)!\n", jj, ii, (double)PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii])));
7314:                   }
7315:                   if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii] - (PetscReal)1) > 1.e-12) {
7316:                     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\tQr basis function %" PetscInt_FMT " is not unitary w.r.t constraint %" PetscInt_FMT " (%1.14e)!\n", jj, ii, (double)PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii])));
7317:                   }
7318:                 }
7319:               }
7320:             } else {
7321:               PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> right change of basis!\n"));
7322:             }
7323:           }
7324:         } else { /* simple transformation block */
7325:           PetscInt    row, col;
7326:           PetscScalar val, norm;

7328:           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N));
7329:           PetscCallBLAS("BLASdot", norm = BLASdot_(&Blas_N, constraints_data + constraints_data_ptr[total_counts], &Blas_one, constraints_data + constraints_data_ptr[total_counts], &Blas_one));
7330:           for (j = 0; j < size_of_constraint; j++) {
7331:             PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts] + j];
7332:             row            = constraints_idxs[constraints_idxs_ptr[total_counts] + j];
7333:             if (!PetscBTLookup(is_primal, row_B)) {
7334:               col = constraints_idxs[constraints_idxs_ptr[total_counts]];
7335:               PetscCall(MatSetValue(localChangeOfBasisMatrix, row, row, 1.0, INSERT_VALUES));
7336:               PetscCall(MatSetValue(localChangeOfBasisMatrix, row, col, constraints_data[constraints_data_ptr[total_counts] + j] / norm, INSERT_VALUES));
7337:             } else {
7338:               for (k = 0; k < size_of_constraint; k++) {
7339:                 col = constraints_idxs[constraints_idxs_ptr[total_counts] + k];
7340:                 if (row != col) {
7341:                   val = -constraints_data[constraints_data_ptr[total_counts] + k] / constraints_data[constraints_data_ptr[total_counts]];
7342:                 } else {
7343:                   val = constraints_data[constraints_data_ptr[total_counts]] / norm;
7344:                 }
7345:                 PetscCall(MatSetValue(localChangeOfBasisMatrix, row, col, val, INSERT_VALUES));
7346:               }
7347:             }
7348:           }
7349:           if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> using standard change of basis\n"));
7350:         }
7351:       } else {
7352:         if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Constraint %" PetscInt_FMT " does not need a change of basis (size %" PetscInt_FMT ")\n", total_counts, size_of_constraint));
7353:       }
7354:     }

7356:     /* free workspace */
7357:     if (qr_needed) {
7358:       if (pcbddc->dbg_flag) PetscCall(PetscFree(dbg_work));
7359:       PetscCall(PetscFree(trs_rhs));
7360:       PetscCall(PetscFree(qr_tau));
7361:       PetscCall(PetscFree(qr_work));
7362:       PetscCall(PetscFree(gqr_work));
7363:       PetscCall(PetscFree(qr_basis));
7364:     }
7365:     PetscCall(PetscBTDestroy(&is_primal));
7366:     PetscCall(MatAssemblyBegin(localChangeOfBasisMatrix, MAT_FINAL_ASSEMBLY));
7367:     PetscCall(MatAssemblyEnd(localChangeOfBasisMatrix, MAT_FINAL_ASSEMBLY));

7369:     /* assembling of global change of variable */
7370:     if (!pcbddc->fake_change) {
7371:       Mat tmat;

7373:       PetscCall(VecGetSize(pcis->vec1_global, &global_size));
7374:       PetscCall(VecGetLocalSize(pcis->vec1_global, &local_size));
7375:       PetscCall(MatDuplicate(pc->pmat, MAT_DO_NOT_COPY_VALUES, &tmat));
7376:       PetscCall(MatISSetLocalMat(tmat, localChangeOfBasisMatrix));
7377:       PetscCall(MatAssemblyBegin(tmat, MAT_FINAL_ASSEMBLY));
7378:       PetscCall(MatAssemblyEnd(tmat, MAT_FINAL_ASSEMBLY));
7379:       PetscCall(MatConvert(tmat, MATAIJ, MAT_INITIAL_MATRIX, &pcbddc->ChangeOfBasisMatrix));
7380:       PetscCall(MatDestroy(&tmat));
7381:       PetscCall(VecSet(pcis->vec1_global, 0.0));
7382:       PetscCall(VecSet(pcis->vec1_N, 1.0));
7383:       PetscCall(VecScatterBegin(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
7384:       PetscCall(VecScatterEnd(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
7385:       PetscCall(VecReciprocal(pcis->vec1_global));
7386:       PetscCall(MatDiagonalScale(pcbddc->ChangeOfBasisMatrix, pcis->vec1_global, NULL));

7388:       /* check */
7389:       if (pcbddc->dbg_flag) {
7390:         PetscReal error;
7391:         Vec       x, x_change;

7393:         PetscCall(VecDuplicate(pcis->vec1_global, &x));
7394:         PetscCall(VecDuplicate(pcis->vec1_global, &x_change));
7395:         PetscCall(VecSetRandom(x, NULL));
7396:         PetscCall(VecCopy(x, pcis->vec1_global));
7397:         PetscCall(VecScatterBegin(matis->rctx, x, pcis->vec1_N, INSERT_VALUES, SCATTER_FORWARD));
7398:         PetscCall(VecScatterEnd(matis->rctx, x, pcis->vec1_N, INSERT_VALUES, SCATTER_FORWARD));
7399:         PetscCall(MatMult(localChangeOfBasisMatrix, pcis->vec1_N, pcis->vec2_N));
7400:         PetscCall(VecScatterBegin(matis->rctx, pcis->vec2_N, x, INSERT_VALUES, SCATTER_REVERSE));
7401:         PetscCall(VecScatterEnd(matis->rctx, pcis->vec2_N, x, INSERT_VALUES, SCATTER_REVERSE));
7402:         PetscCall(MatMult(pcbddc->ChangeOfBasisMatrix, pcis->vec1_global, x_change));
7403:         PetscCall(VecAXPY(x, -1.0, x_change));
7404:         PetscCall(VecNorm(x, NORM_INFINITY, &error));
7405:         PetscCheck(error <= PETSC_SMALL, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on N: %1.6e", (double)error);
7406:         PetscCall(VecDestroy(&x));
7407:         PetscCall(VecDestroy(&x_change));
7408:       }
7409:       /* adapt sub_schurs computed (if any) */
7410:       if (pcbddc->use_deluxe_scaling) {
7411:         PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;

7413:         PetscCheck(!pcbddc->use_change_of_basis || !pcbddc->adaptive_userdefined, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Cannot mix automatic change of basis, adaptive selection and user-defined constraints");
7414:         if (sub_schurs && sub_schurs->S_Ej_all) {
7415:           Mat S_new, tmat;
7416:           IS  is_all_N, is_V_Sall = NULL;

7418:           PetscCall(ISLocalToGlobalMappingApplyIS(pcis->BtoNmap, sub_schurs->is_Ej_all, &is_all_N));
7419:           PetscCall(MatCreateSubMatrix(localChangeOfBasisMatrix, is_all_N, is_all_N, MAT_INITIAL_MATRIX, &tmat));
7420:           if (pcbddc->deluxe_zerorows) {
7421:             ISLocalToGlobalMapping NtoSall;
7422:             IS                     is_V;
7423:             PetscCall(ISCreateGeneral(PETSC_COMM_SELF, pcbddc->n_vertices, pcbddc->local_primal_ref_node, PETSC_COPY_VALUES, &is_V));
7424:             PetscCall(ISLocalToGlobalMappingCreateIS(is_all_N, &NtoSall));
7425:             PetscCall(ISGlobalToLocalMappingApplyIS(NtoSall, IS_GTOLM_DROP, is_V, &is_V_Sall));
7426:             PetscCall(ISLocalToGlobalMappingDestroy(&NtoSall));
7427:             PetscCall(ISDestroy(&is_V));
7428:           }
7429:           PetscCall(ISDestroy(&is_all_N));
7430:           PetscCall(MatPtAP(sub_schurs->S_Ej_all, tmat, MAT_INITIAL_MATRIX, 1.0, &S_new));
7431:           PetscCall(MatDestroy(&sub_schurs->S_Ej_all));
7432:           PetscCall(PetscObjectReference((PetscObject)S_new));
7433:           if (pcbddc->deluxe_zerorows) {
7434:             const PetscScalar *array;
7435:             const PetscInt    *idxs_V, *idxs_all;
7436:             PetscInt           i, n_V;

7438:             PetscCall(MatZeroRowsColumnsIS(S_new, is_V_Sall, 1., NULL, NULL));
7439:             PetscCall(ISGetLocalSize(is_V_Sall, &n_V));
7440:             PetscCall(ISGetIndices(is_V_Sall, &idxs_V));
7441:             PetscCall(ISGetIndices(sub_schurs->is_Ej_all, &idxs_all));
7442:             PetscCall(VecGetArrayRead(pcis->D, &array));
7443:             for (i = 0; i < n_V; i++) {
7444:               PetscScalar val;
7445:               PetscInt    idx;

7447:               idx = idxs_V[i];
7448:               val = array[idxs_all[idxs_V[i]]];
7449:               PetscCall(MatSetValue(S_new, idx, idx, val, INSERT_VALUES));
7450:             }
7451:             PetscCall(MatAssemblyBegin(S_new, MAT_FINAL_ASSEMBLY));
7452:             PetscCall(MatAssemblyEnd(S_new, MAT_FINAL_ASSEMBLY));
7453:             PetscCall(VecRestoreArrayRead(pcis->D, &array));
7454:             PetscCall(ISRestoreIndices(sub_schurs->is_Ej_all, &idxs_all));
7455:             PetscCall(ISRestoreIndices(is_V_Sall, &idxs_V));
7456:           }
7457:           sub_schurs->S_Ej_all = S_new;
7458:           PetscCall(MatDestroy(&S_new));
7459:           if (sub_schurs->sum_S_Ej_all) {
7460:             PetscCall(MatPtAP(sub_schurs->sum_S_Ej_all, tmat, MAT_INITIAL_MATRIX, 1.0, &S_new));
7461:             PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_all));
7462:             PetscCall(PetscObjectReference((PetscObject)S_new));
7463:             if (pcbddc->deluxe_zerorows) PetscCall(MatZeroRowsColumnsIS(S_new, is_V_Sall, 1., NULL, NULL));
7464:             sub_schurs->sum_S_Ej_all = S_new;
7465:             PetscCall(MatDestroy(&S_new));
7466:           }
7467:           PetscCall(ISDestroy(&is_V_Sall));
7468:           PetscCall(MatDestroy(&tmat));
7469:         }
7470:         /* destroy any change of basis context in sub_schurs */
7471:         if (sub_schurs && sub_schurs->change) {
7472:           PetscInt i;

7474:           for (i = 0; i < sub_schurs->n_subs; i++) PetscCall(KSPDestroy(&sub_schurs->change[i]));
7475:           PetscCall(PetscFree(sub_schurs->change));
7476:         }
7477:       }
7478:       if (pcbddc->switch_static) { /* need to save the local change */
7479:         pcbddc->switch_static_change = localChangeOfBasisMatrix;
7480:       } else {
7481:         PetscCall(MatDestroy(&localChangeOfBasisMatrix));
7482:       }
7483:       /* determine if any process has changed the pressures locally */
7484:       pcbddc->change_interior = pcbddc->benign_have_null;
7485:     } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */
7486:       PetscCall(MatDestroy(&pcbddc->ConstraintMatrix));
7487:       pcbddc->ConstraintMatrix = localChangeOfBasisMatrix;
7488:       pcbddc->use_qr_single    = qr_needed;
7489:     }
7490:   } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) {
7491:     if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) {
7492:       PetscCall(PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix));
7493:       pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
7494:     } else {
7495:       Mat benign_global = NULL;
7496:       if (pcbddc->benign_have_null) {
7497:         Mat M;

7499:         pcbddc->change_interior = PETSC_TRUE;
7500:         PetscCall(VecCopy(matis->counter, pcis->vec1_N));
7501:         PetscCall(VecReciprocal(pcis->vec1_N));
7502:         PetscCall(MatDuplicate(pc->pmat, MAT_DO_NOT_COPY_VALUES, &benign_global));
7503:         if (pcbddc->benign_change) {
7504:           PetscCall(MatDuplicate(pcbddc->benign_change, MAT_COPY_VALUES, &M));
7505:           PetscCall(MatDiagonalScale(M, pcis->vec1_N, NULL));
7506:         } else {
7507:           PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, pcis->n, pcis->n, 1, NULL, &M));
7508:           PetscCall(MatDiagonalSet(M, pcis->vec1_N, INSERT_VALUES));
7509:         }
7510:         PetscCall(MatISSetLocalMat(benign_global, M));
7511:         PetscCall(MatDestroy(&M));
7512:         PetscCall(MatAssemblyBegin(benign_global, MAT_FINAL_ASSEMBLY));
7513:         PetscCall(MatAssemblyEnd(benign_global, MAT_FINAL_ASSEMBLY));
7514:       }
7515:       if (pcbddc->user_ChangeOfBasisMatrix) {
7516:         PetscCall(MatMatMult(pcbddc->user_ChangeOfBasisMatrix, benign_global, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &pcbddc->ChangeOfBasisMatrix));
7517:         PetscCall(MatDestroy(&benign_global));
7518:       } else if (pcbddc->benign_have_null) {
7519:         pcbddc->ChangeOfBasisMatrix = benign_global;
7520:       }
7521:     }
7522:     if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */
7523:       IS              is_global;
7524:       const PetscInt *gidxs;

7526:       PetscCall(ISLocalToGlobalMappingGetIndices(matis->rmapping, &gidxs));
7527:       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), pcis->n, gidxs, PETSC_COPY_VALUES, &is_global));
7528:       PetscCall(ISLocalToGlobalMappingRestoreIndices(matis->rmapping, &gidxs));
7529:       PetscCall(MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix, is_global, is_global, &pcbddc->switch_static_change));
7530:       PetscCall(ISDestroy(&is_global));
7531:     }
7532:   }
7533:   if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) PetscCall(VecDuplicate(pcis->vec1_global, &pcbddc->work_change));

7535:   if (!pcbddc->fake_change) {
7536:     /* add pressure dofs to set of primal nodes for numbering purposes */
7537:     for (i = 0; i < pcbddc->benign_n; i++) {
7538:       pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc]  = pcbddc->benign_p0_lidx[i];
7539:       pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i];
7540:       pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc]  = 1;
7541:       pcbddc->local_primal_size_cc++;
7542:       pcbddc->local_primal_size++;
7543:     }

7545:     /* check if a new primal space has been introduced (also take into account benign trick) */
7546:     pcbddc->new_primal_space_local = PETSC_TRUE;
7547:     if (olocal_primal_size == pcbddc->local_primal_size) {
7548:       PetscCall(PetscArraycmp(pcbddc->local_primal_ref_node, olocal_primal_ref_node, olocal_primal_size_cc, &pcbddc->new_primal_space_local));
7549:       pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
7550:       if (!pcbddc->new_primal_space_local) {
7551:         PetscCall(PetscArraycmp(pcbddc->local_primal_ref_mult, olocal_primal_ref_mult, olocal_primal_size_cc, &pcbddc->new_primal_space_local));
7552:         pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
7553:       }
7554:     }
7555:     /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
7556:     PetscCallMPI(MPIU_Allreduce(&pcbddc->new_primal_space_local, &pcbddc->new_primal_space, 1, MPI_C_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc)));
7557:   }
7558:   PetscCall(PetscFree2(olocal_primal_ref_node, olocal_primal_ref_mult));

7560:   /* flush dbg viewer */
7561:   if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));

7563:   /* free workspace */
7564:   PetscCall(PetscBTDestroy(&qr_needed_idx));
7565:   PetscCall(PetscBTDestroy(&change_basis));
7566:   if (!pcbddc->adaptive_selection) {
7567:     PetscCall(PetscFree3(constraints_idxs_ptr, constraints_data_ptr, constraints_n));
7568:     PetscCall(PetscFree3(constraints_data, constraints_idxs, constraints_idxs_B));
7569:   } else {
7570:     PetscCall(PetscFree5(pcbddc->adaptive_constraints_n, pcbddc->adaptive_constraints_idxs_ptr, pcbddc->adaptive_constraints_data_ptr, pcbddc->adaptive_constraints_idxs, pcbddc->adaptive_constraints_data));
7571:     PetscCall(PetscFree(constraints_n));
7572:     PetscCall(PetscFree(constraints_idxs_B));
7573:   }
7574:   PetscFunctionReturn(PETSC_SUCCESS);
7575: }

7577: PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
7578: {
7579:   ISLocalToGlobalMapping map;
7580:   PC_BDDC               *pcbddc = (PC_BDDC *)pc->data;
7581:   Mat_IS                *matis  = (Mat_IS *)pc->pmat->data;
7582:   PetscInt               i, N;
7583:   PetscBool              rcsr = PETSC_FALSE;

7585:   PetscFunctionBegin;
7586:   if (pcbddc->recompute_topography) {
7587:     pcbddc->graphanalyzed = PETSC_FALSE;
7588:     /* Reset previously computed graph */
7589:     PetscCall(PCBDDCGraphReset(pcbddc->mat_graph));
7590:     /* Init local Graph struct */
7591:     PetscCall(MatGetSize(pc->pmat, &N, NULL));
7592:     PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &map, NULL));
7593:     PetscCall(PCBDDCGraphInit(pcbddc->mat_graph, map, N, pcbddc->graphmaxcount));

7595:     if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LOR, &pcbddc->user_primal_vertices_local));
7596:     /* Check validity of the csr graph passed in by the user */
7597:     PetscCheck(!pcbddc->mat_graph->nvtxs_csr || pcbddc->mat_graph->nvtxs_csr == pcbddc->mat_graph->nvtxs, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid size of local CSR graph! Found %" PetscInt_FMT ", expected %" PetscInt_FMT, pcbddc->mat_graph->nvtxs_csr,
7598:                pcbddc->mat_graph->nvtxs);

7600:     /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
7601:     if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) {
7602:       PetscInt *xadj, *adjncy;
7603:       PetscInt  nvtxs;
7604:       PetscBool flg_row;
7605:       Mat       A;

7607:       PetscCall(PetscObjectReference((PetscObject)matis->A));
7608:       A = matis->A;
7609:       for (PetscInt i = 0; i < pcbddc->local_adj_square; i++) {
7610:         Mat AtA;

7612:         PetscCall(MatProductCreate(A, A, NULL, &AtA));
7613:         PetscCall(MatSetOptionsPrefix(AtA, "pc_bddc_graph_"));
7614:         PetscCall(MatProductSetType(AtA, MATPRODUCT_AtB));
7615:         PetscCall(MatProductSetFromOptions(AtA));
7616:         PetscCall(MatProductSymbolic(AtA));
7617:         PetscCall(MatProductClear(AtA));
7618:         /* we only need the sparsity, cheat and tell PETSc the matrix has been assembled */
7619:         AtA->assembled = PETSC_TRUE;
7620:         PetscCall(MatDestroy(&A));
7621:         A = AtA;
7622:       }
7623:       PetscCall(MatGetRowIJ(A, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row));
7624:       if (flg_row) {
7625:         PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, nvtxs, xadj, adjncy, PETSC_COPY_VALUES));
7626:         pcbddc->computed_rowadj = PETSC_TRUE;
7627:         PetscCall(MatRestoreRowIJ(A, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row));
7628:         rcsr = PETSC_TRUE;
7629:       }
7630:       PetscCall(MatDestroy(&A));
7631:     }
7632:     if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));

7634:     if (pcbddc->mat_graph->cdim && !pcbddc->mat_graph->cloc) {
7635:       PetscReal   *lcoords;
7636:       PetscInt     n;
7637:       MPI_Datatype dimrealtype;
7638:       PetscMPIInt  cdimi;

7640:       /* TODO: support for blocked */
7641:       PetscCheck(pcbddc->mat_graph->cnloc == pc->pmat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid number of local coordinates! Got %" PetscInt_FMT ", expected %" PetscInt_FMT, pcbddc->mat_graph->cnloc, pc->pmat->rmap->n);
7642:       PetscCall(MatGetLocalSize(matis->A, &n, NULL));
7643:       PetscCall(PetscMalloc1(pcbddc->mat_graph->cdim * n, &lcoords));
7644:       PetscCall(PetscMPIIntCast(pcbddc->mat_graph->cdim, &cdimi));
7645:       PetscCallMPI(MPI_Type_contiguous(cdimi, MPIU_REAL, &dimrealtype));
7646:       PetscCallMPI(MPI_Type_commit(&dimrealtype));
7647:       PetscCall(PetscSFBcastBegin(matis->sf, dimrealtype, pcbddc->mat_graph->coords, lcoords, MPI_REPLACE));
7648:       PetscCall(PetscSFBcastEnd(matis->sf, dimrealtype, pcbddc->mat_graph->coords, lcoords, MPI_REPLACE));
7649:       PetscCallMPI(MPI_Type_free(&dimrealtype));
7650:       PetscCall(PetscFree(pcbddc->mat_graph->coords));

7652:       pcbddc->mat_graph->coords = lcoords;
7653:       pcbddc->mat_graph->cloc   = PETSC_TRUE;
7654:       pcbddc->mat_graph->cnloc  = n;
7655:     }
7656:     PetscCheck(!pcbddc->mat_graph->cnloc || pcbddc->mat_graph->cnloc == pcbddc->mat_graph->nvtxs, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid number of local subdomain coordinates! Got %" PetscInt_FMT ", expected %" PetscInt_FMT, pcbddc->mat_graph->cnloc,
7657:                pcbddc->mat_graph->nvtxs);
7658:     pcbddc->mat_graph->active_coords = (PetscBool)(pcbddc->corner_selection && pcbddc->mat_graph->cdim && !pcbddc->corner_selected);

7660:     /* attach info on disconnected subdomains if present */
7661:     if (pcbddc->n_local_subs) {
7662:       PetscInt *local_subs, n, totn;

7664:       PetscCall(MatGetLocalSize(matis->A, &n, NULL));
7665:       PetscCall(PetscMalloc1(n, &local_subs));
7666:       for (i = 0; i < n; i++) local_subs[i] = pcbddc->n_local_subs;
7667:       for (i = 0; i < pcbddc->n_local_subs; i++) {
7668:         const PetscInt *idxs;
7669:         PetscInt        nl, j;

7671:         PetscCall(ISGetLocalSize(pcbddc->local_subs[i], &nl));
7672:         PetscCall(ISGetIndices(pcbddc->local_subs[i], &idxs));
7673:         for (j = 0; j < nl; j++) local_subs[idxs[j]] = i;
7674:         PetscCall(ISRestoreIndices(pcbddc->local_subs[i], &idxs));
7675:       }
7676:       for (i = 0, totn = 0; i < n; i++) totn = PetscMax(totn, local_subs[i]);
7677:       pcbddc->mat_graph->n_local_subs = totn + 1;
7678:       pcbddc->mat_graph->local_subs   = local_subs;
7679:     }

7681:     /* Setup of Graph */
7682:     PetscCall(PCBDDCGraphSetUp(pcbddc->mat_graph, pcbddc->vertex_size, pcbddc->NeumannBoundariesLocal, pcbddc->DirichletBoundariesLocal, pcbddc->n_ISForDofsLocal, pcbddc->ISForDofsLocal, pcbddc->user_primal_vertices_local));
7683:   }

7685:   if (!pcbddc->graphanalyzed) {
7686:     /* Graph's connected components analysis */
7687:     PetscCall(PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph));
7688:     pcbddc->graphanalyzed   = PETSC_TRUE;
7689:     pcbddc->corner_selected = pcbddc->corner_selection;
7690:   }
7691:   if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0;
7692:   PetscFunctionReturn(PETSC_SUCCESS);
7693: }

7695: PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt *nio, Vec vecs[])
7696: {
7697:   PetscInt     i, j, n;
7698:   PetscScalar *alphas;
7699:   PetscReal    norm, *onorms;

7701:   PetscFunctionBegin;
7702:   n = *nio;
7703:   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
7704:   PetscCall(PetscMalloc2(n, &alphas, n, &onorms));
7705:   PetscCall(VecNormalize(vecs[0], &norm));
7706:   if (norm < PETSC_SMALL) {
7707:     onorms[0] = 0.0;
7708:     PetscCall(VecSet(vecs[0], 0.0));
7709:   } else {
7710:     onorms[0] = norm;
7711:   }

7713:   for (i = 1; i < n; i++) {
7714:     PetscCall(VecMDot(vecs[i], i, vecs, alphas));
7715:     for (j = 0; j < i; j++) alphas[j] = PetscConj(-alphas[j]);
7716:     PetscCall(VecMAXPY(vecs[i], i, alphas, vecs));
7717:     PetscCall(VecNormalize(vecs[i], &norm));
7718:     if (norm < PETSC_SMALL) {
7719:       onorms[i] = 0.0;
7720:       PetscCall(VecSet(vecs[i], 0.0));
7721:     } else {
7722:       onorms[i] = norm;
7723:     }
7724:   }
7725:   /* push nonzero vectors at the beginning */
7726:   for (i = 0; i < n; i++) {
7727:     if (onorms[i] == 0.0) {
7728:       for (j = i + 1; j < n; j++) {
7729:         if (onorms[j] != 0.0) {
7730:           PetscCall(VecCopy(vecs[j], vecs[i]));
7731:           onorms[i] = onorms[j];
7732:           onorms[j] = 0.0;
7733:           break;
7734:         }
7735:       }
7736:     }
7737:   }
7738:   for (i = 0, *nio = 0; i < n; i++) *nio += onorms[i] != 0.0 ? 1 : 0;
7739:   PetscCall(PetscFree2(alphas, onorms));
7740:   PetscFunctionReturn(PETSC_SUCCESS);
7741: }

7743: static PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS *is_sends, PetscBool *have_void)
7744: {
7745:   ISLocalToGlobalMapping mapping;
7746:   Mat                    A;
7747:   PetscInt               n_neighs, *neighs, *n_shared, **shared;
7748:   PetscMPIInt            size, rank, color;
7749:   PetscInt              *xadj, *adjncy;
7750:   PetscInt              *adjncy_wgt, *v_wgt, *ranks_send_to_idx;
7751:   PetscInt               im_active, active_procs, N, n, i, j, threshold = 2;
7752:   PetscInt               void_procs, *procs_candidates = NULL;
7753:   PetscInt               xadj_count, *count;
7754:   PetscBool              ismatis, use_vwgt = PETSC_FALSE;
7755:   PetscSubcomm           psubcomm;
7756:   MPI_Comm               subcomm;

7758:   PetscFunctionBegin;
7760:   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATIS, &ismatis));
7761:   PetscCheck(ismatis, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot use %s on a matrix object which is not of type MATIS", PETSC_FUNCTION_NAME);
7764:   PetscCheck(*n_subdomains > 0, PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONG, "Invalid number of subdomains requested %" PetscInt_FMT, *n_subdomains);

7766:   if (have_void) *have_void = PETSC_FALSE;
7767:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)mat), &size));
7768:   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
7769:   PetscCall(MatISGetLocalMat(mat, &A));
7770:   PetscCall(MatGetLocalSize(A, &n, NULL));
7771:   im_active = !!n;
7772:   PetscCallMPI(MPIU_Allreduce(&im_active, &active_procs, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)mat)));
7773:   void_procs = size - active_procs;
7774:   /* get ranks of non-active processes in mat communicator */
7775:   if (void_procs) {
7776:     PetscInt ncand;

7778:     if (have_void) *have_void = PETSC_TRUE;
7779:     PetscCall(PetscMalloc1(size, &procs_candidates));
7780:     PetscCallMPI(MPI_Allgather(&im_active, 1, MPIU_INT, procs_candidates, 1, MPIU_INT, PetscObjectComm((PetscObject)mat)));
7781:     for (i = 0, ncand = 0; i < size; i++) {
7782:       if (!procs_candidates[i]) procs_candidates[ncand++] = i;
7783:     }
7784:     /* force n_subdomains to be not greater that the number of non-active processes */
7785:     *n_subdomains = PetscMin(void_procs, *n_subdomains);
7786:   }

7788:   /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix
7789:      number of subdomains requested 1 -> send to rank-0 or first candidate in voids  */
7790:   PetscCall(MatGetSize(mat, &N, NULL));
7791:   if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) {
7792:     PetscInt  issize, isidx, dest;
7793:     PetscBool default_sub;

7795:     if (*n_subdomains == 1) dest = 0;
7796:     else dest = rank;
7797:     if (im_active) {
7798:       issize = 1;
7799:       if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7800:         isidx = procs_candidates[dest];
7801:       } else {
7802:         isidx = dest;
7803:       }
7804:     } else {
7805:       issize = 0;
7806:       isidx  = rank;
7807:     }
7808:     if (*n_subdomains != 1) *n_subdomains = active_procs;
7809:     PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)mat), issize, &isidx, PETSC_COPY_VALUES, is_sends));
7810:     default_sub = (PetscBool)(isidx == rank);
7811:     PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &default_sub, 1, MPI_C_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
7812:     if (default_sub) PetscCall(PetscObjectSetName((PetscObject)*is_sends, "default subassembling"));
7813:     PetscCall(PetscFree(procs_candidates));
7814:     PetscFunctionReturn(PETSC_SUCCESS);
7815:   }
7816:   PetscCall(PetscOptionsGetBool(NULL, ((PetscObject)A)->prefix, "-mat_is_partitioning_use_vwgt", &use_vwgt, NULL));
7817:   PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)A)->prefix, "-mat_is_partitioning_threshold", &threshold, NULL));
7818:   threshold = PetscMax(threshold, 2);

7820:   /* Get info on mapping */
7821:   PetscCall(MatISGetLocalToGlobalMapping(mat, &mapping, NULL));
7822:   PetscCall(ISLocalToGlobalMappingGetInfo(mapping, &n_neighs, &neighs, &n_shared, &shared));

7824:   /* build local CSR graph of subdomains' connectivity */
7825:   PetscCall(PetscMalloc1(2, &xadj));
7826:   xadj[0] = 0;
7827:   xadj[1] = PetscMax(n_neighs - 1, 0);
7828:   PetscCall(PetscMalloc1(xadj[1], &adjncy));
7829:   PetscCall(PetscMalloc1(xadj[1], &adjncy_wgt));
7830:   PetscCall(PetscCalloc1(n, &count));
7831:   for (i = 1; i < n_neighs; i++)
7832:     for (j = 0; j < n_shared[i]; j++) count[shared[i][j]] += 1;

7834:   xadj_count = 0;
7835:   for (i = 1; i < n_neighs; i++) {
7836:     for (j = 0; j < n_shared[i]; j++) {
7837:       if (count[shared[i][j]] < threshold) {
7838:         adjncy[xadj_count]     = neighs[i];
7839:         adjncy_wgt[xadj_count] = n_shared[i];
7840:         xadj_count++;
7841:         break;
7842:       }
7843:     }
7844:   }
7845:   xadj[1] = xadj_count;
7846:   PetscCall(PetscFree(count));
7847:   PetscCall(ISLocalToGlobalMappingRestoreInfo(mapping, &n_neighs, &neighs, &n_shared, &shared));
7848:   PetscCall(PetscSortIntWithArray(xadj[1], adjncy, adjncy_wgt));

7850:   PetscCall(PetscMalloc1(1, &ranks_send_to_idx));

7852:   /* Restrict work on active processes only */
7853:   PetscCall(PetscMPIIntCast(im_active, &color));
7854:   if (void_procs) {
7855:     PetscCall(PetscSubcommCreate(PetscObjectComm((PetscObject)mat), &psubcomm));
7856:     PetscCall(PetscSubcommSetNumber(psubcomm, 2)); /* 2 groups, active process and not active processes */
7857:     PetscCall(PetscSubcommSetTypeGeneral(psubcomm, color, rank));
7858:     subcomm = PetscSubcommChild(psubcomm);
7859:   } else {
7860:     psubcomm = NULL;
7861:     subcomm  = PetscObjectComm((PetscObject)mat);
7862:   }

7864:   v_wgt = NULL;
7865:   if (!color) {
7866:     PetscCall(PetscFree(xadj));
7867:     PetscCall(PetscFree(adjncy));
7868:     PetscCall(PetscFree(adjncy_wgt));
7869:   } else {
7870:     Mat             subdomain_adj;
7871:     IS              new_ranks, new_ranks_contig;
7872:     MatPartitioning partitioner;
7873:     PetscInt        rstart, rend;
7874:     PetscMPIInt     irstart = 0, irend = 0;
7875:     PetscInt       *is_indices, *oldranks;
7876:     PetscMPIInt     size;
7877:     PetscBool       aggregate;

7879:     PetscCallMPI(MPI_Comm_size(subcomm, &size));
7880:     if (void_procs) {
7881:       PetscInt prank = rank;
7882:       PetscCall(PetscMalloc1(size, &oldranks));
7883:       PetscCallMPI(MPI_Allgather(&prank, 1, MPIU_INT, oldranks, 1, MPIU_INT, subcomm));
7884:       for (i = 0; i < xadj[1]; i++) PetscCall(PetscFindInt(adjncy[i], size, oldranks, &adjncy[i]));
7885:       PetscCall(PetscSortIntWithArray(xadj[1], adjncy, adjncy_wgt));
7886:     } else {
7887:       oldranks = NULL;
7888:     }
7889:     aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE);
7890:     if (aggregate) { /* TODO: all this part could be made more efficient */
7891:       PetscInt     lrows, row, ncols, *cols;
7892:       PetscMPIInt  nrank;
7893:       PetscScalar *vals;

7895:       PetscCallMPI(MPI_Comm_rank(subcomm, &nrank));
7896:       lrows = 0;
7897:       if (nrank < redprocs) {
7898:         lrows = size / redprocs;
7899:         if (nrank < size % redprocs) lrows++;
7900:       }
7901:       PetscCall(MatCreateAIJ(subcomm, lrows, lrows, size, size, 50, NULL, 50, NULL, &subdomain_adj));
7902:       PetscCall(MatGetOwnershipRange(subdomain_adj, &rstart, &rend));
7903:       PetscCall(PetscMPIIntCast(rstart, &irstart));
7904:       PetscCall(PetscMPIIntCast(rend, &irend));
7905:       PetscCall(MatSetOption(subdomain_adj, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
7906:       PetscCall(MatSetOption(subdomain_adj, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
7907:       row   = nrank;
7908:       ncols = xadj[1] - xadj[0];
7909:       cols  = adjncy;
7910:       PetscCall(PetscMalloc1(ncols, &vals));
7911:       for (i = 0; i < ncols; i++) vals[i] = adjncy_wgt[i];
7912:       PetscCall(MatSetValues(subdomain_adj, 1, &row, ncols, cols, vals, INSERT_VALUES));
7913:       PetscCall(MatAssemblyBegin(subdomain_adj, MAT_FINAL_ASSEMBLY));
7914:       PetscCall(MatAssemblyEnd(subdomain_adj, MAT_FINAL_ASSEMBLY));
7915:       PetscCall(PetscFree(xadj));
7916:       PetscCall(PetscFree(adjncy));
7917:       PetscCall(PetscFree(adjncy_wgt));
7918:       PetscCall(PetscFree(vals));
7919:       if (use_vwgt) {
7920:         Vec                v;
7921:         const PetscScalar *array;
7922:         PetscInt           nl;

7924:         PetscCall(MatCreateVecs(subdomain_adj, &v, NULL));
7925:         PetscCall(VecSetValue(v, row, (PetscScalar)n, INSERT_VALUES));
7926:         PetscCall(VecAssemblyBegin(v));
7927:         PetscCall(VecAssemblyEnd(v));
7928:         PetscCall(VecGetLocalSize(v, &nl));
7929:         PetscCall(VecGetArrayRead(v, &array));
7930:         PetscCall(PetscMalloc1(nl, &v_wgt));
7931:         for (i = 0; i < nl; i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]);
7932:         PetscCall(VecRestoreArrayRead(v, &array));
7933:         PetscCall(VecDestroy(&v));
7934:       }
7935:     } else {
7936:       PetscCall(MatCreateMPIAdj(subcomm, 1, size, xadj, adjncy, adjncy_wgt, &subdomain_adj));
7937:       if (use_vwgt) {
7938:         PetscCall(PetscMalloc1(1, &v_wgt));
7939:         v_wgt[0] = n;
7940:       }
7941:     }
7942:     /* PetscCall(MatView(subdomain_adj,0)); */

7944:     /* Partition */
7945:     PetscCall(MatPartitioningCreate(subcomm, &partitioner));
7946: #if defined(PETSC_HAVE_PTSCOTCH)
7947:     PetscCall(MatPartitioningSetType(partitioner, MATPARTITIONINGPTSCOTCH));
7948: #elif defined(PETSC_HAVE_PARMETIS)
7949:     PetscCall(MatPartitioningSetType(partitioner, MATPARTITIONINGPARMETIS));
7950: #else
7951:     PetscCall(MatPartitioningSetType(partitioner, MATPARTITIONINGAVERAGE));
7952: #endif
7953:     PetscCall(MatPartitioningSetAdjacency(partitioner, subdomain_adj));
7954:     if (v_wgt) PetscCall(MatPartitioningSetVertexWeights(partitioner, v_wgt));
7955:     *n_subdomains = PetscMin(size, *n_subdomains);
7956:     PetscCall(MatPartitioningSetNParts(partitioner, *n_subdomains));
7957:     PetscCall(MatPartitioningSetFromOptions(partitioner));
7958:     PetscCall(MatPartitioningApply(partitioner, &new_ranks));
7959:     /* PetscCall(MatPartitioningView(partitioner,0)); */

7961:     /* renumber new_ranks to avoid "holes" in new set of processors */
7962:     PetscCall(ISRenumber(new_ranks, NULL, NULL, &new_ranks_contig));
7963:     PetscCall(ISDestroy(&new_ranks));
7964:     PetscCall(ISGetIndices(new_ranks_contig, (const PetscInt **)&is_indices));
7965:     if (!aggregate) {
7966:       if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7967:         PetscAssert(oldranks, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen");
7968:         ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]];
7969:       } else if (oldranks) {
7970:         ranks_send_to_idx[0] = oldranks[is_indices[0]];
7971:       } else {
7972:         ranks_send_to_idx[0] = is_indices[0];
7973:       }
7974:     } else {
7975:       PetscInt     idx = 0;
7976:       PetscMPIInt  tag;
7977:       MPI_Request *reqs;

7979:       PetscCall(PetscObjectGetNewTag((PetscObject)subdomain_adj, &tag));
7980:       PetscCall(PetscMalloc1(rend - rstart, &reqs));
7981:       for (PetscMPIInt i = irstart; i < irend; i++) PetscCallMPI(MPIU_Isend(is_indices + i - rstart, 1, MPIU_INT, i, tag, subcomm, &reqs[i - rstart]));
7982:       PetscCallMPI(MPIU_Recv(&idx, 1, MPIU_INT, MPI_ANY_SOURCE, tag, subcomm, MPI_STATUS_IGNORE));
7983:       PetscCallMPI(MPI_Waitall(irend - irstart, reqs, MPI_STATUSES_IGNORE));
7984:       PetscCall(PetscFree(reqs));
7985:       if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7986:         PetscAssert(oldranks, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen");
7987:         ranks_send_to_idx[0] = procs_candidates[oldranks[idx]];
7988:       } else if (oldranks) {
7989:         ranks_send_to_idx[0] = oldranks[idx];
7990:       } else {
7991:         ranks_send_to_idx[0] = idx;
7992:       }
7993:     }
7994:     PetscCall(ISRestoreIndices(new_ranks_contig, (const PetscInt **)&is_indices));
7995:     /* clean up */
7996:     PetscCall(PetscFree(oldranks));
7997:     PetscCall(ISDestroy(&new_ranks_contig));
7998:     PetscCall(MatDestroy(&subdomain_adj));
7999:     PetscCall(MatPartitioningDestroy(&partitioner));
8000:   }
8001:   PetscCall(PetscSubcommDestroy(&psubcomm));
8002:   PetscCall(PetscFree(procs_candidates));

8004:   /* assemble parallel IS for sends */
8005:   i = 1;
8006:   if (!color) i = 0;
8007:   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)mat), i, ranks_send_to_idx, PETSC_OWN_POINTER, is_sends));
8008:   PetscFunctionReturn(PETSC_SUCCESS);
8009: }

8011: typedef enum {
8012:   MATDENSE_PRIVATE = 0,
8013:   MATAIJ_PRIVATE,
8014:   MATBAIJ_PRIVATE,
8015:   MATSBAIJ_PRIVATE
8016: } MatTypePrivate;

8018: static PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[])
8019: {
8020:   Mat                    local_mat;
8021:   IS                     is_sends_internal;
8022:   PetscInt               rows, cols, new_local_rows;
8023:   PetscInt               i, bs, buf_size_idxs, buf_size_idxs_is, buf_size_vals, buf_size_vecs;
8024:   PetscBool              ismatis, isdense, newisdense, destroy_mat;
8025:   ISLocalToGlobalMapping l2gmap;
8026:   PetscInt              *l2gmap_indices;
8027:   const PetscInt        *is_indices;
8028:   MatType                new_local_type;
8029:   /* buffers */
8030:   PetscInt          *ptr_idxs, *send_buffer_idxs, *recv_buffer_idxs;
8031:   PetscInt          *ptr_idxs_is, *send_buffer_idxs_is, *recv_buffer_idxs_is;
8032:   PetscInt          *recv_buffer_idxs_local;
8033:   PetscScalar       *ptr_vals, *recv_buffer_vals;
8034:   const PetscScalar *send_buffer_vals;
8035:   PetscScalar       *ptr_vecs, *send_buffer_vecs, *recv_buffer_vecs;
8036:   /* MPI */
8037:   MPI_Comm     comm, comm_n;
8038:   PetscSubcomm subcomm;
8039:   PetscMPIInt  n_sends, n_recvs, size;
8040:   PetscMPIInt *iflags, *ilengths_idxs, *ilengths_vals, *ilengths_idxs_is;
8041:   PetscMPIInt *onodes, *onodes_is, *olengths_idxs, *olengths_idxs_is, *olengths_vals;
8042:   PetscMPIInt  len, tag_idxs, tag_idxs_is, tag_vals, tag_vecs, source_dest;
8043:   MPI_Request *send_req_idxs, *send_req_idxs_is, *send_req_vals, *send_req_vecs;
8044:   MPI_Request *recv_req_idxs, *recv_req_idxs_is, *recv_req_vals, *recv_req_vecs;

8046:   PetscFunctionBegin;
8048:   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATIS, &ismatis));
8049:   PetscCheck(ismatis, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot use %s on a matrix object which is not of type MATIS", PETSC_FUNCTION_NAME);
8056:   if (nvecs) {
8057:     PetscCheck(nvecs <= 1, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Just 1 vector supported");
8059:   }
8060:   /* further checks */
8061:   PetscCall(MatISGetLocalMat(mat, &local_mat));
8062:   PetscCall(PetscObjectTypeCompare((PetscObject)local_mat, MATSEQDENSE, &isdense));
8063:   PetscCheck(isdense, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");

8065:   PetscCall(MatGetSize(local_mat, &rows, &cols));
8066:   PetscCheck(rows == cols, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Local MATIS matrices should be square");
8067:   if (reuse && *mat_n) {
8068:     PetscInt mrows, mcols, mnrows, mncols;
8070:     PetscCall(PetscObjectTypeCompare((PetscObject)*mat_n, MATIS, &ismatis));
8071:     PetscCheck(ismatis, PetscObjectComm((PetscObject)*mat_n), PETSC_ERR_SUP, "Cannot reuse a matrix which is not of type MATIS");
8072:     PetscCall(MatGetSize(mat, &mrows, &mcols));
8073:     PetscCall(MatGetSize(*mat_n, &mnrows, &mncols));
8074:     PetscCheck(mrows == mnrows, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot reuse matrix! Wrong number of rows %" PetscInt_FMT " != %" PetscInt_FMT, mrows, mnrows);
8075:     PetscCheck(mcols == mncols, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot reuse matrix! Wrong number of cols %" PetscInt_FMT " != %" PetscInt_FMT, mcols, mncols);
8076:   }
8077:   PetscCall(MatGetBlockSize(local_mat, &bs));

8080:   /* prepare IS for sending if not provided */
8081:   if (!is_sends) {
8082:     PetscCheck(n_subdomains, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "You should specify either an IS or a target number of subdomains");
8083:     PetscCall(PCBDDCMatISGetSubassemblingPattern(mat, &n_subdomains, 0, &is_sends_internal, NULL));
8084:   } else {
8085:     PetscCall(PetscObjectReference((PetscObject)is_sends));
8086:     is_sends_internal = is_sends;
8087:   }

8089:   /* get comm */
8090:   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));

8092:   /* compute number of sends */
8093:   PetscCall(ISGetLocalSize(is_sends_internal, &i));
8094:   PetscCall(PetscMPIIntCast(i, &n_sends));

8096:   /* compute number of receives */
8097:   PetscCallMPI(MPI_Comm_size(comm, &size));
8098:   PetscCall(PetscMalloc1(size, &iflags));
8099:   PetscCall(PetscArrayzero(iflags, size));
8100:   PetscCall(ISGetIndices(is_sends_internal, &is_indices));
8101:   for (i = 0; i < n_sends; i++) iflags[is_indices[i]] = 1;
8102:   PetscCall(PetscGatherNumberOfMessages(comm, iflags, NULL, &n_recvs));
8103:   PetscCall(PetscFree(iflags));

8105:   /* restrict comm if requested */
8106:   subcomm     = NULL;
8107:   destroy_mat = PETSC_FALSE;
8108:   if (restrict_comm) {
8109:     PetscMPIInt color, subcommsize;

8111:     color = 0;
8112:     if (restrict_full) {
8113:       if (!n_recvs) color = 1; /* processes not receiving anything will not participate in new comm (full restriction) */
8114:     } else {
8115:       if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not participate in new comm */
8116:     }
8117:     PetscCallMPI(MPIU_Allreduce(&color, &subcommsize, 1, MPI_INT, MPI_SUM, comm));
8118:     subcommsize = size - subcommsize;
8119:     /* check if reuse has been requested */
8120:     if (reuse) {
8121:       if (*mat_n) {
8122:         PetscMPIInt subcommsize2;
8123:         PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n), &subcommsize2));
8124:         PetscCheck(subcommsize == subcommsize2, PetscObjectComm((PetscObject)*mat_n), PETSC_ERR_PLIB, "Cannot reuse matrix! wrong subcomm size %d != %d", subcommsize, subcommsize2);
8125:         comm_n = PetscObjectComm((PetscObject)*mat_n);
8126:       } else {
8127:         comm_n = PETSC_COMM_SELF;
8128:       }
8129:     } else { /* MAT_INITIAL_MATRIX */
8130:       PetscMPIInt rank;

8132:       PetscCallMPI(MPI_Comm_rank(comm, &rank));
8133:       PetscCall(PetscSubcommCreate(comm, &subcomm));
8134:       PetscCall(PetscSubcommSetNumber(subcomm, 2));
8135:       PetscCall(PetscSubcommSetTypeGeneral(subcomm, color, rank));
8136:       comm_n = PetscSubcommChild(subcomm);
8137:     }
8138:     /* flag to destroy *mat_n if not significative */
8139:     if (color) destroy_mat = PETSC_TRUE;
8140:   } else {
8141:     comm_n = comm;
8142:   }

8144:   /* prepare send/receive buffers */
8145:   PetscCall(PetscMalloc1(size, &ilengths_idxs));
8146:   PetscCall(PetscArrayzero(ilengths_idxs, size));
8147:   PetscCall(PetscMalloc1(size, &ilengths_vals));
8148:   PetscCall(PetscArrayzero(ilengths_vals, size));
8149:   if (nis) PetscCall(PetscCalloc1(size, &ilengths_idxs_is));

8151:   /* Get data from local matrices */
8152:   PetscCheck(isdense, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Subassembling of AIJ local matrices not yet implemented");
8153:   /* TODO: See below some guidelines on how to prepare the local buffers */
8154:   /*
8155:        send_buffer_vals should contain the raw values of the local matrix
8156:        send_buffer_idxs should contain:
8157:        - MatType_PRIVATE type
8158:        - PetscInt        size_of_l2gmap
8159:        - PetscInt        global_row_indices[size_of_l2gmap]
8160:        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
8161:     */
8162:   {
8163:     ISLocalToGlobalMapping mapping;

8165:     PetscCall(MatISGetLocalToGlobalMapping(mat, &mapping, NULL));
8166:     PetscCall(MatDenseGetArrayRead(local_mat, &send_buffer_vals));
8167:     PetscCall(ISLocalToGlobalMappingGetSize(mapping, &i));
8168:     PetscCall(PetscMalloc1(i + 2, &send_buffer_idxs));
8169:     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
8170:     send_buffer_idxs[1] = i;
8171:     PetscCall(ISLocalToGlobalMappingGetIndices(mapping, (const PetscInt **)&ptr_idxs));
8172:     PetscCall(PetscArraycpy(&send_buffer_idxs[2], ptr_idxs, i));
8173:     PetscCall(ISLocalToGlobalMappingRestoreIndices(mapping, (const PetscInt **)&ptr_idxs));
8174:     PetscCall(PetscMPIIntCast(i, &len));
8175:     for (i = 0; i < n_sends; i++) {
8176:       ilengths_vals[is_indices[i]] = len * len;
8177:       ilengths_idxs[is_indices[i]] = len + 2;
8178:     }
8179:   }
8180:   PetscCall(PetscGatherMessageLengths2(comm, n_sends, n_recvs, ilengths_idxs, ilengths_vals, &onodes, &olengths_idxs, &olengths_vals));
8181:   /* additional is (if any) */
8182:   if (nis) {
8183:     PetscMPIInt psum;
8184:     PetscInt    j;
8185:     for (j = 0, psum = 0; j < nis; j++) {
8186:       PetscInt plen;
8187:       PetscCall(ISGetLocalSize(isarray[j], &plen));
8188:       PetscCall(PetscMPIIntCast(plen, &len));
8189:       psum += len + 1; /* indices + length */
8190:     }
8191:     PetscCall(PetscMalloc1(psum, &send_buffer_idxs_is));
8192:     for (j = 0, psum = 0; j < nis; j++) {
8193:       PetscInt        plen;
8194:       const PetscInt *is_array_idxs;
8195:       PetscCall(ISGetLocalSize(isarray[j], &plen));
8196:       send_buffer_idxs_is[psum] = plen;
8197:       PetscCall(ISGetIndices(isarray[j], &is_array_idxs));
8198:       PetscCall(PetscArraycpy(&send_buffer_idxs_is[psum + 1], is_array_idxs, plen));
8199:       PetscCall(ISRestoreIndices(isarray[j], &is_array_idxs));
8200:       psum += plen + 1; /* indices + length */
8201:     }
8202:     for (i = 0; i < n_sends; i++) ilengths_idxs_is[is_indices[i]] = psum;
8203:     PetscCall(PetscGatherMessageLengths(comm, n_sends, n_recvs, ilengths_idxs_is, &onodes_is, &olengths_idxs_is));
8204:   }
8205:   PetscCall(MatISRestoreLocalMat(mat, &local_mat));

8207:   buf_size_idxs    = 0;
8208:   buf_size_vals    = 0;
8209:   buf_size_idxs_is = 0;
8210:   buf_size_vecs    = 0;
8211:   for (i = 0; i < n_recvs; i++) {
8212:     buf_size_idxs += olengths_idxs[i];
8213:     buf_size_vals += olengths_vals[i];
8214:     if (nis) buf_size_idxs_is += olengths_idxs_is[i];
8215:     if (nvecs) buf_size_vecs += olengths_idxs[i];
8216:   }
8217:   PetscCall(PetscMalloc1(buf_size_idxs, &recv_buffer_idxs));
8218:   PetscCall(PetscMalloc1(buf_size_vals, &recv_buffer_vals));
8219:   PetscCall(PetscMalloc1(buf_size_idxs_is, &recv_buffer_idxs_is));
8220:   PetscCall(PetscMalloc1(buf_size_vecs, &recv_buffer_vecs));

8222:   /* get new tags for clean communications */
8223:   PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_idxs));
8224:   PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_vals));
8225:   PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_idxs_is));
8226:   PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_vecs));

8228:   /* allocate for requests */
8229:   PetscCall(PetscMalloc1(n_sends, &send_req_idxs));
8230:   PetscCall(PetscMalloc1(n_sends, &send_req_vals));
8231:   PetscCall(PetscMalloc1(n_sends, &send_req_idxs_is));
8232:   PetscCall(PetscMalloc1(n_sends, &send_req_vecs));
8233:   PetscCall(PetscMalloc1(n_recvs, &recv_req_idxs));
8234:   PetscCall(PetscMalloc1(n_recvs, &recv_req_vals));
8235:   PetscCall(PetscMalloc1(n_recvs, &recv_req_idxs_is));
8236:   PetscCall(PetscMalloc1(n_recvs, &recv_req_vecs));

8238:   /* communications */
8239:   ptr_idxs    = recv_buffer_idxs;
8240:   ptr_vals    = recv_buffer_vals;
8241:   ptr_idxs_is = recv_buffer_idxs_is;
8242:   ptr_vecs    = recv_buffer_vecs;
8243:   for (i = 0; i < n_recvs; i++) {
8244:     PetscCallMPI(MPIU_Irecv(ptr_idxs, olengths_idxs[i], MPIU_INT, onodes[i], tag_idxs, comm, &recv_req_idxs[i]));
8245:     PetscCallMPI(MPIU_Irecv(ptr_vals, olengths_vals[i], MPIU_SCALAR, onodes[i], tag_vals, comm, &recv_req_vals[i]));
8246:     ptr_idxs += olengths_idxs[i];
8247:     ptr_vals += olengths_vals[i];
8248:     if (nis) {
8249:       PetscCallMPI(MPIU_Irecv(ptr_idxs_is, olengths_idxs_is[i], MPIU_INT, onodes_is[i], tag_idxs_is, comm, &recv_req_idxs_is[i]));
8250:       ptr_idxs_is += olengths_idxs_is[i];
8251:     }
8252:     if (nvecs) {
8253:       PetscCallMPI(MPIU_Irecv(ptr_vecs, olengths_idxs[i] - 2, MPIU_SCALAR, onodes[i], tag_vecs, comm, &recv_req_vecs[i]));
8254:       ptr_vecs += olengths_idxs[i] - 2;
8255:     }
8256:   }
8257:   for (i = 0; i < n_sends; i++) {
8258:     PetscCall(PetscMPIIntCast(is_indices[i], &source_dest));
8259:     PetscCallMPI(MPIU_Isend(send_buffer_idxs, ilengths_idxs[source_dest], MPIU_INT, source_dest, tag_idxs, comm, &send_req_idxs[i]));
8260:     PetscCallMPI(MPIU_Isend(send_buffer_vals, ilengths_vals[source_dest], MPIU_SCALAR, source_dest, tag_vals, comm, &send_req_vals[i]));
8261:     if (nis) PetscCallMPI(MPIU_Isend(send_buffer_idxs_is, ilengths_idxs_is[source_dest], MPIU_INT, source_dest, tag_idxs_is, comm, &send_req_idxs_is[i]));
8262:     if (nvecs) {
8263:       PetscCall(VecGetArray(nnsp_vec[0], &send_buffer_vecs));
8264:       PetscCallMPI(MPIU_Isend(send_buffer_vecs, ilengths_idxs[source_dest] - 2, MPIU_SCALAR, source_dest, tag_vecs, comm, &send_req_vecs[i]));
8265:     }
8266:   }
8267:   PetscCall(ISRestoreIndices(is_sends_internal, &is_indices));
8268:   PetscCall(ISDestroy(&is_sends_internal));

8270:   /* assemble new l2g map */
8271:   PetscCallMPI(MPI_Waitall(n_recvs, recv_req_idxs, MPI_STATUSES_IGNORE));
8272:   ptr_idxs       = recv_buffer_idxs;
8273:   new_local_rows = 0;
8274:   for (i = 0; i < n_recvs; i++) {
8275:     new_local_rows += *(ptr_idxs + 1); /* second element is the local size of the l2gmap */
8276:     ptr_idxs += olengths_idxs[i];
8277:   }
8278:   PetscCall(PetscMalloc1(new_local_rows, &l2gmap_indices));
8279:   ptr_idxs       = recv_buffer_idxs;
8280:   new_local_rows = 0;
8281:   for (i = 0; i < n_recvs; i++) {
8282:     PetscCall(PetscArraycpy(&l2gmap_indices[new_local_rows], ptr_idxs + 2, *(ptr_idxs + 1)));
8283:     new_local_rows += *(ptr_idxs + 1); /* second element is the local size of the l2gmap */
8284:     ptr_idxs += olengths_idxs[i];
8285:   }
8286:   PetscCall(PetscSortRemoveDupsInt(&new_local_rows, l2gmap_indices));
8287:   PetscCall(ISLocalToGlobalMappingCreate(comm_n, 1, new_local_rows, l2gmap_indices, PETSC_COPY_VALUES, &l2gmap));
8288:   PetscCall(PetscFree(l2gmap_indices));

8290:   /* infer new local matrix type from received local matrices type */
8291:   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
8292:   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
8293:   if (n_recvs) {
8294:     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
8295:     ptr_idxs                              = recv_buffer_idxs;
8296:     for (i = 0; i < n_recvs; i++) {
8297:       if ((PetscInt)new_local_type_private != *ptr_idxs) {
8298:         new_local_type_private = MATAIJ_PRIVATE;
8299:         break;
8300:       }
8301:       ptr_idxs += olengths_idxs[i];
8302:     }
8303:     switch (new_local_type_private) {
8304:     case MATDENSE_PRIVATE:
8305:       new_local_type = MATSEQAIJ;
8306:       bs             = 1;
8307:       break;
8308:     case MATAIJ_PRIVATE:
8309:       new_local_type = MATSEQAIJ;
8310:       bs             = 1;
8311:       break;
8312:     case MATBAIJ_PRIVATE:
8313:       new_local_type = MATSEQBAIJ;
8314:       break;
8315:     case MATSBAIJ_PRIVATE:
8316:       new_local_type = MATSEQSBAIJ;
8317:       break;
8318:     default:
8319:       SETERRQ(comm, PETSC_ERR_SUP, "Unsupported private type %d in %s", new_local_type_private, PETSC_FUNCTION_NAME);
8320:     }
8321:   } else { /* by default, new_local_type is seqaij */
8322:     new_local_type = MATSEQAIJ;
8323:     bs             = 1;
8324:   }

8326:   /* create MATIS object if needed */
8327:   if (!reuse) {
8328:     PetscCall(MatGetSize(mat, &rows, &cols));
8329:     PetscCall(MatCreateIS(comm_n, bs, PETSC_DECIDE, PETSC_DECIDE, rows, cols, l2gmap, l2gmap, mat_n));
8330:   } else {
8331:     /* it also destroys the local matrices */
8332:     if (*mat_n) {
8333:       PetscCall(MatSetLocalToGlobalMapping(*mat_n, l2gmap, l2gmap));
8334:     } else { /* this is a fake object */
8335:       PetscCall(MatCreateIS(comm_n, bs, PETSC_DECIDE, PETSC_DECIDE, rows, cols, l2gmap, l2gmap, mat_n));
8336:     }
8337:   }
8338:   PetscCall(MatISGetLocalMat(*mat_n, &local_mat));
8339:   PetscCall(MatSetType(local_mat, new_local_type));

8341:   PetscCallMPI(MPI_Waitall(n_recvs, recv_req_vals, MPI_STATUSES_IGNORE));

8343:   /* Global to local map of received indices */
8344:   PetscCall(PetscMalloc1(buf_size_idxs, &recv_buffer_idxs_local)); /* needed for values insertion */
8345:   PetscCall(ISGlobalToLocalMappingApply(l2gmap, IS_GTOLM_MASK, buf_size_idxs, recv_buffer_idxs, &i, recv_buffer_idxs_local));
8346:   PetscCall(ISLocalToGlobalMappingDestroy(&l2gmap));

8348:   /* restore attributes -> type of incoming data and its size */
8349:   buf_size_idxs = 0;
8350:   for (i = 0; i < n_recvs; i++) {
8351:     recv_buffer_idxs_local[buf_size_idxs]     = recv_buffer_idxs[buf_size_idxs];
8352:     recv_buffer_idxs_local[buf_size_idxs + 1] = recv_buffer_idxs[buf_size_idxs + 1];
8353:     buf_size_idxs += olengths_idxs[i];
8354:   }
8355:   PetscCall(PetscFree(recv_buffer_idxs));

8357:   /* set preallocation */
8358:   PetscCall(PetscObjectTypeCompare((PetscObject)local_mat, MATSEQDENSE, &newisdense));
8359:   if (!newisdense) {
8360:     PetscInt *new_local_nnz = NULL;

8362:     ptr_idxs = recv_buffer_idxs_local;
8363:     if (n_recvs) PetscCall(PetscCalloc1(new_local_rows, &new_local_nnz));
8364:     for (i = 0; i < n_recvs; i++) {
8365:       PetscInt j;
8366:       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
8367:         for (j = 0; j < *(ptr_idxs + 1); j++) new_local_nnz[*(ptr_idxs + 2 + j)] += *(ptr_idxs + 1);
8368:       } else {
8369:         /* TODO */
8370:       }
8371:       ptr_idxs += olengths_idxs[i];
8372:     }
8373:     if (new_local_nnz) {
8374:       for (i = 0; i < new_local_rows; i++) new_local_nnz[i] = PetscMin(new_local_nnz[i], new_local_rows);
8375:       PetscCall(MatSeqAIJSetPreallocation(local_mat, 0, new_local_nnz));
8376:       for (i = 0; i < new_local_rows; i++) new_local_nnz[i] /= bs;
8377:       PetscCall(MatSeqBAIJSetPreallocation(local_mat, bs, 0, new_local_nnz));
8378:       for (i = 0; i < new_local_rows; i++) new_local_nnz[i] = PetscMax(new_local_nnz[i] - i, 0);
8379:       PetscCall(MatSeqSBAIJSetPreallocation(local_mat, bs, 0, new_local_nnz));
8380:     } else {
8381:       PetscCall(MatSetUp(local_mat));
8382:     }
8383:     PetscCall(PetscFree(new_local_nnz));
8384:   } else {
8385:     PetscCall(MatSetUp(local_mat));
8386:   }

8388:   /* set values */
8389:   ptr_vals = recv_buffer_vals;
8390:   ptr_idxs = recv_buffer_idxs_local;
8391:   for (i = 0; i < n_recvs; i++) {
8392:     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
8393:       PetscCall(MatSetOption(local_mat, MAT_ROW_ORIENTED, PETSC_FALSE));
8394:       PetscCall(MatSetValues(local_mat, *(ptr_idxs + 1), ptr_idxs + 2, *(ptr_idxs + 1), ptr_idxs + 2, ptr_vals, ADD_VALUES));
8395:       PetscCall(MatAssemblyBegin(local_mat, MAT_FLUSH_ASSEMBLY));
8396:       PetscCall(MatAssemblyEnd(local_mat, MAT_FLUSH_ASSEMBLY));
8397:       PetscCall(MatSetOption(local_mat, MAT_ROW_ORIENTED, PETSC_TRUE));
8398:     } else {
8399:       /* TODO */
8400:     }
8401:     ptr_idxs += olengths_idxs[i];
8402:     ptr_vals += olengths_vals[i];
8403:   }
8404:   PetscCall(MatAssemblyBegin(local_mat, MAT_FINAL_ASSEMBLY));
8405:   PetscCall(MatAssemblyEnd(local_mat, MAT_FINAL_ASSEMBLY));
8406:   PetscCall(MatISRestoreLocalMat(*mat_n, &local_mat));
8407:   PetscCall(MatAssemblyBegin(*mat_n, MAT_FINAL_ASSEMBLY));
8408:   PetscCall(MatAssemblyEnd(*mat_n, MAT_FINAL_ASSEMBLY));
8409:   PetscCall(PetscFree(recv_buffer_vals));

8411: #if 0
8412:   if (!restrict_comm) { /* check */
8413:     Vec       lvec,rvec;
8414:     PetscReal infty_error;

8416:     PetscCall(MatCreateVecs(mat,&rvec,&lvec));
8417:     PetscCall(VecSetRandom(rvec,NULL));
8418:     PetscCall(MatMult(mat,rvec,lvec));
8419:     PetscCall(VecScale(lvec,-1.0));
8420:     PetscCall(MatMultAdd(*mat_n,rvec,lvec,lvec));
8421:     PetscCall(VecNorm(lvec,NORM_INFINITY,&infty_error));
8422:     PetscCall(PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error));
8423:     PetscCall(VecDestroy(&rvec));
8424:     PetscCall(VecDestroy(&lvec));
8425:   }
8426: #endif

8428:   /* assemble new additional is (if any) */
8429:   if (nis) {
8430:     PetscInt **temp_idxs, *count_is, j, psum;

8432:     PetscCallMPI(MPI_Waitall(n_recvs, recv_req_idxs_is, MPI_STATUSES_IGNORE));
8433:     PetscCall(PetscCalloc1(nis, &count_is));
8434:     ptr_idxs = recv_buffer_idxs_is;
8435:     psum     = 0;
8436:     for (i = 0; i < n_recvs; i++) {
8437:       for (j = 0; j < nis; j++) {
8438:         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
8439:         count_is[j] += plen;         /* increment counting of buffer for j-th IS */
8440:         psum += plen;
8441:         ptr_idxs += plen + 1; /* shift pointer to received data */
8442:       }
8443:     }
8444:     PetscCall(PetscMalloc1(nis, &temp_idxs));
8445:     PetscCall(PetscMalloc1(psum, &temp_idxs[0]));
8446:     for (i = 1; i < nis; i++) temp_idxs[i] = PetscSafePointerPlusOffset(temp_idxs[i - 1], count_is[i - 1]);
8447:     PetscCall(PetscArrayzero(count_is, nis));
8448:     ptr_idxs = recv_buffer_idxs_is;
8449:     for (i = 0; i < n_recvs; i++) {
8450:       for (j = 0; j < nis; j++) {
8451:         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
8452:         PetscCall(PetscArraycpy(&temp_idxs[j][count_is[j]], ptr_idxs + 1, plen));
8453:         count_is[j] += plen;  /* increment starting point of buffer for j-th IS */
8454:         ptr_idxs += plen + 1; /* shift pointer to received data */
8455:       }
8456:     }
8457:     for (i = 0; i < nis; i++) {
8458:       PetscCall(ISDestroy(&isarray[i]));
8459:       PetscCall(PetscSortRemoveDupsInt(&count_is[i], temp_idxs[i]));
8460:       PetscCall(ISCreateGeneral(comm_n, count_is[i], temp_idxs[i], PETSC_COPY_VALUES, &isarray[i]));
8461:     }
8462:     PetscCall(PetscFree(count_is));
8463:     PetscCall(PetscFree(temp_idxs[0]));
8464:     PetscCall(PetscFree(temp_idxs));
8465:   }
8466:   /* free workspace */
8467:   PetscCall(PetscFree(recv_buffer_idxs_is));
8468:   PetscCallMPI(MPI_Waitall(n_sends, send_req_idxs, MPI_STATUSES_IGNORE));
8469:   PetscCall(PetscFree(send_buffer_idxs));
8470:   PetscCallMPI(MPI_Waitall(n_sends, send_req_vals, MPI_STATUSES_IGNORE));
8471:   if (isdense) {
8472:     PetscCall(MatISGetLocalMat(mat, &local_mat));
8473:     PetscCall(MatDenseRestoreArrayRead(local_mat, &send_buffer_vals));
8474:     PetscCall(MatISRestoreLocalMat(mat, &local_mat));
8475:   } else {
8476:     /* PetscCall(PetscFree(send_buffer_vals)); */
8477:   }
8478:   if (nis) {
8479:     PetscCallMPI(MPI_Waitall(n_sends, send_req_idxs_is, MPI_STATUSES_IGNORE));
8480:     PetscCall(PetscFree(send_buffer_idxs_is));
8481:   }

8483:   if (nvecs) {
8484:     PetscCallMPI(MPI_Waitall(n_recvs, recv_req_vecs, MPI_STATUSES_IGNORE));
8485:     PetscCallMPI(MPI_Waitall(n_sends, send_req_vecs, MPI_STATUSES_IGNORE));
8486:     PetscCall(VecRestoreArray(nnsp_vec[0], &send_buffer_vecs));
8487:     PetscCall(VecDestroy(&nnsp_vec[0]));
8488:     PetscCall(VecCreate(comm_n, &nnsp_vec[0]));
8489:     PetscCall(VecSetSizes(nnsp_vec[0], new_local_rows, PETSC_DECIDE));
8490:     PetscCall(VecSetType(nnsp_vec[0], VECSTANDARD));
8491:     /* set values */
8492:     ptr_vals = recv_buffer_vecs;
8493:     ptr_idxs = recv_buffer_idxs_local;
8494:     PetscCall(VecGetArray(nnsp_vec[0], &send_buffer_vecs));
8495:     for (i = 0; i < n_recvs; i++) {
8496:       PetscInt j;
8497:       for (j = 0; j < *(ptr_idxs + 1); j++) send_buffer_vecs[*(ptr_idxs + 2 + j)] += *(ptr_vals + j);
8498:       ptr_idxs += olengths_idxs[i];
8499:       ptr_vals += olengths_idxs[i] - 2;
8500:     }
8501:     PetscCall(VecRestoreArray(nnsp_vec[0], &send_buffer_vecs));
8502:     PetscCall(VecAssemblyBegin(nnsp_vec[0]));
8503:     PetscCall(VecAssemblyEnd(nnsp_vec[0]));
8504:   }

8506:   PetscCall(PetscFree(recv_buffer_vecs));
8507:   PetscCall(PetscFree(recv_buffer_idxs_local));
8508:   PetscCall(PetscFree(recv_req_idxs));
8509:   PetscCall(PetscFree(recv_req_vals));
8510:   PetscCall(PetscFree(recv_req_vecs));
8511:   PetscCall(PetscFree(recv_req_idxs_is));
8512:   PetscCall(PetscFree(send_req_idxs));
8513:   PetscCall(PetscFree(send_req_vals));
8514:   PetscCall(PetscFree(send_req_vecs));
8515:   PetscCall(PetscFree(send_req_idxs_is));
8516:   PetscCall(PetscFree(ilengths_vals));
8517:   PetscCall(PetscFree(ilengths_idxs));
8518:   PetscCall(PetscFree(olengths_vals));
8519:   PetscCall(PetscFree(olengths_idxs));
8520:   PetscCall(PetscFree(onodes));
8521:   if (nis) {
8522:     PetscCall(PetscFree(ilengths_idxs_is));
8523:     PetscCall(PetscFree(olengths_idxs_is));
8524:     PetscCall(PetscFree(onodes_is));
8525:   }
8526:   PetscCall(PetscSubcommDestroy(&subcomm));
8527:   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not participate */
8528:     PetscCall(MatDestroy(mat_n));
8529:     for (i = 0; i < nis; i++) PetscCall(ISDestroy(&isarray[i]));
8530:     if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */
8531:       PetscCall(VecDestroy(&nnsp_vec[0]));
8532:     }
8533:     *mat_n = NULL;
8534:   }
8535:   PetscFunctionReturn(PETSC_SUCCESS);
8536: }

8538: /* temporary hack into ksp private data structure */
8539: #include <petsc/private/kspimpl.h>

8541: PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc, Mat coarse_submat)
8542: {
8543:   PC_BDDC               *pcbddc = (PC_BDDC *)pc->data;
8544:   PC_IS                 *pcis   = (PC_IS *)pc->data;
8545:   PCBDDCGraph            graph  = pcbddc->mat_graph;
8546:   Mat                    coarse_mat, coarse_mat_is;
8547:   Mat                    coarsedivudotp = NULL;
8548:   Mat                    coarseG, t_coarse_mat_is;
8549:   MatNullSpace           CoarseNullSpace = NULL;
8550:   ISLocalToGlobalMapping coarse_islg;
8551:   IS                     coarse_is, *isarray, corners;
8552:   PetscInt               i, im_active = -1, active_procs = -1;
8553:   PetscInt               nis, nisdofs, nisneu, nisvert;
8554:   PetscInt               coarse_eqs_per_proc, coarsening_ratio;
8555:   PC                     pc_temp;
8556:   PCType                 coarse_pc_type;
8557:   KSPType                coarse_ksp_type;
8558:   PetscBool              multilevel_requested, multilevel_allowed;
8559:   PetscBool              coarse_reuse, multi_element = graph->multi_element;
8560:   PetscInt               ncoarse, nedcfield;
8561:   PetscBool              compute_vecs = PETSC_FALSE;
8562:   PetscScalar           *array;
8563:   MatReuse               coarse_mat_reuse;
8564:   PetscBool              restr, full_restr, have_void;
8565:   PetscMPIInt            size;

8567:   PetscFunctionBegin;
8568:   PetscCall(PetscLogEventBegin(PC_BDDC_CoarseSetUp[pcbddc->current_level], pc, 0, 0, 0));
8569:   /* Assign global numbering to coarse dofs */
8570:   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
8571:     PetscInt ocoarse_size;
8572:     compute_vecs = PETSC_TRUE;

8574:     pcbddc->new_primal_space = PETSC_TRUE;
8575:     ocoarse_size             = pcbddc->coarse_size;
8576:     PetscCall(PetscFree(pcbddc->global_primal_indices));
8577:     PetscCall(PCBDDCComputePrimalNumbering(pc, &pcbddc->coarse_size, &pcbddc->global_primal_indices));
8578:     /* see if we can avoid some work */
8579:     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
8580:       /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */
8581:       if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) {
8582:         PetscCall(KSPReset(pcbddc->coarse_ksp));
8583:         coarse_reuse = PETSC_FALSE;
8584:       } else { /* we can safely reuse already computed coarse matrix */
8585:         coarse_reuse = PETSC_TRUE;
8586:       }
8587:     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
8588:       coarse_reuse = PETSC_FALSE;
8589:     }
8590:     /* reset any subassembling information */
8591:     if (!coarse_reuse || pcbddc->recompute_topography) PetscCall(ISDestroy(&pcbddc->coarse_subassembling));
8592:   } else { /* primal space is unchanged, so we can reuse coarse matrix */
8593:     coarse_reuse = PETSC_TRUE;
8594:   }
8595:   if (coarse_reuse && pcbddc->coarse_ksp) {
8596:     PetscCall(KSPGetOperators(pcbddc->coarse_ksp, &coarse_mat, NULL));
8597:     PetscCall(PetscObjectReference((PetscObject)coarse_mat));
8598:     coarse_mat_reuse = MAT_REUSE_MATRIX;
8599:   } else {
8600:     coarse_mat       = NULL;
8601:     coarse_mat_reuse = MAT_INITIAL_MATRIX;
8602:   }

8604:   /* creates temporary l2gmap and IS for coarse indexes */
8605:   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), pcbddc->local_primal_size, pcbddc->global_primal_indices, PETSC_COPY_VALUES, &coarse_is));
8606:   PetscCall(ISLocalToGlobalMappingCreateIS(coarse_is, &coarse_islg));

8608:   /* creates temporary MATIS object for coarse matrix */
8609:   PetscCall(MatCreate(PetscObjectComm((PetscObject)pc), &t_coarse_mat_is));
8610:   PetscCall(MatSetType(t_coarse_mat_is, MATIS));
8611:   PetscCall(MatSetSizes(t_coarse_mat_is, PETSC_DECIDE, PETSC_DECIDE, pcbddc->coarse_size, pcbddc->coarse_size));
8612:   PetscCall(MatISSetAllowRepeated(t_coarse_mat_is, multi_element));
8613:   PetscCall(MatSetLocalToGlobalMapping(t_coarse_mat_is, coarse_islg, coarse_islg));
8614:   PetscCall(MatISSetLocalMat(t_coarse_mat_is, coarse_submat));
8615:   PetscCall(MatAssemblyBegin(t_coarse_mat_is, MAT_FINAL_ASSEMBLY));
8616:   PetscCall(MatAssemblyEnd(t_coarse_mat_is, MAT_FINAL_ASSEMBLY));
8617:   PetscCall(MatViewFromOptions(t_coarse_mat_is, (PetscObject)pc, "-pc_bddc_coarse_mat_is_view"));

8619:   /* count "active" (i.e. with positive local size) and "void" processes */
8620:   im_active = !!pcis->n;
8621:   PetscCallMPI(MPIU_Allreduce(&im_active, &active_procs, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc)));

8623:   /* determine number of processes partecipating to coarse solver and compute subassembling pattern */
8624:   /* restr : whether we want to exclude senders (which are not receivers) from the subassembling pattern */
8625:   /* full_restr : just use the receivers from the subassembling pattern */
8626:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size));
8627:   coarse_mat_is        = NULL;
8628:   multilevel_allowed   = PETSC_FALSE;
8629:   multilevel_requested = PETSC_FALSE;
8630:   coarse_eqs_per_proc  = PetscMin(PetscMax(pcbddc->coarse_size, 1), pcbddc->coarse_eqs_per_proc);
8631:   if (coarse_eqs_per_proc < 0 || size == 1) coarse_eqs_per_proc = PetscMax(pcbddc->coarse_size, 1);
8632:   if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE;
8633:   if (pcbddc->coarse_size <= pcbddc->coarse_eqs_limit) multilevel_requested = PETSC_FALSE;
8634:   coarsening_ratio = multi_element ? 1 : pcbddc->coarsening_ratio;
8635:   if (multilevel_requested) {
8636:     ncoarse    = active_procs / coarsening_ratio;
8637:     restr      = PETSC_FALSE;
8638:     full_restr = PETSC_FALSE;
8639:   } else {
8640:     ncoarse    = pcbddc->coarse_size / coarse_eqs_per_proc + !!(pcbddc->coarse_size % coarse_eqs_per_proc);
8641:     restr      = PETSC_TRUE;
8642:     full_restr = PETSC_TRUE;
8643:   }
8644:   if (!pcbddc->coarse_size || (size == 1 && !multi_element)) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE;
8645:   ncoarse = PetscMax(1, ncoarse);
8646:   if (!pcbddc->coarse_subassembling) {
8647:     if (coarsening_ratio > 1) {
8648:       if (multilevel_requested) {
8649:         PetscCall(PCBDDCMatISGetSubassemblingPattern(pc->pmat, &ncoarse, pcbddc->coarse_adj_red, &pcbddc->coarse_subassembling, &have_void));
8650:       } else {
8651:         PetscCall(PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is, &ncoarse, pcbddc->coarse_adj_red, &pcbddc->coarse_subassembling, &have_void));
8652:       }
8653:     } else {
8654:       PetscMPIInt rank;

8656:       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank));
8657:       have_void = (active_procs == size) ? PETSC_FALSE : PETSC_TRUE;
8658:       PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), 1, rank, 1, &pcbddc->coarse_subassembling));
8659:       PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_subassembling, "default subassembling"));
8660:     }
8661:   } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */
8662:     PetscInt psum;
8663:     if (pcbddc->coarse_ksp) psum = 1;
8664:     else psum = 0;
8665:     PetscCallMPI(MPIU_Allreduce(&psum, &ncoarse, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc)));
8666:     have_void = ncoarse < size ? PETSC_TRUE : PETSC_FALSE;
8667:   }
8668:   /* determine if we can go multilevel */
8669:   if (multilevel_requested) {
8670:     if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */
8671:     else restr = full_restr = PETSC_TRUE;             /* 1 subdomain, use a direct solver */
8672:   }
8673:   if (multilevel_allowed && have_void) restr = PETSC_TRUE;

8675:   /* dump subassembling pattern */
8676:   if (pcbddc->dbg_flag && multilevel_allowed) PetscCall(ISView(pcbddc->coarse_subassembling, pcbddc->dbg_viewer));
8677:   /* compute dofs splitting and neumann boundaries for coarse dofs */
8678:   nedcfield = -1;
8679:   corners   = NULL;
8680:   if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal || pcbddc->corner_selected)) { /* protects from unneeded computations */
8681:     PetscInt              *tidxs, *tidxs2, nout, tsize, i;
8682:     const PetscInt        *idxs;
8683:     ISLocalToGlobalMapping tmap;

8685:     /* create map between primal indices (in local representative ordering) and local primal numbering */
8686:     PetscCall(ISLocalToGlobalMappingCreate(PETSC_COMM_SELF, 1, pcbddc->local_primal_size, pcbddc->primal_indices_local_idxs, PETSC_COPY_VALUES, &tmap));
8687:     /* allocate space for temporary storage */
8688:     PetscCall(PetscMalloc1(pcbddc->local_primal_size, &tidxs));
8689:     PetscCall(PetscMalloc1(pcbddc->local_primal_size, &tidxs2));
8690:     /* allocate for IS array */
8691:     nisdofs = pcbddc->n_ISForDofsLocal;
8692:     if (pcbddc->nedclocal) {
8693:       if (pcbddc->nedfield > -1) {
8694:         nedcfield = pcbddc->nedfield;
8695:       } else {
8696:         nedcfield = 0;
8697:         PetscCheck(!nisdofs, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "This should not happen (%" PetscInt_FMT ")", nisdofs);
8698:         nisdofs = 1;
8699:       }
8700:     }
8701:     nisneu  = !!pcbddc->NeumannBoundariesLocal;
8702:     nisvert = 0; /* nisvert is not used */
8703:     nis     = nisdofs + nisneu + nisvert;
8704:     PetscCall(PetscMalloc1(nis, &isarray));
8705:     /* dofs splitting */
8706:     for (i = 0; i < nisdofs; i++) {
8707:       /* PetscCall(ISView(pcbddc->ISForDofsLocal[i],0)); */
8708:       if (nedcfield != i) {
8709:         PetscCall(ISGetLocalSize(pcbddc->ISForDofsLocal[i], &tsize));
8710:         PetscCall(ISGetIndices(pcbddc->ISForDofsLocal[i], &idxs));
8711:         PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs));
8712:         PetscCall(ISRestoreIndices(pcbddc->ISForDofsLocal[i], &idxs));
8713:       } else {
8714:         PetscCall(ISGetLocalSize(pcbddc->nedclocal, &tsize));
8715:         PetscCall(ISGetIndices(pcbddc->nedclocal, &idxs));
8716:         PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs));
8717:         PetscCheck(tsize == nout, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed when mapping coarse nedelec field! %" PetscInt_FMT " != %" PetscInt_FMT, tsize, nout);
8718:         PetscCall(ISRestoreIndices(pcbddc->nedclocal, &idxs));
8719:       }
8720:       PetscCall(ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2));
8721:       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &isarray[i]));
8722:       /* PetscCall(ISView(isarray[i],0)); */
8723:     }
8724:     /* neumann boundaries */
8725:     if (pcbddc->NeumannBoundariesLocal) {
8726:       /* PetscCall(ISView(pcbddc->NeumannBoundariesLocal,0)); */
8727:       PetscCall(ISGetLocalSize(pcbddc->NeumannBoundariesLocal, &tsize));
8728:       PetscCall(ISGetIndices(pcbddc->NeumannBoundariesLocal, &idxs));
8729:       PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs));
8730:       PetscCall(ISRestoreIndices(pcbddc->NeumannBoundariesLocal, &idxs));
8731:       PetscCall(ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2));
8732:       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &isarray[nisdofs]));
8733:       /* PetscCall(ISView(isarray[nisdofs],0)); */
8734:     }
8735:     /* coordinates */
8736:     if (pcbddc->corner_selected) {
8737:       PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &corners));
8738:       PetscCall(ISGetLocalSize(corners, &tsize));
8739:       PetscCall(ISGetIndices(corners, &idxs));
8740:       PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs));
8741:       PetscCheck(tsize == nout, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed when mapping corners! %" PetscInt_FMT " != %" PetscInt_FMT, tsize, nout);
8742:       PetscCall(ISRestoreIndices(corners, &idxs));
8743:       PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &corners));
8744:       PetscCall(ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2));
8745:       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &corners));
8746:     }
8747:     PetscCall(PetscFree(tidxs));
8748:     PetscCall(PetscFree(tidxs2));
8749:     PetscCall(ISLocalToGlobalMappingDestroy(&tmap));
8750:   } else {
8751:     nis     = 0;
8752:     nisdofs = 0;
8753:     nisneu  = 0;
8754:     nisvert = 0;
8755:     isarray = NULL;
8756:   }
8757:   /* destroy no longer needed map */
8758:   PetscCall(ISLocalToGlobalMappingDestroy(&coarse_islg));

8760:   /* subassemble */
8761:   if (multilevel_allowed) {
8762:     Vec       vp[1];
8763:     PetscInt  nvecs = 0;
8764:     PetscBool reuse;

8766:     vp[0] = NULL;
8767:     /* XXX HDIV also */
8768:     if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */
8769:       PetscCall(VecCreate(PetscObjectComm((PetscObject)pc), &vp[0]));
8770:       PetscCall(VecSetSizes(vp[0], pcbddc->local_primal_size, PETSC_DECIDE));
8771:       PetscCall(VecSetType(vp[0], VECSTANDARD));
8772:       nvecs = 1;

8774:       if (pcbddc->divudotp) {
8775:         Mat      B, loc_divudotp;
8776:         Vec      v, p;
8777:         IS       dummy;
8778:         PetscInt np;

8780:         PetscCall(MatISGetLocalMat(pcbddc->divudotp, &loc_divudotp));
8781:         PetscCall(MatGetSize(loc_divudotp, &np, NULL));
8782:         PetscCall(ISCreateStride(PETSC_COMM_SELF, np, 0, 1, &dummy));
8783:         PetscCall(MatCreateSubMatrix(loc_divudotp, dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B));
8784:         PetscCall(MatCreateVecs(B, &v, &p));
8785:         PetscCall(VecSet(p, 1.));
8786:         PetscCall(MatMultTranspose(B, p, v));
8787:         PetscCall(VecDestroy(&p));
8788:         PetscCall(MatDestroy(&B));
8789:         PetscCall(VecGetArray(vp[0], &array));
8790:         PetscCall(VecPlaceArray(pcbddc->vec1_P, array));
8791:         PetscCall(MatMultTranspose(pcbddc->coarse_phi_B, v, pcbddc->vec1_P));
8792:         PetscCall(VecResetArray(pcbddc->vec1_P));
8793:         PetscCall(VecRestoreArray(vp[0], &array));
8794:         PetscCall(ISDestroy(&dummy));
8795:         PetscCall(VecDestroy(&v));
8796:       }
8797:     }
8798:     if (coarse_mat) reuse = PETSC_TRUE;
8799:     else reuse = PETSC_FALSE;
8800:     if (multi_element) {
8801:       PetscCall(PetscObjectReference((PetscObject)t_coarse_mat_is));
8802:       coarse_mat_is = t_coarse_mat_is;
8803:     } else {
8804:       PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &reuse, 1, MPI_C_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc)));
8805:       if (reuse) {
8806:         PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_TRUE, &coarse_mat, nis, isarray, nvecs, vp));
8807:       } else {
8808:         PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_FALSE, &coarse_mat_is, nis, isarray, nvecs, vp));
8809:       }
8810:       if (vp[0]) { /* vp[0] could have been placed on a different set of processes */
8811:         PetscScalar       *arraym;
8812:         const PetscScalar *arrayv;
8813:         PetscInt           nl;
8814:         PetscCall(VecGetLocalSize(vp[0], &nl));
8815:         PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, 1, nl, NULL, &coarsedivudotp));
8816:         PetscCall(MatDenseGetArray(coarsedivudotp, &arraym));
8817:         PetscCall(VecGetArrayRead(vp[0], &arrayv));
8818:         PetscCall(PetscArraycpy(arraym, arrayv, nl));
8819:         PetscCall(VecRestoreArrayRead(vp[0], &arrayv));
8820:         PetscCall(MatDenseRestoreArray(coarsedivudotp, &arraym));
8821:         PetscCall(VecDestroy(&vp[0]));
8822:       } else {
8823:         PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, 0, 0, 1, NULL, &coarsedivudotp));
8824:       }
8825:     }
8826:   } else {
8827:     PetscBool default_sub;

8829:     PetscCall(PetscStrcmp(((PetscObject)pcbddc->coarse_subassembling)->name, "default subassembling", &default_sub));
8830:     if (!default_sub) PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_FALSE, &coarse_mat_is, 0, NULL, 0, NULL));
8831:     else {
8832:       PetscCall(PetscObjectReference((PetscObject)t_coarse_mat_is));
8833:       coarse_mat_is = t_coarse_mat_is;
8834:     }
8835:   }
8836:   if (coarse_mat_is || coarse_mat) {
8837:     if (!multilevel_allowed) {
8838:       PetscCall(MatConvert(coarse_mat_is, MATAIJ, coarse_mat_reuse, &coarse_mat));
8839:     } else {
8840:       /* if this matrix is present, it means we are not reusing the coarse matrix */
8841:       if (coarse_mat_is) {
8842:         PetscCheck(!coarse_mat, PetscObjectComm((PetscObject)coarse_mat_is), PETSC_ERR_PLIB, "This should not happen");
8843:         PetscCall(PetscObjectReference((PetscObject)coarse_mat_is));
8844:         coarse_mat = coarse_mat_is;
8845:       }
8846:     }
8847:   }
8848:   PetscCall(MatDestroy(&t_coarse_mat_is));
8849:   PetscCall(MatDestroy(&coarse_mat_is));

8851:   /* create local to global scatters for coarse problem */
8852:   if (compute_vecs) {
8853:     PetscInt lrows;
8854:     PetscCall(VecDestroy(&pcbddc->coarse_vec));
8855:     if (coarse_mat) {
8856:       PetscCall(MatGetLocalSize(coarse_mat, &lrows, NULL));
8857:     } else {
8858:       lrows = 0;
8859:     }
8860:     PetscCall(VecCreate(PetscObjectComm((PetscObject)pc), &pcbddc->coarse_vec));
8861:     PetscCall(VecSetSizes(pcbddc->coarse_vec, lrows, PETSC_DECIDE));
8862:     PetscCall(VecSetType(pcbddc->coarse_vec, coarse_mat ? coarse_mat->defaultvectype : VECSTANDARD));
8863:     PetscCall(VecScatterDestroy(&pcbddc->coarse_loc_to_glob));
8864:     PetscCall(VecScatterCreate(pcbddc->vec1_P, NULL, pcbddc->coarse_vec, coarse_is, &pcbddc->coarse_loc_to_glob));
8865:   }
8866:   PetscCall(ISDestroy(&coarse_is));

8868:   /* set defaults for coarse KSP and PC */
8869:   if (multilevel_allowed) {
8870:     coarse_ksp_type = KSPRICHARDSON;
8871:     coarse_pc_type  = PCBDDC;
8872:   } else {
8873:     coarse_ksp_type = KSPPREONLY;
8874:     coarse_pc_type  = PCREDUNDANT;
8875:   }

8877:   /* print some info if requested */
8878:   if (pcbddc->dbg_flag) {
8879:     if (!multilevel_allowed) {
8880:       PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n"));
8881:       if (multilevel_requested) {
8882:         PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Not enough active processes on level %" PetscInt_FMT " (active processes %" PetscInt_FMT ", coarsening ratio %" PetscInt_FMT ")\n", pcbddc->current_level, active_procs, coarsening_ratio));
8883:       } else if (pcbddc->max_levels) {
8884:         PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Maximum number of requested levels reached (%" PetscInt_FMT ")\n", pcbddc->max_levels));
8885:       }
8886:       PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
8887:     }
8888:   }

8890:   /* communicate coarse discrete gradient */
8891:   coarseG = NULL;
8892:   if (pcbddc->nedcG && multilevel_allowed) {
8893:     MPI_Comm ccomm;
8894:     if (coarse_mat) {
8895:       ccomm = PetscObjectComm((PetscObject)coarse_mat);
8896:     } else {
8897:       ccomm = MPI_COMM_NULL;
8898:     }
8899:     PetscCall(MatMPIAIJRestrict(pcbddc->nedcG, ccomm, &coarseG));
8900:   }

8902:   /* create the coarse KSP object only once with defaults */
8903:   if (coarse_mat) {
8904:     PetscBool   isredundant, isbddc, force, valid;
8905:     PetscViewer dbg_viewer = NULL;
8906:     PetscBool   isset, issym, isher, isspd;

8908:     if (pcbddc->dbg_flag) {
8909:       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat));
8910:       PetscCall(PetscViewerASCIIAddTab(dbg_viewer, 2 * pcbddc->current_level));
8911:     }
8912:     if (!pcbddc->coarse_ksp) {
8913:       char   prefix[256], str_level[16];
8914:       size_t len;

8916:       PetscCall(KSPCreate(PetscObjectComm((PetscObject)coarse_mat), &pcbddc->coarse_ksp));
8917:       PetscCall(KSPSetNestLevel(pcbddc->coarse_ksp, pc->kspnestlevel));
8918:       PetscCall(KSPSetErrorIfNotConverged(pcbddc->coarse_ksp, pc->erroriffailure));
8919:       PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp, (PetscObject)pc, 1));
8920:       PetscCall(KSPSetTolerances(pcbddc->coarse_ksp, PETSC_CURRENT, PETSC_CURRENT, PETSC_CURRENT, 1));
8921:       PetscCall(KSPSetOperators(pcbddc->coarse_ksp, coarse_mat, coarse_mat));
8922:       PetscCall(KSPSetType(pcbddc->coarse_ksp, coarse_ksp_type));
8923:       PetscCall(KSPSetNormType(pcbddc->coarse_ksp, KSP_NORM_NONE));
8924:       PetscCall(KSPGetPC(pcbddc->coarse_ksp, &pc_temp));
8925:       /* TODO is this logic correct? should check for coarse_mat type */
8926:       PetscCall(PCSetType(pc_temp, coarse_pc_type));
8927:       /* prefix */
8928:       PetscCall(PetscStrncpy(prefix, "", sizeof(prefix)));
8929:       PetscCall(PetscStrncpy(str_level, "", sizeof(str_level)));
8930:       if (!pcbddc->current_level) {
8931:         PetscCall(PetscStrncpy(prefix, ((PetscObject)pc)->prefix, sizeof(prefix)));
8932:         PetscCall(PetscStrlcat(prefix, "pc_bddc_coarse_", sizeof(prefix)));
8933:       } else {
8934:         PetscCall(PetscStrlen(((PetscObject)pc)->prefix, &len));
8935:         if (pcbddc->current_level > 1) len -= 3;  /* remove "lX_" with X level number */
8936:         if (pcbddc->current_level > 10) len -= 1; /* remove another char from level number */
8937:         /* Nonstandard use of PetscStrncpy() to copy only a portion of the string */
8938:         PetscCall(PetscStrncpy(prefix, ((PetscObject)pc)->prefix, len + 1));
8939:         PetscCall(PetscSNPrintf(str_level, sizeof(str_level), "l%" PetscInt_FMT "_", pcbddc->current_level));
8940:         PetscCall(PetscStrlcat(prefix, str_level, sizeof(prefix)));
8941:       }
8942:       PetscCall(KSPSetOptionsPrefix(pcbddc->coarse_ksp, prefix));
8943:       /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
8944:       PetscCall(PCBDDCSetLevel(pc_temp, pcbddc->current_level + 1));
8945:       PetscCall(PCBDDCSetCoarseningRatio(pc_temp, pcbddc->coarsening_ratio));
8946:       PetscCall(PCBDDCSetLevels(pc_temp, pcbddc->max_levels));
8947:       /* allow user customization */
8948:       PetscCall(KSPSetFromOptions(pcbddc->coarse_ksp));
8949:       /* get some info after set from options */
8950:       PetscCall(KSPGetPC(pcbddc->coarse_ksp, &pc_temp));
8951:       /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */
8952:       force = PETSC_FALSE;
8953:       PetscCall(PetscOptionsGetBool(NULL, ((PetscObject)pc_temp)->prefix, "-pc_type_forced", &force, NULL));
8954:       PetscCall(PetscObjectTypeCompareAny((PetscObject)pc_temp, &valid, PCBDDC, PCNN, PCHPDDM, ""));
8955:       PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc));
8956:       if (multilevel_allowed && !force && !valid) {
8957:         isbddc = PETSC_TRUE;
8958:         PetscCall(PCSetType(pc_temp, PCBDDC));
8959:         PetscCall(PCBDDCSetLevel(pc_temp, pcbddc->current_level + 1));
8960:         PetscCall(PCBDDCSetCoarseningRatio(pc_temp, pcbddc->coarsening_ratio));
8961:         PetscCall(PCBDDCSetLevels(pc_temp, pcbddc->max_levels));
8962:         if (pc_temp->ops->setfromoptions) { /* need to setfromoptions again, skipping the pc_type */
8963:           PetscObjectOptionsBegin((PetscObject)pc_temp);
8964:           PetscCall((*pc_temp->ops->setfromoptions)(pc_temp, PetscOptionsObject));
8965:           PetscCall(PetscObjectProcessOptionsHandlers((PetscObject)pc_temp, PetscOptionsObject));
8966:           PetscOptionsEnd();
8967:           pc_temp->setfromoptionscalled++;
8968:         }
8969:       }
8970:     }
8971:     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
8972:     PetscCall(KSPGetPC(pcbddc->coarse_ksp, &pc_temp));
8973:     if (nisdofs) {
8974:       PetscCall(PCBDDCSetDofsSplitting(pc_temp, nisdofs, isarray));
8975:       for (i = 0; i < nisdofs; i++) PetscCall(ISDestroy(&isarray[i]));
8976:     }
8977:     if (nisneu) {
8978:       PetscCall(PCBDDCSetNeumannBoundaries(pc_temp, isarray[nisdofs]));
8979:       PetscCall(ISDestroy(&isarray[nisdofs]));
8980:     }
8981:     if (nisvert) {
8982:       PetscCall(PCBDDCSetPrimalVerticesIS(pc_temp, isarray[nis - 1]));
8983:       PetscCall(ISDestroy(&isarray[nis - 1]));
8984:     }
8985:     if (coarseG) PetscCall(PCBDDCSetDiscreteGradient(pc_temp, coarseG, 1, nedcfield, PETSC_FALSE, PETSC_TRUE));

8987:     /* get some info after set from options */
8988:     PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc));

8990:     /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */
8991:     if (isbddc && !multilevel_allowed) PetscCall(PCSetType(pc_temp, coarse_pc_type));
8992:     /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */
8993:     force = PETSC_FALSE;
8994:     PetscCall(PetscOptionsGetBool(NULL, ((PetscObject)pc_temp)->prefix, "-pc_type_forced", &force, NULL));
8995:     PetscCall(PetscObjectTypeCompareAny((PetscObject)pc_temp, &valid, PCBDDC, PCNN, PCHPDDM, ""));
8996:     if (multilevel_requested && multilevel_allowed && !valid && !force) PetscCall(PCSetType(pc_temp, PCBDDC));
8997:     PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCREDUNDANT, &isredundant));
8998:     if (isredundant) {
8999:       KSP inner_ksp;
9000:       PC  inner_pc;

9002:       PetscCall(PCRedundantGetKSP(pc_temp, &inner_ksp));
9003:       PetscCall(KSPGetPC(inner_ksp, &inner_pc));
9004:     }

9006:     /* parameters which miss an API */
9007:     PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc));
9008:     if (isbddc) {
9009:       PC_BDDC *pcbddc_coarse = (PC_BDDC *)pc_temp->data;

9011:       pcbddc_coarse->detect_disconnected = PETSC_TRUE;
9012:       pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc;
9013:       pcbddc_coarse->coarse_eqs_limit    = pcbddc->coarse_eqs_limit;
9014:       pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null;
9015:       if (pcbddc_coarse->benign_saddle_point) {
9016:         Mat                    coarsedivudotp_is;
9017:         ISLocalToGlobalMapping l2gmap, rl2g, cl2g;
9018:         IS                     row, col;
9019:         const PetscInt        *gidxs;
9020:         PetscInt               n, st, M, N;

9022:         PetscCall(MatGetSize(coarsedivudotp, &n, NULL));
9023:         PetscCallMPI(MPI_Scan(&n, &st, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)coarse_mat)));
9024:         st = st - n;
9025:         PetscCall(ISCreateStride(PetscObjectComm((PetscObject)coarse_mat), 1, st, 1, &row));
9026:         PetscCall(MatISGetLocalToGlobalMapping(coarse_mat, &l2gmap, NULL));
9027:         PetscCall(ISLocalToGlobalMappingGetSize(l2gmap, &n));
9028:         PetscCall(ISLocalToGlobalMappingGetIndices(l2gmap, &gidxs));
9029:         PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat), n, gidxs, PETSC_COPY_VALUES, &col));
9030:         PetscCall(ISLocalToGlobalMappingRestoreIndices(l2gmap, &gidxs));
9031:         PetscCall(ISLocalToGlobalMappingCreateIS(row, &rl2g));
9032:         PetscCall(ISLocalToGlobalMappingCreateIS(col, &cl2g));
9033:         PetscCall(ISGetSize(row, &M));
9034:         PetscCall(MatGetSize(coarse_mat, &N, NULL));
9035:         PetscCall(ISDestroy(&row));
9036:         PetscCall(ISDestroy(&col));
9037:         PetscCall(MatCreate(PetscObjectComm((PetscObject)coarse_mat), &coarsedivudotp_is));
9038:         PetscCall(MatSetType(coarsedivudotp_is, MATIS));
9039:         PetscCall(MatSetSizes(coarsedivudotp_is, PETSC_DECIDE, PETSC_DECIDE, M, N));
9040:         PetscCall(MatSetLocalToGlobalMapping(coarsedivudotp_is, rl2g, cl2g));
9041:         PetscCall(ISLocalToGlobalMappingDestroy(&rl2g));
9042:         PetscCall(ISLocalToGlobalMappingDestroy(&cl2g));
9043:         PetscCall(MatISSetLocalMat(coarsedivudotp_is, coarsedivudotp));
9044:         PetscCall(MatDestroy(&coarsedivudotp));
9045:         PetscCall(PCBDDCSetDivergenceMat(pc_temp, coarsedivudotp_is, PETSC_FALSE, NULL));
9046:         PetscCall(MatDestroy(&coarsedivudotp_is));
9047:         pcbddc_coarse->adaptive_userdefined = PETSC_TRUE;
9048:         if (pcbddc->adaptive_threshold[0] == 0.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE;
9049:       }
9050:     }

9052:     /* propagate symmetry info of coarse matrix */
9053:     PetscCall(MatSetOption(coarse_mat, MAT_STRUCTURALLY_SYMMETRIC, PETSC_TRUE));
9054:     PetscCall(MatIsSymmetricKnown(pc->pmat, &isset, &issym));
9055:     if (isset) PetscCall(MatSetOption(coarse_mat, MAT_SYMMETRIC, issym));
9056:     PetscCall(MatIsHermitianKnown(pc->pmat, &isset, &isher));
9057:     if (isset) PetscCall(MatSetOption(coarse_mat, MAT_HERMITIAN, isher));
9058:     PetscCall(MatIsSPDKnown(pc->pmat, &isset, &isspd));
9059:     if (isset) PetscCall(MatSetOption(coarse_mat, MAT_SPD, isspd));

9061:     if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) PetscCall(MatSetOption(coarse_mat, MAT_SPD, PETSC_TRUE));
9062:     /* set operators */
9063:     PetscCall(MatViewFromOptions(coarse_mat, (PetscObject)pc, "-pc_bddc_coarse_mat_view"));
9064:     PetscCall(MatSetOptionsPrefix(coarse_mat, ((PetscObject)pcbddc->coarse_ksp)->prefix));
9065:     PetscCall(KSPSetOperators(pcbddc->coarse_ksp, coarse_mat, coarse_mat));
9066:     if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISubtractTab(dbg_viewer, 2 * pcbddc->current_level));
9067:   }
9068:   PetscCall(MatDestroy(&coarseG));
9069:   PetscCall(PetscFree(isarray));
9070: #if 0
9071:   {
9072:     PetscViewer viewer;
9073:     char filename[256];
9074:     PetscCall(PetscSNPrintf(filename, PETSC_STATIC_ARRAY_LENGTH(filename), "coarse_mat_level%d.m",pcbddc->current_level));
9075:     PetscCall(PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer));
9076:     PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB));
9077:     PetscCall(MatView(coarse_mat,viewer));
9078:     PetscCall(PetscViewerPopFormat(viewer));
9079:     PetscCall(PetscViewerDestroy(&viewer));
9080:   }
9081: #endif

9083:   if (corners) {
9084:     Vec             gv;
9085:     IS              is;
9086:     const PetscInt *idxs;
9087:     PetscInt        i, d, N, n, cdim = pcbddc->mat_graph->cdim;
9088:     PetscScalar    *coords;

9090:     PetscCheck(pcbddc->mat_graph->cloc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing local coordinates");
9091:     PetscCall(VecGetSize(pcbddc->coarse_vec, &N));
9092:     PetscCall(VecGetLocalSize(pcbddc->coarse_vec, &n));
9093:     PetscCall(VecCreate(PetscObjectComm((PetscObject)pcbddc->coarse_vec), &gv));
9094:     PetscCall(VecSetBlockSize(gv, cdim));
9095:     PetscCall(VecSetSizes(gv, n * cdim, N * cdim));
9096:     PetscCall(VecSetType(gv, VECSTANDARD));
9097:     PetscCall(VecSetFromOptions(gv));
9098:     PetscCall(VecSet(gv, PETSC_MAX_REAL)); /* we only propagate coordinates from vertices constraints */

9100:     PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &is));
9101:     PetscCall(ISGetLocalSize(is, &n));
9102:     PetscCall(ISGetIndices(is, &idxs));
9103:     PetscCall(PetscMalloc1(n * cdim, &coords));
9104:     for (i = 0; i < n; i++) {
9105:       for (d = 0; d < cdim; d++) coords[cdim * i + d] = pcbddc->mat_graph->coords[cdim * idxs[i] + d];
9106:     }
9107:     PetscCall(ISRestoreIndices(is, &idxs));
9108:     PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &is));

9110:     PetscCall(ISGetLocalSize(corners, &n));
9111:     PetscCall(ISGetIndices(corners, &idxs));
9112:     PetscCall(VecSetValuesBlocked(gv, n, idxs, coords, INSERT_VALUES));
9113:     PetscCall(ISRestoreIndices(corners, &idxs));
9114:     PetscCall(PetscFree(coords));
9115:     PetscCall(VecAssemblyBegin(gv));
9116:     PetscCall(VecAssemblyEnd(gv));
9117:     PetscCall(VecGetArray(gv, &coords));
9118:     if (pcbddc->coarse_ksp) {
9119:       PC        coarse_pc;
9120:       PetscBool isbddc;

9122:       PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc));
9123:       PetscCall(PetscObjectTypeCompare((PetscObject)coarse_pc, PCBDDC, &isbddc));
9124:       if (isbddc) { /* coarse coordinates have PETSC_MAX_REAL, specific for BDDC */
9125:         PetscReal *realcoords;

9127:         PetscCall(VecGetLocalSize(gv, &n));
9128: #if defined(PETSC_USE_COMPLEX)
9129:         PetscCall(PetscMalloc1(n, &realcoords));
9130:         for (i = 0; i < n; i++) realcoords[i] = PetscRealPart(coords[i]);
9131: #else
9132:         realcoords = coords;
9133: #endif
9134:         PetscCall(PCSetCoordinates(coarse_pc, cdim, n / cdim, realcoords));
9135: #if defined(PETSC_USE_COMPLEX)
9136:         PetscCall(PetscFree(realcoords));
9137: #endif
9138:       }
9139:     }
9140:     PetscCall(VecRestoreArray(gv, &coords));
9141:     PetscCall(VecDestroy(&gv));
9142:   }
9143:   PetscCall(ISDestroy(&corners));

9145:   if (pcbddc->coarse_ksp) {
9146:     Vec crhs, csol;

9148:     PetscCall(KSPGetSolution(pcbddc->coarse_ksp, &csol));
9149:     PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &crhs));
9150:     if (!csol) PetscCall(MatCreateVecs(coarse_mat, &pcbddc->coarse_ksp->vec_sol, NULL));
9151:     if (!crhs) PetscCall(MatCreateVecs(coarse_mat, NULL, &pcbddc->coarse_ksp->vec_rhs));
9152:   }
9153:   PetscCall(MatDestroy(&coarsedivudotp));

9155:   /* compute null space for coarse solver if the benign trick has been requested */
9156:   if (pcbddc->benign_null) {
9157:     PetscCall(VecSet(pcbddc->vec1_P, 0.));
9158:     for (i = 0; i < pcbddc->benign_n; i++) PetscCall(VecSetValue(pcbddc->vec1_P, pcbddc->local_primal_size - pcbddc->benign_n + i, 1.0, INSERT_VALUES));
9159:     PetscCall(VecAssemblyBegin(pcbddc->vec1_P));
9160:     PetscCall(VecAssemblyEnd(pcbddc->vec1_P));
9161:     PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, INSERT_VALUES, SCATTER_FORWARD));
9162:     PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, INSERT_VALUES, SCATTER_FORWARD));
9163:     if (coarse_mat) {
9164:       Vec          nullv;
9165:       PetscScalar *array, *array2;
9166:       PetscInt     nl;

9168:       PetscCall(MatCreateVecs(coarse_mat, &nullv, NULL));
9169:       PetscCall(VecGetLocalSize(nullv, &nl));
9170:       PetscCall(VecGetArrayRead(pcbddc->coarse_vec, (const PetscScalar **)&array));
9171:       PetscCall(VecGetArray(nullv, &array2));
9172:       PetscCall(PetscArraycpy(array2, array, nl));
9173:       PetscCall(VecRestoreArray(nullv, &array2));
9174:       PetscCall(VecRestoreArrayRead(pcbddc->coarse_vec, (const PetscScalar **)&array));
9175:       PetscCall(VecNormalize(nullv, NULL));
9176:       PetscCall(MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat), PETSC_FALSE, 1, &nullv, &CoarseNullSpace));
9177:       PetscCall(VecDestroy(&nullv));
9178:     }
9179:   }
9180:   PetscCall(PetscLogEventEnd(PC_BDDC_CoarseSetUp[pcbddc->current_level], pc, 0, 0, 0));

9182:   PetscCall(PetscLogEventBegin(PC_BDDC_CoarseSolver[pcbddc->current_level], pc, 0, 0, 0));
9183:   if (pcbddc->coarse_ksp) {
9184:     PetscBool ispreonly;

9186:     if (CoarseNullSpace) {
9187:       PetscBool isnull;

9189:       PetscCall(MatNullSpaceTest(CoarseNullSpace, coarse_mat, &isnull));
9190:       if (isnull) PetscCall(MatSetNullSpace(coarse_mat, CoarseNullSpace));
9191:       /* TODO: add local nullspaces (if any) */
9192:     }
9193:     /* setup coarse ksp */
9194:     PetscCall(KSPSetUp(pcbddc->coarse_ksp));
9195:     /* Check coarse problem if in debug mode or if solving with an iterative method */
9196:     PetscCall(PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp, KSPPREONLY, &ispreonly));
9197:     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates)) {
9198:       KSP         check_ksp;
9199:       KSPType     check_ksp_type;
9200:       PC          check_pc;
9201:       Vec         check_vec, coarse_vec;
9202:       PetscReal   abs_infty_error, infty_error, lambda_min = 1.0, lambda_max = 1.0;
9203:       PetscInt    its;
9204:       PetscBool   compute_eigs;
9205:       PetscReal  *eigs_r, *eigs_c;
9206:       PetscInt    neigs;
9207:       const char *prefix;

9209:       /* Create ksp object suitable for estimation of extreme eigenvalues */
9210:       PetscCall(KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp), &check_ksp));
9211:       PetscCall(KSPSetNestLevel(check_ksp, pc->kspnestlevel));
9212:       PetscCall(PetscObjectIncrementTabLevel((PetscObject)check_ksp, (PetscObject)pcbddc->coarse_ksp, 0));
9213:       PetscCall(KSPSetErrorIfNotConverged(pcbddc->coarse_ksp, PETSC_FALSE));
9214:       PetscCall(KSPSetOperators(check_ksp, coarse_mat, coarse_mat));
9215:       PetscCall(KSPSetTolerances(check_ksp, 1.e-12, 1.e-12, PETSC_CURRENT, pcbddc->coarse_size));
9216:       /* prevent from setup unneeded object */
9217:       PetscCall(KSPGetPC(check_ksp, &check_pc));
9218:       PetscCall(PCSetType(check_pc, PCNONE));
9219:       if (ispreonly) {
9220:         check_ksp_type = KSPPREONLY;
9221:         compute_eigs   = PETSC_FALSE;
9222:       } else {
9223:         check_ksp_type = KSPGMRES;
9224:         compute_eigs   = PETSC_TRUE;
9225:       }
9226:       PetscCall(KSPSetType(check_ksp, check_ksp_type));
9227:       PetscCall(KSPSetComputeSingularValues(check_ksp, compute_eigs));
9228:       PetscCall(KSPSetComputeEigenvalues(check_ksp, compute_eigs));
9229:       PetscCall(KSPGMRESSetRestart(check_ksp, pcbddc->coarse_size + 1));
9230:       PetscCall(KSPGetOptionsPrefix(pcbddc->coarse_ksp, &prefix));
9231:       PetscCall(KSPSetOptionsPrefix(check_ksp, prefix));
9232:       PetscCall(KSPAppendOptionsPrefix(check_ksp, "check_"));
9233:       PetscCall(KSPSetFromOptions(check_ksp));
9234:       PetscCall(KSPSetUp(check_ksp));
9235:       PetscCall(KSPGetPC(pcbddc->coarse_ksp, &check_pc));
9236:       PetscCall(KSPSetPC(check_ksp, check_pc));
9237:       /* create random vec */
9238:       PetscCall(MatCreateVecs(coarse_mat, &coarse_vec, &check_vec));
9239:       PetscCall(VecSetRandom(check_vec, NULL));
9240:       PetscCall(MatMult(coarse_mat, check_vec, coarse_vec));
9241:       /* solve coarse problem */
9242:       PetscCall(KSPSolve(check_ksp, coarse_vec, coarse_vec));
9243:       PetscCall(KSPCheckSolve(check_ksp, pc, coarse_vec));
9244:       /* set eigenvalue estimation if preonly has not been requested */
9245:       if (compute_eigs) {
9246:         PetscCall(PetscMalloc1(pcbddc->coarse_size + 1, &eigs_r));
9247:         PetscCall(PetscMalloc1(pcbddc->coarse_size + 1, &eigs_c));
9248:         PetscCall(KSPComputeEigenvalues(check_ksp, pcbddc->coarse_size + 1, eigs_r, eigs_c, &neigs));
9249:         if (neigs) {
9250:           lambda_max = eigs_r[neigs - 1];
9251:           lambda_min = eigs_r[0];
9252:           if (pcbddc->use_coarse_estimates) {
9253:             if (lambda_max >= lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */
9254:               PetscCall(KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp, lambda_max + PETSC_SMALL, lambda_min));
9255:               PetscCall(KSPRichardsonSetScale(pcbddc->coarse_ksp, 2.0 / (lambda_max + lambda_min)));
9256:             }
9257:           }
9258:         }
9259:       }

9261:       /* check coarse problem residual error */
9262:       if (pcbddc->dbg_flag) {
9263:         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
9264:         PetscCall(PetscViewerASCIIAddTab(dbg_viewer, 2 * (pcbddc->current_level + 1)));
9265:         PetscCall(VecAXPY(check_vec, -1.0, coarse_vec));
9266:         PetscCall(VecNorm(check_vec, NORM_INFINITY, &infty_error));
9267:         PetscCall(MatMult(coarse_mat, check_vec, coarse_vec));
9268:         PetscCall(VecNorm(coarse_vec, NORM_INFINITY, &abs_infty_error));
9269:         PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem details (use estimates %d)\n", pcbddc->use_coarse_estimates));
9270:         PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)pcbddc->coarse_ksp, dbg_viewer));
9271:         PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)check_pc, dbg_viewer));
9272:         PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem exact infty_error   : %1.6e\n", (double)infty_error));
9273:         PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem residual infty_error: %1.6e\n", (double)abs_infty_error));
9274:         if (CoarseNullSpace) PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem is singular\n"));
9275:         if (compute_eigs) {
9276:           PetscReal          lambda_max_s, lambda_min_s;
9277:           KSPConvergedReason reason;
9278:           PetscCall(KSPGetType(check_ksp, &check_ksp_type));
9279:           PetscCall(KSPGetIterationNumber(check_ksp, &its));
9280:           PetscCall(KSPGetConvergedReason(check_ksp, &reason));
9281:           PetscCall(KSPComputeExtremeSingularValues(check_ksp, &lambda_max_s, &lambda_min_s));
9282:           PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem eigenvalues (estimated with %" PetscInt_FMT " iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n", its, check_ksp_type, reason, (double)lambda_min, (double)lambda_max, (double)lambda_min_s, (double)lambda_max_s));
9283:           for (i = 0; i < neigs; i++) PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "%1.6e %1.6ei\n", (double)eigs_r[i], (double)eigs_c[i]));
9284:         }
9285:         PetscCall(PetscViewerFlush(dbg_viewer));
9286:         PetscCall(PetscViewerASCIISubtractTab(dbg_viewer, 2 * (pcbddc->current_level + 1)));
9287:       }
9288:       PetscCall(VecDestroy(&check_vec));
9289:       PetscCall(VecDestroy(&coarse_vec));
9290:       PetscCall(KSPDestroy(&check_ksp));
9291:       if (compute_eigs) {
9292:         PetscCall(PetscFree(eigs_r));
9293:         PetscCall(PetscFree(eigs_c));
9294:       }
9295:     }
9296:   }
9297:   PetscCall(MatNullSpaceDestroy(&CoarseNullSpace));
9298:   /* print additional info */
9299:   if (pcbddc->dbg_flag) {
9300:     /* waits until all processes reaches this point */
9301:     PetscCall(PetscBarrier((PetscObject)pc));
9302:     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Coarse solver setup completed at level %" PetscInt_FMT "\n", pcbddc->current_level));
9303:     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
9304:   }

9306:   /* free memory */
9307:   PetscCall(MatDestroy(&coarse_mat));
9308:   PetscCall(PetscLogEventEnd(PC_BDDC_CoarseSolver[pcbddc->current_level], pc, 0, 0, 0));
9309:   PetscFunctionReturn(PETSC_SUCCESS);
9310: }

9312: PetscErrorCode PCBDDCComputePrimalNumbering(PC pc, PetscInt *coarse_size_n, PetscInt **local_primal_indices_n)
9313: {
9314:   PC_BDDC        *pcbddc = (PC_BDDC *)pc->data;
9315:   PC_IS          *pcis   = (PC_IS *)pc->data;
9316:   IS              subset, subset_mult, subset_n;
9317:   PetscInt        local_size, coarse_size = 0;
9318:   PetscInt       *local_primal_indices = NULL;
9319:   const PetscInt *t_local_primal_indices;

9321:   PetscFunctionBegin;
9322:   /* Compute global number of coarse dofs */
9323:   PetscCheck(!pcbddc->local_primal_size || pcbddc->local_primal_ref_node, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BDDC ConstraintsSetUp should be called first");
9324:   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddc->local_primal_size_cc, pcbddc->local_primal_ref_node, PETSC_COPY_VALUES, &subset_n));
9325:   PetscCall(ISLocalToGlobalMappingApplyIS(pcis->mapping, subset_n, &subset));
9326:   PetscCall(ISDestroy(&subset_n));
9327:   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddc->local_primal_size_cc, pcbddc->local_primal_ref_mult, PETSC_COPY_VALUES, &subset_mult));
9328:   PetscCall(ISRenumber(subset, subset_mult, &coarse_size, &subset_n));
9329:   PetscCall(ISDestroy(&subset));
9330:   PetscCall(ISDestroy(&subset_mult));
9331:   PetscCall(ISGetLocalSize(subset_n, &local_size));
9332:   PetscCheck(local_size == pcbddc->local_primal_size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of local primal indices computed %" PetscInt_FMT " != %" PetscInt_FMT, local_size, pcbddc->local_primal_size);
9333:   PetscCall(PetscMalloc1(local_size, &local_primal_indices));
9334:   PetscCall(ISGetIndices(subset_n, &t_local_primal_indices));
9335:   PetscCall(PetscArraycpy(local_primal_indices, t_local_primal_indices, local_size));
9336:   PetscCall(ISRestoreIndices(subset_n, &t_local_primal_indices));
9337:   PetscCall(ISDestroy(&subset_n));

9339:   if (pcbddc->dbg_flag) {
9340:     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
9341:     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n"));
9342:     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Size of coarse problem is %" PetscInt_FMT "\n", coarse_size));
9343:     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
9344:   }

9346:   /* get back data */
9347:   *coarse_size_n          = coarse_size;
9348:   *local_primal_indices_n = local_primal_indices;
9349:   PetscFunctionReturn(PETSC_SUCCESS);
9350: }

9352: PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx, Vec gwork, Vec lwork, IS globalis, IS *localis)
9353: {
9354:   IS           localis_t;
9355:   PetscInt     i, lsize, *idxs, n;
9356:   PetscScalar *vals;

9358:   PetscFunctionBegin;
9359:   /* get indices in local ordering exploiting local to global map */
9360:   PetscCall(ISGetLocalSize(globalis, &lsize));
9361:   PetscCall(PetscMalloc1(lsize, &vals));
9362:   for (i = 0; i < lsize; i++) vals[i] = 1.0;
9363:   PetscCall(ISGetIndices(globalis, (const PetscInt **)&idxs));
9364:   PetscCall(VecSet(gwork, 0.0));
9365:   PetscCall(VecSet(lwork, 0.0));
9366:   if (idxs) { /* multilevel guard */
9367:     PetscCall(VecSetOption(gwork, VEC_IGNORE_NEGATIVE_INDICES, PETSC_TRUE));
9368:     PetscCall(VecSetValues(gwork, lsize, idxs, vals, INSERT_VALUES));
9369:   }
9370:   PetscCall(VecAssemblyBegin(gwork));
9371:   PetscCall(ISRestoreIndices(globalis, (const PetscInt **)&idxs));
9372:   PetscCall(PetscFree(vals));
9373:   PetscCall(VecAssemblyEnd(gwork));
9374:   /* now compute set in local ordering */
9375:   PetscCall(VecScatterBegin(g2l_ctx, gwork, lwork, INSERT_VALUES, SCATTER_FORWARD));
9376:   PetscCall(VecScatterEnd(g2l_ctx, gwork, lwork, INSERT_VALUES, SCATTER_FORWARD));
9377:   PetscCall(VecGetArrayRead(lwork, (const PetscScalar **)&vals));
9378:   PetscCall(VecGetSize(lwork, &n));
9379:   for (i = 0, lsize = 0; i < n; i++) {
9380:     if (PetscRealPart(vals[i]) > 0.5) lsize++;
9381:   }
9382:   PetscCall(PetscMalloc1(lsize, &idxs));
9383:   for (i = 0, lsize = 0; i < n; i++) {
9384:     if (PetscRealPart(vals[i]) > 0.5) idxs[lsize++] = i;
9385:   }
9386:   PetscCall(VecRestoreArrayRead(lwork, (const PetscScalar **)&vals));
9387:   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)gwork), lsize, idxs, PETSC_OWN_POINTER, &localis_t));
9388:   *localis = localis_t;
9389:   PetscFunctionReturn(PETSC_SUCCESS);
9390: }

9392: PetscErrorCode PCBDDCComputeFakeChange(PC pc, PetscBool constraints, PCBDDCGraph graph, PCBDDCSubSchurs schurs, Mat *change, IS *change_primal, IS *change_primal_mult, PetscBool *change_with_qr)
9393: {
9394:   PC_IS   *pcis   = (PC_IS *)pc->data;
9395:   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
9396:   PC_IS   *pcisf;
9397:   PC_BDDC *pcbddcf;
9398:   PC       pcf;

9400:   PetscFunctionBegin;
9401:   PetscCall(PCCreate(PetscObjectComm((PetscObject)pc), &pcf));
9402:   PetscCall(PCSetOperators(pcf, pc->mat, pc->pmat));
9403:   PetscCall(PCSetType(pcf, PCBDDC));

9405:   pcisf   = (PC_IS *)pcf->data;
9406:   pcbddcf = (PC_BDDC *)pcf->data;

9408:   pcisf->is_B_local = pcis->is_B_local;
9409:   pcisf->vec1_N     = pcis->vec1_N;
9410:   pcisf->BtoNmap    = pcis->BtoNmap;
9411:   pcisf->n          = pcis->n;
9412:   pcisf->n_B        = pcis->n_B;

9414:   PetscCall(PetscFree(pcbddcf->mat_graph));
9415:   PetscCall(PetscFree(pcbddcf->sub_schurs));
9416:   pcbddcf->mat_graph             = graph ? graph : pcbddc->mat_graph;
9417:   pcbddcf->sub_schurs            = schurs;
9418:   pcbddcf->adaptive_selection    = schurs ? PETSC_TRUE : PETSC_FALSE;
9419:   pcbddcf->adaptive_threshold[0] = pcbddc->adaptive_threshold[0];
9420:   pcbddcf->adaptive_threshold[1] = pcbddc->adaptive_threshold[1];
9421:   pcbddcf->adaptive_nmin         = pcbddc->adaptive_nmin;
9422:   pcbddcf->adaptive_nmax         = pcbddc->adaptive_nmax;
9423:   pcbddcf->use_faces             = PETSC_TRUE;
9424:   pcbddcf->use_change_of_basis   = (PetscBool)!constraints;
9425:   pcbddcf->use_change_on_faces   = (PetscBool)!constraints;
9426:   pcbddcf->use_qr_single         = (PetscBool)!constraints;
9427:   pcbddcf->fake_change           = PETSC_TRUE;
9428:   pcbddcf->dbg_flag              = pcbddc->dbg_flag;

9430:   PetscCall(PCBDDCAdaptiveSelection(pcf));
9431:   PetscCall(PCBDDCConstraintsSetUp(pcf));

9433:   *change = pcbddcf->ConstraintMatrix;
9434:   if (change_primal) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddcf->local_primal_size_cc, pcbddcf->local_primal_ref_node, PETSC_COPY_VALUES, change_primal));
9435:   if (change_primal_mult) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddcf->local_primal_size_cc, pcbddcf->local_primal_ref_mult, PETSC_COPY_VALUES, change_primal_mult));
9436:   if (change_with_qr) *change_with_qr = pcbddcf->use_qr_single;

9438:   if (schurs) pcbddcf->sub_schurs = NULL;
9439:   pcbddcf->ConstraintMatrix = NULL;
9440:   pcbddcf->mat_graph        = NULL;
9441:   pcisf->is_B_local         = NULL;
9442:   pcisf->vec1_N             = NULL;
9443:   pcisf->BtoNmap            = NULL;
9444:   PetscCall(PCDestroy(&pcf));
9445:   PetscFunctionReturn(PETSC_SUCCESS);
9446: }

9448: PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
9449: {
9450:   PC_IS          *pcis       = (PC_IS *)pc->data;
9451:   PC_BDDC        *pcbddc     = (PC_BDDC *)pc->data;
9452:   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
9453:   Mat             S_j;
9454:   PetscInt       *used_xadj, *used_adjncy;
9455:   PetscBool       free_used_adj;

9457:   PetscFunctionBegin;
9458:   PetscCall(PetscLogEventBegin(PC_BDDC_Schurs[pcbddc->current_level], pc, 0, 0, 0));
9459:   /* decide the adjacency to be used for determining internal problems for local schur on subsets */
9460:   free_used_adj = PETSC_FALSE;
9461:   if (pcbddc->sub_schurs_layers == -1) {
9462:     used_xadj   = NULL;
9463:     used_adjncy = NULL;
9464:   } else {
9465:     if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
9466:       used_xadj   = pcbddc->mat_graph->xadj;
9467:       used_adjncy = pcbddc->mat_graph->adjncy;
9468:     } else if (pcbddc->computed_rowadj) {
9469:       used_xadj   = pcbddc->mat_graph->xadj;
9470:       used_adjncy = pcbddc->mat_graph->adjncy;
9471:     } else {
9472:       PetscBool       flg_row = PETSC_FALSE;
9473:       const PetscInt *xadj, *adjncy;
9474:       PetscInt        nvtxs;

9476:       PetscCall(MatGetRowIJ(pcbddc->local_mat, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, &xadj, &adjncy, &flg_row));
9477:       if (flg_row) {
9478:         PetscCall(PetscMalloc2(nvtxs + 1, &used_xadj, xadj[nvtxs], &used_adjncy));
9479:         PetscCall(PetscArraycpy(used_xadj, xadj, nvtxs + 1));
9480:         PetscCall(PetscArraycpy(used_adjncy, adjncy, xadj[nvtxs]));
9481:         free_used_adj = PETSC_TRUE;
9482:       } else {
9483:         pcbddc->sub_schurs_layers = -1;
9484:         used_xadj                 = NULL;
9485:         used_adjncy               = NULL;
9486:       }
9487:       PetscCall(MatRestoreRowIJ(pcbddc->local_mat, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, &xadj, &adjncy, &flg_row));
9488:     }
9489:   }

9491:   /* setup sub_schurs data */
9492:   PetscCall(MatCreateSchurComplement(pcis->A_II, pcis->pA_II, pcis->A_IB, pcis->A_BI, pcis->A_BB, &S_j));
9493:   if (!sub_schurs->schur_explicit) {
9494:     /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */
9495:     PetscCall(MatSchurComplementSetKSP(S_j, pcbddc->ksp_D));
9496:     PetscCall(PCBDDCSubSchursSetUp(sub_schurs, NULL, S_j, PETSC_FALSE, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, NULL, pcbddc->adaptive_selection, PETSC_FALSE, PETSC_FALSE, 0, NULL, NULL, NULL, NULL));
9497:   } else {
9498:     Mat       change        = NULL;
9499:     Vec       scaling       = NULL;
9500:     IS        change_primal = NULL, iP;
9501:     PetscInt  benign_n;
9502:     PetscBool reuse_solvers     = (PetscBool)!pcbddc->use_change_of_basis;
9503:     PetscBool need_change       = PETSC_FALSE;
9504:     PetscBool discrete_harmonic = PETSC_FALSE;

9506:     if (!pcbddc->use_vertices && reuse_solvers) {
9507:       PetscInt n_vertices;

9509:       PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_vertices));
9510:       reuse_solvers = (PetscBool)!n_vertices;
9511:     }
9512:     if (!pcbddc->benign_change_explicit) {
9513:       benign_n = pcbddc->benign_n;
9514:     } else {
9515:       benign_n = 0;
9516:     }
9517:     /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc.
9518:        We need a global reduction to avoid possible deadlocks.
9519:        We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */
9520:     if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) {
9521:       PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change);
9522:       PetscCallMPI(MPIU_Allreduce(&have_loc_change, &need_change, 1, MPI_C_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc)));
9523:       need_change = (PetscBool)(!need_change);
9524:     }
9525:     /* If the user defines additional constraints, we import them here */
9526:     if (need_change) {
9527:       PetscCheck(!pcbddc->sub_schurs_rebuild, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot compute change of basis with a different graph");
9528:       PetscCall(PCBDDCComputeFakeChange(pc, PETSC_FALSE, NULL, NULL, &change, &change_primal, NULL, &sub_schurs->change_with_qr));
9529:     }
9530:     if (!pcbddc->use_deluxe_scaling) scaling = pcis->D;

9532:     PetscCall(PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_iP", (PetscObject *)&iP));
9533:     if (iP) {
9534:       PetscOptionsBegin(PetscObjectComm((PetscObject)iP), sub_schurs->prefix, "BDDC sub_schurs options", "PC");
9535:       PetscCall(PetscOptionsBool("-sub_schurs_discrete_harmonic", NULL, NULL, discrete_harmonic, &discrete_harmonic, NULL));
9536:       PetscOptionsEnd();
9537:     }
9538:     if (discrete_harmonic) {
9539:       Mat A;
9540:       PetscCall(MatDuplicate(pcbddc->local_mat, MAT_COPY_VALUES, &A));
9541:       PetscCall(MatZeroRowsColumnsIS(A, iP, 1.0, NULL, NULL));
9542:       PetscCall(PetscObjectCompose((PetscObject)A, "__KSPFETIDP_iP", (PetscObject)iP));
9543:       PetscCall(PCBDDCSubSchursSetUp(sub_schurs, A, S_j, pcbddc->sub_schurs_exact_schur, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, scaling, pcbddc->adaptive_selection, reuse_solvers, pcbddc->benign_saddle_point, benign_n, pcbddc->benign_p0_lidx,
9544:                                      pcbddc->benign_zerodiag_subs, change, change_primal));
9545:       PetscCall(MatDestroy(&A));
9546:     } else {
9547:       PetscCall(PCBDDCSubSchursSetUp(sub_schurs, pcbddc->local_mat, S_j, pcbddc->sub_schurs_exact_schur, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, scaling, pcbddc->adaptive_selection, reuse_solvers, pcbddc->benign_saddle_point, benign_n,
9548:                                      pcbddc->benign_p0_lidx, pcbddc->benign_zerodiag_subs, change, change_primal));
9549:     }
9550:     PetscCall(MatDestroy(&change));
9551:     PetscCall(ISDestroy(&change_primal));
9552:   }
9553:   PetscCall(MatDestroy(&S_j));

9555:   /* free adjacency */
9556:   if (free_used_adj) PetscCall(PetscFree2(used_xadj, used_adjncy));
9557:   PetscCall(PetscLogEventEnd(PC_BDDC_Schurs[pcbddc->current_level], pc, 0, 0, 0));
9558:   PetscFunctionReturn(PETSC_SUCCESS);
9559: }

9561: PetscErrorCode PCBDDCInitSubSchurs(PC pc)
9562: {
9563:   PC_IS      *pcis   = (PC_IS *)pc->data;
9564:   PC_BDDC    *pcbddc = (PC_BDDC *)pc->data;
9565:   PCBDDCGraph graph;

9567:   PetscFunctionBegin;
9568:   /* attach interface graph for determining subsets */
9569:   if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
9570:     IS       verticesIS, verticescomm;
9571:     PetscInt vsize, *idxs;

9573:     PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &verticesIS));
9574:     PetscCall(ISGetSize(verticesIS, &vsize));
9575:     PetscCall(ISGetIndices(verticesIS, (const PetscInt **)&idxs));
9576:     PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), vsize, idxs, PETSC_COPY_VALUES, &verticescomm));
9577:     PetscCall(ISRestoreIndices(verticesIS, (const PetscInt **)&idxs));
9578:     PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &verticesIS));
9579:     PetscCall(PCBDDCGraphCreate(&graph));
9580:     PetscCall(PCBDDCGraphInit(graph, pcbddc->mat_graph->l2gmap, pcbddc->mat_graph->nvtxs_global, pcbddc->graphmaxcount));
9581:     PetscCall(PCBDDCGraphSetUp(graph, pcbddc->mat_graph->custom_minimal_size, NULL, pcbddc->DirichletBoundariesLocal, 0, NULL, verticescomm));
9582:     PetscCall(ISDestroy(&verticescomm));
9583:     PetscCall(PCBDDCGraphComputeConnectedComponents(graph));
9584:   } else {
9585:     graph = pcbddc->mat_graph;
9586:   }
9587:   /* print some info */
9588:   if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) {
9589:     IS       vertices;
9590:     PetscInt nv, nedges, nfaces;
9591:     PetscCall(PCBDDCGraphASCIIView(graph, pcbddc->dbg_flag, pcbddc->dbg_viewer));
9592:     PetscCall(PCBDDCGraphGetCandidatesIS(graph, &nfaces, NULL, &nedges, NULL, &vertices));
9593:     PetscCall(ISGetSize(vertices, &nv));
9594:     PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
9595:     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n"));
9596:     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate vertices (%d)\n", PetscGlobalRank, nv, pcbddc->use_vertices));
9597:     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate edges    (%d)\n", PetscGlobalRank, nedges, pcbddc->use_edges));
9598:     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate faces    (%d)\n", PetscGlobalRank, nfaces, pcbddc->use_faces));
9599:     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
9600:     PetscCall(PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer));
9601:     PetscCall(PCBDDCGraphRestoreCandidatesIS(graph, &nfaces, NULL, &nedges, NULL, &vertices));
9602:   }

9604:   /* sub_schurs init */
9605:   if (!pcbddc->sub_schurs) PetscCall(PCBDDCSubSchursCreate(&pcbddc->sub_schurs));
9606:   PetscCall(PCBDDCSubSchursInit(pcbddc->sub_schurs, ((PetscObject)pc)->prefix, pcis->is_I_local, pcis->is_B_local, graph, pcis->BtoNmap, pcbddc->sub_schurs_rebuild, PETSC_FALSE));

9608:   /* free graph struct */
9609:   if (pcbddc->sub_schurs_rebuild) PetscCall(PCBDDCGraphDestroy(&graph));
9610:   PetscFunctionReturn(PETSC_SUCCESS);
9611: }

9613: static PetscErrorCode PCBDDCViewGlobalIS(PC pc, IS is, PetscViewer viewer)
9614: {
9615:   Mat_IS         *matis = (Mat_IS *)pc->pmat->data;
9616:   PetscInt        n     = pc->pmat->rmap->n, ln, ni, st;
9617:   const PetscInt *idxs;
9618:   IS              gis;

9620:   PetscFunctionBegin;
9621:   if (!is) PetscFunctionReturn(PETSC_SUCCESS);
9622:   PetscCall(MatGetOwnershipRange(pc->pmat, &st, NULL));
9623:   PetscCall(MatGetLocalSize(matis->A, NULL, &ln));
9624:   PetscCall(PetscArrayzero(matis->sf_leafdata, ln));
9625:   PetscCall(PetscArrayzero(matis->sf_rootdata, n));
9626:   PetscCall(ISGetLocalSize(is, &ni));
9627:   PetscCall(ISGetIndices(is, &idxs));
9628:   for (PetscInt i = 0; i < ni; i++) {
9629:     if (idxs[i] < 0 || idxs[i] >= ln) continue;
9630:     matis->sf_leafdata[idxs[i]] = 1;
9631:   }
9632:   PetscCall(ISRestoreIndices(is, &idxs));
9633:   PetscCall(PetscSFReduceBegin(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM));
9634:   PetscCall(PetscSFReduceEnd(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM));
9635:   ln = 0;
9636:   for (PetscInt i = 0; i < n; i++) {
9637:     if (matis->sf_rootdata[i]) matis->sf_rootdata[ln++] = i + st;
9638:   }
9639:   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), ln, matis->sf_rootdata, PETSC_USE_POINTER, &gis));
9640:   PetscCall(ISView(gis, viewer));
9641:   PetscCall(ISDestroy(&gis));
9642:   PetscFunctionReturn(PETSC_SUCCESS);
9643: }

9645: PetscErrorCode PCBDDCLoadOrViewCustomization(PC pc, PetscBool load, const char *outfile)
9646: {
9647:   PetscInt    header[11];
9648:   PC_BDDC    *pcbddc = (PC_BDDC *)pc->data;
9649:   PetscViewer viewer;
9650:   MPI_Comm    comm = PetscObjectComm((PetscObject)pc);

9652:   PetscFunctionBegin;
9653:   PetscCall(PetscViewerBinaryOpen(comm, outfile ? outfile : "bddc_dump.dat", load ? FILE_MODE_READ : FILE_MODE_WRITE, &viewer));
9654:   if (load) {
9655:     IS  is;
9656:     Mat A;

9658:     PetscCall(PetscViewerBinaryRead(viewer, header, PETSC_STATIC_ARRAY_LENGTH(header), NULL, PETSC_INT));
9659:     PetscCheck(header[0] == 0 || header[0] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9660:     PetscCheck(header[1] == 0 || header[1] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9661:     PetscCheck(header[2] >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9662:     PetscCheck(header[3] == 0 || header[3] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9663:     PetscCheck(header[4] == 0 || header[4] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9664:     PetscCheck(header[5] >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9665:     PetscCheck(header[7] == 0 || header[7] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9666:     PetscCheck(header[8] == 0 || header[8] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9667:     PetscCheck(header[9] == 0 || header[9] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9668:     PetscCheck(header[10] == 0 || header[10] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9669:     if (header[0]) {
9670:       PetscCall(ISCreate(comm, &is));
9671:       PetscCall(ISLoad(is, viewer));
9672:       PetscCall(PCBDDCSetDirichletBoundaries(pc, is));
9673:       PetscCall(ISDestroy(&is));
9674:     }
9675:     if (header[1]) {
9676:       PetscCall(ISCreate(comm, &is));
9677:       PetscCall(ISLoad(is, viewer));
9678:       PetscCall(PCBDDCSetNeumannBoundaries(pc, is));
9679:       PetscCall(ISDestroy(&is));
9680:     }
9681:     if (header[2]) {
9682:       IS *isarray;

9684:       PetscCall(PetscMalloc1(header[2], &isarray));
9685:       for (PetscInt i = 0; i < header[2]; i++) {
9686:         PetscCall(ISCreate(comm, &isarray[i]));
9687:         PetscCall(ISLoad(isarray[i], viewer));
9688:       }
9689:       PetscCall(PCBDDCSetDofsSplitting(pc, header[2], isarray));
9690:       for (PetscInt i = 0; i < header[2]; i++) PetscCall(ISDestroy(&isarray[i]));
9691:       PetscCall(PetscFree(isarray));
9692:     }
9693:     if (header[3]) {
9694:       PetscCall(ISCreate(comm, &is));
9695:       PetscCall(ISLoad(is, viewer));
9696:       PetscCall(PCBDDCSetPrimalVerticesIS(pc, is));
9697:       PetscCall(ISDestroy(&is));
9698:     }
9699:     if (header[4]) {
9700:       PetscCall(MatCreate(comm, &A));
9701:       PetscCall(MatSetType(A, MATAIJ));
9702:       PetscCall(MatLoad(A, viewer));
9703:       PetscCall(PCBDDCSetDiscreteGradient(pc, A, header[5], header[6], (PetscBool)header[7], (PetscBool)header[8]));
9704:       PetscCall(MatDestroy(&A));
9705:     }
9706:     if (header[9]) {
9707:       PetscCall(MatCreate(comm, &A));
9708:       PetscCall(MatSetType(A, MATIS));
9709:       PetscCall(MatLoad(A, viewer));
9710:       PetscCall(PCBDDCSetDivergenceMat(pc, A, (PetscBool)header[10], NULL));
9711:       PetscCall(MatDestroy(&A));
9712:     }
9713:   } else {
9714:     header[0]  = (PetscInt)!!pcbddc->DirichletBoundariesLocal;
9715:     header[1]  = (PetscInt)!!pcbddc->NeumannBoundariesLocal;
9716:     header[2]  = pcbddc->n_ISForDofsLocal;
9717:     header[3]  = (PetscInt)!!pcbddc->user_primal_vertices_local;
9718:     header[4]  = (PetscInt)!!pcbddc->discretegradient;
9719:     header[5]  = pcbddc->nedorder;
9720:     header[6]  = pcbddc->nedfield;
9721:     header[7]  = (PetscInt)pcbddc->nedglobal;
9722:     header[8]  = (PetscInt)pcbddc->conforming;
9723:     header[9]  = (PetscInt)!!pcbddc->divudotp;
9724:     header[10] = (PetscInt)pcbddc->divudotp_trans;
9725:     if (header[4]) header[3] = 0;

9727:     PetscCall(PetscViewerBinaryWrite(viewer, header, PETSC_STATIC_ARRAY_LENGTH(header), PETSC_INT));
9728:     PetscCall(PCBDDCViewGlobalIS(pc, pcbddc->DirichletBoundariesLocal, viewer));
9729:     PetscCall(PCBDDCViewGlobalIS(pc, pcbddc->NeumannBoundariesLocal, viewer));
9730:     for (PetscInt i = 0; i < header[2]; i++) PetscCall(PCBDDCViewGlobalIS(pc, pcbddc->ISForDofsLocal[i], viewer));
9731:     if (header[3]) PetscCall(PCBDDCViewGlobalIS(pc, pcbddc->user_primal_vertices_local, viewer));
9732:     if (header[4]) PetscCall(MatView(pcbddc->discretegradient, viewer));
9733:     if (header[9]) PetscCall(MatView(pcbddc->divudotp, viewer));
9734:   }
9735:   PetscCall(PetscViewerDestroy(&viewer));
9736:   PetscFunctionReturn(PETSC_SUCCESS);
9737: }

9739: #include <../src/mat/impls/aij/mpi/mpiaij.h>
9740: static PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B)
9741: {
9742:   Mat         At;
9743:   IS          rows;
9744:   PetscInt    rst, ren;
9745:   PetscLayout rmap;

9747:   PetscFunctionBegin;
9748:   rst = ren = 0;
9749:   if (ccomm != MPI_COMM_NULL) {
9750:     PetscCall(PetscLayoutCreate(ccomm, &rmap));
9751:     PetscCall(PetscLayoutSetSize(rmap, A->rmap->N));
9752:     PetscCall(PetscLayoutSetBlockSize(rmap, 1));
9753:     PetscCall(PetscLayoutSetUp(rmap));
9754:     PetscCall(PetscLayoutGetRange(rmap, &rst, &ren));
9755:   }
9756:   PetscCall(ISCreateStride(PetscObjectComm((PetscObject)A), ren - rst, rst, 1, &rows));
9757:   PetscCall(MatCreateSubMatrix(A, rows, NULL, MAT_INITIAL_MATRIX, &At));
9758:   PetscCall(ISDestroy(&rows));

9760:   if (ccomm != MPI_COMM_NULL) {
9761:     Mat_MPIAIJ *a, *b;
9762:     IS          from, to;
9763:     Vec         gvec;
9764:     PetscInt    lsize;

9766:     PetscCall(MatCreate(ccomm, B));
9767:     PetscCall(MatSetSizes(*B, ren - rst, PETSC_DECIDE, PETSC_DECIDE, At->cmap->N));
9768:     PetscCall(MatSetType(*B, MATAIJ));
9769:     PetscCall(PetscLayoutDestroy(&(*B)->rmap));
9770:     PetscCall(PetscLayoutSetUp((*B)->cmap));
9771:     a = (Mat_MPIAIJ *)At->data;
9772:     b = (Mat_MPIAIJ *)(*B)->data;
9773:     PetscCallMPI(MPI_Comm_size(ccomm, &b->size));
9774:     PetscCallMPI(MPI_Comm_rank(ccomm, &b->rank));
9775:     PetscCall(PetscObjectReference((PetscObject)a->A));
9776:     PetscCall(PetscObjectReference((PetscObject)a->B));
9777:     b->A = a->A;
9778:     b->B = a->B;

9780:     b->donotstash   = a->donotstash;
9781:     b->roworiented  = a->roworiented;
9782:     b->rowindices   = NULL;
9783:     b->rowvalues    = NULL;
9784:     b->getrowactive = PETSC_FALSE;

9786:     (*B)->rmap         = rmap;
9787:     (*B)->factortype   = A->factortype;
9788:     (*B)->assembled    = PETSC_TRUE;
9789:     (*B)->insertmode   = NOT_SET_VALUES;
9790:     (*B)->preallocated = PETSC_TRUE;

9792:     if (a->colmap) {
9793: #if defined(PETSC_USE_CTABLE)
9794:       PetscCall(PetscHMapIDuplicate(a->colmap, &b->colmap));
9795: #else
9796:       PetscCall(PetscMalloc1(At->cmap->N, &b->colmap));
9797:       PetscCall(PetscArraycpy(b->colmap, a->colmap, At->cmap->N));
9798: #endif
9799:     } else b->colmap = NULL;
9800:     if (a->garray) {
9801:       PetscInt len;
9802:       len = a->B->cmap->n;
9803:       PetscCall(PetscMalloc1(len + 1, &b->garray));
9804:       if (len) PetscCall(PetscArraycpy(b->garray, a->garray, len));
9805:     } else b->garray = NULL;

9807:     PetscCall(PetscObjectReference((PetscObject)a->lvec));
9808:     b->lvec = a->lvec;

9810:     /* cannot use VecScatterCopy */
9811:     PetscCall(VecGetLocalSize(b->lvec, &lsize));
9812:     PetscCall(ISCreateGeneral(ccomm, lsize, b->garray, PETSC_USE_POINTER, &from));
9813:     PetscCall(ISCreateStride(PETSC_COMM_SELF, lsize, 0, 1, &to));
9814:     PetscCall(MatCreateVecs(*B, &gvec, NULL));
9815:     PetscCall(VecScatterCreate(gvec, from, b->lvec, to, &b->Mvctx));
9816:     PetscCall(ISDestroy(&from));
9817:     PetscCall(ISDestroy(&to));
9818:     PetscCall(VecDestroy(&gvec));
9819:   }
9820:   PetscCall(MatDestroy(&At));
9821:   PetscFunctionReturn(PETSC_SUCCESS);
9822: }

9824: /* same as MatCreateSubMatrix(A, rows, NULL,...) but allows repeated rows */
9825: static PetscErrorCode MatAIJExtractRows(Mat A, IS rows, Mat *sA)
9826: {
9827:   PetscBool isaij;
9828:   MPI_Comm  comm;

9830:   PetscFunctionBegin;
9831:   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
9832:   PetscCall(PetscObjectBaseTypeCompareAny((PetscObject)A, &isaij, MATSEQAIJ, MATMPIAIJ, ""));
9833:   PetscCheck(isaij, comm, PETSC_ERR_SUP, "Not implemented");
9834:   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isaij));
9835:   if (isaij) { /* SeqAIJ supports repeated rows */
9836:     PetscCall(MatCreateSubMatrix(A, rows, NULL, MAT_INITIAL_MATRIX, sA));
9837:   } else {
9838:     Mat                A_loc;
9839:     Mat_SeqAIJ        *da;
9840:     PetscSF            sf;
9841:     PetscInt           ni, *di, *dj, m = A->rmap->n, c, *ldata, *rdata;
9842:     PetscScalar       *daa;
9843:     const PetscInt    *idxs;
9844:     const PetscSFNode *iremotes;
9845:     PetscSFNode       *remotes;

9847:     /* SF for incoming rows */
9848:     PetscCall(PetscSFCreate(comm, &sf));
9849:     PetscCall(ISGetLocalSize(rows, &ni));
9850:     PetscCall(ISGetIndices(rows, &idxs));
9851:     PetscCall(PetscSFSetGraphLayout(sf, A->rmap, ni, NULL, PETSC_USE_POINTER, idxs));
9852:     PetscCall(ISRestoreIndices(rows, &idxs));

9854:     PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &A_loc));
9855:     da = (Mat_SeqAIJ *)A_loc->data;
9856:     PetscCall(PetscMalloc2(2 * ni, &ldata, 2 * m, &rdata));
9857:     for (PetscInt i = 0; i < m; i++) {
9858:       rdata[2 * i + 0] = da->i[i + 1] - da->i[i];
9859:       rdata[2 * i + 1] = da->i[i];
9860:     }
9861:     PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, rdata, ldata, MPI_REPLACE));
9862:     PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, rdata, ldata, MPI_REPLACE));
9863:     PetscCall(PetscMalloc1(ni + 1, &di));
9864:     di[0] = 0;
9865:     for (PetscInt i = 0; i < ni; i++) di[i + 1] = di[i] + ldata[2 * i + 0];
9866:     PetscCall(PetscMalloc1(di[ni], &dj));
9867:     PetscCall(PetscMalloc1(di[ni], &daa));
9868:     PetscCall(PetscMalloc1(di[ni], &remotes));

9870:     PetscCall(PetscSFGetGraph(sf, NULL, NULL, NULL, &iremotes));

9872:     /* SF graph for nonzeros */
9873:     c = 0;
9874:     for (PetscInt i = 0; i < ni; i++) {
9875:       const PetscInt rank  = iremotes[i].rank;
9876:       const PetscInt rsize = ldata[2 * i];
9877:       for (PetscInt j = 0; j < rsize; j++) {
9878:         remotes[c].rank  = rank;
9879:         remotes[c].index = ldata[2 * i + 1] + j;
9880:         c++;
9881:       }
9882:     }
9883:     PetscCheck(c == di[ni], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of local nonzeros %" PetscInt_FMT " != %" PetscInt_FMT, c, di[ni]);
9884:     PetscCall(PetscSFSetGraph(sf, da->i[m], di[ni], NULL, PETSC_USE_POINTER, remotes, PETSC_USE_POINTER));
9885:     PetscCall(PetscSFBcastBegin(sf, MPIU_INT, da->j, dj, MPI_REPLACE));
9886:     PetscCall(PetscSFBcastEnd(sf, MPIU_INT, da->j, dj, MPI_REPLACE));
9887:     PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, da->a, daa, MPI_REPLACE));
9888:     PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, da->a, daa, MPI_REPLACE));

9890:     PetscCall(MatCreateMPIAIJWithArrays(comm, ni, A->cmap->n, PETSC_DECIDE, A->cmap->N, di, dj, daa, sA));
9891:     PetscCall(MatDestroy(&A_loc));
9892:     PetscCall(PetscSFDestroy(&sf));
9893:     PetscCall(PetscFree(di));
9894:     PetscCall(PetscFree(dj));
9895:     PetscCall(PetscFree(daa));
9896:     PetscCall(PetscFree(remotes));
9897:     PetscCall(PetscFree2(ldata, rdata));
9898:   }
9899:   PetscFunctionReturn(PETSC_SUCCESS);
9900: }