Actual source code: bddcprivate.c

petsc-3.10.5 2019-03-28
Report Typos and Errors
  1:  #include <../src/mat/impls/aij/seq/aij.h>
  2:  #include <../src/ksp/pc/impls/bddc/bddc.h>
  3:  #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
  4:  #include <../src/mat/impls/dense/seq/dense.h>
  5:  #include <petscdmplex.h>
  6:  #include <petscblaslapack.h>
  7:  #include <petsc/private/sfimpl.h>
  8:  #include <petsc/private/dmpleximpl.h>
  9:  #include <petscdmda.h>

 11: static PetscErrorCode MatMPIAIJRestrict(Mat,MPI_Comm,Mat*);

 13: /* if range is true,  it returns B s.t. span{B} = range(A)
 14:    if range is false, it returns B s.t. range(B) _|_ range(A) */
 15: PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B)
 16: {
 17: #if !defined(PETSC_USE_COMPLEX)
 18:   PetscScalar    *uwork,*data,*U, ds = 0.;
 19:   PetscReal      *sing;
 20:   PetscBLASInt   bM,bN,lwork,lierr,di = 1;
 21:   PetscInt       ulw,i,nr,nc,n;

 25: #if defined(PETSC_MISSING_LAPACK_GESVD)
 26:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LAPACK _GESVD not available");
 27: #else
 28:   MatGetSize(A,&nr,&nc);
 29:   if (!nr || !nc) return(0);

 31:   /* workspace */
 32:   if (!work) {
 33:     ulw  = PetscMax(PetscMax(1,5*PetscMin(nr,nc)),3*PetscMin(nr,nc)+PetscMax(nr,nc));
 34:     PetscMalloc1(ulw,&uwork);
 35:   } else {
 36:     ulw   = lw;
 37:     uwork = work;
 38:   }
 39:   n = PetscMin(nr,nc);
 40:   if (!rwork) {
 41:     PetscMalloc1(n,&sing);
 42:   } else {
 43:     sing = rwork;
 44:   }

 46:   /* SVD */
 47:   PetscMalloc1(nr*nr,&U);
 48:   PetscBLASIntCast(nr,&bM);
 49:   PetscBLASIntCast(nc,&bN);
 50:   PetscBLASIntCast(ulw,&lwork);
 51:   MatDenseGetArray(A,&data);
 52:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
 53:   PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,&lierr));
 54:   PetscFPTrapPop();
 55:   if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
 56:   MatDenseRestoreArray(A,&data);
 57:   for (i=0;i<n;i++) if (sing[i] < PETSC_SMALL) break;
 58:   if (!rwork) {
 59:     PetscFree(sing);
 60:   }
 61:   if (!work) {
 62:     PetscFree(uwork);
 63:   }
 64:   /* create B */
 65:   if (!range) {
 66:     MatCreateSeqDense(PETSC_COMM_SELF,nr,nr-i,NULL,B);
 67:     MatDenseGetArray(*B,&data);
 68:     PetscMemcpy(data,U+nr*i,(nr-i)*nr*sizeof(PetscScalar));
 69:   } else {
 70:     MatCreateSeqDense(PETSC_COMM_SELF,nr,i,NULL,B);
 71:     MatDenseGetArray(*B,&data);
 72:     PetscMemcpy(data,U,i*nr*sizeof(PetscScalar));
 73:   }
 74:   MatDenseRestoreArray(*B,&data);
 75:   PetscFree(U);
 76: #endif
 77: #else /* PETSC_USE_COMPLEX */
 79:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for complexes");
 80: #endif
 81:   return(0);
 82: }

 84: /* TODO REMOVE */
 85: #if defined(PRINT_GDET)
 86: static int inc = 0;
 87: static int lev = 0;
 88: #endif

 90: PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat* Gins, Mat* GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork)
 91: {
 93:   Mat            GE,GEd;
 94:   PetscInt       rsize,csize,esize;
 95:   PetscScalar    *ptr;

 98:   ISGetSize(edge,&esize);
 99:   if (!esize) return(0);
100:   ISGetSize(extrow,&rsize);
101:   ISGetSize(extcol,&csize);

103:   /* gradients */
104:   ptr  = work + 5*esize;
105:   MatCreateSubMatrix(lG,extrow,extcol,MAT_INITIAL_MATRIX,&GE);
106:   MatCreateSeqDense(PETSC_COMM_SELF,rsize,csize,ptr,Gins);
107:   MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,Gins);
108:   MatDestroy(&GE);

110:   /* constants */
111:   ptr += rsize*csize;
112:   MatCreateSeqDense(PETSC_COMM_SELF,esize,csize,ptr,&GEd);
113:   MatCreateSubMatrix(lG,edge,extcol,MAT_INITIAL_MATRIX,&GE);
114:   MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,&GEd);
115:   MatDestroy(&GE);
116:   MatDenseOrthogonalRangeOrComplement(GEd,PETSC_FALSE,5*esize,work,rwork,GKins);
117:   MatDestroy(&GEd);

119:   if (corners) {
120:     Mat            GEc;
121:     PetscScalar    *vals,v;

123:     MatCreateSubMatrix(lG,edge,corners,MAT_INITIAL_MATRIX,&GEc);
124:     MatTransposeMatMult(GEc,*GKins,MAT_INITIAL_MATRIX,1.0,&GEd);
125:     MatDenseGetArray(GEd,&vals);
126:     /* v    = PetscAbsScalar(vals[0]) */;
127:     v    = 1.;
128:     cvals[0] = vals[0]/v;
129:     cvals[1] = vals[1]/v;
130:     MatDenseRestoreArray(GEd,&vals);
131:     MatScale(*GKins,1./v);
132: #if defined(PRINT_GDET)
133:     {
134:       PetscViewer viewer;
135:       char filename[256];
136:       sprintf(filename,"Gdet_l%d_r%d_cc%d.m",lev,PetscGlobalRank,inc++);
137:       PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);
138:       PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
139:       PetscObjectSetName((PetscObject)GEc,"GEc");
140:       MatView(GEc,viewer);
141:       PetscObjectSetName((PetscObject)(*GKins),"GK");
142:       MatView(*GKins,viewer);
143:       PetscObjectSetName((PetscObject)GEd,"Gproj");
144:       MatView(GEd,viewer);
145:       PetscViewerDestroy(&viewer);
146:     }
147: #endif
148:     MatDestroy(&GEd);
149:     MatDestroy(&GEc);
150:   }

152:   return(0);
153: }

155: PetscErrorCode PCBDDCNedelecSupport(PC pc)
156: {
157:   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
158:   Mat_IS                 *matis = (Mat_IS*)pc->pmat->data;
159:   Mat                    G,T,conn,lG,lGt,lGis,lGall,lGe,lGinit;
160:   Vec                    tvec;
161:   PetscSF                sfv;
162:   ISLocalToGlobalMapping el2g,vl2g,fl2g,al2g;
163:   MPI_Comm               comm;
164:   IS                     lned,primals,allprimals,nedfieldlocal;
165:   IS                     *eedges,*extrows,*extcols,*alleedges;
166:   PetscBT                btv,bte,btvc,btb,btbd,btvcand,btvi,btee,bter;
167:   PetscScalar            *vals,*work;
168:   PetscReal              *rwork;
169:   const PetscInt         *idxs,*ii,*jj,*iit,*jjt;
170:   PetscInt               ne,nv,Lv,order,n,field;
171:   PetscInt               n_neigh,*neigh,*n_shared,**shared;
172:   PetscInt               i,j,extmem,cum,maxsize,nee;
173:   PetscInt               *extrow,*extrowcum,*marks,*vmarks,*gidxs;
174:   PetscInt               *sfvleaves,*sfvroots;
175:   PetscInt               *corners,*cedges;
176:   PetscInt               *ecount,**eneighs,*vcount,**vneighs;
177: #if defined(PETSC_USE_DEBUG)
178:   PetscInt               *emarks;
179: #endif
180:   PetscBool              print,eerr,done,lrc[2],conforming,global,singular,setprimal;
181:   PetscErrorCode         ierr;

184:   /* If the discrete gradient is defined for a subset of dofs and global is true,
185:      it assumes G is given in global ordering for all the dofs.
186:      Otherwise, the ordering is global for the Nedelec field */
187:   order      = pcbddc->nedorder;
188:   conforming = pcbddc->conforming;
189:   field      = pcbddc->nedfield;
190:   global     = pcbddc->nedglobal;
191:   setprimal  = PETSC_FALSE;
192:   print      = PETSC_FALSE;
193:   singular   = PETSC_FALSE;

195:   /* Command line customization */
196:   PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC Nedelec options","PC");
197:   PetscOptionsBool("-pc_bddc_nedelec_field_primal","All edge dofs set as primals: Toselli's algorithm C",NULL,setprimal,&setprimal,NULL);
198:   PetscOptionsBool("-pc_bddc_nedelec_singular","Infer nullspace from discrete gradient",NULL,singular,&singular,NULL);
199:   PetscOptionsInt("-pc_bddc_nedelec_order","Test variable order code (to be removed)",NULL,order,&order,NULL);
200:   /* print debug info TODO: to be removed */
201:   PetscOptionsBool("-pc_bddc_nedelec_print","Print debug info",NULL,print,&print,NULL);
202:   PetscOptionsEnd();

204:   /* Return if there are no edges in the decomposition and the problem is not singular */
205:   MatGetLocalToGlobalMapping(pc->pmat,&al2g,NULL);
206:   ISLocalToGlobalMappingGetSize(al2g,&n);
207:   PetscObjectGetComm((PetscObject)pc,&comm);
208:   if (!singular) {
209:     VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);
210:     lrc[0] = PETSC_FALSE;
211:     for (i=0;i<n;i++) {
212:       if (PetscRealPart(vals[i]) > 2.) {
213:         lrc[0] = PETSC_TRUE;
214:         break;
215:       }
216:     }
217:     VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);
218:     MPIU_Allreduce(&lrc[0],&lrc[1],1,MPIU_BOOL,MPI_LOR,comm);
219:     if (!lrc[1]) return(0);
220:   }

222:   /* Get Nedelec field */
223:   MatISSetUpSF(pc->pmat);
224:   if (pcbddc->n_ISForDofsLocal && field >= pcbddc->n_ISForDofsLocal) SETERRQ2(comm,PETSC_ERR_USER,"Invalid field for Nedelec %D: number of fields is %D",field,pcbddc->n_ISForDofsLocal);
225:   if (pcbddc->n_ISForDofsLocal && field >= 0) {
226:     PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]);
227:     nedfieldlocal = pcbddc->ISForDofsLocal[field];
228:     ISGetLocalSize(nedfieldlocal,&ne);
229:   } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) {
230:     ne            = n;
231:     nedfieldlocal = NULL;
232:     global        = PETSC_TRUE;
233:   } else if (field == PETSC_DECIDE) {
234:     PetscInt rst,ren,*idx;

236:     PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));
237:     PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));
238:     MatGetOwnershipRange(pcbddc->discretegradient,&rst,&ren);
239:     for (i=rst;i<ren;i++) {
240:       PetscInt nc;

242:       MatGetRow(pcbddc->discretegradient,i,&nc,NULL,NULL);
243:       if (nc > 1) matis->sf_rootdata[i-rst] = 1;
244:       MatRestoreRow(pcbddc->discretegradient,i,&nc,NULL,NULL);
245:     }
246:     PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);
247:     PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);
248:     PetscMalloc1(n,&idx);
249:     for (i=0,ne=0;i<n;i++) if (matis->sf_leafdata[i]) idx[ne++] = i;
250:     ISCreateGeneral(comm,ne,idx,PETSC_OWN_POINTER,&nedfieldlocal);
251:   } else {
252:     SETERRQ(comm,PETSC_ERR_USER,"When multiple fields are present, the Nedelec field has to be specified");
253:   }

255:   /* Sanity checks */
256:   if (!order && !conforming) SETERRQ(comm,PETSC_ERR_SUP,"Variable order and non-conforming spaces are not supported at the same time");
257:   if (pcbddc->user_ChangeOfBasisMatrix) SETERRQ(comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with user defined change of basis");
258:   if (order && ne%order) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"The number of local edge dofs %D it's not a multiple of the order %D",ne,order);

260:   /* Just set primal dofs and return */
261:   if (setprimal) {
262:     IS       enedfieldlocal;
263:     PetscInt *eidxs;

265:     PetscMalloc1(ne,&eidxs);
266:     VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);
267:     if (nedfieldlocal) {
268:       ISGetIndices(nedfieldlocal,&idxs);
269:       for (i=0,cum=0;i<ne;i++) {
270:         if (PetscRealPart(vals[idxs[i]]) > 2.) {
271:           eidxs[cum++] = idxs[i];
272:         }
273:       }
274:       ISRestoreIndices(nedfieldlocal,&idxs);
275:     } else {
276:       for (i=0,cum=0;i<ne;i++) {
277:         if (PetscRealPart(vals[i]) > 2.) {
278:           eidxs[cum++] = i;
279:         }
280:       }
281:     }
282:     VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);
283:     ISCreateGeneral(comm,cum,eidxs,PETSC_COPY_VALUES,&enedfieldlocal);
284:     PCBDDCSetPrimalVerticesLocalIS(pc,enedfieldlocal);
285:     PetscFree(eidxs);
286:     ISDestroy(&nedfieldlocal);
287:     ISDestroy(&enedfieldlocal);
288:     return(0);
289:   }

291:   /* Compute some l2g maps */
292:   if (nedfieldlocal) {
293:     IS is;

295:     /* need to map from the local Nedelec field to local numbering */
296:     ISLocalToGlobalMappingCreateIS(nedfieldlocal,&fl2g);
297:     /* need to map from the local Nedelec field to global numbering for the whole dofs*/
298:     ISLocalToGlobalMappingApplyIS(al2g,nedfieldlocal,&is);
299:     ISLocalToGlobalMappingCreateIS(is,&al2g);
300:     /* need to map from the local Nedelec field to global numbering (for Nedelec only) */
301:     if (global) {
302:       PetscObjectReference((PetscObject)al2g);
303:       el2g = al2g;
304:     } else {
305:       IS gis;

307:       ISRenumber(is,NULL,NULL,&gis);
308:       ISLocalToGlobalMappingCreateIS(gis,&el2g);
309:       ISDestroy(&gis);
310:     }
311:     ISDestroy(&is);
312:   } else {
313:     /* restore default */
314:     pcbddc->nedfield = -1;
315:     /* one ref for the destruction of al2g, one for el2g */
316:     PetscObjectReference((PetscObject)al2g);
317:     PetscObjectReference((PetscObject)al2g);
318:     el2g = al2g;
319:     fl2g = NULL;
320:   }

322:   /* Start communication to drop connections for interior edges (for cc analysis only) */
323:   PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));
324:   PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));
325:   if (nedfieldlocal) {
326:     ISGetIndices(nedfieldlocal,&idxs);
327:     for (i=0;i<ne;i++) matis->sf_leafdata[idxs[i]] = 1;
328:     ISRestoreIndices(nedfieldlocal,&idxs);
329:   } else {
330:     for (i=0;i<ne;i++) matis->sf_leafdata[i] = 1;
331:   }
332:   PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);
333:   PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);

335:   if (!singular) { /* drop connections with interior edges to avoid unneeded communications and memory movements */
336:     MatDuplicate(pcbddc->discretegradient,MAT_COPY_VALUES,&G);
337:     MatSetOption(G,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);
338:     if (global) {
339:       PetscInt rst;

341:       MatGetOwnershipRange(G,&rst,NULL);
342:       for (i=0,cum=0;i<pc->pmat->rmap->n;i++) {
343:         if (matis->sf_rootdata[i] < 2) {
344:           matis->sf_rootdata[cum++] = i + rst;
345:         }
346:       }
347:       MatSetOption(G,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE);
348:       MatZeroRows(G,cum,matis->sf_rootdata,0.,NULL,NULL);
349:     } else {
350:       PetscInt *tbz;

352:       PetscMalloc1(ne,&tbz);
353:       PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);
354:       PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);
355:       ISGetIndices(nedfieldlocal,&idxs);
356:       for (i=0,cum=0;i<ne;i++)
357:         if (matis->sf_leafdata[idxs[i]] == 1)
358:           tbz[cum++] = i;
359:       ISRestoreIndices(nedfieldlocal,&idxs);
360:       ISLocalToGlobalMappingApply(el2g,cum,tbz,tbz);
361:       MatZeroRows(G,cum,tbz,0.,NULL,NULL);
362:       PetscFree(tbz);
363:     }
364:   } else { /* we need the entire G to infer the nullspace */
365:     PetscObjectReference((PetscObject)pcbddc->discretegradient);
366:     G    = pcbddc->discretegradient;
367:   }

369:   /* Extract subdomain relevant rows of G */
370:   ISLocalToGlobalMappingGetIndices(el2g,&idxs);
371:   ISCreateGeneral(comm,ne,idxs,PETSC_USE_POINTER,&lned);
372:   MatCreateSubMatrix(G,lned,NULL,MAT_INITIAL_MATRIX,&lGall);
373:   ISLocalToGlobalMappingRestoreIndices(el2g,&idxs);
374:   ISDestroy(&lned);
375:   MatConvert(lGall,MATIS,MAT_INITIAL_MATRIX,&lGis);
376:   MatDestroy(&lGall);
377:   MatISGetLocalMat(lGis,&lG);

379:   /* SF for nodal dofs communications */
380:   MatGetLocalSize(G,NULL,&Lv);
381:   MatGetLocalToGlobalMapping(lGis,NULL,&vl2g);
382:   PetscObjectReference((PetscObject)vl2g);
383:   ISLocalToGlobalMappingGetSize(vl2g,&nv);
384:   PetscSFCreate(comm,&sfv);
385:   ISLocalToGlobalMappingGetIndices(vl2g,&idxs);
386:   PetscSFSetGraphLayout(sfv,lGis->cmap,nv,NULL,PETSC_OWN_POINTER,idxs);
387:   ISLocalToGlobalMappingRestoreIndices(vl2g,&idxs);
388:   i    = singular ? 2 : 1;
389:   PetscMalloc2(i*nv,&sfvleaves,i*Lv,&sfvroots);

391:   /* Destroy temporary G created in MATIS format and modified G */
392:   PetscObjectReference((PetscObject)lG);
393:   MatDestroy(&lGis);
394:   MatDestroy(&G);

396:   if (print) {
397:     PetscObjectSetName((PetscObject)lG,"initial_lG");
398:     MatView(lG,NULL);
399:   }

401:   /* Save lG for values insertion in change of basis */
402:   MatDuplicate(lG,MAT_COPY_VALUES,&lGinit);

404:   /* Analyze the edge-nodes connections (duplicate lG) */
405:   MatDuplicate(lG,MAT_COPY_VALUES,&lGe);
406:   MatSetOption(lGe,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);
407:   PetscBTCreate(nv,&btv);
408:   PetscBTCreate(ne,&bte);
409:   PetscBTCreate(ne,&btb);
410:   PetscBTCreate(ne,&btbd);
411:   PetscBTCreate(nv,&btvcand);
412:   /* need to import the boundary specification to ensure the
413:      proper detection of coarse edges' endpoints */
414:   if (pcbddc->DirichletBoundariesLocal) {
415:     IS is;

417:     if (fl2g) {
418:       ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->DirichletBoundariesLocal,&is);
419:     } else {
420:       is = pcbddc->DirichletBoundariesLocal;
421:     }
422:     ISGetLocalSize(is,&cum);
423:     ISGetIndices(is,&idxs);
424:     for (i=0;i<cum;i++) {
425:       if (idxs[i] >= 0) {
426:         PetscBTSet(btb,idxs[i]);
427:         PetscBTSet(btbd,idxs[i]);
428:       }
429:     }
430:     ISRestoreIndices(is,&idxs);
431:     if (fl2g) {
432:       ISDestroy(&is);
433:     }
434:   }
435:   if (pcbddc->NeumannBoundariesLocal) {
436:     IS is;

438:     if (fl2g) {
439:       ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->NeumannBoundariesLocal,&is);
440:     } else {
441:       is = pcbddc->NeumannBoundariesLocal;
442:     }
443:     ISGetLocalSize(is,&cum);
444:     ISGetIndices(is,&idxs);
445:     for (i=0;i<cum;i++) {
446:       if (idxs[i] >= 0) {
447:         PetscBTSet(btb,idxs[i]);
448:       }
449:     }
450:     ISRestoreIndices(is,&idxs);
451:     if (fl2g) {
452:       ISDestroy(&is);
453:     }
454:   }

456:   /* Count neighs per dof */
457:   ISLocalToGlobalMappingGetNodeInfo(el2g,NULL,&ecount,&eneighs);
458:   ISLocalToGlobalMappingGetNodeInfo(vl2g,NULL,&vcount,&vneighs);

460:   /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs
461:      for proper detection of coarse edges' endpoints */
462:   PetscBTCreate(ne,&btee);
463:   for (i=0;i<ne;i++) {
464:     if ((ecount[i] > 2 && !PetscBTLookup(btbd,i)) || (ecount[i] == 2 && PetscBTLookup(btb,i))) {
465:       PetscBTSet(btee,i);
466:     }
467:   }
468:   PetscMalloc1(ne,&marks);
469:   if (!conforming) {
470:     MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);
471:     MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
472:   }
473:   MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
474:   MatSeqAIJGetArray(lGe,&vals);
475:   cum  = 0;
476:   for (i=0;i<ne;i++) {
477:     /* eliminate rows corresponding to edge dofs belonging to coarse faces */
478:     if (!PetscBTLookup(btee,i)) {
479:       marks[cum++] = i;
480:       continue;
481:     }
482:     /* set badly connected edge dofs as primal */
483:     if (!conforming) {
484:       if (ii[i+1]-ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */
485:         marks[cum++] = i;
486:         PetscBTSet(bte,i);
487:         for (j=ii[i];j<ii[i+1];j++) {
488:           PetscBTSet(btv,jj[j]);
489:         }
490:       } else {
491:         /* every edge dofs should be connected trough a certain number of nodal dofs
492:            to other edge dofs belonging to coarse edges
493:            - at most 2 endpoints
494:            - order-1 interior nodal dofs
495:            - no undefined nodal dofs (nconn < order)
496:         */
497:         PetscInt ends = 0,ints = 0, undef = 0;
498:         for (j=ii[i];j<ii[i+1];j++) {
499:           PetscInt v = jj[j],k;
500:           PetscInt nconn = iit[v+1]-iit[v];
501:           for (k=iit[v];k<iit[v+1];k++) if (!PetscBTLookup(btee,jjt[k])) nconn--;
502:           if (nconn > order) ends++;
503:           else if (nconn == order) ints++;
504:           else undef++;
505:         }
506:         if (undef || ends > 2 || ints != order -1) {
507:           marks[cum++] = i;
508:           PetscBTSet(bte,i);
509:           for (j=ii[i];j<ii[i+1];j++) {
510:             PetscBTSet(btv,jj[j]);
511:           }
512:         }
513:       }
514:     }
515:     /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */
516:     if (!order && ii[i+1] != ii[i]) {
517:       PetscScalar val = 1./(ii[i+1]-ii[i]-1);
518:       for (j=ii[i];j<ii[i+1];j++) vals[j] = val;
519:     }
520:   }
521:   PetscBTDestroy(&btee);
522:   MatSeqAIJRestoreArray(lGe,&vals);
523:   MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
524:   if (!conforming) {
525:     MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
526:     MatDestroy(&lGt);
527:   }
528:   MatZeroRows(lGe,cum,marks,0.,NULL,NULL);

530:   /* identify splitpoints and corner candidates */
531:   MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);
532:   if (print) {
533:     PetscObjectSetName((PetscObject)lGe,"edgerestr_lG");
534:     MatView(lGe,NULL);
535:     PetscObjectSetName((PetscObject)lGt,"edgerestr_lGt");
536:     MatView(lGt,NULL);
537:   }
538:   MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
539:   MatSeqAIJGetArray(lGt,&vals);
540:   for (i=0;i<nv;i++) {
541:     PetscInt  ord = order, test = ii[i+1]-ii[i], vc = vcount[i];
542:     PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE;
543:     if (!order) { /* variable order */
544:       PetscReal vorder = 0.;

546:       for (j=ii[i];j<ii[i+1];j++) vorder += PetscRealPart(vals[j]);
547:       test = PetscFloorReal(vorder+10.*PETSC_SQRT_MACHINE_EPSILON);
548:       if (vorder-test > PETSC_SQRT_MACHINE_EPSILON) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected value for vorder: %g (%D)",vorder,test);
549:       ord  = 1;
550:     }
551: #if defined(PETSC_USE_DEBUG)
552:     if (test%ord) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected number of edge dofs %D connected with nodal dof %D with order %D",test,i,ord);
553: #endif
554:     for (j=ii[i];j<ii[i+1] && sneighs;j++) {
555:       if (PetscBTLookup(btbd,jj[j])) {
556:         bdir = PETSC_TRUE;
557:         break;
558:       }
559:       if (vc != ecount[jj[j]]) {
560:         sneighs = PETSC_FALSE;
561:       } else {
562:         PetscInt k,*vn = vneighs[i], *en = eneighs[jj[j]];
563:         for (k=0;k<vc;k++) {
564:           if (vn[k] != en[k]) {
565:             sneighs = PETSC_FALSE;
566:             break;
567:           }
568:         }
569:       }
570:     }
571:     if (!sneighs || test >= 3*ord || bdir) { /* splitpoints */
572:       if (print) PetscPrintf(PETSC_COMM_SELF,"SPLITPOINT %D (%D %D %D)\n",i,!sneighs,test >= 3*ord,bdir);
573:       PetscBTSet(btv,i);
574:     } else if (test == ord) {
575:       if (order == 1 || (!order && ii[i+1]-ii[i] == 1)) {
576:         if (print) PetscPrintf(PETSC_COMM_SELF,"ENDPOINT %D\n",i);
577:         PetscBTSet(btv,i);
578:       } else {
579:         if (print) PetscPrintf(PETSC_COMM_SELF,"CORNER CANDIDATE %D\n",i);
580:         PetscBTSet(btvcand,i);
581:       }
582:     }
583:   }
584:   ISLocalToGlobalMappingRestoreNodeInfo(el2g,NULL,&ecount,&eneighs);
585:   ISLocalToGlobalMappingRestoreNodeInfo(vl2g,NULL,&vcount,&vneighs);
586:   PetscBTDestroy(&btbd);

588:   /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */
589:   if (order != 1) {
590:     if (print) PetscPrintf(PETSC_COMM_SELF,"INSPECTING CANDIDATES\n");
591:     MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
592:     for (i=0;i<nv;i++) {
593:       if (PetscBTLookup(btvcand,i)) {
594:         PetscBool found = PETSC_FALSE;
595:         for (j=ii[i];j<ii[i+1] && !found;j++) {
596:           PetscInt k,e = jj[j];
597:           if (PetscBTLookup(bte,e)) continue;
598:           for (k=iit[e];k<iit[e+1];k++) {
599:             PetscInt v = jjt[k];
600:             if (v != i && PetscBTLookup(btvcand,v)) {
601:               found = PETSC_TRUE;
602:               break;
603:             }
604:           }
605:         }
606:         if (!found) {
607:           if (print) PetscPrintf(PETSC_COMM_SELF,"  CANDIDATE %D CLEARED\n",i);
608:           PetscBTClear(btvcand,i);
609:         } else {
610:           if (print) PetscPrintf(PETSC_COMM_SELF,"  CANDIDATE %D ACCEPTED\n",i);
611:         }
612:       }
613:     }
614:     MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
615:   }
616:   MatSeqAIJRestoreArray(lGt,&vals);
617:   MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
618:   MatDestroy(&lGe);

620:   /* Get the local G^T explicitly */
621:   MatDestroy(&lGt);
622:   MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);
623:   MatSetOption(lGt,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);

625:   /* Mark interior nodal dofs */
626:   ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);
627:   PetscBTCreate(nv,&btvi);
628:   for (i=1;i<n_neigh;i++) {
629:     for (j=0;j<n_shared[i];j++) {
630:       PetscBTSet(btvi,shared[i][j]);
631:     }
632:   }
633:   ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);

635:   /* communicate corners and splitpoints */
636:   PetscMalloc1(nv,&vmarks);
637:   PetscMemzero(sfvleaves,nv*sizeof(PetscInt));
638:   PetscMemzero(sfvroots,Lv*sizeof(PetscInt));
639:   for (i=0;i<nv;i++) if (PetscUnlikely(PetscBTLookup(btv,i))) sfvleaves[i] = 1;

641:   if (print) {
642:     IS tbz;

644:     cum = 0;
645:     for (i=0;i<nv;i++)
646:       if (sfvleaves[i])
647:         vmarks[cum++] = i;

649:     ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);
650:     PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_local");
651:     ISView(tbz,NULL);
652:     ISDestroy(&tbz);
653:   }

655:   PetscSFReduceBegin(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);
656:   PetscSFReduceEnd(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);
657:   PetscSFBcastBegin(sfv,MPIU_INT,sfvroots,sfvleaves);
658:   PetscSFBcastEnd(sfv,MPIU_INT,sfvroots,sfvleaves);

660:   /* Zero rows of lGt corresponding to identified corners
661:      and interior nodal dofs */
662:   cum = 0;
663:   for (i=0;i<nv;i++) {
664:     if (sfvleaves[i]) {
665:       vmarks[cum++] = i;
666:       PetscBTSet(btv,i);
667:     }
668:     if (!PetscBTLookup(btvi,i)) vmarks[cum++] = i;
669:   }
670:   PetscBTDestroy(&btvi);
671:   if (print) {
672:     IS tbz;

674:     ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);
675:     PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_with_interior");
676:     ISView(tbz,NULL);
677:     ISDestroy(&tbz);
678:   }
679:   MatZeroRows(lGt,cum,vmarks,0.,NULL,NULL);
680:   PetscFree(vmarks);
681:   PetscSFDestroy(&sfv);
682:   PetscFree2(sfvleaves,sfvroots);

684:   /* Recompute G */
685:   MatDestroy(&lG);
686:   MatTranspose(lGt,MAT_INITIAL_MATRIX,&lG);
687:   if (print) {
688:     PetscObjectSetName((PetscObject)lG,"used_lG");
689:     MatView(lG,NULL);
690:     PetscObjectSetName((PetscObject)lGt,"used_lGt");
691:     MatView(lGt,NULL);
692:   }

694:   /* Get primal dofs (if any) */
695:   cum = 0;
696:   for (i=0;i<ne;i++) {
697:     if (PetscUnlikely(PetscBTLookup(bte,i))) marks[cum++] = i;
698:   }
699:   if (fl2g) {
700:     ISLocalToGlobalMappingApply(fl2g,cum,marks,marks);
701:   }
702:   ISCreateGeneral(comm,cum,marks,PETSC_COPY_VALUES,&primals);
703:   if (print) {
704:     PetscObjectSetName((PetscObject)primals,"prescribed_primal_dofs");
705:     ISView(primals,NULL);
706:   }
707:   PetscBTDestroy(&bte);
708:   /* TODO: what if the user passed in some of them ?  */
709:   PCBDDCSetPrimalVerticesLocalIS(pc,primals);
710:   ISDestroy(&primals);

712:   /* Compute edge connectivity */
713:   PetscObjectSetOptionsPrefix((PetscObject)lG,"econn_");
714:   MatMatMultSymbolic(lG,lGt,PETSC_DEFAULT,&conn);
715:   MatGetRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
716:   if (fl2g) {
717:     PetscBT   btf;
718:     PetscInt  *iia,*jja,*iiu,*jju;
719:     PetscBool rest = PETSC_FALSE,free = PETSC_FALSE;

721:     /* create CSR for all local dofs */
722:     PetscMalloc1(n+1,&iia);
723:     if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */
724:       if (pcbddc->mat_graph->nvtxs_csr != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid size of CSR graph %D. Should be %D",pcbddc->mat_graph->nvtxs_csr,n);
725:       iiu = pcbddc->mat_graph->xadj;
726:       jju = pcbddc->mat_graph->adjncy;
727:     } else if (pcbddc->use_local_adj) {
728:       rest = PETSC_TRUE;
729:       MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);
730:     } else {
731:       free   = PETSC_TRUE;
732:       PetscMalloc2(n+1,&iiu,n,&jju);
733:       iiu[0] = 0;
734:       for (i=0;i<n;i++) {
735:         iiu[i+1] = i+1;
736:         jju[i]   = -1;
737:       }
738:     }

740:     /* import sizes of CSR */
741:     iia[0] = 0;
742:     for (i=0;i<n;i++) iia[i+1] = iiu[i+1]-iiu[i];

744:     /* overwrite entries corresponding to the Nedelec field */
745:     PetscBTCreate(n,&btf);
746:     ISGetIndices(nedfieldlocal,&idxs);
747:     for (i=0;i<ne;i++) {
748:       PetscBTSet(btf,idxs[i]);
749:       iia[idxs[i]+1] = ii[i+1]-ii[i];
750:     }

752:     /* iia in CSR */
753:     for (i=0;i<n;i++) iia[i+1] += iia[i];

755:     /* jja in CSR */
756:     PetscMalloc1(iia[n],&jja);
757:     for (i=0;i<n;i++)
758:       if (!PetscBTLookup(btf,i))
759:         for (j=0;j<iiu[i+1]-iiu[i];j++)
760:           jja[iia[i]+j] = jju[iiu[i]+j];

762:     /* map edge dofs connectivity */
763:     if (jj) {
764:       ISLocalToGlobalMappingApply(fl2g,ii[ne],jj,(PetscInt *)jj);
765:       for (i=0;i<ne;i++) {
766:         PetscInt e = idxs[i];
767:         for (j=0;j<ii[i+1]-ii[i];j++) jja[iia[e]+j] = jj[ii[i]+j];
768:       }
769:     }
770:     ISRestoreIndices(nedfieldlocal,&idxs);
771:     PCBDDCSetLocalAdjacencyGraph(pc,n,iia,jja,PETSC_OWN_POINTER);
772:     if (rest) {
773:       MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);
774:     }
775:     if (free) {
776:       PetscFree2(iiu,jju);
777:     }
778:     PetscBTDestroy(&btf);
779:   } else {
780:     PCBDDCSetLocalAdjacencyGraph(pc,n,ii,jj,PETSC_USE_POINTER);
781:   }

783:   /* Analyze interface for edge dofs */
784:   PCBDDCAnalyzeInterface(pc);
785:   pcbddc->mat_graph->twodim = PETSC_FALSE;

787:   /* Get coarse edges in the edge space */
788:   PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);
789:   MatRestoreRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);

791:   if (fl2g) {
792:     ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);
793:     PetscMalloc1(nee,&eedges);
794:     for (i=0;i<nee;i++) {
795:       ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);
796:     }
797:   } else {
798:     eedges  = alleedges;
799:     primals = allprimals;
800:   }

802:   /* Mark fine edge dofs with their coarse edge id */
803:   PetscMemzero(marks,ne*sizeof(PetscInt));
804:   ISGetLocalSize(primals,&cum);
805:   ISGetIndices(primals,&idxs);
806:   for (i=0;i<cum;i++) marks[idxs[i]] = nee+1;
807:   ISRestoreIndices(primals,&idxs);
808:   if (print) {
809:     PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs");
810:     ISView(primals,NULL);
811:   }

813:   maxsize = 0;
814:   for (i=0;i<nee;i++) {
815:     PetscInt size,mark = i+1;

817:     ISGetLocalSize(eedges[i],&size);
818:     ISGetIndices(eedges[i],&idxs);
819:     for (j=0;j<size;j++) marks[idxs[j]] = mark;
820:     ISRestoreIndices(eedges[i],&idxs);
821:     maxsize = PetscMax(maxsize,size);
822:   }

824:   /* Find coarse edge endpoints */
825:   MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
826:   MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
827:   for (i=0;i<nee;i++) {
828:     PetscInt mark = i+1,size;

830:     ISGetLocalSize(eedges[i],&size);
831:     if (!size && nedfieldlocal) continue;
832:     if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i);
833:     ISGetIndices(eedges[i],&idxs);
834:     if (print) {
835:       PetscPrintf(PETSC_COMM_SELF,"ENDPOINTS ANALYSIS EDGE %D\n",i);
836:       ISView(eedges[i],NULL);
837:     }
838:     for (j=0;j<size;j++) {
839:       PetscInt k, ee = idxs[j];
840:       if (print) PetscPrintf(PETSC_COMM_SELF,"  idx %D\n",ee);
841:       for (k=ii[ee];k<ii[ee+1];k++) {
842:         if (print) PetscPrintf(PETSC_COMM_SELF,"    inspect %D\n",jj[k]);
843:         if (PetscBTLookup(btv,jj[k])) {
844:           if (print) PetscPrintf(PETSC_COMM_SELF,"      corner found (already set) %D\n",jj[k]);
845:         } else if (PetscBTLookup(btvcand,jj[k])) { /* is it ok? */
846:           PetscInt  k2;
847:           PetscBool corner = PETSC_FALSE;
848:           for (k2 = iit[jj[k]];k2 < iit[jj[k]+1];k2++) {
849:             if (print) PetscPrintf(PETSC_COMM_SELF,"        INSPECTING %D: mark %D (ref mark %D), boundary %D\n",jjt[k2],marks[jjt[k2]],mark,!!PetscBTLookup(btb,jjt[k2]));
850:             /* it's a corner if either is connected with an edge dof belonging to a different cc or
851:                if the edge dof lie on the natural part of the boundary */
852:             if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb,jjt[k2]))) {
853:               corner = PETSC_TRUE;
854:               break;
855:             }
856:           }
857:           if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */
858:             if (print) PetscPrintf(PETSC_COMM_SELF,"        corner found %D\n",jj[k]);
859:             PetscBTSet(btv,jj[k]);
860:           } else {
861:             if (print) PetscPrintf(PETSC_COMM_SELF,"        no corners found\n");
862:           }
863:         }
864:       }
865:     }
866:     ISRestoreIndices(eedges[i],&idxs);
867:   }
868:   MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
869:   MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
870:   PetscBTDestroy(&btb);

872:   /* Reset marked primal dofs */
873:   ISGetLocalSize(primals,&cum);
874:   ISGetIndices(primals,&idxs);
875:   for (i=0;i<cum;i++) marks[idxs[i]] = 0;
876:   ISRestoreIndices(primals,&idxs);

878:   /* Now use the initial lG */
879:   MatDestroy(&lG);
880:   MatDestroy(&lGt);
881:   lG   = lGinit;
882:   MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);

884:   /* Compute extended cols indices */
885:   PetscBTCreate(nv,&btvc);
886:   PetscBTCreate(nee,&bter);
887:   MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
888:   MatSeqAIJGetMaxRowNonzeros(lG,&i);
889:   i   *= maxsize;
890:   PetscCalloc1(nee,&extcols);
891:   PetscMalloc2(i,&extrow,i,&gidxs);
892:   eerr = PETSC_FALSE;
893:   for (i=0;i<nee;i++) {
894:     PetscInt size,found = 0;

896:     cum  = 0;
897:     ISGetLocalSize(eedges[i],&size);
898:     if (!size && nedfieldlocal) continue;
899:     if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i);
900:     ISGetIndices(eedges[i],&idxs);
901:     PetscBTMemzero(nv,btvc);
902:     for (j=0;j<size;j++) {
903:       PetscInt k,ee = idxs[j];
904:       for (k=ii[ee];k<ii[ee+1];k++) {
905:         PetscInt vv = jj[k];
906:         if (!PetscBTLookup(btv,vv)) extrow[cum++] = vv;
907:         else if (!PetscBTLookupSet(btvc,vv)) found++;
908:       }
909:     }
910:     ISRestoreIndices(eedges[i],&idxs);
911:     PetscSortRemoveDupsInt(&cum,extrow);
912:     ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);
913:     PetscSortIntWithArray(cum,gidxs,extrow);
914:     ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);
915:     /* it may happen that endpoints are not defined at this point
916:        if it is the case, mark this edge for a second pass */
917:     if (cum != size -1 || found != 2) {
918:       PetscBTSet(bter,i);
919:       if (print) {
920:         PetscObjectSetName((PetscObject)eedges[i],"error_edge");
921:         ISView(eedges[i],NULL);
922:         PetscObjectSetName((PetscObject)extcols[i],"error_extcol");
923:         ISView(extcols[i],NULL);
924:       }
925:       eerr = PETSC_TRUE;
926:     }
927:   }
928:   /* if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */
929:   MPIU_Allreduce(&eerr,&done,1,MPIU_BOOL,MPI_LOR,comm);
930:   if (done) {
931:     PetscInt *newprimals;

933:     PetscMalloc1(ne,&newprimals);
934:     ISGetLocalSize(primals,&cum);
935:     ISGetIndices(primals,&idxs);
936:     PetscMemcpy(newprimals,idxs,cum*sizeof(PetscInt));
937:     ISRestoreIndices(primals,&idxs);
938:     MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
939:     if (print) PetscPrintf(PETSC_COMM_SELF,"DOING SECOND PASS (eerr %D)\n",eerr);
940:     for (i=0;i<nee;i++) {
941:       PetscBool has_candidates = PETSC_FALSE;
942:       if (PetscBTLookup(bter,i)) {
943:         PetscInt size,mark = i+1;

945:         ISGetLocalSize(eedges[i],&size);
946:         ISGetIndices(eedges[i],&idxs);
947:         /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */
948:         for (j=0;j<size;j++) {
949:           PetscInt k,ee = idxs[j];
950:           if (print) PetscPrintf(PETSC_COMM_SELF,"Inspecting edge dof %D [%D %D)\n",ee,ii[ee],ii[ee+1]);
951:           for (k=ii[ee];k<ii[ee+1];k++) {
952:             /* set all candidates located on the edge as corners */
953:             if (PetscBTLookup(btvcand,jj[k])) {
954:               PetscInt k2,vv = jj[k];
955:               has_candidates = PETSC_TRUE;
956:               if (print) PetscPrintf(PETSC_COMM_SELF,"  Candidate set to vertex %D\n",vv);
957:               PetscBTSet(btv,vv);
958:               /* set all edge dofs connected to candidate as primals */
959:               for (k2=iit[vv];k2<iit[vv+1];k2++) {
960:                 if (marks[jjt[k2]] == mark) {
961:                   PetscInt k3,ee2 = jjt[k2];
962:                   if (print) PetscPrintf(PETSC_COMM_SELF,"    Connected edge dof set to primal %D\n",ee2);
963:                   newprimals[cum++] = ee2;
964:                   /* finally set the new corners */
965:                   for (k3=ii[ee2];k3<ii[ee2+1];k3++) {
966:                     if (print) PetscPrintf(PETSC_COMM_SELF,"      Connected nodal dof set to vertex %D\n",jj[k3]);
967:                     PetscBTSet(btv,jj[k3]);
968:                   }
969:                 }
970:               }
971:             } else {
972:               if (print) PetscPrintf(PETSC_COMM_SELF,"  Not a candidate vertex %D\n",jj[k]);
973:             }
974:           }
975:         }
976:         if (!has_candidates) { /* circular edge */
977:           PetscInt k, ee = idxs[0],*tmarks;

979:           PetscCalloc1(ne,&tmarks);
980:           if (print) PetscPrintf(PETSC_COMM_SELF,"  Circular edge %D\n",i);
981:           for (k=ii[ee];k<ii[ee+1];k++) {
982:             PetscInt k2;
983:             if (print) PetscPrintf(PETSC_COMM_SELF,"    Set to corner %D\n",jj[k]);
984:             PetscBTSet(btv,jj[k]);
985:             for (k2=iit[jj[k]];k2<iit[jj[k]+1];k2++) tmarks[jjt[k2]]++;
986:           }
987:           for (j=0;j<size;j++) {
988:             if (tmarks[idxs[j]] > 1) {
989:               if (print) PetscPrintf(PETSC_COMM_SELF,"  Edge dof set to primal %D\n",idxs[j]);
990:               newprimals[cum++] = idxs[j];
991:             }
992:           }
993:           PetscFree(tmarks);
994:         }
995:         ISRestoreIndices(eedges[i],&idxs);
996:       }
997:       ISDestroy(&extcols[i]);
998:     }
999:     PetscFree(extcols);
1000:     MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
1001:     PetscSortRemoveDupsInt(&cum,newprimals);
1002:     if (fl2g) {
1003:       ISLocalToGlobalMappingApply(fl2g,cum,newprimals,newprimals);
1004:       ISDestroy(&primals);
1005:       for (i=0;i<nee;i++) {
1006:         ISDestroy(&eedges[i]);
1007:       }
1008:       PetscFree(eedges);
1009:     }
1010:     PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);
1011:     ISCreateGeneral(comm,cum,newprimals,PETSC_COPY_VALUES,&primals);
1012:     PetscFree(newprimals);
1013:     PCBDDCSetPrimalVerticesLocalIS(pc,primals);
1014:     ISDestroy(&primals);
1015:     PCBDDCAnalyzeInterface(pc);
1016:     pcbddc->mat_graph->twodim = PETSC_FALSE;
1017:     PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);
1018:     if (fl2g) {
1019:       ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);
1020:       PetscMalloc1(nee,&eedges);
1021:       for (i=0;i<nee;i++) {
1022:         ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);
1023:       }
1024:     } else {
1025:       eedges  = alleedges;
1026:       primals = allprimals;
1027:     }
1028:     PetscCalloc1(nee,&extcols);

1030:     /* Mark again */
1031:     PetscMemzero(marks,ne*sizeof(PetscInt));
1032:     for (i=0;i<nee;i++) {
1033:       PetscInt size,mark = i+1;

1035:       ISGetLocalSize(eedges[i],&size);
1036:       ISGetIndices(eedges[i],&idxs);
1037:       for (j=0;j<size;j++) marks[idxs[j]] = mark;
1038:       ISRestoreIndices(eedges[i],&idxs);
1039:     }
1040:     if (print) {
1041:       PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs_secondpass");
1042:       ISView(primals,NULL);
1043:     }

1045:     /* Recompute extended cols */
1046:     eerr = PETSC_FALSE;
1047:     for (i=0;i<nee;i++) {
1048:       PetscInt size;

1050:       cum  = 0;
1051:       ISGetLocalSize(eedges[i],&size);
1052:       if (!size && nedfieldlocal) continue;
1053:       if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i);
1054:       ISGetIndices(eedges[i],&idxs);
1055:       for (j=0;j<size;j++) {
1056:         PetscInt k,ee = idxs[j];
1057:         for (k=ii[ee];k<ii[ee+1];k++) if (!PetscBTLookup(btv,jj[k])) extrow[cum++] = jj[k];
1058:       }
1059:       ISRestoreIndices(eedges[i],&idxs);
1060:       PetscSortRemoveDupsInt(&cum,extrow);
1061:       ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);
1062:       PetscSortIntWithArray(cum,gidxs,extrow);
1063:       ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);
1064:       if (cum != size -1) {
1065:         if (print) {
1066:           PetscObjectSetName((PetscObject)eedges[i],"error_edge_secondpass");
1067:           ISView(eedges[i],NULL);
1068:           PetscObjectSetName((PetscObject)extcols[i],"error_extcol_secondpass");
1069:           ISView(extcols[i],NULL);
1070:         }
1071:         eerr = PETSC_TRUE;
1072:       }
1073:     }
1074:   }
1075:   MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1076:   PetscFree2(extrow,gidxs);
1077:   PetscBTDestroy(&bter);
1078:   if (print) { PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF); }
1079:   /* an error should not occur at this point */
1080:   if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL SECOND PASS");

1082:   /* Check the number of endpoints */
1083:   MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1084:   PetscMalloc1(2*nee,&corners);
1085:   PetscMalloc1(nee,&cedges);
1086:   for (i=0;i<nee;i++) {
1087:     PetscInt size, found = 0, gc[2];

1089:     /* init with defaults */
1090:     cedges[i] = corners[i*2] = corners[i*2+1] = -1;
1091:     ISGetLocalSize(eedges[i],&size);
1092:     if (!size && nedfieldlocal) continue;
1093:     if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i);
1094:     ISGetIndices(eedges[i],&idxs);
1095:     PetscBTMemzero(nv,btvc);
1096:     for (j=0;j<size;j++) {
1097:       PetscInt k,ee = idxs[j];
1098:       for (k=ii[ee];k<ii[ee+1];k++) {
1099:         PetscInt vv = jj[k];
1100:         if (PetscBTLookup(btv,vv) && !PetscBTLookupSet(btvc,vv)) {
1101:           if (found == 2) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found more then two corners for edge %D",i);
1102:           corners[i*2+found++] = vv;
1103:         }
1104:       }
1105:     }
1106:     if (found != 2) {
1107:       PetscInt e;
1108:       if (fl2g) {
1109:         ISLocalToGlobalMappingApply(fl2g,1,idxs,&e);
1110:       } else {
1111:         e = idxs[0];
1112:       }
1113:       SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found %D corners for edge %D (astart %D, estart %D)",found,i,e,idxs[0]);
1114:     }

1116:     /* get primal dof index on this coarse edge */
1117:     ISLocalToGlobalMappingApply(vl2g,2,corners+2*i,gc);
1118:     if (gc[0] > gc[1]) {
1119:       PetscInt swap  = corners[2*i];
1120:       corners[2*i]   = corners[2*i+1];
1121:       corners[2*i+1] = swap;
1122:     }
1123:     cedges[i] = idxs[size-1];
1124:     ISRestoreIndices(eedges[i],&idxs);
1125:     if (print) PetscPrintf(PETSC_COMM_SELF,"EDGE %D: ce %D, corners (%D,%D)\n",i,cedges[i],corners[2*i],corners[2*i+1]);
1126:   }
1127:   MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1128:   PetscBTDestroy(&btvc);

1130: #if defined(PETSC_USE_DEBUG)
1131:   /* Inspects columns of lG (rows of lGt) and make sure the change of basis will
1132:      not interfere with neighbouring coarse edges */
1133:   PetscMalloc1(nee+1,&emarks);
1134:   MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1135:   for (i=0;i<nv;i++) {
1136:     PetscInt emax = 0,eemax = 0;

1138:     if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue;
1139:     PetscMemzero(emarks,(nee+1)*sizeof(PetscInt));
1140:     for (j=ii[i];j<ii[i+1];j++) emarks[marks[jj[j]]]++;
1141:     for (j=1;j<nee+1;j++) {
1142:       if (emax < emarks[j]) {
1143:         emax = emarks[j];
1144:         eemax = j;
1145:       }
1146:     }
1147:     /* not relevant for edges */
1148:     if (!eemax) continue;

1150:     for (j=ii[i];j<ii[i+1];j++) {
1151:       if (marks[jj[j]] && marks[jj[j]] != eemax) {
1152:         SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_SUP,"Found 2 coarse edges (id %D and %D) connected through the %D nodal dof at edge dof %D",marks[jj[j]]-1,eemax,i,jj[j]);
1153:       }
1154:     }
1155:   }
1156:   PetscFree(emarks);
1157:   MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1158: #endif

1160:   /* Compute extended rows indices for edge blocks of the change of basis */
1161:   MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1162:   MatSeqAIJGetMaxRowNonzeros(lGt,&extmem);
1163:   extmem *= maxsize;
1164:   PetscMalloc1(extmem*nee,&extrow);
1165:   PetscMalloc1(nee,&extrows);
1166:   PetscCalloc1(nee,&extrowcum);
1167:   for (i=0;i<nv;i++) {
1168:     PetscInt mark = 0,size,start;

1170:     if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue;
1171:     for (j=ii[i];j<ii[i+1];j++)
1172:       if (marks[jj[j]] && !mark)
1173:         mark = marks[jj[j]];

1175:     /* not relevant */
1176:     if (!mark) continue;

1178:     /* import extended row */
1179:     mark--;
1180:     start = mark*extmem+extrowcum[mark];
1181:     size = ii[i+1]-ii[i];
1182:     if (extrowcum[mark] + size > extmem) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not enough memory allocated %D > %D",extrowcum[mark] + size,extmem);
1183:     PetscMemcpy(extrow+start,jj+ii[i],size*sizeof(PetscInt));
1184:     extrowcum[mark] += size;
1185:   }
1186:   MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1187:   MatDestroy(&lGt);
1188:   PetscFree(marks);

1190:   /* Compress extrows */
1191:   cum  = 0;
1192:   for (i=0;i<nee;i++) {
1193:     PetscInt size = extrowcum[i],*start = extrow + i*extmem;
1194:     PetscSortRemoveDupsInt(&size,start);
1195:     ISCreateGeneral(PETSC_COMM_SELF,size,start,PETSC_USE_POINTER,&extrows[i]);
1196:     cum  = PetscMax(cum,size);
1197:   }
1198:   PetscFree(extrowcum);
1199:   PetscBTDestroy(&btv);
1200:   PetscBTDestroy(&btvcand);

1202:   /* Workspace for lapack inner calls and VecSetValues */
1203:   PetscMalloc2((5+cum+maxsize)*maxsize,&work,maxsize,&rwork);

1205:   /* Create change of basis matrix (preallocation can be improved) */
1206:   MatCreate(comm,&T);
1207:   MatSetSizes(T,pc->pmat->rmap->n,pc->pmat->rmap->n,
1208:                        pc->pmat->rmap->N,pc->pmat->rmap->N);
1209:   MatSetType(T,MATAIJ);
1210:   MatSeqAIJSetPreallocation(T,10,NULL);
1211:   MatMPIAIJSetPreallocation(T,10,NULL,10,NULL);
1212:   MatSetLocalToGlobalMapping(T,al2g,al2g);
1213:   MatSetOption(T,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);
1214:   MatSetOption(T,MAT_ROW_ORIENTED,PETSC_FALSE);
1215:   ISLocalToGlobalMappingDestroy(&al2g);

1217:   /* Defaults to identity */
1218:   MatCreateVecs(pc->pmat,&tvec,NULL);
1219:   VecSet(tvec,1.0);
1220:   MatDiagonalSet(T,tvec,INSERT_VALUES);
1221:   VecDestroy(&tvec);

1223:   /* Create discrete gradient for the coarser level if needed */
1224:   MatDestroy(&pcbddc->nedcG);
1225:   ISDestroy(&pcbddc->nedclocal);
1226:   if (pcbddc->current_level < pcbddc->max_levels) {
1227:     ISLocalToGlobalMapping cel2g,cvl2g;
1228:     IS                     wis,gwis;
1229:     PetscInt               cnv,cne;

1231:     ISCreateGeneral(comm,nee,cedges,PETSC_COPY_VALUES,&wis);
1232:     if (fl2g) {
1233:       ISLocalToGlobalMappingApplyIS(fl2g,wis,&pcbddc->nedclocal);
1234:     } else {
1235:       PetscObjectReference((PetscObject)wis);
1236:       pcbddc->nedclocal = wis;
1237:     }
1238:     ISLocalToGlobalMappingApplyIS(el2g,wis,&gwis);
1239:     ISDestroy(&wis);
1240:     ISRenumber(gwis,NULL,&cne,&wis);
1241:     ISLocalToGlobalMappingCreateIS(wis,&cel2g);
1242:     ISDestroy(&wis);
1243:     ISDestroy(&gwis);

1245:     ISCreateGeneral(comm,2*nee,corners,PETSC_USE_POINTER,&wis);
1246:     ISLocalToGlobalMappingApplyIS(vl2g,wis,&gwis);
1247:     ISDestroy(&wis);
1248:     ISRenumber(gwis,NULL,&cnv,&wis);
1249:     ISLocalToGlobalMappingCreateIS(wis,&cvl2g);
1250:     ISDestroy(&wis);
1251:     ISDestroy(&gwis);

1253:     MatCreate(comm,&pcbddc->nedcG);
1254:     MatSetSizes(pcbddc->nedcG,PETSC_DECIDE,PETSC_DECIDE,cne,cnv);
1255:     MatSetType(pcbddc->nedcG,MATAIJ);
1256:     MatSeqAIJSetPreallocation(pcbddc->nedcG,2,NULL);
1257:     MatMPIAIJSetPreallocation(pcbddc->nedcG,2,NULL,2,NULL);
1258:     MatSetLocalToGlobalMapping(pcbddc->nedcG,cel2g,cvl2g);
1259:     ISLocalToGlobalMappingDestroy(&cel2g);
1260:     ISLocalToGlobalMappingDestroy(&cvl2g);
1261:   }
1262:   ISLocalToGlobalMappingDestroy(&vl2g);

1264: #if defined(PRINT_GDET)
1265:   inc = 0;
1266:   lev = pcbddc->current_level;
1267: #endif

1269:   /* Insert values in the change of basis matrix */
1270:   for (i=0;i<nee;i++) {
1271:     Mat         Gins = NULL, GKins = NULL;
1272:     IS          cornersis = NULL;
1273:     PetscScalar cvals[2];

1275:     if (pcbddc->nedcG) {
1276:       ISCreateGeneral(PETSC_COMM_SELF,2,corners+2*i,PETSC_USE_POINTER,&cornersis);
1277:     }
1278:     PCBDDCComputeNedelecChangeEdge(lG,eedges[i],extrows[i],extcols[i],cornersis,&Gins,&GKins,cvals,work,rwork);
1279:     if (Gins && GKins) {
1280:       PetscScalar    *data;
1281:       const PetscInt *rows,*cols;
1282:       PetscInt       nrh,nch,nrc,ncc;

1284:       ISGetIndices(eedges[i],&cols);
1285:       /* H1 */
1286:       ISGetIndices(extrows[i],&rows);
1287:       MatGetSize(Gins,&nrh,&nch);
1288:       MatDenseGetArray(Gins,&data);
1289:       MatSetValuesLocal(T,nrh,rows,nch,cols,data,INSERT_VALUES);
1290:       MatDenseRestoreArray(Gins,&data);
1291:       ISRestoreIndices(extrows[i],&rows);
1292:       /* complement */
1293:       MatGetSize(GKins,&nrc,&ncc);
1294:       if (!ncc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Constant function has not been generated for coarse edge %D",i);
1295:       if (ncc + nch != nrc) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"The sum of the number of columns of GKins %D and Gins %D does not match %D for coarse edge %D",ncc,nch,nrc,i);
1296:       if (ncc != 1 && pcbddc->nedcG) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot generate the coarse discrete gradient for coarse edge %D with ncc %D",i,ncc);
1297:       MatDenseGetArray(GKins,&data);
1298:       MatSetValuesLocal(T,nrc,cols,ncc,cols+nch,data,INSERT_VALUES);
1299:       MatDenseRestoreArray(GKins,&data);

1301:       /* coarse discrete gradient */
1302:       if (pcbddc->nedcG) {
1303:         PetscInt cols[2];

1305:         cols[0] = 2*i;
1306:         cols[1] = 2*i+1;
1307:         MatSetValuesLocal(pcbddc->nedcG,1,&i,2,cols,cvals,INSERT_VALUES);
1308:       }
1309:       ISRestoreIndices(eedges[i],&cols);
1310:     }
1311:     ISDestroy(&extrows[i]);
1312:     ISDestroy(&extcols[i]);
1313:     ISDestroy(&cornersis);
1314:     MatDestroy(&Gins);
1315:     MatDestroy(&GKins);
1316:   }
1317:   ISLocalToGlobalMappingDestroy(&el2g);

1319:   /* Start assembling */
1320:   MatAssemblyBegin(T,MAT_FINAL_ASSEMBLY);
1321:   if (pcbddc->nedcG) {
1322:     MatAssemblyBegin(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);
1323:   }

1325:   /* Free */
1326:   if (fl2g) {
1327:     ISDestroy(&primals);
1328:     for (i=0;i<nee;i++) {
1329:       ISDestroy(&eedges[i]);
1330:     }
1331:     PetscFree(eedges);
1332:   }

1334:   /* hack mat_graph with primal dofs on the coarse edges */
1335:   {
1336:     PCBDDCGraph graph   = pcbddc->mat_graph;
1337:     PetscInt    *oqueue = graph->queue;
1338:     PetscInt    *ocptr  = graph->cptr;
1339:     PetscInt    ncc,*idxs;

1341:     /* find first primal edge */
1342:     if (pcbddc->nedclocal) {
1343:       ISGetIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);
1344:     } else {
1345:       if (fl2g) {
1346:         ISLocalToGlobalMappingApply(fl2g,nee,cedges,cedges);
1347:       }
1348:       idxs = cedges;
1349:     }
1350:     cum = 0;
1351:     while (cum < nee && cedges[cum] < 0) cum++;

1353:     /* adapt connected components */
1354:     PetscMalloc2(graph->nvtxs+1,&graph->cptr,ocptr[graph->ncc],&graph->queue);
1355:     graph->cptr[0] = 0;
1356:     for (i=0,ncc=0;i<graph->ncc;i++) {
1357:       PetscInt lc = ocptr[i+1]-ocptr[i];
1358:       if (cum != nee && oqueue[ocptr[i+1]-1] == cedges[cum]) { /* this cc has a primal dof */
1359:         graph->cptr[ncc+1] = graph->cptr[ncc]+1;
1360:         graph->queue[graph->cptr[ncc]] = cedges[cum];
1361:         ncc++;
1362:         lc--;
1363:         cum++;
1364:         while (cum < nee && cedges[cum] < 0) cum++;
1365:       }
1366:       graph->cptr[ncc+1] = graph->cptr[ncc] + lc;
1367:       for (j=0;j<lc;j++) graph->queue[graph->cptr[ncc]+j] = oqueue[ocptr[i]+j];
1368:       ncc++;
1369:     }
1370:     graph->ncc = ncc;
1371:     if (pcbddc->nedclocal) {
1372:       ISRestoreIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);
1373:     }
1374:     PetscFree2(ocptr,oqueue);
1375:   }
1376:   ISLocalToGlobalMappingDestroy(&fl2g);
1377:   PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);
1378:   PCBDDCGraphResetCSR(pcbddc->mat_graph);
1379:   MatDestroy(&conn);

1381:   ISDestroy(&nedfieldlocal);
1382:   PetscFree(extrow);
1383:   PetscFree2(work,rwork);
1384:   PetscFree(corners);
1385:   PetscFree(cedges);
1386:   PetscFree(extrows);
1387:   PetscFree(extcols);
1388:   MatDestroy(&lG);

1390:   /* Complete assembling */
1391:   MatAssemblyEnd(T,MAT_FINAL_ASSEMBLY);
1392:   if (pcbddc->nedcG) {
1393:     MatAssemblyEnd(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);
1394: #if 0
1395:     PetscObjectSetName((PetscObject)pcbddc->nedcG,"coarse_G");
1396:     MatView(pcbddc->nedcG,NULL);
1397: #endif
1398:   }

1400:   /* set change of basis */
1401:   PCBDDCSetChangeOfBasisMat(pc,T,singular);
1402:   MatDestroy(&T);

1404:   return(0);
1405: }

1407: /* the near-null space of BDDC carries information on quadrature weights,
1408:    and these can be collinear -> so cheat with MatNullSpaceCreate
1409:    and create a suitable set of basis vectors first */
1410: PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp)
1411: {
1413:   PetscInt       i;

1416:   for (i=0;i<nvecs;i++) {
1417:     PetscInt first,last;

1419:     VecGetOwnershipRange(quad_vecs[i],&first,&last);
1420:     if (last-first < 2*nvecs && has_const) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented");
1421:     if (i>=first && i < last) {
1422:       PetscScalar *data;
1423:       VecGetArray(quad_vecs[i],&data);
1424:       if (!has_const) {
1425:         data[i-first] = 1.;
1426:       } else {
1427:         data[2*i-first] = 1./PetscSqrtReal(2.);
1428:         data[2*i-first+1] = -1./PetscSqrtReal(2.);
1429:       }
1430:       VecRestoreArray(quad_vecs[i],&data);
1431:     }
1432:     PetscObjectStateIncrease((PetscObject)quad_vecs[i]);
1433:   }
1434:   MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp);
1435:   for (i=0;i<nvecs;i++) { /* reset vectors */
1436:     PetscInt first,last;
1437:     VecLockPop(quad_vecs[i]);
1438:     VecGetOwnershipRange(quad_vecs[i],&first,&last);
1439:     if (i>=first && i < last) {
1440:       PetscScalar *data;
1441:       VecGetArray(quad_vecs[i],&data);
1442:       if (!has_const) {
1443:         data[i-first] = 0.;
1444:       } else {
1445:         data[2*i-first] = 0.;
1446:         data[2*i-first+1] = 0.;
1447:       }
1448:       VecRestoreArray(quad_vecs[i],&data);
1449:     }
1450:     PetscObjectStateIncrease((PetscObject)quad_vecs[i]);
1451:     VecLockPush(quad_vecs[i]);
1452:   }
1453:   return(0);
1454: }

1456: PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp)
1457: {
1458:   Mat                    loc_divudotp;
1459:   Vec                    p,v,vins,quad_vec,*quad_vecs;
1460:   ISLocalToGlobalMapping map;
1461:   PetscScalar            *vals;
1462:   const PetscScalar      *array;
1463:   PetscInt               i,maxneighs,maxsize;
1464:   PetscInt               n_neigh,*neigh,*n_shared,**shared;
1465:   PetscMPIInt            rank;
1466:   PetscErrorCode         ierr;

1469:   ISLocalToGlobalMappingGetInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);
1470:   MPIU_Allreduce(&n_neigh,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));
1471:   if (!maxneighs) {
1472:     ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);
1473:     *nnsp = NULL;
1474:     return(0);
1475:   }
1476:   maxsize = 0;
1477:   for (i=0;i<n_neigh;i++) maxsize = PetscMax(n_shared[i],maxsize);
1478:   PetscMalloc1(maxsize,&vals);
1479:   /* create vectors to hold quadrature weights */
1480:   MatCreateVecs(A,&quad_vec,NULL);
1481:   if (!transpose) {
1482:     MatGetLocalToGlobalMapping(A,&map,NULL);
1483:   } else {
1484:     MatGetLocalToGlobalMapping(A,NULL,&map);
1485:   }
1486:   VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);
1487:   VecDestroy(&quad_vec);
1488:   PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);
1489:   for (i=0;i<maxneighs;i++) {
1490:     VecLockPop(quad_vecs[i]);
1491:     VecSetLocalToGlobalMapping(quad_vecs[i],map);
1492:   }

1494:   /* compute local quad vec */
1495:   MatISGetLocalMat(divudotp,&loc_divudotp);
1496:   if (!transpose) {
1497:     MatCreateVecs(loc_divudotp,&v,&p);
1498:   } else {
1499:     MatCreateVecs(loc_divudotp,&p,&v);
1500:   }
1501:   VecSet(p,1.);
1502:   if (!transpose) {
1503:     MatMultTranspose(loc_divudotp,p,v);
1504:   } else {
1505:     MatMult(loc_divudotp,p,v);
1506:   }
1507:   if (vl2l) {
1508:     Mat        lA;
1509:     VecScatter sc;

1511:     MatISGetLocalMat(A,&lA);
1512:     MatCreateVecs(lA,&vins,NULL);
1513:     VecScatterCreate(v,NULL,vins,vl2l,&sc);
1514:     VecScatterBegin(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);
1515:     VecScatterEnd(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);
1516:     VecScatterDestroy(&sc);
1517:   } else {
1518:     vins = v;
1519:   }
1520:   VecGetArrayRead(vins,&array);
1521:   VecDestroy(&p);

1523:   /* insert in global quadrature vecs */
1524:   MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);
1525:   for (i=0;i<n_neigh;i++) {
1526:     const PetscInt    *idxs;
1527:     PetscInt          idx,nn,j;

1529:     idxs = shared[i];
1530:     nn   = n_shared[i];
1531:     for (j=0;j<nn;j++) vals[j] = array[idxs[j]];
1532:     PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);
1533:     idx  = -(idx+1);
1534:     VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);
1535:   }
1536:   ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);
1537:   VecRestoreArrayRead(vins,&array);
1538:   if (vl2l) {
1539:     VecDestroy(&vins);
1540:   }
1541:   VecDestroy(&v);
1542:   PetscFree(vals);

1544:   /* assemble near null space */
1545:   for (i=0;i<maxneighs;i++) {
1546:     VecAssemblyBegin(quad_vecs[i]);
1547:   }
1548:   for (i=0;i<maxneighs;i++) {
1549:     VecAssemblyEnd(quad_vecs[i]);
1550:     VecViewFromOptions(quad_vecs[i],NULL,"-pc_bddc_quad_vecs_view");
1551:     VecLockPush(quad_vecs[i]);
1552:   }
1553:   VecDestroyVecs(maxneighs,&quad_vecs);
1554:   return(0);
1555: }

1557: PetscErrorCode PCBDDCAddPrimalVerticesLocalIS(PC pc, IS primalv)
1558: {
1559:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;

1563:   if (primalv) {
1564:     if (pcbddc->user_primal_vertices_local) {
1565:       IS list[2], newp;

1567:       list[0] = primalv;
1568:       list[1] = pcbddc->user_primal_vertices_local;
1569:       ISConcatenate(PetscObjectComm((PetscObject)pc),2,list,&newp);
1570:       ISSortRemoveDups(newp);
1571:       ISDestroy(&list[1]);
1572:       pcbddc->user_primal_vertices_local = newp;
1573:     } else {
1574:       PCBDDCSetPrimalVerticesLocalIS(pc,primalv);
1575:     }
1576:   }
1577:   return(0);
1578: }

1580: static PetscErrorCode func_coords_private(PetscInt dim, PetscReal t, const PetscReal X[], PetscInt Nf, PetscScalar *out, void *ctx)
1581: {
1582:   PetscInt f, *comp  = (PetscInt *)ctx;

1585:   for (f=0;f<Nf;f++) out[f] = X[*comp];
1586:   return(0);
1587: }

1589: PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc)
1590: {
1592:   Vec            local,global;
1593:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1594:   Mat_IS         *matis = (Mat_IS*)pc->pmat->data;
1595:   PetscBool      monolithic = PETSC_FALSE;

1598:   PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC topology options","PC");
1599:   PetscOptionsBool("-pc_bddc_monolithic","Discard any information on dofs splitting",NULL,monolithic,&monolithic,NULL);
1600:   PetscOptionsEnd();
1601:   /* need to convert from global to local topology information and remove references to information in global ordering */
1602:   MatCreateVecs(pc->pmat,&global,NULL);
1603:   MatCreateVecs(matis->A,&local,NULL);
1604:   if (monolithic) { /* just get block size to properly compute vertices */
1605:     if (pcbddc->vertex_size == 1) {
1606:       MatGetBlockSize(pc->pmat,&pcbddc->vertex_size);
1607:     }
1608:     goto boundary;
1609:   }

1611:   if (pcbddc->user_provided_isfordofs) {
1612:     if (pcbddc->n_ISForDofs) {
1613:       PetscInt i;
1614:       PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);
1615:       for (i=0;i<pcbddc->n_ISForDofs;i++) {
1616:         PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);
1617:         ISDestroy(&pcbddc->ISForDofs[i]);
1618:       }
1619:       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
1620:       pcbddc->n_ISForDofs = 0;
1621:       PetscFree(pcbddc->ISForDofs);
1622:     }
1623:   } else {
1624:     if (!pcbddc->n_ISForDofsLocal) { /* field split not present */
1625:       DM dm;

1627:       PCGetDM(pc, &dm);
1628:       if (!dm) {
1629:         MatGetDM(pc->pmat, &dm);
1630:       }
1631:       if (dm) {
1632:         IS      *fields;
1633:         PetscInt nf,i;
1634:         DMCreateFieldDecomposition(dm,&nf,NULL,&fields,NULL);
1635:         PetscMalloc1(nf,&pcbddc->ISForDofsLocal);
1636:         for (i=0;i<nf;i++) {
1637:           PCBDDCGlobalToLocal(matis->rctx,global,local,fields[i],&pcbddc->ISForDofsLocal[i]);
1638:           ISDestroy(&fields[i]);
1639:         }
1640:         PetscFree(fields);
1641:         pcbddc->n_ISForDofsLocal = nf;
1642:       } else { /* See if MATIS has fields attached by the conversion from MatNest */
1643:         PetscContainer   c;

1645:         PetscObjectQuery((PetscObject)pc->pmat,"_convert_nest_lfields",(PetscObject*)&c);
1646:         if (c) {
1647:           MatISLocalFields lf;
1648:           PetscContainerGetPointer(c,(void**)&lf);
1649:           PCBDDCSetDofsSplittingLocal(pc,lf->nr,lf->rf);
1650:         } else { /* fallback, create the default fields if bs > 1 */
1651:           PetscInt i, n = matis->A->rmap->n;
1652:           MatGetBlockSize(pc->pmat,&i);
1653:           if (i > 1) {
1654:             pcbddc->n_ISForDofsLocal = i;
1655:             PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);
1656:             for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
1657:               ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);
1658:             }
1659:           }
1660:         }
1661:       }
1662:     } else {
1663:       PetscInt i;
1664:       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
1665:         PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->ISForDofsLocal[i]);
1666:       }
1667:     }
1668:   }

1670: boundary:
1671:   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) {
1672:     PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);
1673:   } else if (pcbddc->DirichletBoundariesLocal) {
1674:     PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->DirichletBoundariesLocal);
1675:   }
1676:   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) {
1677:     PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);
1678:   } else if (pcbddc->NeumannBoundariesLocal) {
1679:     PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->NeumannBoundariesLocal);
1680:   }
1681:   if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) {
1682:     PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);
1683:   }
1684:   VecDestroy(&global);
1685:   VecDestroy(&local);
1686:   /* detect local disconnected subdomains if requested (use matis->A) */
1687:   if (pcbddc->detect_disconnected) {
1688:     IS        primalv = NULL;
1689:     PetscInt  i;
1690:     PetscBool filter = pcbddc->detect_disconnected_filter;

1692:     for (i=0;i<pcbddc->n_local_subs;i++) {
1693:       ISDestroy(&pcbddc->local_subs[i]);
1694:     }
1695:     PetscFree(pcbddc->local_subs);
1696:     PCBDDCDetectDisconnectedComponents(pc,filter,&pcbddc->n_local_subs,&pcbddc->local_subs,&primalv);
1697:     PCBDDCAddPrimalVerticesLocalIS(pc,primalv);
1698:     ISDestroy(&primalv);
1699:   }
1700:   /* early stage corner detection */
1701:   {
1702:     DM dm;

1704:     MatGetDM(pc->pmat,&dm);
1705:     if (dm) {
1706:       PetscBool isda;

1708:       PetscObjectTypeCompare((PetscObject)dm,DMDA,&isda);
1709:       if (isda) {
1710:         ISLocalToGlobalMapping l2l;
1711:         IS                     corners;
1712:         Mat                    lA;

1714:         DMDAGetSubdomainCornersIS(dm,&corners);
1715:         MatISGetLocalMat(pc->pmat,&lA);
1716:         MatGetLocalToGlobalMapping(lA,&l2l,NULL);
1717:         MatISRestoreLocalMat(pc->pmat,&lA);
1718:         if (l2l && corners) {
1719:           const PetscInt *idx;
1720:           PetscInt       dof,bs,*idxout,n;

1722:           DMDAGetInfo(dm,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL);
1723:           ISLocalToGlobalMappingGetBlockSize(l2l,&bs);
1724:           ISGetLocalSize(corners,&n);
1725:           ISGetIndices(corners,&idx);
1726:           if (bs == dof) {
1727:             PetscMalloc1(n,&idxout);
1728:             ISLocalToGlobalMappingApplyBlock(l2l,n,idx,idxout);
1729:           } else { /* the original DMDA local-to-local map have been modified */
1730:             PetscInt i,d;

1732:             PetscMalloc1(dof*n,&idxout);
1733:             for (i=0;i<n;i++) for (d=0;d<dof;d++) idxout[dof*i+d] = dof*idx[i]+d;
1734:             ISLocalToGlobalMappingApply(l2l,dof*n,idxout,idxout);

1736:             bs = 1;
1737:             n *= dof;
1738:           }
1739:           ISRestoreIndices(corners,&idx);
1740:           DMDARestoreSubdomainCornersIS(dm,&corners);
1741:           ISCreateBlock(PetscObjectComm((PetscObject)pc),bs,n,idxout,PETSC_OWN_POINTER,&corners);
1742:           PCBDDCAddPrimalVerticesLocalIS(pc,corners);
1743:           ISDestroy(&corners);
1744:           pcbddc->corner_selected = PETSC_TRUE;
1745:         } else if (corners) { /* not from DMDA */
1746:           DMDARestoreSubdomainCornersIS(dm,&corners);
1747:         }
1748:       }
1749:     }
1750:   }
1751:   if (pcbddc->corner_selection && !pcbddc->mat_graph->cdim) {
1752:     DM dm;

1754:     PCGetDM(pc,&dm);
1755:     if (!dm) {
1756:       MatGetDM(pc->pmat,&dm);
1757:     }
1758:     if (dm) {
1759:       Vec            vcoords;
1760:       PetscSection   section;
1761:       PetscReal      *coords;
1762:       PetscInt       d,cdim,nl,nf,**ctxs;
1763:       PetscErrorCode (**funcs)(PetscInt, PetscReal, const PetscReal *, PetscInt, PetscScalar *, void *);

1765:       DMGetCoordinateDim(dm,&cdim);
1766:       DMGetSection(dm,&section);
1767:       PetscSectionGetNumFields(section,&nf);
1768:       DMCreateGlobalVector(dm,&vcoords);
1769:       VecGetLocalSize(vcoords,&nl);
1770:       PetscMalloc1(nl*cdim,&coords);
1771:       PetscMalloc2(nf,&funcs,nf,&ctxs);
1772:       PetscMalloc1(nf,&ctxs[0]);
1773:       for (d=0;d<nf;d++) funcs[d] = func_coords_private;
1774:       for (d=1;d<nf;d++) ctxs[d] = ctxs[d-1] + 1;
1775:       for (d=0;d<cdim;d++) {
1776:         PetscInt          i;
1777:         const PetscScalar *v;

1779:         for (i=0;i<nf;i++) ctxs[i][0] = d;
1780:         DMProjectFunction(dm,0.0,funcs,(void**)ctxs,INSERT_VALUES,vcoords);
1781:         VecGetArrayRead(vcoords,&v);
1782:         for (i=0;i<nl;i++) coords[i*cdim+d] = PetscRealPart(v[i]);
1783:         VecRestoreArrayRead(vcoords,&v);
1784:       }
1785:       VecDestroy(&vcoords);
1786:       PCSetCoordinates(pc,cdim,nl,coords);
1787:       PetscFree(coords);
1788:       PetscFree(ctxs[0]);
1789:       PetscFree2(funcs,ctxs);
1790:     }
1791:   }
1792:   return(0);
1793: }

1795: PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is)
1796: {
1797:   Mat_IS          *matis = (Mat_IS*)(pc->pmat->data);
1798:   PetscErrorCode  ierr;
1799:   IS              nis;
1800:   const PetscInt  *idxs;
1801:   PetscInt        i,nd,n = matis->A->rmap->n,*nidxs,nnd;
1802:   PetscBool       *ld;

1805:   if (mop != MPI_LAND && mop != MPI_LOR) SETERRQ(PetscObjectComm((PetscObject)(pc)),PETSC_ERR_SUP,"Supported are MPI_LAND and MPI_LOR");
1806:   MatISSetUpSF(pc->pmat);
1807:   if (mop == MPI_LAND) {
1808:     /* init rootdata with true */
1809:     ld   = (PetscBool*) matis->sf_rootdata;
1810:     for (i=0;i<pc->pmat->rmap->n;i++) ld[i] = PETSC_TRUE;
1811:   } else {
1812:     PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscBool));
1813:   }
1814:   PetscMemzero(matis->sf_leafdata,n*sizeof(PetscBool));
1815:   ISGetLocalSize(*is,&nd);
1816:   ISGetIndices(*is,&idxs);
1817:   ld   = (PetscBool*) matis->sf_leafdata;
1818:   for (i=0;i<nd;i++)
1819:     if (-1 < idxs[i] && idxs[i] < n)
1820:       ld[idxs[i]] = PETSC_TRUE;
1821:   ISRestoreIndices(*is,&idxs);
1822:   PetscSFReduceBegin(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);
1823:   PetscSFReduceEnd(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);
1824:   PetscSFBcastBegin(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);
1825:   PetscSFBcastEnd(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);
1826:   if (mop == MPI_LAND) {
1827:     PetscMalloc1(nd,&nidxs);
1828:   } else {
1829:     PetscMalloc1(n,&nidxs);
1830:   }
1831:   for (i=0,nnd=0;i<n;i++)
1832:     if (ld[i])
1833:       nidxs[nnd++] = i;
1834:   ISCreateGeneral(PetscObjectComm((PetscObject)(*is)),nnd,nidxs,PETSC_OWN_POINTER,&nis);
1835:   ISDestroy(is);
1836:   *is  = nis;
1837:   return(0);
1838: }

1840: PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z)
1841: {
1842:   PC_IS             *pcis = (PC_IS*)(pc->data);
1843:   PC_BDDC           *pcbddc = (PC_BDDC*)(pc->data);
1844:   PetscErrorCode    ierr;

1847:   if (!pcbddc->benign_have_null) {
1848:     return(0);
1849:   }
1850:   if (pcbddc->ChangeOfBasisMatrix) {
1851:     Vec swap;

1853:     MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);
1854:     swap = pcbddc->work_change;
1855:     pcbddc->work_change = r;
1856:     r = swap;
1857:   }
1858:   VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
1859:   VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
1860:   KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);
1861:   VecSet(z,0.);
1862:   VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);
1863:   VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);
1864:   if (pcbddc->ChangeOfBasisMatrix) {
1865:     pcbddc->work_change = r;
1866:     VecCopy(z,pcbddc->work_change);
1867:     MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);
1868:   }
1869:   return(0);
1870: }

1872: PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose)
1873: {
1874:   PCBDDCBenignMatMult_ctx ctx;
1875:   PetscErrorCode          ierr;
1876:   PetscBool               apply_right,apply_left,reset_x;

1879:   MatShellGetContext(A,&ctx);
1880:   if (transpose) {
1881:     apply_right = ctx->apply_left;
1882:     apply_left = ctx->apply_right;
1883:   } else {
1884:     apply_right = ctx->apply_right;
1885:     apply_left = ctx->apply_left;
1886:   }
1887:   reset_x = PETSC_FALSE;
1888:   if (apply_right) {
1889:     const PetscScalar *ax;
1890:     PetscInt          nl,i;

1892:     VecGetLocalSize(x,&nl);
1893:     VecGetArrayRead(x,&ax);
1894:     PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));
1895:     VecRestoreArrayRead(x,&ax);
1896:     for (i=0;i<ctx->benign_n;i++) {
1897:       PetscScalar    sum,val;
1898:       const PetscInt *idxs;
1899:       PetscInt       nz,j;
1900:       ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);
1901:       ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);
1902:       sum = 0.;
1903:       if (ctx->apply_p0) {
1904:         val = ctx->work[idxs[nz-1]];
1905:         for (j=0;j<nz-1;j++) {
1906:           sum += ctx->work[idxs[j]];
1907:           ctx->work[idxs[j]] += val;
1908:         }
1909:       } else {
1910:         for (j=0;j<nz-1;j++) {
1911:           sum += ctx->work[idxs[j]];
1912:         }
1913:       }
1914:       ctx->work[idxs[nz-1]] -= sum;
1915:       ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);
1916:     }
1917:     VecPlaceArray(x,ctx->work);
1918:     reset_x = PETSC_TRUE;
1919:   }
1920:   if (transpose) {
1921:     MatMultTranspose(ctx->A,x,y);
1922:   } else {
1923:     MatMult(ctx->A,x,y);
1924:   }
1925:   if (reset_x) {
1926:     VecResetArray(x);
1927:   }
1928:   if (apply_left) {
1929:     PetscScalar *ay;
1930:     PetscInt    i;

1932:     VecGetArray(y,&ay);
1933:     for (i=0;i<ctx->benign_n;i++) {
1934:       PetscScalar    sum,val;
1935:       const PetscInt *idxs;
1936:       PetscInt       nz,j;
1937:       ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);
1938:       ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);
1939:       val = -ay[idxs[nz-1]];
1940:       if (ctx->apply_p0) {
1941:         sum = 0.;
1942:         for (j=0;j<nz-1;j++) {
1943:           sum += ay[idxs[j]];
1944:           ay[idxs[j]] += val;
1945:         }
1946:         ay[idxs[nz-1]] += sum;
1947:       } else {
1948:         for (j=0;j<nz-1;j++) {
1949:           ay[idxs[j]] += val;
1950:         }
1951:         ay[idxs[nz-1]] = 0.;
1952:       }
1953:       ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);
1954:     }
1955:     VecRestoreArray(y,&ay);
1956:   }
1957:   return(0);
1958: }

1960: PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y)
1961: {

1965:   PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);
1966:   return(0);
1967: }

1969: PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y)
1970: {

1974:   PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);
1975:   return(0);
1976: }

1978: PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore)
1979: {
1980:   PC_IS                   *pcis = (PC_IS*)pc->data;
1981:   PC_BDDC                 *pcbddc = (PC_BDDC*)pc->data;
1982:   PCBDDCBenignMatMult_ctx ctx;
1983:   PetscErrorCode          ierr;

1986:   if (!restore) {
1987:     Mat                A_IB,A_BI;
1988:     PetscScalar        *work;
1989:     PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL;

1991:     if (pcbddc->benign_original_mat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored");
1992:     if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) return(0);
1993:     PetscMalloc1(pcis->n,&work);
1994:     MatCreate(PETSC_COMM_SELF,&A_IB);
1995:     MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);
1996:     MatSetType(A_IB,MATSHELL);
1997:     MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);
1998:     MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);
1999:     PetscNew(&ctx);
2000:     MatShellSetContext(A_IB,ctx);
2001:     ctx->apply_left = PETSC_TRUE;
2002:     ctx->apply_right = PETSC_FALSE;
2003:     ctx->apply_p0 = PETSC_FALSE;
2004:     ctx->benign_n = pcbddc->benign_n;
2005:     if (reuse) {
2006:       ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs;
2007:       ctx->free = PETSC_FALSE;
2008:     } else { /* TODO: could be optimized for successive solves */
2009:       ISLocalToGlobalMapping N_to_D;
2010:       PetscInt               i;

2012:       ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);
2013:       PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);
2014:       for (i=0;i<pcbddc->benign_n;i++) {
2015:         ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);
2016:       }
2017:       ISLocalToGlobalMappingDestroy(&N_to_D);
2018:       ctx->free = PETSC_TRUE;
2019:     }
2020:     ctx->A = pcis->A_IB;
2021:     ctx->work = work;
2022:     MatSetUp(A_IB);
2023:     MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);
2024:     MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);
2025:     pcis->A_IB = A_IB;

2027:     /* A_BI as A_IB^T */
2028:     MatCreateTranspose(A_IB,&A_BI);
2029:     pcbddc->benign_original_mat = pcis->A_BI;
2030:     pcis->A_BI = A_BI;
2031:   } else {
2032:     if (!pcbddc->benign_original_mat) {
2033:       return(0);
2034:     }
2035:     MatShellGetContext(pcis->A_IB,&ctx);
2036:     MatDestroy(&pcis->A_IB);
2037:     pcis->A_IB = ctx->A;
2038:     ctx->A = NULL;
2039:     MatDestroy(&pcis->A_BI);
2040:     pcis->A_BI = pcbddc->benign_original_mat;
2041:     pcbddc->benign_original_mat = NULL;
2042:     if (ctx->free) {
2043:       PetscInt i;
2044:       for (i=0;i<ctx->benign_n;i++) {
2045:         ISDestroy(&ctx->benign_zerodiag_subs[i]);
2046:       }
2047:       PetscFree(ctx->benign_zerodiag_subs);
2048:     }
2049:     PetscFree(ctx->work);
2050:     PetscFree(ctx);
2051:   }
2052:   return(0);
2053: }

2055: /* used just in bddc debug mode */
2056: PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B)
2057: {
2058:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
2059:   Mat_IS         *matis = (Mat_IS*)pc->pmat->data;
2060:   Mat            An;

2064:   MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);
2065:   MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);
2066:   if (is1) {
2067:     MatCreateSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);
2068:     MatDestroy(&An);
2069:   } else {
2070:     *B = An;
2071:   }
2072:   return(0);
2073: }

2075: /* TODO: add reuse flag */
2076: PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B)
2077: {
2078:   Mat            Bt;
2079:   PetscScalar    *a,*bdata;
2080:   const PetscInt *ii,*ij;
2081:   PetscInt       m,n,i,nnz,*bii,*bij;
2082:   PetscBool      flg_row;

2086:   MatGetSize(A,&n,&m);
2087:   MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);
2088:   MatSeqAIJGetArray(A,&a);
2089:   nnz = n;
2090:   for (i=0;i<ii[n];i++) {
2091:     if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++;
2092:   }
2093:   PetscMalloc1(n+1,&bii);
2094:   PetscMalloc1(nnz,&bij);
2095:   PetscMalloc1(nnz,&bdata);
2096:   nnz = 0;
2097:   bii[0] = 0;
2098:   for (i=0;i<n;i++) {
2099:     PetscInt j;
2100:     for (j=ii[i];j<ii[i+1];j++) {
2101:       PetscScalar entry = a[j];
2102:       if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || (n == m && ij[j] == i)) {
2103:         bij[nnz] = ij[j];
2104:         bdata[nnz] = entry;
2105:         nnz++;
2106:       }
2107:     }
2108:     bii[i+1] = nnz;
2109:   }
2110:   MatSeqAIJRestoreArray(A,&a);
2111:   MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);
2112:   MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);
2113:   {
2114:     Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data);
2115:     b->free_a = PETSC_TRUE;
2116:     b->free_ij = PETSC_TRUE;
2117:   }
2118:   if (*B == A) {
2119:     MatDestroy(&A);
2120:   }
2121:   *B = Bt;
2122:   return(0);
2123: }

2125: PetscErrorCode PCBDDCDetectDisconnectedComponents(PC pc, PetscBool filter, PetscInt *ncc, IS* cc[], IS* primalv)
2126: {
2127:   Mat                    B = NULL;
2128:   DM                     dm;
2129:   IS                     is_dummy,*cc_n;
2130:   ISLocalToGlobalMapping l2gmap_dummy;
2131:   PCBDDCGraph            graph;
2132:   PetscInt               *xadj_filtered = NULL,*adjncy_filtered = NULL;
2133:   PetscInt               i,n;
2134:   PetscInt               *xadj,*adjncy;
2135:   PetscBool              isplex = PETSC_FALSE;
2136:   PetscErrorCode         ierr;

2139:   if (ncc) *ncc = 0;
2140:   if (cc) *cc = NULL;
2141:   if (primalv) *primalv = NULL;
2142:   PCBDDCGraphCreate(&graph);
2143:   PCGetDM(pc,&dm);
2144:   if (!dm) {
2145:     MatGetDM(pc->pmat,&dm);
2146:   }
2147:   if (dm) {
2148:     PetscObjectTypeCompare((PetscObject)dm,DMPLEX,&isplex);
2149:   }
2150:   if (filter) isplex = PETSC_FALSE;

2152:   if (isplex) { /* this code has been modified from plexpartition.c */
2153:     PetscInt       p, pStart, pEnd, a, adjSize, idx, size, nroots;
2154:     PetscInt      *adj = NULL;
2155:     IS             cellNumbering;
2156:     const PetscInt *cellNum;
2157:     PetscBool      useCone, useClosure;
2158:     PetscSection   section;
2159:     PetscSegBuffer adjBuffer;
2160:     PetscSF        sfPoint;

2164:     DMPlexGetHeightStratum(dm, 0, &pStart, &pEnd);
2165:     DMGetPointSF(dm, &sfPoint);
2166:     PetscSFGetGraph(sfPoint, &nroots, NULL, NULL, NULL);
2167:     /* Build adjacency graph via a section/segbuffer */
2168:     PetscSectionCreate(PetscObjectComm((PetscObject) dm), &section);
2169:     PetscSectionSetChart(section, pStart, pEnd);
2170:     PetscSegBufferCreate(sizeof(PetscInt),1000,&adjBuffer);
2171:     /* Always use FVM adjacency to create partitioner graph */
2172:     DMPlexGetAdjacencyUseCone(dm, &useCone);
2173:     DMPlexGetAdjacencyUseClosure(dm, &useClosure);
2174:     DMPlexSetAdjacencyUseCone(dm, PETSC_TRUE);
2175:     DMPlexSetAdjacencyUseClosure(dm, PETSC_FALSE);
2176:     DMPlexGetCellNumbering(dm, &cellNumbering);
2177:     ISGetIndices(cellNumbering, &cellNum);
2178:     for (n = 0, p = pStart; p < pEnd; p++) {
2179:       /* Skip non-owned cells in parallel (ParMetis expects no overlap) */
2180:       if (nroots > 0) {if (cellNum[p] < 0) continue;}
2181:       adjSize = PETSC_DETERMINE;
2182:       DMPlexGetAdjacency(dm, p, &adjSize, &adj);
2183:       for (a = 0; a < adjSize; ++a) {
2184:         const PetscInt point = adj[a];
2185:         if (pStart <= point && point < pEnd) {
2186:           PetscInt *PETSC_RESTRICT pBuf;
2187:           PetscSectionAddDof(section, p, 1);
2188:           PetscSegBufferGetInts(adjBuffer, 1, &pBuf);
2189:           *pBuf = point;
2190:         }
2191:       }
2192:       n++;
2193:     }
2194:     DMPlexSetAdjacencyUseCone(dm, useCone);
2195:     DMPlexSetAdjacencyUseClosure(dm, useClosure);
2196:     /* Derive CSR graph from section/segbuffer */
2197:     PetscSectionSetUp(section);
2198:     PetscSectionGetStorageSize(section, &size);
2199:     PetscMalloc1(n+1, &xadj);
2200:     for (idx = 0, p = pStart; p < pEnd; p++) {
2201:       if (nroots > 0) {if (cellNum[p] < 0) continue;}
2202:       PetscSectionGetOffset(section, p, &(xadj[idx++]));
2203:     }
2204:     xadj[n] = size;
2205:     PetscSegBufferExtractAlloc(adjBuffer, &adjncy);
2206:     /* Clean up */
2207:     PetscSegBufferDestroy(&adjBuffer);
2208:     PetscSectionDestroy(&section);
2209:     PetscFree(adj);
2210:     graph->xadj = xadj;
2211:     graph->adjncy = adjncy;
2212:   } else {
2213:     Mat       A;
2214:     PetscBool isseqaij, flg_row;

2216:     MatISGetLocalMat(pc->pmat,&A);
2217:     if (!A->rmap->N || !A->cmap->N) {
2218:       PCBDDCGraphDestroy(&graph);
2219:       return(0);
2220:     }
2221:     PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);
2222:     if (!isseqaij && filter) {
2223:       PetscBool isseqdense;

2225:       PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);
2226:       if (!isseqdense) {
2227:         MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
2228:       } else { /* TODO: rectangular case and LDA */
2229:         PetscScalar *array;
2230:         PetscReal   chop=1.e-6;

2232:         MatDuplicate(A,MAT_COPY_VALUES,&B);
2233:         MatDenseGetArray(B,&array);
2234:         MatGetSize(B,&n,NULL);
2235:         for (i=0;i<n;i++) {
2236:           PetscInt j;
2237:           for (j=i+1;j<n;j++) {
2238:             PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)]));
2239:             if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.;
2240:             if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.;
2241:           }
2242:         }
2243:         MatDenseRestoreArray(B,&array);
2244:         MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);
2245:       }
2246:     } else {
2247:       PetscObjectReference((PetscObject)A);
2248:       B = A;
2249:     }
2250:     MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);

2252:     /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */
2253:     if (filter) {
2254:       PetscScalar *data;
2255:       PetscInt    j,cum;

2257:       PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);
2258:       MatSeqAIJGetArray(B,&data);
2259:       cum = 0;
2260:       for (i=0;i<n;i++) {
2261:         PetscInt t;

2263:         for (j=xadj[i];j<xadj[i+1];j++) {
2264:           if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) {
2265:             continue;
2266:           }
2267:           adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j];
2268:         }
2269:         t = xadj_filtered[i];
2270:         xadj_filtered[i] = cum;
2271:         cum += t;
2272:       }
2273:       MatSeqAIJRestoreArray(B,&data);
2274:       graph->xadj = xadj_filtered;
2275:       graph->adjncy = adjncy_filtered;
2276:     } else {
2277:       graph->xadj = xadj;
2278:       graph->adjncy = adjncy;
2279:     }
2280:   }
2281:   /* compute local connected components using PCBDDCGraph */
2282:   ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);
2283:   ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);
2284:   ISDestroy(&is_dummy);
2285:   PCBDDCGraphInit(graph,l2gmap_dummy,n,PETSC_MAX_INT);
2286:   ISLocalToGlobalMappingDestroy(&l2gmap_dummy);
2287:   PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);
2288:   PCBDDCGraphComputeConnectedComponents(graph);

2290:   /* partial clean up */
2291:   PetscFree2(xadj_filtered,adjncy_filtered);
2292:   if (B) {
2293:     PetscBool flg_row;
2294:     MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);
2295:     MatDestroy(&B);
2296:   }
2297:   if (isplex) {
2298:     PetscFree(xadj);
2299:     PetscFree(adjncy);
2300:   }

2302:   /* get back data */
2303:   if (isplex) {
2304:     if (ncc) *ncc = graph->ncc;
2305:     if (cc || primalv) {
2306:       Mat          A;
2307:       PetscBT      btv,btvt;
2308:       PetscSection subSection;
2309:       PetscInt     *ids,cum,cump,*cids,*pids;

2311:       DMPlexGetSubdomainSection(dm,&subSection);
2312:       MatISGetLocalMat(pc->pmat,&A);
2313:       PetscMalloc3(A->rmap->n,&ids,graph->ncc+1,&cids,A->rmap->n,&pids);
2314:       PetscBTCreate(A->rmap->n,&btv);
2315:       PetscBTCreate(A->rmap->n,&btvt);

2317:       cids[0] = 0;
2318:       for (i = 0, cump = 0, cum = 0; i < graph->ncc; i++) {
2319:         PetscInt j;

2321:         PetscBTMemzero(A->rmap->n,btvt);
2322:         for (j = graph->cptr[i]; j < graph->cptr[i+1]; j++) {
2323:           PetscInt k, size, *closure = NULL, cell = graph->queue[j];

2325:           DMPlexGetTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);
2326:           for (k = 0; k < 2*size; k += 2) {
2327:             PetscInt s, p = closure[k], off, dof, cdof;

2329:             PetscSectionGetConstraintDof(subSection, p, &cdof);
2330:             PetscSectionGetOffset(subSection,p,&off);
2331:             PetscSectionGetDof(subSection,p,&dof);
2332:             for (s = 0; s < dof-cdof; s++) {
2333:               if (PetscBTLookupSet(btvt,off+s)) continue;
2334:               if (!PetscBTLookup(btv,off+s)) {
2335:                 ids[cum++] = off+s;
2336:               } else { /* cross-vertex */
2337:                 pids[cump++] = off+s;
2338:               }
2339:             }
2340:           }
2341:           DMPlexRestoreTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);
2342:         }
2343:         cids[i+1] = cum;
2344:         /* mark dofs as already assigned */
2345:         for (j = cids[i]; j < cids[i+1]; j++) {
2346:           PetscBTSet(btv,ids[j]);
2347:         }
2348:       }
2349:       if (cc) {
2350:         PetscMalloc1(graph->ncc,&cc_n);
2351:         for (i = 0; i < graph->ncc; i++) {
2352:           ISCreateGeneral(PETSC_COMM_SELF,cids[i+1]-cids[i],ids+cids[i],PETSC_COPY_VALUES,&cc_n[i]);
2353:         }
2354:         *cc = cc_n;
2355:       }
2356:       if (primalv) {
2357:         ISCreateGeneral(PetscObjectComm((PetscObject)pc),cump,pids,PETSC_COPY_VALUES,primalv);
2358:       }
2359:       PetscFree3(ids,cids,pids);
2360:       PetscBTDestroy(&btv);
2361:       PetscBTDestroy(&btvt);
2362:     }
2363:   } else {
2364:     if (ncc) *ncc = graph->ncc;
2365:     if (cc) {
2366:       PetscMalloc1(graph->ncc,&cc_n);
2367:       for (i=0;i<graph->ncc;i++) {
2368:         ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);
2369:       }
2370:       *cc = cc_n;
2371:     }
2372:   }
2373:   /* clean up graph */
2374:   graph->xadj = 0;
2375:   graph->adjncy = 0;
2376:   PCBDDCGraphDestroy(&graph);
2377:   return(0);
2378: }

2380: PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag)
2381: {
2382:   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
2383:   PC_IS*         pcis = (PC_IS*)(pc->data);
2384:   IS             dirIS = NULL;
2385:   PetscInt       i;

2389:   PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);
2390:   if (zerodiag) {
2391:     Mat            A;
2392:     Vec            vec3_N;
2393:     PetscScalar    *vals;
2394:     const PetscInt *idxs;
2395:     PetscInt       nz,*count;

2397:     /* p0 */
2398:     VecSet(pcis->vec1_N,0.);
2399:     PetscMalloc1(pcis->n,&vals);
2400:     ISGetLocalSize(zerodiag,&nz);
2401:     ISGetIndices(zerodiag,&idxs);
2402:     for (i=0;i<nz;i++) vals[i] = 1.;
2403:     VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);
2404:     VecAssemblyBegin(pcis->vec1_N);
2405:     VecAssemblyEnd(pcis->vec1_N);
2406:     /* v_I */
2407:     VecSetRandom(pcis->vec2_N,NULL);
2408:     for (i=0;i<nz;i++) vals[i] = 0.;
2409:     VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);
2410:     ISRestoreIndices(zerodiag,&idxs);
2411:     ISGetIndices(pcis->is_B_local,&idxs);
2412:     for (i=0;i<pcis->n_B;i++) vals[i] = 0.;
2413:     VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);
2414:     ISRestoreIndices(pcis->is_B_local,&idxs);
2415:     if (dirIS) {
2416:       PetscInt n;

2418:       ISGetLocalSize(dirIS,&n);
2419:       ISGetIndices(dirIS,&idxs);
2420:       for (i=0;i<n;i++) vals[i] = 0.;
2421:       VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);
2422:       ISRestoreIndices(dirIS,&idxs);
2423:     }
2424:     VecAssemblyBegin(pcis->vec2_N);
2425:     VecAssemblyEnd(pcis->vec2_N);
2426:     VecDuplicate(pcis->vec1_N,&vec3_N);
2427:     VecSet(vec3_N,0.);
2428:     MatISGetLocalMat(pc->pmat,&A);
2429:     MatMult(A,pcis->vec1_N,vec3_N);
2430:     VecDot(vec3_N,pcis->vec2_N,&vals[0]);
2431:     if (PetscAbsScalar(vals[0]) > 1.e-1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0]));
2432:     PetscFree(vals);
2433:     VecDestroy(&vec3_N);

2435:     /* there should not be any pressure dofs lying on the interface */
2436:     PetscCalloc1(pcis->n,&count);
2437:     ISGetIndices(pcis->is_B_local,&idxs);
2438:     for (i=0;i<pcis->n_B;i++) count[idxs[i]]++;
2439:     ISRestoreIndices(pcis->is_B_local,&idxs);
2440:     ISGetIndices(zerodiag,&idxs);
2441:     for (i=0;i<nz;i++) if (count[idxs[i]]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %D is an interface dof",idxs[i]);
2442:     ISRestoreIndices(zerodiag,&idxs);
2443:     PetscFree(count);
2444:   }
2445:   ISDestroy(&dirIS);

2447:   /* check PCBDDCBenignGetOrSetP0 */
2448:   VecSetRandom(pcis->vec1_global,NULL);
2449:   for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i;
2450:   PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);
2451:   for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1;
2452:   PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);
2453:   for (i=0;i<pcbddc->benign_n;i++) {
2454:     PetscInt val = PetscRealPart(pcbddc->benign_p0[i]);
2455:     if (val != -PetscGlobalRank-i) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %D instead of %g",PetscRealPart(pcbddc->benign_p0[i]),i,-PetscGlobalRank-i);
2456:   }
2457:   return(0);
2458: }

2460: PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal)
2461: {
2462:   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
2463:   IS             pressures,zerodiag,zerodiag_save,*zerodiag_subs;
2464:   PetscInt       nz,n;
2465:   PetscInt       *interior_dofs,n_interior_dofs,nneu;
2466:   PetscBool      sorted,have_null,has_null_pressures,recompute_zerodiag,checkb;

2470:   PetscSFDestroy(&pcbddc->benign_sf);
2471:   MatDestroy(&pcbddc->benign_B0);
2472:   for (n=0;n<pcbddc->benign_n;n++) {
2473:     ISDestroy(&pcbddc->benign_zerodiag_subs[n]);
2474:   }
2475:   PetscFree(pcbddc->benign_zerodiag_subs);
2476:   pcbddc->benign_n = 0;

2478:   /* if a local info on dofs is present, uses the last field for "pressures" (or fid by command line)
2479:      otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not)
2480:      Checks if all the pressure dofs in each subdomain have a zero diagonal
2481:      If not, a change of basis on pressures is not needed
2482:      since the local Schur complements are already SPD
2483:   */
2484:   has_null_pressures = PETSC_TRUE;
2485:   have_null = PETSC_TRUE;
2486:   if (pcbddc->n_ISForDofsLocal) {
2487:     IS       iP = NULL;
2488:     PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1;

2490:     PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC benign options","PC");
2491:     PetscOptionsInt("-pc_bddc_pressure_field","Field id for pressures",NULL,p,&p,NULL);
2492:     PetscOptionsEnd();
2493:     if (p < 0 || p > pcbddc->n_ISForDofsLocal-1) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Invalid field id for pressures %D",p);
2494:     /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */
2495:     ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);
2496:     ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);
2497:     ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);
2498:     ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);
2499:     /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */
2500:     PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lP",(PetscObject*)&iP);
2501:     if (iP) {
2502:       IS newpressures;

2504:       ISDifference(pressures,iP,&newpressures);
2505:       ISDestroy(&pressures);
2506:       pressures = newpressures;
2507:     }
2508:     ISSorted(pressures,&sorted);
2509:     if (!sorted) {
2510:       ISSort(pressures);
2511:     }
2512:   } else {
2513:     pressures = NULL;
2514:   }
2515:   /* pcis has not been setup yet, so get the local size from the subdomain matrix */
2516:   MatGetLocalSize(pcbddc->local_mat,&n,NULL);
2517:   if (!n) pcbddc->benign_change_explicit = PETSC_TRUE;
2518:   MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);
2519:   ISSorted(zerodiag,&sorted);
2520:   if (!sorted) {
2521:     ISSort(zerodiag);
2522:   }
2523:   PetscObjectReference((PetscObject)zerodiag);
2524:   zerodiag_save = zerodiag;
2525:   ISGetLocalSize(zerodiag,&nz);
2526:   if (!nz) {
2527:     if (n) have_null = PETSC_FALSE;
2528:     has_null_pressures = PETSC_FALSE;
2529:     ISDestroy(&zerodiag);
2530:   }
2531:   recompute_zerodiag = PETSC_FALSE;
2532:   /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */
2533:   zerodiag_subs    = NULL;
2534:   pcbddc->benign_n = 0;
2535:   n_interior_dofs  = 0;
2536:   interior_dofs    = NULL;
2537:   nneu             = 0;
2538:   if (pcbddc->NeumannBoundariesLocal) {
2539:     ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nneu);
2540:   }
2541:   checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level);
2542:   if (checkb) { /* need to compute interior nodes */
2543:     PetscInt n,i,j;
2544:     PetscInt n_neigh,*neigh,*n_shared,**shared;
2545:     PetscInt *iwork;

2547:     ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);
2548:     ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);
2549:     PetscCalloc1(n,&iwork);
2550:     PetscMalloc1(n,&interior_dofs);
2551:     for (i=1;i<n_neigh;i++)
2552:       for (j=0;j<n_shared[i];j++)
2553:           iwork[shared[i][j]] += 1;
2554:     for (i=0;i<n;i++)
2555:       if (!iwork[i])
2556:         interior_dofs[n_interior_dofs++] = i;
2557:     PetscFree(iwork);
2558:     ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);
2559:   }
2560:   if (has_null_pressures) {
2561:     IS             *subs;
2562:     PetscInt       nsubs,i,j,nl;
2563:     const PetscInt *idxs;
2564:     PetscScalar    *array;
2565:     Vec            *work;
2566:     Mat_IS*        matis = (Mat_IS*)(pc->pmat->data);

2568:     subs  = pcbddc->local_subs;
2569:     nsubs = pcbddc->n_local_subs;
2570:     /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */
2571:     if (checkb) {
2572:       VecDuplicateVecs(matis->y,2,&work);
2573:       ISGetLocalSize(zerodiag,&nl);
2574:       ISGetIndices(zerodiag,&idxs);
2575:       /* work[0] = 1_p */
2576:       VecSet(work[0],0.);
2577:       VecGetArray(work[0],&array);
2578:       for (j=0;j<nl;j++) array[idxs[j]] = 1.;
2579:       VecRestoreArray(work[0],&array);
2580:       /* work[0] = 1_v */
2581:       VecSet(work[1],1.);
2582:       VecGetArray(work[1],&array);
2583:       for (j=0;j<nl;j++) array[idxs[j]] = 0.;
2584:       VecRestoreArray(work[1],&array);
2585:       ISRestoreIndices(zerodiag,&idxs);
2586:     }
2587:     if (nsubs > 1) {
2588:       PetscCalloc1(nsubs,&zerodiag_subs);
2589:       for (i=0;i<nsubs;i++) {
2590:         ISLocalToGlobalMapping l2g;
2591:         IS                     t_zerodiag_subs;
2592:         PetscInt               nl;

2594:         ISLocalToGlobalMappingCreateIS(subs[i],&l2g);
2595:         ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);
2596:         ISGetLocalSize(t_zerodiag_subs,&nl);
2597:         if (nl) {
2598:           PetscBool valid = PETSC_TRUE;

2600:           if (checkb) {
2601:             VecSet(matis->x,0);
2602:             ISGetLocalSize(subs[i],&nl);
2603:             ISGetIndices(subs[i],&idxs);
2604:             VecGetArray(matis->x,&array);
2605:             for (j=0;j<nl;j++) array[idxs[j]] = 1.;
2606:             VecRestoreArray(matis->x,&array);
2607:             ISRestoreIndices(subs[i],&idxs);
2608:             VecPointwiseMult(matis->x,work[0],matis->x);
2609:             MatMult(matis->A,matis->x,matis->y);
2610:             VecPointwiseMult(matis->y,work[1],matis->y);
2611:             VecGetArray(matis->y,&array);
2612:             for (j=0;j<n_interior_dofs;j++) {
2613:               if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) {
2614:                 valid = PETSC_FALSE;
2615:                 break;
2616:               }
2617:             }
2618:             VecRestoreArray(matis->y,&array);
2619:           }
2620:           if (valid && nneu) {
2621:             const PetscInt *idxs;
2622:             PetscInt       nzb;

2624:             ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);
2625:             ISGlobalToLocalMappingApply(l2g,IS_GTOLM_DROP,nneu,idxs,&nzb,NULL);
2626:             ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);
2627:             if (nzb) valid = PETSC_FALSE;
2628:           }
2629:           if (valid && pressures) {
2630:             IS t_pressure_subs;
2631:             ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);
2632:             ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);
2633:             ISDestroy(&t_pressure_subs);
2634:           }
2635:           if (valid) {
2636:             ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);
2637:             pcbddc->benign_n++;
2638:           } else {
2639:             recompute_zerodiag = PETSC_TRUE;
2640:           }
2641:         }
2642:         ISDestroy(&t_zerodiag_subs);
2643:         ISLocalToGlobalMappingDestroy(&l2g);
2644:       }
2645:     } else { /* there's just one subdomain (or zero if they have not been detected */
2646:       PetscBool valid = PETSC_TRUE;

2648:       if (nneu) valid = PETSC_FALSE;
2649:       if (valid && pressures) {
2650:         ISEqual(pressures,zerodiag,&valid);
2651:       }
2652:       if (valid && checkb) {
2653:         MatMult(matis->A,work[0],matis->x);
2654:         VecPointwiseMult(matis->x,work[1],matis->x);
2655:         VecGetArray(matis->x,&array);
2656:         for (j=0;j<n_interior_dofs;j++) {
2657:           if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) {
2658:             valid = PETSC_FALSE;
2659:             break;
2660:           }
2661:         }
2662:         VecRestoreArray(matis->x,&array);
2663:       }
2664:       if (valid) {
2665:         pcbddc->benign_n = 1;
2666:         PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);
2667:         PetscObjectReference((PetscObject)zerodiag);
2668:         zerodiag_subs[0] = zerodiag;
2669:       }
2670:     }
2671:     if (checkb) {
2672:       VecDestroyVecs(2,&work);
2673:     }
2674:   }
2675:   PetscFree(interior_dofs);

2677:   if (!pcbddc->benign_n) {
2678:     PetscInt n;

2680:     ISDestroy(&zerodiag);
2681:     recompute_zerodiag = PETSC_FALSE;
2682:     MatGetLocalSize(pcbddc->local_mat,&n,NULL);
2683:     if (n) {
2684:       has_null_pressures = PETSC_FALSE;
2685:       have_null = PETSC_FALSE;
2686:     }
2687:   }

2689:   /* final check for null pressures */
2690:   if (zerodiag && pressures) {
2691:     PetscInt nz,np;
2692:     ISGetLocalSize(zerodiag,&nz);
2693:     ISGetLocalSize(pressures,&np);
2694:     if (nz != np) have_null = PETSC_FALSE;
2695:   }

2697:   if (recompute_zerodiag) {
2698:     ISDestroy(&zerodiag);
2699:     if (pcbddc->benign_n == 1) {
2700:       PetscObjectReference((PetscObject)zerodiag_subs[0]);
2701:       zerodiag = zerodiag_subs[0];
2702:     } else {
2703:       PetscInt i,nzn,*new_idxs;

2705:       nzn = 0;
2706:       for (i=0;i<pcbddc->benign_n;i++) {
2707:         PetscInt ns;
2708:         ISGetLocalSize(zerodiag_subs[i],&ns);
2709:         nzn += ns;
2710:       }
2711:       PetscMalloc1(nzn,&new_idxs);
2712:       nzn = 0;
2713:       for (i=0;i<pcbddc->benign_n;i++) {
2714:         PetscInt ns,*idxs;
2715:         ISGetLocalSize(zerodiag_subs[i],&ns);
2716:         ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);
2717:         PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));
2718:         ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);
2719:         nzn += ns;
2720:       }
2721:       PetscSortInt(nzn,new_idxs);
2722:       ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);
2723:     }
2724:     have_null = PETSC_FALSE;
2725:   }

2727:   /* Prepare matrix to compute no-net-flux */
2728:   if (pcbddc->compute_nonetflux && !pcbddc->divudotp) {
2729:     Mat                    A,loc_divudotp;
2730:     ISLocalToGlobalMapping rl2g,cl2g,l2gmap;
2731:     IS                     row,col,isused = NULL;
2732:     PetscInt               M,N,n,st,n_isused;

2734:     if (pressures) {
2735:       isused = pressures;
2736:     } else {
2737:       isused = zerodiag_save;
2738:     }
2739:     MatGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);
2740:     MatISGetLocalMat(pc->pmat,&A);
2741:     MatGetLocalSize(A,&n,NULL);
2742:     if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field");
2743:     n_isused = 0;
2744:     if (isused) {
2745:       ISGetLocalSize(isused,&n_isused);
2746:     }
2747:     MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));
2748:     st = st-n_isused;
2749:     if (n) {
2750:       const PetscInt *gidxs;

2752:       MatCreateSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);
2753:       ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);
2754:       /* TODO: extend ISCreateStride with st = PETSC_DECIDE */
2755:       ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);
2756:       ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);
2757:       ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);
2758:     } else {
2759:       MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);
2760:       ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);
2761:       ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);
2762:     }
2763:     MatGetSize(pc->pmat,NULL,&N);
2764:     ISGetSize(row,&M);
2765:     ISLocalToGlobalMappingCreateIS(row,&rl2g);
2766:     ISLocalToGlobalMappingCreateIS(col,&cl2g);
2767:     ISDestroy(&row);
2768:     ISDestroy(&col);
2769:     MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);
2770:     MatSetType(pcbddc->divudotp,MATIS);
2771:     MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);
2772:     MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);
2773:     ISLocalToGlobalMappingDestroy(&rl2g);
2774:     ISLocalToGlobalMappingDestroy(&cl2g);
2775:     MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);
2776:     MatDestroy(&loc_divudotp);
2777:     MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);
2778:     MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);
2779:   }
2780:   ISDestroy(&zerodiag_save);

2782:   /* change of basis and p0 dofs */
2783:   if (has_null_pressures) {
2784:     IS             zerodiagc;
2785:     const PetscInt *idxs,*idxsc;
2786:     PetscInt       i,s,*nnz;

2788:     ISGetLocalSize(zerodiag,&nz);
2789:     ISComplement(zerodiag,0,n,&zerodiagc);
2790:     ISGetIndices(zerodiagc,&idxsc);
2791:     /* local change of basis for pressures */
2792:     MatDestroy(&pcbddc->benign_change);
2793:     MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);
2794:     MatSetType(pcbddc->benign_change,MATAIJ);
2795:     MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);
2796:     PetscMalloc1(n,&nnz);
2797:     for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */
2798:     for (i=0;i<pcbddc->benign_n;i++) {
2799:       PetscInt nzs,j;

2801:       ISGetLocalSize(zerodiag_subs[i],&nzs);
2802:       ISGetIndices(zerodiag_subs[i],&idxs);
2803:       for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */
2804:       nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */
2805:       ISRestoreIndices(zerodiag_subs[i],&idxs);
2806:     }
2807:     MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);
2808:     MatSetOption(pcbddc->benign_change,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);
2809:     PetscFree(nnz);
2810:     /* set identity on velocities */
2811:     for (i=0;i<n-nz;i++) {
2812:       MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);
2813:     }
2814:     ISRestoreIndices(zerodiagc,&idxsc);
2815:     ISDestroy(&zerodiagc);
2816:     PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);
2817:     PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);
2818:     /* set change on pressures */
2819:     for (s=0;s<pcbddc->benign_n;s++) {
2820:       PetscScalar *array;
2821:       PetscInt    nzs;

2823:       ISGetLocalSize(zerodiag_subs[s],&nzs);
2824:       ISGetIndices(zerodiag_subs[s],&idxs);
2825:       for (i=0;i<nzs-1;i++) {
2826:         PetscScalar vals[2];
2827:         PetscInt    cols[2];

2829:         cols[0] = idxs[i];
2830:         cols[1] = idxs[nzs-1];
2831:         vals[0] = 1.;
2832:         vals[1] = 1.;
2833:         MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);
2834:       }
2835:       PetscMalloc1(nzs,&array);
2836:       for (i=0;i<nzs-1;i++) array[i] = -1.;
2837:       array[nzs-1] = 1.;
2838:       MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);
2839:       /* store local idxs for p0 */
2840:       pcbddc->benign_p0_lidx[s] = idxs[nzs-1];
2841:       ISRestoreIndices(zerodiag_subs[s],&idxs);
2842:       PetscFree(array);
2843:     }
2844:     MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);
2845:     MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);
2846:     /* project if needed */
2847:     if (pcbddc->benign_change_explicit) {
2848:       Mat M;

2850:       MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);
2851:       MatDestroy(&pcbddc->local_mat);
2852:       MatSeqAIJCompress(M,&pcbddc->local_mat);
2853:       MatDestroy(&M);
2854:     }
2855:     /* store global idxs for p0 */
2856:     ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);
2857:   }
2858:   pcbddc->benign_zerodiag_subs = zerodiag_subs;
2859:   ISDestroy(&pressures);

2861:   /* determines if the coarse solver will be singular or not */
2862:   MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));
2863:   /* determines if the problem has subdomains with 0 pressure block */
2864:   have_null = (PetscBool)(!!pcbddc->benign_n);
2865:   MPI_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
2866:   *zerodiaglocal = zerodiag;
2867:   return(0);
2868: }

2870: PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get)
2871: {
2872:   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
2873:   PetscScalar    *array;

2877:   if (!pcbddc->benign_sf) {
2878:     PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);
2879:     PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);
2880:   }
2881:   if (get) {
2882:     VecGetArrayRead(v,(const PetscScalar**)&array);
2883:     PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);
2884:     PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);
2885:     VecRestoreArrayRead(v,(const PetscScalar**)&array);
2886:   } else {
2887:     VecGetArray(v,&array);
2888:     PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);
2889:     PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);
2890:     VecRestoreArray(v,&array);
2891:   }
2892:   return(0);
2893: }

2895: PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop)
2896: {
2897:   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;

2901:   /* TODO: add error checking
2902:     - avoid nested pop (or push) calls.
2903:     - cannot push before pop.
2904:     - cannot call this if pcbddc->local_mat is NULL
2905:   */
2906:   if (!pcbddc->benign_n) {
2907:     return(0);
2908:   }
2909:   if (pop) {
2910:     if (pcbddc->benign_change_explicit) {
2911:       IS       is_p0;
2912:       MatReuse reuse;

2914:       /* extract B_0 */
2915:       reuse = MAT_INITIAL_MATRIX;
2916:       if (pcbddc->benign_B0) {
2917:         reuse = MAT_REUSE_MATRIX;
2918:       }
2919:       ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);
2920:       MatCreateSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);
2921:       /* remove rows and cols from local problem */
2922:       MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);
2923:       MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);
2924:       MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);
2925:       ISDestroy(&is_p0);
2926:     } else {
2927:       Mat_IS      *matis = (Mat_IS*)pc->pmat->data;
2928:       PetscScalar *vals;
2929:       PetscInt    i,n,*idxs_ins;

2931:       VecGetLocalSize(matis->y,&n);
2932:       PetscMalloc2(n,&idxs_ins,n,&vals);
2933:       if (!pcbddc->benign_B0) {
2934:         PetscInt *nnz;
2935:         MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);
2936:         MatSetType(pcbddc->benign_B0,MATAIJ);
2937:         MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);
2938:         PetscMalloc1(pcbddc->benign_n,&nnz);
2939:         for (i=0;i<pcbddc->benign_n;i++) {
2940:           ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);
2941:           nnz[i] = n - nnz[i];
2942:         }
2943:         MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);
2944:         MatSetOption(pcbddc->benign_B0,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);
2945:         PetscFree(nnz);
2946:       }

2948:       for (i=0;i<pcbddc->benign_n;i++) {
2949:         PetscScalar *array;
2950:         PetscInt    *idxs,j,nz,cum;

2952:         VecSet(matis->x,0.);
2953:         ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);
2954:         ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);
2955:         for (j=0;j<nz;j++) vals[j] = 1.;
2956:         VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);
2957:         VecAssemblyBegin(matis->x);
2958:         VecAssemblyEnd(matis->x);
2959:         VecSet(matis->y,0.);
2960:         MatMult(matis->A,matis->x,matis->y);
2961:         VecGetArray(matis->y,&array);
2962:         cum = 0;
2963:         for (j=0;j<n;j++) {
2964:           if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) {
2965:             vals[cum] = array[j];
2966:             idxs_ins[cum] = j;
2967:             cum++;
2968:           }
2969:         }
2970:         MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);
2971:         VecRestoreArray(matis->y,&array);
2972:         ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);
2973:       }
2974:       MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);
2975:       MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);
2976:       PetscFree2(idxs_ins,vals);
2977:     }
2978:   } else { /* push */
2979:     if (pcbddc->benign_change_explicit) {
2980:       PetscInt i;

2982:       for (i=0;i<pcbddc->benign_n;i++) {
2983:         PetscScalar *B0_vals;
2984:         PetscInt    *B0_cols,B0_ncol;

2986:         MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);
2987:         MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);
2988:         MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);
2989:         MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);
2990:         MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);
2991:       }
2992:       MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);
2993:       MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);
2994:     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!");
2995:   }
2996:   return(0);
2997: }

2999: PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
3000: {
3001:   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
3002:   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
3003:   PetscBLASInt    B_dummyint,B_neigs,B_ierr,B_lwork;
3004:   PetscBLASInt    *B_iwork,*B_ifail;
3005:   PetscScalar     *work,lwork;
3006:   PetscScalar     *St,*S,*eigv;
3007:   PetscScalar     *Sarray,*Starray;
3008:   PetscReal       *eigs,thresh,lthresh,uthresh;
3009:   PetscInt        i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs;
3010:   PetscBool       allocated_S_St;
3011: #if defined(PETSC_USE_COMPLEX)
3012:   PetscReal       *rwork;
3013: #endif
3014:   PetscErrorCode  ierr;

3017:   if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data");
3018:   if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO");
3019:   if (sub_schurs->n_subs && (!sub_schurs->is_symmetric)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for this matrix pencil (herm %d, symm %d, posdef %d)",sub_schurs->is_hermitian,sub_schurs->is_symmetric,sub_schurs->is_posdef);
3020:   PetscLogEventBegin(PC_BDDC_AdaptiveSetUp[pcbddc->current_level],pc,0,0,0);

3022:   if (pcbddc->dbg_flag) {
3023:     PetscViewerFlush(pcbddc->dbg_viewer);
3024:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
3025:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");
3026:     PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
3027:   }

3029:   if (pcbddc->dbg_flag) {
3030:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %D (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef);
3031:   }

3033:   /* max size of subsets */
3034:   mss = 0;
3035:   for (i=0;i<sub_schurs->n_subs;i++) {
3036:     PetscInt subset_size;

3038:     ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
3039:     mss = PetscMax(mss,subset_size);
3040:   }

3042:   /* min/max and threshold */
3043:   nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
3044:   nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0;
3045:   nmax = PetscMax(nmin,nmax);
3046:   allocated_S_St = PETSC_FALSE;
3047:   if (nmin || !sub_schurs->is_posdef) { /* XXX */
3048:     allocated_S_St = PETSC_TRUE;
3049:   }

3051:   /* allocate lapack workspace */
3052:   cum = cum2 = 0;
3053:   maxneigs = 0;
3054:   for (i=0;i<sub_schurs->n_subs;i++) {
3055:     PetscInt n,subset_size;

3057:     ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
3058:     n = PetscMin(subset_size,nmax);
3059:     cum += subset_size;
3060:     cum2 += subset_size*n;
3061:     maxneigs = PetscMax(maxneigs,n);
3062:   }
3063:   if (mss) {
3064:     if (sub_schurs->is_symmetric) {
3065:       PetscBLASInt B_itype = 1;
3066:       PetscBLASInt B_N = mss;
3067:       PetscReal    zero = 0.0;
3068:       PetscReal    eps = 0.0; /* dlamch? */

3070:       B_lwork = -1;
3071:       S = NULL;
3072:       St = NULL;
3073:       eigs = NULL;
3074:       eigv = NULL;
3075:       B_iwork = NULL;
3076:       B_ifail = NULL;
3077: #if defined(PETSC_USE_COMPLEX)
3078:       rwork = NULL;
3079: #endif
3080:       thresh = 1.0;
3081:       PetscFPTrapPush(PETSC_FP_TRAP_OFF);
3082: #if defined(PETSC_USE_COMPLEX)
3083:       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3084: #else
3085:       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr));
3086: #endif
3087:       if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr);
3088:       PetscFPTrapPop();
3089:     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented");
3090:   } else {
3091:     lwork = 0;
3092:   }

3094:   nv = 0;
3095:   if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */
3096:     ISGetLocalSize(sub_schurs->is_vertices,&nv);
3097:   }
3098:   PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);
3099:   if (allocated_S_St) {
3100:     PetscMalloc2(mss*mss,&S,mss*mss,&St);
3101:   }
3102:   PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);
3103: #if defined(PETSC_USE_COMPLEX)
3104:   PetscMalloc1(7*mss,&rwork);
3105: #endif
3106:   PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n,
3107:                       nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr,
3108:                       nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr,
3109:                       nv+cum,&pcbddc->adaptive_constraints_idxs,
3110:                       nv+cum2,&pcbddc->adaptive_constraints_data);
3111:   PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));

3113:   maxneigs = 0;
3114:   cum = cumarray = 0;
3115:   pcbddc->adaptive_constraints_idxs_ptr[0] = 0;
3116:   pcbddc->adaptive_constraints_data_ptr[0] = 0;
3117:   if (sub_schurs->is_vertices && pcbddc->use_vertices) {
3118:     const PetscInt *idxs;

3120:     ISGetIndices(sub_schurs->is_vertices,&idxs);
3121:     for (cum=0;cum<nv;cum++) {
3122:       pcbddc->adaptive_constraints_n[cum] = 1;
3123:       pcbddc->adaptive_constraints_idxs[cum] = idxs[cum];
3124:       pcbddc->adaptive_constraints_data[cum] = 1.0;
3125:       pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1;
3126:       pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1;
3127:     }
3128:     ISRestoreIndices(sub_schurs->is_vertices,&idxs);
3129:   }

3131:   if (mss) { /* multilevel */
3132:     MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);
3133:     MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);
3134:   }

3136:   lthresh = pcbddc->adaptive_threshold[0];
3137:   uthresh = pcbddc->adaptive_threshold[1];
3138:   for (i=0;i<sub_schurs->n_subs;i++) {
3139:     const PetscInt *idxs;
3140:     PetscReal      upper,lower;
3141:     PetscInt       j,subset_size,eigs_start = 0;
3142:     PetscBLASInt   B_N;
3143:     PetscBool      same_data = PETSC_FALSE;
3144:     PetscBool      scal = PETSC_FALSE;

3146:     if (pcbddc->use_deluxe_scaling) {
3147:       upper = PETSC_MAX_REAL;
3148:       lower = uthresh;
3149:     } else {
3150:       if (!sub_schurs->is_posdef) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented without deluxe scaling");
3151:       upper = 1./uthresh;
3152:       lower = 0.;
3153:     }
3154:     ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
3155:     ISGetIndices(sub_schurs->is_subs[i],&idxs);
3156:     PetscBLASIntCast(subset_size,&B_N);
3157:     /* this is experimental: we assume the dofs have been properly grouped to have
3158:        the diagonal blocks Schur complements either positive or negative definite (true for Stokes) */
3159:     if (!sub_schurs->is_posdef) {
3160:       Mat T;

3162:       for (j=0;j<subset_size;j++) {
3163:         if (PetscRealPart(*(Sarray+cumarray+j*(subset_size+1))) < 0.0) {
3164:           MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,Sarray+cumarray,&T);
3165:           MatScale(T,-1.0);
3166:           MatDestroy(&T);
3167:           MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,Starray+cumarray,&T);
3168:           MatScale(T,-1.0);
3169:           MatDestroy(&T);
3170:           if (sub_schurs->change_primal_sub) {
3171:             PetscInt       nz,k;
3172:             const PetscInt *idxs;

3174:             ISGetLocalSize(sub_schurs->change_primal_sub[i],&nz);
3175:             ISGetIndices(sub_schurs->change_primal_sub[i],&idxs);
3176:             for (k=0;k<nz;k++) {
3177:               *( Sarray + cumarray + idxs[k]*(subset_size+1)) *= -1.0;
3178:               *(Starray + cumarray + idxs[k]*(subset_size+1))  = 0.0;
3179:             }
3180:             ISRestoreIndices(sub_schurs->change_primal_sub[i],&idxs);
3181:           }
3182:           scal = PETSC_TRUE;
3183:           break;
3184:         }
3185:       }
3186:     }

3188:     if (allocated_S_St) { /* S and S_t should be copied since we could need them later */
3189:       if (sub_schurs->is_symmetric) {
3190:         PetscInt j,k;
3191:         if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */
3192:           PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));
3193:           PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));
3194:         }
3195:         for (j=0;j<subset_size;j++) {
3196:           for (k=j;k<subset_size;k++) {
3197:             S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k];
3198:             St[j*subset_size+k] = Starray[cumarray+j*subset_size+k];
3199:           }
3200:         }
3201:       } else {
3202:         PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3203:         PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3204:       }
3205:     } else {
3206:       S = Sarray + cumarray;
3207:       St = Starray + cumarray;
3208:     }
3209:     /* see if we can save some work */
3210:     if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) {
3211:       PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);
3212:     }

3214:     if (same_data && !sub_schurs->change) { /* there's no need of constraints here */
3215:       B_neigs = 0;
3216:     } else {
3217:       if (sub_schurs->is_symmetric) {
3218:         PetscBLASInt B_itype = 1;
3219:         PetscBLASInt B_IL, B_IU;
3220:         PetscReal    eps = -1.0; /* dlamch? */
3221:         PetscInt     nmin_s;
3222:         PetscBool    compute_range;

3224:         B_neigs = 0;
3225:         compute_range = (PetscBool)!same_data;
3226:         if (nmin >= subset_size) compute_range = PETSC_FALSE;

3228:         if (pcbddc->dbg_flag) {
3229:           PetscInt nc = 0;

3231:           if (sub_schurs->change_primal_sub) {
3232:             ISGetLocalSize(sub_schurs->change_primal_sub[i],&nc);
3233:           }
3234:           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %D/%D size %D count %D fid %D (range %d) (change %D).\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]+1,pcbddc->mat_graph->which_dof[idxs[0]],compute_range,nc);
3235:         }

3237:         PetscFPTrapPush(PETSC_FP_TRAP_OFF);
3238:         if (compute_range) {

3240:           /* ask for eigenvalues larger than thresh */
3241:           if (sub_schurs->is_posdef) {
3242: #if defined(PETSC_USE_COMPLEX)
3243:             PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3244: #else
3245:             PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3246: #endif
3247:             PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3248:           } else { /* no theory so far, but it works nicely */
3249:             PetscInt  recipe = 0,recipe_m = 1;
3250:             PetscReal bb[2];

3252:             PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe",&recipe,NULL);
3253:             switch (recipe) {
3254:             case 0:
3255:               if (scal) { bb[0] = PETSC_MIN_REAL; bb[1] = lthresh; }
3256:               else { bb[0] = uthresh; bb[1] = PETSC_MAX_REAL; }
3257: #if defined(PETSC_USE_COMPLEX)
3258:               PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3259: #else
3260:               PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3261: #endif
3262:               PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3263:               break;
3264:             case 1:
3265:               bb[0] = PETSC_MIN_REAL; bb[1] = lthresh*lthresh;
3266: #if defined(PETSC_USE_COMPLEX)
3267:               PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3268: #else
3269:               PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3270: #endif
3271:               PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3272:               if (!scal) {
3273:                 PetscBLASInt B_neigs2 = 0;

3275:                 bb[0] = PetscMax(lthresh*lthresh,uthresh); bb[1] = PETSC_MAX_REAL;
3276:                 PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3277:                 PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3278: #if defined(PETSC_USE_COMPLEX)
3279:                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3280: #else
3281:                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3282: #endif
3283:                 PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3284:                 B_neigs += B_neigs2;
3285:               }
3286:               break;
3287:             case 2:
3288:               if (scal) {
3289:                 bb[0] = PETSC_MIN_REAL;
3290:                 bb[1] = 0;
3291: #if defined(PETSC_USE_COMPLEX)
3292:                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3293: #else
3294:                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3295: #endif
3296:                 PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3297:               } else {
3298:                 PetscBLASInt B_neigs2 = 0;
3299:                 PetscBool    import = PETSC_FALSE;

3301:                 lthresh = PetscMax(lthresh,0.0);
3302:                 if (lthresh > 0.0) {
3303:                   bb[0] = PETSC_MIN_REAL;
3304:                   bb[1] = lthresh*lthresh;

3306:                   import = PETSC_TRUE;
3307: #if defined(PETSC_USE_COMPLEX)
3308:                   PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3309: #else
3310:                   PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3311: #endif
3312:                   PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3313:                 }
3314:                 bb[0] = PetscMax(lthresh*lthresh,uthresh);
3315:                 bb[1] = PETSC_MAX_REAL;
3316:                 if (import) {
3317:                   PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3318:                   PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3319:                 }
3320: #if defined(PETSC_USE_COMPLEX)
3321:                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3322: #else
3323:                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3324: #endif
3325:                 PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3326:                 B_neigs += B_neigs2;
3327:               }
3328:               break;
3329:             case 3:
3330:               if (scal) {
3331:                 PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe3_min_scal",&recipe_m,NULL);
3332:               } else {
3333:                 PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe3_min",&recipe_m,NULL);
3334:               }
3335:               if (!scal) {
3336:                 bb[0] = uthresh;
3337:                 bb[1] = PETSC_MAX_REAL;
3338: #if defined(PETSC_USE_COMPLEX)
3339:                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3340: #else
3341:                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3342: #endif
3343:                 PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3344:               }
3345:               if (recipe_m > 0 && B_N - B_neigs > 0) {
3346:                 PetscBLASInt B_neigs2 = 0;

3348:                 B_IL = 1;
3349:                 PetscBLASIntCast(PetscMin(recipe_m,B_N - B_neigs),&B_IU);
3350:                 PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3351:                 PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3352: #if defined(PETSC_USE_COMPLEX)
3353:                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3354: #else
3355:                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3356: #endif
3357:                 PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3358:                 B_neigs += B_neigs2;
3359:               }
3360:               break;
3361:             case 4:
3362:               bb[0] = PETSC_MIN_REAL; bb[1] = lthresh;
3363: #if defined(PETSC_USE_COMPLEX)
3364:               PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3365: #else
3366:               PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3367: #endif
3368:               PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3369:               {
3370:                 PetscBLASInt B_neigs2 = 0;

3372:                 bb[0] = PetscMax(lthresh+PETSC_SMALL,uthresh); bb[1] = PETSC_MAX_REAL;
3373:                 PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3374:                 PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3375: #if defined(PETSC_USE_COMPLEX)
3376:                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3377: #else
3378:                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3379: #endif
3380:                 PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3381:                 B_neigs += B_neigs2;
3382:               }
3383:               break;
3384:             case 5: /* same as before: first compute all eigenvalues, then filter */
3385: #if defined(PETSC_USE_COMPLEX)
3386:               PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","A","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3387: #else
3388:               PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","A","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3389: #endif
3390:               PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3391:               {
3392:                 PetscInt e,k,ne;
3393:                 for (e=0,ne=0;e<B_neigs;e++) {
3394:                   if (eigs[e] < lthresh || eigs[e] > uthresh) {
3395:                     for (k=0;k<B_N;k++) S[ne*B_N+k] = eigv[e*B_N+k];
3396:                     eigs[ne] = eigs[e];
3397:                     ne++;
3398:                   }
3399:                 }
3400:                 PetscMemcpy(eigv,S,B_N*ne*sizeof(PetscScalar));
3401:                 B_neigs = ne;
3402:               }
3403:               break;
3404:             default:
3405:               SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Unknown recipe %D",recipe);
3406:               break;
3407:             }
3408:           }
3409:         } else if (!same_data) { /* this is just to see all the eigenvalues */
3410:           B_IU = PetscMax(1,PetscMin(B_N,nmax));
3411:           B_IL = 1;
3412: #if defined(PETSC_USE_COMPLEX)
3413:           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3414: #else
3415:           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3416: #endif
3417:           PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3418:         } else { /* same_data is true, so just get the adaptive functional requested by the user */
3419:           PetscInt k;
3420:           if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen");
3421:           ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);
3422:           PetscBLASIntCast(nmax,&B_neigs);
3423:           nmin = nmax;
3424:           PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));
3425:           for (k=0;k<nmax;k++) {
3426:             eigs[k] = 1./PETSC_SMALL;
3427:             eigv[k*(subset_size+1)] = 1.0;
3428:           }
3429:         }
3430:         PetscFPTrapPop();
3431:         if (B_ierr) {
3432:           if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
3433:           else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
3434:           else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
3435:         }

3437:         if (B_neigs > nmax) {
3438:           if (pcbddc->dbg_flag) {
3439:             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, more than maximum required %D.\n",B_neigs,nmax);
3440:           }
3441:           if (pcbddc->use_deluxe_scaling) eigs_start = scal ? 0 : B_neigs-nmax;
3442:           B_neigs = nmax;
3443:         }

3445:         nmin_s = PetscMin(nmin,B_N);
3446:         if (B_neigs < nmin_s) {
3447:           PetscBLASInt B_neigs2 = 0;

3449:           if (pcbddc->use_deluxe_scaling) {
3450:             if (scal) {
3451:               B_IU = nmin_s;
3452:               B_IL = B_neigs + 1;
3453:             } else {
3454:               B_IL = B_N - nmin_s + 1;
3455:               B_IU = B_N - B_neigs;
3456:             }
3457:           } else {
3458:             B_IL = B_neigs + 1;
3459:             B_IU = nmin_s;
3460:           }
3461:           if (pcbddc->dbg_flag) {
3462:             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, less than minimum required %D. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU);
3463:           }
3464:           if (sub_schurs->is_symmetric) {
3465:             PetscInt j,k;
3466:             for (j=0;j<subset_size;j++) {
3467:               for (k=j;k<subset_size;k++) {
3468:                 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k];
3469:                 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k];
3470:               }
3471:             }
3472:           } else {
3473:             PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3474:             PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3475:           }
3476:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
3477: #if defined(PETSC_USE_COMPLEX)
3478:           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3479: #else
3480:           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3481: #endif
3482:           PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3483:           PetscFPTrapPop();
3484:           B_neigs += B_neigs2;
3485:         }
3486:         if (B_ierr) {
3487:           if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
3488:           else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
3489:           else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
3490:         }
3491:         if (pcbddc->dbg_flag) {
3492:           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Got %d eigs\n",B_neigs);
3493:           for (j=0;j<B_neigs;j++) {
3494:             if (eigs[j] == 0.0) {
3495:               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     Inf\n");
3496:             } else {
3497:               if (pcbddc->use_deluxe_scaling) {
3498:                 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     %1.6e\n",eigs[j+eigs_start]);
3499:               } else {
3500:                 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     %1.6e\n",1./eigs[j+eigs_start]);
3501:               }
3502:             }
3503:           }
3504:         }
3505:       } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented");
3506:     }
3507:     /* change the basis back to the original one */
3508:     if (sub_schurs->change) {
3509:       Mat change,phi,phit;

3511:       if (pcbddc->dbg_flag > 2) {
3512:         PetscInt ii;
3513:         for (ii=0;ii<B_neigs;ii++) {
3514:           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);
3515:           for (j=0;j<B_N;j++) {
3516: #if defined(PETSC_USE_COMPLEX)
3517:             PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]);
3518:             PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]);
3519:             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e + %1.4e i\n",r,c);
3520: #else
3521:             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);
3522: #endif
3523:           }
3524:         }
3525:       }
3526:       KSPGetOperators(sub_schurs->change[i],&change,NULL);
3527:       MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);
3528:       MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);
3529:       MatCopy(phi,phit,SAME_NONZERO_PATTERN);
3530:       MatDestroy(&phit);
3531:       MatDestroy(&phi);
3532:     }
3533:     maxneigs = PetscMax(B_neigs,maxneigs);
3534:     pcbddc->adaptive_constraints_n[i+nv] = B_neigs;
3535:     if (B_neigs) {
3536:       PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));

3538:       if (pcbddc->dbg_flag > 1) {
3539:         PetscInt ii;
3540:         for (ii=0;ii<B_neigs;ii++) {
3541:           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);
3542:           for (j=0;j<B_N;j++) {
3543: #if defined(PETSC_USE_COMPLEX)
3544:             PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
3545:             PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
3546:             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e + %1.4e i\n",r,c);
3547: #else
3548:             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
3549: #endif
3550:           }
3551:         }
3552:       }
3553:       PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));
3554:       pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size;
3555:       pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs;
3556:       cum++;
3557:     }
3558:     ISRestoreIndices(sub_schurs->is_subs[i],&idxs);
3559:     /* shift for next computation */
3560:     cumarray += subset_size*subset_size;
3561:   }
3562:   if (pcbddc->dbg_flag) {
3563:     PetscViewerFlush(pcbddc->dbg_viewer);
3564:   }

3566:   if (mss) {
3567:     MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);
3568:     MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);
3569:     /* destroy matrices (junk) */
3570:     MatDestroy(&sub_schurs->sum_S_Ej_inv_all);
3571:     MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);
3572:   }
3573:   if (allocated_S_St) {
3574:     PetscFree2(S,St);
3575:   }
3576:   PetscFree5(eigv,eigs,work,B_iwork,B_ifail);
3577: #if defined(PETSC_USE_COMPLEX)
3578:   PetscFree(rwork);
3579: #endif
3580:   if (pcbddc->dbg_flag) {
3581:     PetscInt maxneigs_r;
3582:     MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));
3583:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %D\n",maxneigs_r);
3584:   }
3585:   PetscLogEventEnd(PC_BDDC_AdaptiveSetUp[pcbddc->current_level],pc,0,0,0);
3586:   return(0);
3587: }

3589: PetscErrorCode PCBDDCSetUpSolvers(PC pc)
3590: {
3591:   PetscScalar    *coarse_submat_vals;

3595:   /* Setup local scatters R_to_B and (optionally) R_to_D */
3596:   /* PCBDDCSetUpLocalWorkVectors should be called first! */
3597:   PCBDDCSetUpLocalScatters(pc);

3599:   /* Setup local neumann solver ksp_R */
3600:   /* PCBDDCSetUpLocalScatters should be called first! */
3601:   PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);

3603:   /*
3604:      Setup local correction and local part of coarse basis.
3605:      Gives back the dense local part of the coarse matrix in column major ordering
3606:   */
3607:   PCBDDCSetUpCorrection(pc,&coarse_submat_vals);

3609:   /* Compute total number of coarse nodes and setup coarse solver */
3610:   PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);

3612:   /* free */
3613:   PetscFree(coarse_submat_vals);
3614:   return(0);
3615: }

3617: PetscErrorCode PCBDDCResetCustomization(PC pc)
3618: {
3619:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;

3623:   ISDestroy(&pcbddc->user_primal_vertices);
3624:   ISDestroy(&pcbddc->user_primal_vertices_local);
3625:   ISDestroy(&pcbddc->NeumannBoundaries);
3626:   ISDestroy(&pcbddc->NeumannBoundariesLocal);
3627:   ISDestroy(&pcbddc->DirichletBoundaries);
3628:   MatNullSpaceDestroy(&pcbddc->onearnullspace);
3629:   PetscFree(pcbddc->onearnullvecs_state);
3630:   ISDestroy(&pcbddc->DirichletBoundariesLocal);
3631:   PCBDDCSetDofsSplitting(pc,0,NULL);
3632:   PCBDDCSetDofsSplittingLocal(pc,0,NULL);
3633:   return(0);
3634: }

3636: PetscErrorCode PCBDDCResetTopography(PC pc)
3637: {
3638:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
3639:   PetscInt       i;

3643:   MatDestroy(&pcbddc->nedcG);
3644:   ISDestroy(&pcbddc->nedclocal);
3645:   MatDestroy(&pcbddc->discretegradient);
3646:   MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);
3647:   MatDestroy(&pcbddc->ChangeOfBasisMatrix);
3648:   MatDestroy(&pcbddc->switch_static_change);
3649:   VecDestroy(&pcbddc->work_change);
3650:   MatDestroy(&pcbddc->ConstraintMatrix);
3651:   MatDestroy(&pcbddc->divudotp);
3652:   ISDestroy(&pcbddc->divudotp_vl2l);
3653:   PCBDDCGraphDestroy(&pcbddc->mat_graph);
3654:   for (i=0;i<pcbddc->n_local_subs;i++) {
3655:     ISDestroy(&pcbddc->local_subs[i]);
3656:   }
3657:   pcbddc->n_local_subs = 0;
3658:   PetscFree(pcbddc->local_subs);
3659:   PCBDDCSubSchursDestroy(&pcbddc->sub_schurs);
3660:   pcbddc->graphanalyzed        = PETSC_FALSE;
3661:   pcbddc->recompute_topography = PETSC_TRUE;
3662:   pcbddc->corner_selected      = PETSC_FALSE;
3663:   return(0);
3664: }

3666: PetscErrorCode PCBDDCResetSolvers(PC pc)
3667: {
3668:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;

3672:   VecDestroy(&pcbddc->coarse_vec);
3673:   if (pcbddc->coarse_phi_B) {
3674:     PetscScalar *array;
3675:     MatDenseGetArray(pcbddc->coarse_phi_B,&array);
3676:     PetscFree(array);
3677:   }
3678:   MatDestroy(&pcbddc->coarse_phi_B);
3679:   MatDestroy(&pcbddc->coarse_phi_D);
3680:   MatDestroy(&pcbddc->coarse_psi_B);
3681:   MatDestroy(&pcbddc->coarse_psi_D);
3682:   VecDestroy(&pcbddc->vec1_P);
3683:   VecDestroy(&pcbddc->vec1_C);
3684:   MatDestroy(&pcbddc->local_auxmat2);
3685:   MatDestroy(&pcbddc->local_auxmat1);
3686:   VecDestroy(&pcbddc->vec1_R);
3687:   VecDestroy(&pcbddc->vec2_R);
3688:   ISDestroy(&pcbddc->is_R_local);
3689:   VecScatterDestroy(&pcbddc->R_to_B);
3690:   VecScatterDestroy(&pcbddc->R_to_D);
3691:   VecScatterDestroy(&pcbddc->coarse_loc_to_glob);
3692:   KSPReset(pcbddc->ksp_D);
3693:   KSPReset(pcbddc->ksp_R);
3694:   KSPReset(pcbddc->coarse_ksp);
3695:   MatDestroy(&pcbddc->local_mat);
3696:   PetscFree(pcbddc->primal_indices_local_idxs);
3697:   PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);
3698:   PetscFree(pcbddc->global_primal_indices);
3699:   ISDestroy(&pcbddc->coarse_subassembling);
3700:   MatDestroy(&pcbddc->benign_change);
3701:   VecDestroy(&pcbddc->benign_vec);
3702:   PCBDDCBenignShellMat(pc,PETSC_TRUE);
3703:   MatDestroy(&pcbddc->benign_B0);
3704:   PetscSFDestroy(&pcbddc->benign_sf);
3705:   if (pcbddc->benign_zerodiag_subs) {
3706:     PetscInt i;
3707:     for (i=0;i<pcbddc->benign_n;i++) {
3708:       ISDestroy(&pcbddc->benign_zerodiag_subs[i]);
3709:     }
3710:     PetscFree(pcbddc->benign_zerodiag_subs);
3711:   }
3712:   PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);
3713:   return(0);
3714: }

3716: PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
3717: {
3718:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
3719:   PC_IS          *pcis = (PC_IS*)pc->data;
3720:   VecType        impVecType;
3721:   PetscInt       n_constraints,n_R,old_size;

3725:   n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices;
3726:   n_R = pcis->n - pcbddc->n_vertices;
3727:   VecGetType(pcis->vec1_N,&impVecType);
3728:   /* local work vectors (try to avoid unneeded work)*/
3729:   /* R nodes */
3730:   old_size = -1;
3731:   if (pcbddc->vec1_R) {
3732:     VecGetSize(pcbddc->vec1_R,&old_size);
3733:   }
3734:   if (n_R != old_size) {
3735:     VecDestroy(&pcbddc->vec1_R);
3736:     VecDestroy(&pcbddc->vec2_R);
3737:     VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);
3738:     VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);
3739:     VecSetType(pcbddc->vec1_R,impVecType);
3740:     VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);
3741:   }
3742:   /* local primal dofs */
3743:   old_size = -1;
3744:   if (pcbddc->vec1_P) {
3745:     VecGetSize(pcbddc->vec1_P,&old_size);
3746:   }
3747:   if (pcbddc->local_primal_size != old_size) {
3748:     VecDestroy(&pcbddc->vec1_P);
3749:     VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);
3750:     VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);
3751:     VecSetType(pcbddc->vec1_P,impVecType);
3752:   }
3753:   /* local explicit constraints */
3754:   old_size = -1;
3755:   if (pcbddc->vec1_C) {
3756:     VecGetSize(pcbddc->vec1_C,&old_size);
3757:   }
3758:   if (n_constraints && n_constraints != old_size) {
3759:     VecDestroy(&pcbddc->vec1_C);
3760:     VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);
3761:     VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);
3762:     VecSetType(pcbddc->vec1_C,impVecType);
3763:   }
3764:   return(0);
3765: }

3767: PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
3768: {
3769:   PetscErrorCode  ierr;
3770:   /* pointers to pcis and pcbddc */
3771:   PC_IS*          pcis = (PC_IS*)pc->data;
3772:   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
3773:   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
3774:   /* submatrices of local problem */
3775:   Mat             A_RV,A_VR,A_VV,local_auxmat2_R;
3776:   /* submatrices of local coarse problem */
3777:   Mat             S_VV,S_CV,S_VC,S_CC;
3778:   /* working matrices */
3779:   Mat             C_CR;
3780:   /* additional working stuff */
3781:   PC              pc_R;
3782:   Mat             F,Brhs = NULL;
3783:   Vec             dummy_vec;
3784:   PetscBool       isLU,isCHOL,isILU,need_benign_correction,sparserhs;
3785:   PetscScalar     *coarse_submat_vals; /* TODO: use a PETSc matrix */
3786:   PetscScalar     *work;
3787:   PetscInt        *idx_V_B;
3788:   PetscInt        lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I;
3789:   PetscInt        i,n_R,n_D,n_B;

3791:   /* some shortcuts to scalars */
3792:   PetscScalar     one=1.0,m_one=-1.0;

3795:   if (!pcbddc->symmetric_primal && pcbddc->benign_n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented");
3796:   PetscLogEventBegin(PC_BDDC_CorrectionSetUp[pcbddc->current_level],pc,0,0,0);

3798:   /* Set Non-overlapping dimensions */
3799:   n_vertices = pcbddc->n_vertices;
3800:   n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices;
3801:   n_B = pcis->n_B;
3802:   n_D = pcis->n - n_B;
3803:   n_R = pcis->n - n_vertices;

3805:   /* vertices in boundary numbering */
3806:   PetscMalloc1(n_vertices,&idx_V_B);
3807:   ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);
3808:   if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D",n_vertices,i);

3810:   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
3811:   PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);
3812:   MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);
3813:   MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);
3814:   MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);
3815:   MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);
3816:   MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);
3817:   MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);
3818:   MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);
3819:   MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);

3821:   /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
3822:   KSPGetPC(pcbddc->ksp_R,&pc_R);
3823:   PCSetUp(pc_R);
3824:   PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);
3825:   PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);
3826:   PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);
3827:   lda_rhs = n_R;
3828:   need_benign_correction = PETSC_FALSE;
3829:   if (isLU || isILU || isCHOL) {
3830:     PCFactorGetMatrix(pc_R,&F);
3831:   } else if (sub_schurs && sub_schurs->reuse_solver) {
3832:     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
3833:     MatFactorType      type;

3835:     F = reuse_solver->F;
3836:     MatGetFactorType(F,&type);
3837:     if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE;
3838:     MatGetSize(F,&lda_rhs,NULL);
3839:     need_benign_correction = (PetscBool)(!!reuse_solver->benign_n);
3840:   } else {
3841:     F = NULL;
3842:   }

3844:   /* determine if we can use a sparse right-hand side */
3845:   sparserhs = PETSC_FALSE;
3846:   if (F) {
3847:     MatSolverType solver;

3849:     MatFactorGetSolverType(F,&solver);
3850:     PetscStrcmp(solver,MATSOLVERMUMPS,&sparserhs);
3851:   }

3853:   /* allocate workspace */
3854:   n = 0;
3855:   if (n_constraints) {
3856:     n += lda_rhs*n_constraints;
3857:   }
3858:   if (n_vertices) {
3859:     n = PetscMax(2*lda_rhs*n_vertices,n);
3860:     n = PetscMax((lda_rhs+n_B)*n_vertices,n);
3861:   }
3862:   if (!pcbddc->symmetric_primal) {
3863:     n = PetscMax(2*lda_rhs*pcbddc->local_primal_size,n);
3864:   }
3865:   PetscMalloc1(n,&work);

3867:   /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */
3868:   dummy_vec = NULL;
3869:   if (need_benign_correction && lda_rhs != n_R && F) {
3870:     VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&dummy_vec);
3871:     VecSetSizes(dummy_vec,lda_rhs,PETSC_DECIDE);
3872:     VecSetType(dummy_vec,((PetscObject)pcis->vec1_N)->type_name);
3873:   }

3875:   /* Precompute stuffs needed for preprocessing and application of BDDC*/
3876:   if (n_constraints) {
3877:     Mat         M3,C_B;
3878:     IS          is_aux;
3879:     PetscScalar *array,*array2;

3881:     MatDestroy(&pcbddc->local_auxmat1);
3882:     MatDestroy(&pcbddc->local_auxmat2);

3884:     /* Extract constraints on R nodes: C_{CR}  */
3885:     ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);
3886:     MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);
3887:     MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);

3889:     /* Assemble         local_auxmat2_R =        (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */
3890:     /* Assemble pcbddc->local_auxmat2   = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */
3891:     if (!sparserhs) {
3892:       PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));
3893:       for (i=0;i<n_constraints;i++) {
3894:         const PetscScalar *row_cmat_values;
3895:         const PetscInt    *row_cmat_indices;
3896:         PetscInt          size_of_constraint,j;

3898:         MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);
3899:         for (j=0;j<size_of_constraint;j++) {
3900:           work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j];
3901:         }
3902:         MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);
3903:       }
3904:       MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&Brhs);
3905:     } else {
3906:       Mat tC_CR;

3908:       MatScale(C_CR,-1.0);
3909:       if (lda_rhs != n_R) {
3910:         PetscScalar *aa;
3911:         PetscInt    r,*ii,*jj;
3912:         PetscBool   done;

3914:         MatGetRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);
3915:         if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed");
3916:         MatSeqAIJGetArray(C_CR,&aa);
3917:         MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_constraints,lda_rhs,ii,jj,aa,&tC_CR);
3918:         MatRestoreRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);
3919:         if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed");
3920:       } else {
3921:         PetscObjectReference((PetscObject)C_CR);
3922:         tC_CR = C_CR;
3923:       }
3924:       MatCreateTranspose(tC_CR,&Brhs);
3925:       MatDestroy(&tC_CR);
3926:     }
3927:     MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);
3928:     if (F) {
3929:       if (need_benign_correction) {
3930:         PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

3932:         /* rhs is already zero on interior dofs, no need to change the rhs */
3933:         PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));
3934:       }
3935:       MatMatSolve(F,Brhs,local_auxmat2_R);
3936:       if (need_benign_correction) {
3937:         PetscScalar        *marr;
3938:         PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

3940:         MatDenseGetArray(local_auxmat2_R,&marr);
3941:         if (lda_rhs != n_R) {
3942:           for (i=0;i<n_constraints;i++) {
3943:             VecPlaceArray(dummy_vec,marr+i*lda_rhs);
3944:             PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);
3945:             VecResetArray(dummy_vec);
3946:           }
3947:         } else {
3948:           for (i=0;i<n_constraints;i++) {
3949:             VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);
3950:             PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);
3951:             VecResetArray(pcbddc->vec1_R);
3952:           }
3953:         }
3954:         MatDenseRestoreArray(local_auxmat2_R,&marr);
3955:       }
3956:     } else {
3957:       PetscScalar *marr;

3959:       MatDenseGetArray(local_auxmat2_R,&marr);
3960:       for (i=0;i<n_constraints;i++) {
3961:         VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);
3962:         VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);
3963:         KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
3964:         VecResetArray(pcbddc->vec1_R);
3965:         VecResetArray(pcbddc->vec2_R);
3966:       }
3967:       MatDenseRestoreArray(local_auxmat2_R,&marr);
3968:     }
3969:     if (sparserhs) {
3970:       MatScale(C_CR,-1.0);
3971:     }
3972:     MatDestroy(&Brhs);
3973:     if (!pcbddc->switch_static) {
3974:       MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);
3975:       MatDenseGetArray(pcbddc->local_auxmat2,&array);
3976:       MatDenseGetArray(local_auxmat2_R,&array2);
3977:       for (i=0;i<n_constraints;i++) {
3978:         VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);
3979:         VecPlaceArray(pcis->vec1_B,array+i*n_B);
3980:         VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
3981:         VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
3982:         VecResetArray(pcis->vec1_B);
3983:         VecResetArray(pcbddc->vec1_R);
3984:       }
3985:       MatDenseRestoreArray(local_auxmat2_R,&array2);
3986:       MatDenseRestoreArray(pcbddc->local_auxmat2,&array);
3987:       MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);
3988:     } else {
3989:       if (lda_rhs != n_R) {
3990:         IS dummy;

3992:         ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);
3993:         MatCreateSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);
3994:         ISDestroy(&dummy);
3995:       } else {
3996:         PetscObjectReference((PetscObject)local_auxmat2_R);
3997:         pcbddc->local_auxmat2 = local_auxmat2_R;
3998:       }
3999:       MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);
4000:     }
4001:     ISDestroy(&is_aux);
4002:     /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1}  */
4003:     MatScale(M3,m_one);
4004:     if (isCHOL) {
4005:       MatCholeskyFactor(M3,NULL,NULL);
4006:     } else {
4007:       MatLUFactor(M3,NULL,NULL,NULL);
4008:     }
4009:     MatSeqDenseInvertFactors_Private(M3);
4010:     /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */
4011:     MatMatMult(M3,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);
4012:     MatDestroy(&C_B);
4013:     MatCopy(M3,S_CC,SAME_NONZERO_PATTERN); /* S_CC can have a different LDA, MatMatSolve doesn't support it */
4014:     MatDestroy(&M3);
4015:   }

4017:   /* Get submatrices from subdomain matrix */
4018:   if (n_vertices) {
4019:     IS        is_aux;
4020:     PetscBool isseqaij;

4022:     if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */
4023:       IS tis;

4025:       ISDuplicate(pcbddc->is_R_local,&tis);
4026:       ISSort(tis);
4027:       ISComplement(tis,0,pcis->n,&is_aux);
4028:       ISDestroy(&tis);
4029:     } else {
4030:       ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);
4031:     }
4032:     MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);
4033:     MatCreateSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);
4034:     PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);
4035:     if (!isseqaij) { /* MatMatMult(A_VR,A_RRmA_RV) below will raise an error */
4036:       MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);
4037:     }
4038:     MatCreateSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);
4039:     ISDestroy(&is_aux);
4040:   }

4042:   /* Matrix of coarse basis functions (local) */
4043:   if (pcbddc->coarse_phi_B) {
4044:     PetscInt on_B,on_primal,on_D=n_D;
4045:     if (pcbddc->coarse_phi_D) {
4046:       MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);
4047:     }
4048:     MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);
4049:     if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) {
4050:       PetscScalar *marray;

4052:       MatDenseGetArray(pcbddc->coarse_phi_B,&marray);
4053:       PetscFree(marray);
4054:       MatDestroy(&pcbddc->coarse_phi_B);
4055:       MatDestroy(&pcbddc->coarse_psi_B);
4056:       MatDestroy(&pcbddc->coarse_phi_D);
4057:       MatDestroy(&pcbddc->coarse_psi_D);
4058:     }
4059:   }

4061:   if (!pcbddc->coarse_phi_B) {
4062:     PetscScalar *marr;

4064:     /* memory size */
4065:     n = n_B*pcbddc->local_primal_size;
4066:     if (pcbddc->switch_static || pcbddc->dbg_flag) n += n_D*pcbddc->local_primal_size;
4067:     if (!pcbddc->symmetric_primal) n *= 2;
4068:     PetscCalloc1(n,&marr);
4069:     MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_B);
4070:     marr += n_B*pcbddc->local_primal_size;
4071:     if (pcbddc->switch_static || pcbddc->dbg_flag) {
4072:       MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_D);
4073:       marr += n_D*pcbddc->local_primal_size;
4074:     }
4075:     if (!pcbddc->symmetric_primal) {
4076:       MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_B);
4077:       marr += n_B*pcbddc->local_primal_size;
4078:       if (pcbddc->switch_static || pcbddc->dbg_flag) {
4079:         MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_D);
4080:       }
4081:     } else {
4082:       PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);
4083:       pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
4084:       if (pcbddc->switch_static || pcbddc->dbg_flag) {
4085:         PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);
4086:         pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
4087:       }
4088:     }
4089:   }

4091:   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
4092:   p0_lidx_I = NULL;
4093:   if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) {
4094:     const PetscInt *idxs;

4096:     ISGetIndices(pcis->is_I_local,&idxs);
4097:     PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);
4098:     for (i=0;i<pcbddc->benign_n;i++) {
4099:       PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);
4100:     }
4101:     ISRestoreIndices(pcis->is_I_local,&idxs);
4102:   }

4104:   /* vertices */
4105:   if (n_vertices) {
4106:     PetscBool restoreavr = PETSC_FALSE;

4108:     MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);

4110:     if (n_R) {
4111:       Mat          A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */
4112:       PetscBLASInt B_N,B_one = 1;
4113:       PetscScalar  *x,*y;

4115:       MatScale(A_RV,m_one);
4116:       if (need_benign_correction) {
4117:         ISLocalToGlobalMapping RtoN;
4118:         IS                     is_p0;
4119:         PetscInt               *idxs_p0,n;

4121:         PetscMalloc1(pcbddc->benign_n,&idxs_p0);
4122:         ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);
4123:         ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);
4124:         if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %D != %D",n,pcbddc->benign_n);
4125:         ISLocalToGlobalMappingDestroy(&RtoN);
4126:         ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);
4127:         MatCreateSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);
4128:         ISDestroy(&is_p0);
4129:       }

4131:       MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);
4132:       if (!sparserhs || need_benign_correction) {
4133:         if (lda_rhs == n_R) {
4134:           MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);
4135:         } else {
4136:           PetscScalar    *av,*array;
4137:           const PetscInt *xadj,*adjncy;
4138:           PetscInt       n;
4139:           PetscBool      flg_row;

4141:           array = work+lda_rhs*n_vertices;
4142:           PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));
4143:           MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);
4144:           MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);
4145:           MatSeqAIJGetArray(A_RV,&av);
4146:           for (i=0;i<n;i++) {
4147:             PetscInt j;
4148:             for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j];
4149:           }
4150:           MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);
4151:           MatDestroy(&A_RV);
4152:           MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);
4153:         }
4154:         if (need_benign_correction) {
4155:           PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4156:           PetscScalar        *marr;

4158:           MatDenseGetArray(A_RV,&marr);
4159:           /* need \Phi^T A_RV = (I+L)A_RV, L given by

4161:                  | 0 0  0 | (V)
4162:              L = | 0 0 -1 | (P-p0)
4163:                  | 0 0 -1 | (p0)

4165:           */
4166:           for (i=0;i<reuse_solver->benign_n;i++) {
4167:             const PetscScalar *vals;
4168:             const PetscInt    *idxs,*idxs_zero;
4169:             PetscInt          n,j,nz;

4171:             ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);
4172:             ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);
4173:             MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);
4174:             for (j=0;j<n;j++) {
4175:               PetscScalar val = vals[j];
4176:               PetscInt    k,col = idxs[j];
4177:               for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val;
4178:             }
4179:             MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);
4180:             ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);
4181:           }
4182:           MatDenseRestoreArray(A_RV,&marr);
4183:         }
4184:         PetscObjectReference((PetscObject)A_RV);
4185:         Brhs = A_RV;
4186:       } else {
4187:         Mat tA_RVT,A_RVT;

4189:         if (!pcbddc->symmetric_primal) {
4190:           /* A_RV already scaled by -1 */
4191:           MatTranspose(A_RV,MAT_INITIAL_MATRIX,&A_RVT);
4192:         } else {
4193:           restoreavr = PETSC_TRUE;
4194:           MatScale(A_VR,-1.0);
4195:           PetscObjectReference((PetscObject)A_VR);
4196:           A_RVT = A_VR;
4197:         }
4198:         if (lda_rhs != n_R) {
4199:           PetscScalar *aa;
4200:           PetscInt    r,*ii,*jj;
4201:           PetscBool   done;

4203:           MatGetRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);
4204:           if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed");
4205:           MatSeqAIJGetArray(A_RVT,&aa);
4206:           MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_vertices,lda_rhs,ii,jj,aa,&tA_RVT);
4207:           MatRestoreRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);
4208:           if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed");
4209:         } else {
4210:           PetscObjectReference((PetscObject)A_RVT);
4211:           tA_RVT = A_RVT;
4212:         }
4213:         MatCreateTranspose(tA_RVT,&Brhs);
4214:         MatDestroy(&tA_RVT);
4215:         MatDestroy(&A_RVT);
4216:       }
4217:       if (F) {
4218:         /* need to correct the rhs */
4219:         if (need_benign_correction) {
4220:           PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4221:           PetscScalar        *marr;

4223:           MatDenseGetArray(Brhs,&marr);
4224:           if (lda_rhs != n_R) {
4225:             for (i=0;i<n_vertices;i++) {
4226:               VecPlaceArray(dummy_vec,marr+i*lda_rhs);
4227:               PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);
4228:               VecResetArray(dummy_vec);
4229:             }
4230:           } else {
4231:             for (i=0;i<n_vertices;i++) {
4232:               VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);
4233:               PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);
4234:               VecResetArray(pcbddc->vec1_R);
4235:             }
4236:           }
4237:           MatDenseRestoreArray(Brhs,&marr);
4238:         }
4239:         MatMatSolve(F,Brhs,A_RRmA_RV);
4240:         if (restoreavr) {
4241:           MatScale(A_VR,-1.0);
4242:         }
4243:         /* need to correct the solution */
4244:         if (need_benign_correction) {
4245:           PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4246:           PetscScalar        *marr;

4248:           MatDenseGetArray(A_RRmA_RV,&marr);
4249:           if (lda_rhs != n_R) {
4250:             for (i=0;i<n_vertices;i++) {
4251:               VecPlaceArray(dummy_vec,marr+i*lda_rhs);
4252:               PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);
4253:               VecResetArray(dummy_vec);
4254:             }
4255:           } else {
4256:             for (i=0;i<n_vertices;i++) {
4257:               VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);
4258:               PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);
4259:               VecResetArray(pcbddc->vec1_R);
4260:             }
4261:           }
4262:           MatDenseRestoreArray(A_RRmA_RV,&marr);
4263:         }
4264:       } else {
4265:         MatDenseGetArray(Brhs,&y);
4266:         for (i=0;i<n_vertices;i++) {
4267:           VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);
4268:           VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);
4269:           KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
4270:           VecResetArray(pcbddc->vec1_R);
4271:           VecResetArray(pcbddc->vec2_R);
4272:         }
4273:         MatDenseRestoreArray(Brhs,&y);
4274:       }
4275:       MatDestroy(&A_RV);
4276:       MatDestroy(&Brhs);
4277:       /* S_VV and S_CV */
4278:       if (n_constraints) {
4279:         Mat B;

4281:         PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));
4282:         for (i=0;i<n_vertices;i++) {
4283:           VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);
4284:           VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);
4285:           VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4286:           VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4287:           VecResetArray(pcis->vec1_B);
4288:           VecResetArray(pcbddc->vec1_R);
4289:         }
4290:         MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);
4291:         MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);
4292:         MatDestroy(&B);
4293:         MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);
4294:         MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);
4295:         MatScale(S_CV,m_one);
4296:         PetscBLASIntCast(lda_rhs*n_vertices,&B_N);
4297:         PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one));
4298:         MatDestroy(&B);
4299:       }
4300:       if (lda_rhs != n_R) {
4301:         MatDestroy(&A_RRmA_RV);
4302:         MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);
4303:         MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);
4304:       }
4305:       MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);
4306:       /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */
4307:       if (need_benign_correction) {
4308:         PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4309:         PetscScalar      *marr,*sums;

4311:         PetscMalloc1(n_vertices,&sums);
4312:         MatDenseGetArray(S_VVt,&marr);
4313:         for (i=0;i<reuse_solver->benign_n;i++) {
4314:           const PetscScalar *vals;
4315:           const PetscInt    *idxs,*idxs_zero;
4316:           PetscInt          n,j,nz;

4318:           ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);
4319:           ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);
4320:           for (j=0;j<n_vertices;j++) {
4321:             PetscInt k;
4322:             sums[j] = 0.;
4323:             for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs];
4324:           }
4325:           MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);
4326:           for (j=0;j<n;j++) {
4327:             PetscScalar val = vals[j];
4328:             PetscInt k;
4329:             for (k=0;k<n_vertices;k++) {
4330:               marr[idxs[j]+k*n_vertices] += val*sums[k];
4331:             }
4332:           }
4333:           MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);
4334:           ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);
4335:         }
4336:         PetscFree(sums);
4337:         MatDenseRestoreArray(S_VVt,&marr);
4338:         MatDestroy(&A_RV_bcorr);
4339:       }
4340:       MatDestroy(&A_RRmA_RV);
4341:       PetscBLASIntCast(n_vertices*n_vertices,&B_N);
4342:       MatDenseGetArray(A_VV,&x);
4343:       MatDenseGetArray(S_VVt,&y);
4344:       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one));
4345:       MatDenseRestoreArray(A_VV,&x);
4346:       MatDenseRestoreArray(S_VVt,&y);
4347:       MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);
4348:       MatDestroy(&S_VVt);
4349:     } else {
4350:       MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);
4351:     }
4352:     MatDestroy(&A_VV);

4354:     /* coarse basis functions */
4355:     for (i=0;i<n_vertices;i++) {
4356:       PetscScalar *y;

4358:       VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);
4359:       MatDenseGetArray(pcbddc->coarse_phi_B,&y);
4360:       VecPlaceArray(pcis->vec1_B,y+n_B*i);
4361:       VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4362:       VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4363:       y[n_B*i+idx_V_B[i]] = 1.0;
4364:       MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);
4365:       VecResetArray(pcis->vec1_B);

4367:       if (pcbddc->switch_static || pcbddc->dbg_flag) {
4368:         PetscInt j;

4370:         MatDenseGetArray(pcbddc->coarse_phi_D,&y);
4371:         VecPlaceArray(pcis->vec1_D,y+n_D*i);
4372:         VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4373:         VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4374:         VecResetArray(pcis->vec1_D);
4375:         for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0;
4376:         MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);
4377:       }
4378:       VecResetArray(pcbddc->vec1_R);
4379:     }
4380:     /* if n_R == 0 the object is not destroyed */
4381:     MatDestroy(&A_RV);
4382:   }
4383:   VecDestroy(&dummy_vec);

4385:   if (n_constraints) {
4386:     Mat B;

4388:     MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);
4389:     MatScale(S_CC,m_one);
4390:     MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);
4391:     MatScale(S_CC,m_one);
4392:     if (n_vertices) {
4393:       if (isCHOL || need_benign_correction) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */
4394:         MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);
4395:       } else {
4396:         Mat S_VCt;

4398:         if (lda_rhs != n_R) {
4399:           MatDestroy(&B);
4400:           MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);
4401:           MatSeqDenseSetLDA(B,lda_rhs);
4402:         }
4403:         MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);
4404:         MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);
4405:         MatDestroy(&S_VCt);
4406:       }
4407:     }
4408:     MatDestroy(&B);
4409:     /* coarse basis functions */
4410:     for (i=0;i<n_constraints;i++) {
4411:       PetscScalar *y;

4413:       VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);
4414:       MatDenseGetArray(pcbddc->coarse_phi_B,&y);
4415:       VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));
4416:       VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4417:       VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4418:       MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);
4419:       VecResetArray(pcis->vec1_B);
4420:       if (pcbddc->switch_static || pcbddc->dbg_flag) {
4421:         PetscInt j;

4423:         MatDenseGetArray(pcbddc->coarse_phi_D,&y);
4424:         VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));
4425:         VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4426:         VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4427:         VecResetArray(pcis->vec1_D);
4428:         for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0;
4429:         MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);
4430:       }
4431:       VecResetArray(pcbddc->vec1_R);
4432:     }
4433:   }
4434:   if (n_constraints) {
4435:     MatDestroy(&local_auxmat2_R);
4436:   }
4437:   PetscFree(p0_lidx_I);

4439:   /* coarse matrix entries relative to B_0 */
4440:   if (pcbddc->benign_n) {
4441:     Mat         B0_B,B0_BPHI;
4442:     IS          is_dummy;
4443:     PetscScalar *data;
4444:     PetscInt    j;

4446:     ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);
4447:     MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);
4448:     ISDestroy(&is_dummy);
4449:     MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);
4450:     MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);
4451:     MatDenseGetArray(B0_BPHI,&data);
4452:     for (j=0;j<pcbddc->benign_n;j++) {
4453:       PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j;
4454:       for (i=0;i<pcbddc->local_primal_size;i++) {
4455:         coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j];
4456:         coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j];
4457:       }
4458:     }
4459:     MatDenseRestoreArray(B0_BPHI,&data);
4460:     MatDestroy(&B0_B);
4461:     MatDestroy(&B0_BPHI);
4462:   }

4464:   /* compute other basis functions for non-symmetric problems */
4465:   if (!pcbddc->symmetric_primal) {
4466:     Mat         B_V=NULL,B_C=NULL;
4467:     PetscScalar *marray;

4469:     if (n_constraints) {
4470:       Mat S_CCT,C_CRT;

4472:       MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT);
4473:       MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);
4474:       MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);
4475:       MatDestroy(&S_CCT);
4476:       if (n_vertices) {
4477:         Mat S_VCT;

4479:         MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);
4480:         MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);
4481:         MatDestroy(&S_VCT);
4482:       }
4483:       MatDestroy(&C_CRT);
4484:     } else {
4485:       MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);
4486:     }
4487:     if (n_vertices && n_R) {
4488:       PetscScalar    *av,*marray;
4489:       const PetscInt *xadj,*adjncy;
4490:       PetscInt       n;
4491:       PetscBool      flg_row;

4493:       /* B_V = B_V - A_VR^T */
4494:       MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);
4495:       MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);
4496:       MatSeqAIJGetArray(A_VR,&av);
4497:       MatDenseGetArray(B_V,&marray);
4498:       for (i=0;i<n;i++) {
4499:         PetscInt j;
4500:         for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j];
4501:       }
4502:       MatDenseRestoreArray(B_V,&marray);
4503:       MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);
4504:       MatDestroy(&A_VR);
4505:     }

4507:     /* currently there's no support for MatTransposeMatSolve(F,B,X) */
4508:     if (n_vertices) {
4509:       MatDenseGetArray(B_V,&marray);
4510:       for (i=0;i<n_vertices;i++) {
4511:         VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);
4512:         VecPlaceArray(pcbddc->vec2_R,work+i*n_R);
4513:         KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
4514:         VecResetArray(pcbddc->vec1_R);
4515:         VecResetArray(pcbddc->vec2_R);
4516:       }
4517:       MatDenseRestoreArray(B_V,&marray);
4518:     }
4519:     if (B_C) {
4520:       MatDenseGetArray(B_C,&marray);
4521:       for (i=n_vertices;i<n_constraints+n_vertices;i++) {
4522:         VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);
4523:         VecPlaceArray(pcbddc->vec2_R,work+i*n_R);
4524:         KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
4525:         VecResetArray(pcbddc->vec1_R);
4526:         VecResetArray(pcbddc->vec2_R);
4527:       }
4528:       MatDenseRestoreArray(B_C,&marray);
4529:     }
4530:     /* coarse basis functions */
4531:     for (i=0;i<pcbddc->local_primal_size;i++) {
4532:       PetscScalar *y;

4534:       VecPlaceArray(pcbddc->vec1_R,work+i*n_R);
4535:       MatDenseGetArray(pcbddc->coarse_psi_B,&y);
4536:       VecPlaceArray(pcis->vec1_B,y+n_B*i);
4537:       VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4538:       VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4539:       if (i<n_vertices) {
4540:         y[n_B*i+idx_V_B[i]] = 1.0;
4541:       }
4542:       MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);
4543:       VecResetArray(pcis->vec1_B);

4545:       if (pcbddc->switch_static || pcbddc->dbg_flag) {
4546:         MatDenseGetArray(pcbddc->coarse_psi_D,&y);
4547:         VecPlaceArray(pcis->vec1_D,y+n_D*i);
4548:         VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4549:         VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4550:         VecResetArray(pcis->vec1_D);
4551:         MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);
4552:       }
4553:       VecResetArray(pcbddc->vec1_R);
4554:     }
4555:     MatDestroy(&B_V);
4556:     MatDestroy(&B_C);
4557:   }

4559:   /* free memory */
4560:   PetscFree(idx_V_B);
4561:   MatDestroy(&S_VV);
4562:   MatDestroy(&S_CV);
4563:   MatDestroy(&S_VC);
4564:   MatDestroy(&S_CC);
4565:   PetscFree(work);
4566:   if (n_vertices) {
4567:     MatDestroy(&A_VR);
4568:   }
4569:   if (n_constraints) {
4570:     MatDestroy(&C_CR);
4571:   }
4572:   /* Checking coarse_sub_mat and coarse basis functios */
4573:   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
4574:   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
4575:   if (pcbddc->dbg_flag) {
4576:     Mat         coarse_sub_mat;
4577:     Mat         AUXMAT,TM1,TM2,TM3,TM4;
4578:     Mat         coarse_phi_D,coarse_phi_B;
4579:     Mat         coarse_psi_D,coarse_psi_B;
4580:     Mat         A_II,A_BB,A_IB,A_BI;
4581:     Mat         C_B,CPHI;
4582:     IS          is_dummy;
4583:     Vec         mones;
4584:     MatType     checkmattype=MATSEQAIJ;
4585:     PetscReal   real_value;

4587:     if (pcbddc->benign_n && !pcbddc->benign_change_explicit) {
4588:       Mat A;
4589:       PCBDDCBenignProject(pc,NULL,NULL,&A);
4590:       MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);
4591:       MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);
4592:       MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);
4593:       MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);
4594:       MatDestroy(&A);
4595:     } else {
4596:       MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);
4597:       MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);
4598:       MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);
4599:       MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);
4600:     }
4601:     MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);
4602:     MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);
4603:     if (!pcbddc->symmetric_primal) {
4604:       MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);
4605:       MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);
4606:     }
4607:     MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);

4609:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
4610:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);
4611:     PetscViewerFlush(pcbddc->dbg_viewer);
4612:     if (!pcbddc->symmetric_primal) {
4613:       MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4614:       MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);
4615:       MatDestroy(&AUXMAT);
4616:       MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4617:       MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);
4618:       MatDestroy(&AUXMAT);
4619:       MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4620:       MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);
4621:       MatDestroy(&AUXMAT);
4622:       MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4623:       MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);
4624:       MatDestroy(&AUXMAT);
4625:     } else {
4626:       MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);
4627:       MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);
4628:       MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4629:       MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);
4630:       MatDestroy(&AUXMAT);
4631:       MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4632:       MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);
4633:       MatDestroy(&AUXMAT);
4634:     }
4635:     MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);
4636:     MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);
4637:     MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);
4638:     MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);
4639:     if (pcbddc->benign_n) {
4640:       Mat         B0_B,B0_BPHI;
4641:       PetscScalar *data,*data2;
4642:       PetscInt    j;

4644:       ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);
4645:       MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);
4646:       MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);
4647:       MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);
4648:       MatDenseGetArray(TM1,&data);
4649:       MatDenseGetArray(B0_BPHI,&data2);
4650:       for (j=0;j<pcbddc->benign_n;j++) {
4651:         PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j;
4652:         for (i=0;i<pcbddc->local_primal_size;i++) {
4653:           data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j];
4654:           data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j];
4655:         }
4656:       }
4657:       MatDenseRestoreArray(TM1,&data);
4658:       MatDenseRestoreArray(B0_BPHI,&data2);
4659:       MatDestroy(&B0_B);
4660:       ISDestroy(&is_dummy);
4661:       MatDestroy(&B0_BPHI);
4662:     }
4663: #if 0
4664:   {
4665:     PetscViewer viewer;
4666:     char filename[256];
4667:     sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level);
4668:     PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);
4669:     PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
4670:     PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");
4671:     MatView(coarse_sub_mat,viewer);
4672:     PetscObjectSetName((PetscObject)TM1,"projected");
4673:     MatView(TM1,viewer);
4674:     if (pcbddc->coarse_phi_B) {
4675:       PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");
4676:       MatView(pcbddc->coarse_phi_B,viewer);
4677:     }
4678:     if (pcbddc->coarse_phi_D) {
4679:       PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");
4680:       MatView(pcbddc->coarse_phi_D,viewer);
4681:     }
4682:     if (pcbddc->coarse_psi_B) {
4683:       PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");
4684:       MatView(pcbddc->coarse_psi_B,viewer);
4685:     }
4686:     if (pcbddc->coarse_psi_D) {
4687:       PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");
4688:       MatView(pcbddc->coarse_psi_D,viewer);
4689:     }
4690:     PetscObjectSetName((PetscObject)pcbddc->local_mat,"A");
4691:     MatView(pcbddc->local_mat,viewer);
4692:     PetscObjectSetName((PetscObject)pcbddc->ConstraintMatrix,"C");
4693:     MatView(pcbddc->ConstraintMatrix,viewer);
4694:     PetscObjectSetName((PetscObject)pcis->is_I_local,"I");
4695:     ISView(pcis->is_I_local,viewer);
4696:     PetscObjectSetName((PetscObject)pcis->is_B_local,"B");
4697:     ISView(pcis->is_B_local,viewer);
4698:     PetscObjectSetName((PetscObject)pcbddc->is_R_local,"R");
4699:     ISView(pcbddc->is_R_local,viewer);
4700:     PetscViewerDestroy(&viewer);
4701:   }
4702: #endif
4703:     MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);
4704:     MatNorm(TM1,NORM_FROBENIUS,&real_value);
4705:     PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
4706:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d          matrix error % 1.14e\n",PetscGlobalRank,real_value);

4708:     /* check constraints */
4709:     ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);
4710:     MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);
4711:     if (!pcbddc->benign_n) { /* TODO: add benign case */
4712:       MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);
4713:     } else {
4714:       PetscScalar *data;
4715:       Mat         tmat;
4716:       MatDenseGetArray(pcbddc->coarse_phi_B,&data);
4717:       MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);
4718:       MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);
4719:       MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);
4720:       MatDestroy(&tmat);
4721:     }
4722:     MatCreateVecs(CPHI,&mones,NULL);
4723:     VecSet(mones,-1.0);
4724:     MatDiagonalSet(CPHI,mones,ADD_VALUES);
4725:     MatNorm(CPHI,NORM_FROBENIUS,&real_value);
4726:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);
4727:     if (!pcbddc->symmetric_primal) {
4728:       MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);
4729:       VecSet(mones,-1.0);
4730:       MatDiagonalSet(CPHI,mones,ADD_VALUES);
4731:       MatNorm(CPHI,NORM_FROBENIUS,&real_value);
4732:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);
4733:     }
4734:     MatDestroy(&C_B);
4735:     MatDestroy(&CPHI);
4736:     ISDestroy(&is_dummy);
4737:     VecDestroy(&mones);
4738:     PetscViewerFlush(pcbddc->dbg_viewer);
4739:     MatDestroy(&A_II);
4740:     MatDestroy(&A_BB);
4741:     MatDestroy(&A_IB);
4742:     MatDestroy(&A_BI);
4743:     MatDestroy(&TM1);
4744:     MatDestroy(&TM2);
4745:     MatDestroy(&TM3);
4746:     MatDestroy(&TM4);
4747:     MatDestroy(&coarse_phi_D);
4748:     MatDestroy(&coarse_phi_B);
4749:     if (!pcbddc->symmetric_primal) {
4750:       MatDestroy(&coarse_psi_D);
4751:       MatDestroy(&coarse_psi_B);
4752:     }
4753:     MatDestroy(&coarse_sub_mat);
4754:   }
4755:   /* get back data */
4756:   *coarse_submat_vals_n = coarse_submat_vals;
4757:   PetscLogEventEnd(PC_BDDC_CorrectionSetUp[pcbddc->current_level],pc,0,0,0);
4758:   return(0);
4759: }

4761: PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B)
4762: {
4763:   Mat            *work_mat;
4764:   IS             isrow_s,iscol_s;
4765:   PetscBool      rsorted,csorted;
4766:   PetscInt       rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL;

4770:   ISSorted(isrow,&rsorted);
4771:   ISSorted(iscol,&csorted);
4772:   ISGetLocalSize(isrow,&rsize);
4773:   ISGetLocalSize(iscol,&csize);

4775:   if (!rsorted) {
4776:     const PetscInt *idxs;
4777:     PetscInt *idxs_sorted,i;

4779:     PetscMalloc1(rsize,&idxs_perm_r);
4780:     PetscMalloc1(rsize,&idxs_sorted);
4781:     for (i=0;i<rsize;i++) {
4782:       idxs_perm_r[i] = i;
4783:     }
4784:     ISGetIndices(isrow,&idxs);
4785:     PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);
4786:     for (i=0;i<rsize;i++) {
4787:       idxs_sorted[i] = idxs[idxs_perm_r[i]];
4788:     }
4789:     ISRestoreIndices(isrow,&idxs);
4790:     ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);
4791:   } else {
4792:     PetscObjectReference((PetscObject)isrow);
4793:     isrow_s = isrow;
4794:   }

4796:   if (!csorted) {
4797:     if (isrow == iscol) {
4798:       PetscObjectReference((PetscObject)isrow_s);
4799:       iscol_s = isrow_s;
4800:     } else {
4801:       const PetscInt *idxs;
4802:       PetscInt       *idxs_sorted,i;

4804:       PetscMalloc1(csize,&idxs_perm_c);
4805:       PetscMalloc1(csize,&idxs_sorted);
4806:       for (i=0;i<csize;i++) {
4807:         idxs_perm_c[i] = i;
4808:       }
4809:       ISGetIndices(iscol,&idxs);
4810:       PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);
4811:       for (i=0;i<csize;i++) {
4812:         idxs_sorted[i] = idxs[idxs_perm_c[i]];
4813:       }
4814:       ISRestoreIndices(iscol,&idxs);
4815:       ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);
4816:     }
4817:   } else {
4818:     PetscObjectReference((PetscObject)iscol);
4819:     iscol_s = iscol;
4820:   }

4822:   MatCreateSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);

4824:   if (!rsorted || !csorted) {
4825:     Mat      new_mat;
4826:     IS       is_perm_r,is_perm_c;

4828:     if (!rsorted) {
4829:       PetscInt *idxs_r,i;
4830:       PetscMalloc1(rsize,&idxs_r);
4831:       for (i=0;i<rsize;i++) {
4832:         idxs_r[idxs_perm_r[i]] = i;
4833:       }
4834:       PetscFree(idxs_perm_r);
4835:       ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);
4836:     } else {
4837:       ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);
4838:     }
4839:     ISSetPermutation(is_perm_r);

4841:     if (!csorted) {
4842:       if (isrow_s == iscol_s) {
4843:         PetscObjectReference((PetscObject)is_perm_r);
4844:         is_perm_c = is_perm_r;
4845:       } else {
4846:         PetscInt *idxs_c,i;
4847:         if (!idxs_perm_c) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Permutation array not present");
4848:         PetscMalloc1(csize,&idxs_c);
4849:         for (i=0;i<csize;i++) {
4850:           idxs_c[idxs_perm_c[i]] = i;
4851:         }
4852:         PetscFree(idxs_perm_c);
4853:         ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);
4854:       }
4855:     } else {
4856:       ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);
4857:     }
4858:     ISSetPermutation(is_perm_c);

4860:     MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);
4861:     MatDestroy(&work_mat[0]);
4862:     work_mat[0] = new_mat;
4863:     ISDestroy(&is_perm_r);
4864:     ISDestroy(&is_perm_c);
4865:   }

4867:   PetscObjectReference((PetscObject)work_mat[0]);
4868:   *B = work_mat[0];
4869:   MatDestroyMatrices(1,&work_mat);
4870:   ISDestroy(&isrow_s);
4871:   ISDestroy(&iscol_s);
4872:   return(0);
4873: }

4875: PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
4876: {
4877:   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
4878:   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
4879:   Mat            new_mat,lA;
4880:   IS             is_local,is_global;
4881:   PetscInt       local_size;
4882:   PetscBool      isseqaij;

4886:   MatDestroy(&pcbddc->local_mat);
4887:   MatGetSize(matis->A,&local_size,NULL);
4888:   ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);
4889:   ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);
4890:   ISDestroy(&is_local);
4891:   MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);
4892:   ISDestroy(&is_global);

4894:   /* check */
4895:   if (pcbddc->dbg_flag) {
4896:     Vec       x,x_change;
4897:     PetscReal error;

4899:     MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);
4900:     VecSetRandom(x,NULL);
4901:     MatMult(ChangeOfBasisMatrix,x,x_change);
4902:     VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);
4903:     VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);
4904:     MatMult(new_mat,matis->x,matis->y);
4905:     if (!pcbddc->change_interior) {
4906:       const PetscScalar *x,*y,*v;
4907:       PetscReal         lerror = 0.;
4908:       PetscInt          i;

4910:       VecGetArrayRead(matis->x,&x);
4911:       VecGetArrayRead(matis->y,&y);
4912:       VecGetArrayRead(matis->counter,&v);
4913:       for (i=0;i<local_size;i++)
4914:         if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i]-y[i]) > lerror)
4915:           lerror = PetscAbsScalar(x[i]-y[i]);
4916:       VecRestoreArrayRead(matis->x,&x);
4917:       VecRestoreArrayRead(matis->y,&y);
4918:       VecRestoreArrayRead(matis->counter,&v);
4919:       MPIU_Allreduce(&lerror,&error,1,MPIU_REAL,MPI_MAX,PetscObjectComm((PetscObject)pc));
4920:       if (error > PETSC_SMALL) {
4921:         if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) {
4922:           SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on I: %1.6e",error);
4923:         } else {
4924:           SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on I: %1.6e",error);
4925:         }
4926:       }
4927:     }
4928:     VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);
4929:     VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);
4930:     VecAXPY(x,-1.0,x_change);
4931:     VecNorm(x,NORM_INFINITY,&error);
4932:     if (error > PETSC_SMALL) {
4933:       if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) {
4934:         SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e",error);
4935:       } else {
4936:         SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on N: %1.6e",error);
4937:       }
4938:     }
4939:     VecDestroy(&x);
4940:     VecDestroy(&x_change);
4941:   }

4943:   /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */
4944:   PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject*)&lA);

4946:   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
4947:   PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);
4948:   if (isseqaij) {
4949:     MatDestroy(&pcbddc->local_mat);
4950:     MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);
4951:     if (lA) {
4952:       Mat work;
4953:       MatPtAP(lA,new_mat,MAT_INITIAL_MATRIX,2.0,&work);
4954:       PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);
4955:       MatDestroy(&work);
4956:     }
4957:   } else {
4958:     Mat work_mat;

4960:     MatDestroy(&pcbddc->local_mat);
4961:     MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);
4962:     MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);
4963:     MatDestroy(&work_mat);
4964:     if (lA) {
4965:       Mat work;
4966:       MatConvert(lA,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);
4967:       MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&work);
4968:       PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);
4969:       MatDestroy(&work);
4970:     }
4971:   }
4972:   if (matis->A->symmetric_set) {
4973:     MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);
4974: #if !defined(PETSC_USE_COMPLEX)
4975:     MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);
4976: #endif
4977:   }
4978:   MatDestroy(&new_mat);
4979:   return(0);
4980: }

4982: PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
4983: {
4984:   PC_IS*          pcis = (PC_IS*)(pc->data);
4985:   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
4986:   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
4987:   PetscInt        *idx_R_local=NULL;
4988:   PetscInt        n_vertices,i,j,n_R,n_D,n_B;
4989:   PetscInt        vbs,bs;
4990:   PetscBT         bitmask=NULL;
4991:   PetscErrorCode  ierr;

4994:   /*
4995:     No need to setup local scatters if
4996:       - primal space is unchanged
4997:         AND
4998:       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
4999:         AND
5000:       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
5001:   */
5002:   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
5003:     return(0);
5004:   }
5005:   /* destroy old objects */
5006:   ISDestroy(&pcbddc->is_R_local);
5007:   VecScatterDestroy(&pcbddc->R_to_B);
5008:   VecScatterDestroy(&pcbddc->R_to_D);
5009:   /* Set Non-overlapping dimensions */
5010:   n_B = pcis->n_B;
5011:   n_D = pcis->n - n_B;
5012:   n_vertices = pcbddc->n_vertices;

5014:   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */

5016:   /* create auxiliary bitmask and allocate workspace */
5017:   if (!sub_schurs || !sub_schurs->reuse_solver) {
5018:     PetscMalloc1(pcis->n-n_vertices,&idx_R_local);
5019:     PetscBTCreate(pcis->n,&bitmask);
5020:     for (i=0;i<n_vertices;i++) {
5021:       PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);
5022:     }

5024:     for (i=0, n_R=0; i<pcis->n; i++) {
5025:       if (!PetscBTLookup(bitmask,i)) {
5026:         idx_R_local[n_R++] = i;
5027:       }
5028:     }
5029:   } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */
5030:     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

5032:     ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);
5033:     ISGetLocalSize(reuse_solver->is_R,&n_R);
5034:   }

5036:   /* Block code */
5037:   vbs = 1;
5038:   MatGetBlockSize(pcbddc->local_mat,&bs);
5039:   if (bs>1 && !(n_vertices%bs)) {
5040:     PetscBool is_blocked = PETSC_TRUE;
5041:     PetscInt  *vary;
5042:     if (!sub_schurs || !sub_schurs->reuse_solver) {
5043:       PetscMalloc1(pcis->n/bs,&vary);
5044:       PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));
5045:       /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
5046:       /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */
5047:       for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++;
5048:       for (i=0; i<pcis->n/bs; i++) {
5049:         if (vary[i]!=0 && vary[i]!=bs) {
5050:           is_blocked = PETSC_FALSE;
5051:           break;
5052:         }
5053:       }
5054:       PetscFree(vary);
5055:     } else {
5056:       /* Verify directly the R set */
5057:       for (i=0; i<n_R/bs; i++) {
5058:         PetscInt j,node=idx_R_local[bs*i];
5059:         for (j=1; j<bs; j++) {
5060:           if (node != idx_R_local[bs*i+j]-j) {
5061:             is_blocked = PETSC_FALSE;
5062:             break;
5063:           }
5064:         }
5065:       }
5066:     }
5067:     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
5068:       vbs = bs;
5069:       for (i=0;i<n_R/vbs;i++) {
5070:         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
5071:       }
5072:     }
5073:   }
5074:   ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);
5075:   if (sub_schurs && sub_schurs->reuse_solver) {
5076:     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

5078:     ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);
5079:     ISDestroy(&reuse_solver->is_R);
5080:     PetscObjectReference((PetscObject)pcbddc->is_R_local);
5081:     reuse_solver->is_R = pcbddc->is_R_local;
5082:   } else {
5083:     PetscFree(idx_R_local);
5084:   }

5086:   /* print some info if requested */
5087:   if (pcbddc->dbg_flag) {
5088:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
5089:     PetscViewerFlush(pcbddc->dbg_viewer);
5090:     PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
5091:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);
5092:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %D, dirichlet_size = %D, boundary_size = %D\n",pcis->n,n_D,n_B);
5093:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %D, v_size = %D, constraints = %D, local_primal_size = %D\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);
5094:     PetscViewerFlush(pcbddc->dbg_viewer);
5095:   }

5097:   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
5098:   if (!sub_schurs || !sub_schurs->reuse_solver) {
5099:     IS       is_aux1,is_aux2;
5100:     PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local;

5102:     ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);
5103:     PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);
5104:     PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);
5105:     ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);
5106:     for (i=0; i<n_D; i++) {
5107:       PetscBTSet(bitmask,is_indices[i]);
5108:     }
5109:     ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);
5110:     for (i=0, j=0; i<n_R; i++) {
5111:       if (!PetscBTLookup(bitmask,idx_R_local[i])) {
5112:         aux_array1[j++] = i;
5113:       }
5114:     }
5115:     ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);
5116:     ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);
5117:     for (i=0, j=0; i<n_B; i++) {
5118:       if (!PetscBTLookup(bitmask,is_indices[i])) {
5119:         aux_array2[j++] = i;
5120:       }
5121:     }
5122:     ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);
5123:     ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);
5124:     VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);
5125:     ISDestroy(&is_aux1);
5126:     ISDestroy(&is_aux2);

5128:     if (pcbddc->switch_static || pcbddc->dbg_flag) {
5129:       PetscMalloc1(n_D,&aux_array1);
5130:       for (i=0, j=0; i<n_R; i++) {
5131:         if (PetscBTLookup(bitmask,idx_R_local[i])) {
5132:           aux_array1[j++] = i;
5133:         }
5134:       }
5135:       ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);
5136:       VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);
5137:       ISDestroy(&is_aux1);
5138:     }
5139:     PetscBTDestroy(&bitmask);
5140:     ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);
5141:   } else {
5142:     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5143:     IS                 tis;
5144:     PetscInt           schur_size;

5146:     ISGetLocalSize(reuse_solver->is_B,&schur_size);
5147:     ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);
5148:     VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);
5149:     ISDestroy(&tis);
5150:     if (pcbddc->switch_static || pcbddc->dbg_flag) {
5151:       ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);
5152:       VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);
5153:       ISDestroy(&tis);
5154:     }
5155:   }
5156:   return(0);
5157: }


5160: PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
5161: {
5162:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
5163:   PC_IS          *pcis = (PC_IS*)pc->data;
5164:   PC             pc_temp;
5165:   Mat            A_RR;
5166:   MatReuse       reuse;
5167:   PetscScalar    m_one = -1.0;
5168:   PetscReal      value;
5169:   PetscInt       n_D,n_R;
5170:   PetscBool      check_corr,issbaij;
5172:   /* prefixes stuff */
5173:   char           dir_prefix[256],neu_prefix[256],str_level[16];
5174:   size_t         len;

5177:   PetscLogEventBegin(PC_BDDC_LocalSolvers[pcbddc->current_level],pc,0,0,0);
5178:   /* compute prefixes */
5179:   PetscStrcpy(dir_prefix,"");
5180:   PetscStrcpy(neu_prefix,"");
5181:   if (!pcbddc->current_level) {
5182:     PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,sizeof(dir_prefix));
5183:     PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,sizeof(neu_prefix));
5184:     PetscStrlcat(dir_prefix,"pc_bddc_dirichlet_",sizeof(dir_prefix));
5185:     PetscStrlcat(neu_prefix,"pc_bddc_neumann_",sizeof(neu_prefix));
5186:   } else {
5187:     PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level));
5188:     PetscStrlen(((PetscObject)pc)->prefix,&len);
5189:     len -= 15; /* remove "pc_bddc_coarse_" */
5190:     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
5191:     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
5192:     /* Nonstandard use of PetscStrncpy() to only copy a portion of the input string */
5193:     PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);
5194:     PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);
5195:     PetscStrlcat(dir_prefix,"pc_bddc_dirichlet_",sizeof(dir_prefix));
5196:     PetscStrlcat(neu_prefix,"pc_bddc_neumann_",sizeof(neu_prefix));
5197:     PetscStrlcat(dir_prefix,str_level,sizeof(dir_prefix));
5198:     PetscStrlcat(neu_prefix,str_level,sizeof(neu_prefix));
5199:   }

5201:   /* DIRICHLET PROBLEM */
5202:   if (dirichlet) {
5203:     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5204:     if (pcbddc->benign_n && !pcbddc->benign_change_explicit) {
5205:       if (!sub_schurs || !sub_schurs->reuse_solver) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented");
5206:       if (pcbddc->dbg_flag) {
5207:         Mat    A_IIn;

5209:         PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);
5210:         MatDestroy(&pcis->A_II);
5211:         pcis->A_II = A_IIn;
5212:       }
5213:     }
5214:     if (pcbddc->local_mat->symmetric_set) {
5215:       MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);
5216:     }
5217:     /* Matrix for Dirichlet problem is pcis->A_II */
5218:     n_D = pcis->n - pcis->n_B;
5219:     if (!pcbddc->ksp_D) { /* create object if not yet build */
5220:       void (*f)(void) = 0;

5222:       KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);
5223:       PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);
5224:       /* default */
5225:       KSPSetType(pcbddc->ksp_D,KSPPREONLY);
5226:       KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);
5227:       PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);
5228:       KSPGetPC(pcbddc->ksp_D,&pc_temp);
5229:       if (issbaij) {
5230:         PCSetType(pc_temp,PCCHOLESKY);
5231:       } else {
5232:         PCSetType(pc_temp,PCLU);
5233:       }
5234:       KSPSetErrorIfNotConverged(pcbddc->ksp_D,pc->erroriffailure);
5235:       /* Allow user's customization */
5236:       KSPSetFromOptions(pcbddc->ksp_D);
5237:       PetscObjectQueryFunction((PetscObject)pc_temp,"PCSetCoordinates_C",&f);
5238:       if (f && pcbddc->mat_graph->cloc) {
5239:         PetscReal      *coords = pcbddc->mat_graph->coords,*scoords;
5240:         const PetscInt *idxs;
5241:         PetscInt       cdim = pcbddc->mat_graph->cdim,nl,i,d;

5243:         ISGetLocalSize(pcis->is_I_local,&nl);
5244:         ISGetIndices(pcis->is_I_local,&idxs);
5245:         PetscMalloc1(nl*cdim,&scoords);
5246:         for (i=0;i<nl;i++) {
5247:           for (d=0;d<cdim;d++) {
5248:             scoords[i*cdim+d] = coords[idxs[i]*cdim+d];
5249:           }
5250:         }
5251:         ISRestoreIndices(pcis->is_I_local,&idxs);
5252:         PCSetCoordinates(pc_temp,cdim,nl,scoords);
5253:         PetscFree(scoords);
5254:       }
5255:     }
5256:     MatSetOptionsPrefix(pcis->A_II,((PetscObject)pcbddc->ksp_D)->prefix);
5257:     KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);
5258:     if (sub_schurs && sub_schurs->reuse_solver) {
5259:       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

5261:       KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);
5262:     }
5263:     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
5264:     if (!n_D) {
5265:       KSPGetPC(pcbddc->ksp_D,&pc_temp);
5266:       PCSetType(pc_temp,PCNONE);
5267:     }
5268:     /* set ksp_D into pcis data */
5269:     KSPDestroy(&pcis->ksp_D);
5270:     PetscObjectReference((PetscObject)pcbddc->ksp_D);
5271:     pcis->ksp_D = pcbddc->ksp_D;
5272:   }

5274:   /* NEUMANN PROBLEM */
5275:   A_RR = 0;
5276:   if (neumann) {
5277:     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5278:     PetscInt        ibs,mbs;
5279:     PetscBool       issbaij, reuse_neumann_solver;
5280:     Mat_IS*         matis = (Mat_IS*)pc->pmat->data;

5282:     reuse_neumann_solver = PETSC_FALSE;
5283:     if (sub_schurs && sub_schurs->reuse_solver) {
5284:       IS iP;

5286:       reuse_neumann_solver = PETSC_TRUE;
5287:       PetscObjectQuery((PetscObject)sub_schurs->A,"__KSPFETIDP_iP",(PetscObject*)&iP);
5288:       if (iP) reuse_neumann_solver = PETSC_FALSE;
5289:     }
5290:     /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
5291:     ISGetSize(pcbddc->is_R_local,&n_R);
5292:     if (pcbddc->ksp_R) { /* already created ksp */
5293:       PetscInt nn_R;
5294:       KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);
5295:       PetscObjectReference((PetscObject)A_RR);
5296:       MatGetSize(A_RR,&nn_R,NULL);
5297:       if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
5298:         KSPReset(pcbddc->ksp_R);
5299:         MatDestroy(&A_RR);
5300:         reuse = MAT_INITIAL_MATRIX;
5301:       } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
5302:         if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
5303:           MatDestroy(&A_RR);
5304:           reuse = MAT_INITIAL_MATRIX;
5305:         } else { /* safe to reuse the matrix */
5306:           reuse = MAT_REUSE_MATRIX;
5307:         }
5308:       }
5309:       /* last check */
5310:       if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
5311:         MatDestroy(&A_RR);
5312:         reuse = MAT_INITIAL_MATRIX;
5313:       }
5314:     } else { /* first time, so we need to create the matrix */
5315:       reuse = MAT_INITIAL_MATRIX;
5316:     }
5317:     /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */
5318:     MatGetBlockSize(pcbddc->local_mat,&mbs);
5319:     ISGetBlockSize(pcbddc->is_R_local,&ibs);
5320:     PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);
5321:     if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */
5322:       if (matis->A == pcbddc->local_mat) {
5323:         MatDestroy(&pcbddc->local_mat);
5324:         MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);
5325:       } else {
5326:         MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);
5327:       }
5328:     } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
5329:       if (matis->A == pcbddc->local_mat) {
5330:         MatDestroy(&pcbddc->local_mat);
5331:         MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);
5332:       } else {
5333:         MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);
5334:       }
5335:     }
5336:     /* extract A_RR */
5337:     if (reuse_neumann_solver) {
5338:       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

5340:       if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */
5341:         MatDestroy(&A_RR);
5342:         if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */
5343:           PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);
5344:         } else {
5345:           MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);
5346:         }
5347:       } else {
5348:         MatDestroy(&A_RR);
5349:         PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);
5350:         PetscObjectReference((PetscObject)A_RR);
5351:       }
5352:     } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */
5353:       MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);
5354:     }
5355:     if (pcbddc->local_mat->symmetric_set) {
5356:       MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);
5357:     }
5358:     if (!pcbddc->ksp_R) { /* create object if not present */
5359:       void (*f)(void) = 0;

5361:       KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);
5362:       PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);
5363:       /* default */
5364:       KSPSetType(pcbddc->ksp_R,KSPPREONLY);
5365:       KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);
5366:       KSPGetPC(pcbddc->ksp_R,&pc_temp);
5367:       PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);
5368:       if (issbaij) {
5369:         PCSetType(pc_temp,PCCHOLESKY);
5370:       } else {
5371:         PCSetType(pc_temp,PCLU);
5372:       }
5373:       KSPSetErrorIfNotConverged(pcbddc->ksp_R,pc->erroriffailure);
5374:       /* Allow user's customization */
5375:       KSPSetFromOptions(pcbddc->ksp_R);
5376:       PetscObjectQueryFunction((PetscObject)pc_temp,"PCSetCoordinates_C",&f);
5377:       if (f && pcbddc->mat_graph->cloc) {
5378:         PetscReal      *coords = pcbddc->mat_graph->coords,*scoords;
5379:         const PetscInt *idxs;
5380:         PetscInt       cdim = pcbddc->mat_graph->cdim,nl,i,d;

5382:         ISGetLocalSize(pcbddc->is_R_local,&nl);
5383:         ISGetIndices(pcbddc->is_R_local,&idxs);
5384:         PetscMalloc1(nl*cdim,&scoords);
5385:         for (i=0;i<nl;i++) {
5386:           for (d=0;d<cdim;d++) {
5387:             scoords[i*cdim+d] = coords[idxs[i]*cdim+d];
5388:           }
5389:         }
5390:         ISRestoreIndices(pcbddc->is_R_local,&idxs);
5391:         PCSetCoordinates(pc_temp,cdim,nl,scoords);
5392:         PetscFree(scoords);
5393:       }
5394:     }
5395:     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
5396:     if (!n_R) {
5397:       KSPGetPC(pcbddc->ksp_R,&pc_temp);
5398:       PCSetType(pc_temp,PCNONE);
5399:     }
5400:     MatSetOptionsPrefix(A_RR,((PetscObject)pcbddc->ksp_R)->prefix);
5401:     KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);
5402:     /* Reuse solver if it is present */
5403:     if (reuse_neumann_solver) {
5404:       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

5406:       KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);
5407:     }
5408:   }

5410:   if (pcbddc->dbg_flag) {
5411:     PetscViewerFlush(pcbddc->dbg_viewer);
5412:     PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
5413:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
5414:   }

5416:   /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */
5417:   check_corr = PETSC_FALSE;
5418:   if (pcbddc->NullSpace_corr[0]) {
5419:     PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);
5420:   }
5421:   if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) {
5422:     check_corr = PETSC_TRUE;
5423:     PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);
5424:   }
5425:   if (neumann && pcbddc->NullSpace_corr[2]) {
5426:     check_corr = PETSC_TRUE;
5427:     PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);
5428:   }
5429:   /* check Dirichlet and Neumann solvers */
5430:   if (pcbddc->dbg_flag) {
5431:     if (dirichlet) { /* Dirichlet */
5432:       VecSetRandom(pcis->vec1_D,NULL);
5433:       MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);
5434:       KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);
5435:       VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);
5436:       VecNorm(pcis->vec1_D,NORM_INFINITY,&value);
5437:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);
5438:       if (check_corr) {
5439:         PCBDDCNullSpaceCheckCorrection(pc,PETSC_TRUE);
5440:       }
5441:       PetscViewerFlush(pcbddc->dbg_viewer);
5442:     }
5443:     if (neumann) { /* Neumann */
5444:       VecSetRandom(pcbddc->vec1_R,NULL);
5445:       MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);
5446:       KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);
5447:       VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);
5448:       VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);
5449:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);
5450:       if (check_corr) {
5451:         PCBDDCNullSpaceCheckCorrection(pc,PETSC_FALSE);
5452:       }
5453:       PetscViewerFlush(pcbddc->dbg_viewer);
5454:     }
5455:   }
5456:   /* free Neumann problem's matrix */
5457:   MatDestroy(&A_RR);
5458:   PetscLogEventEnd(PC_BDDC_LocalSolvers[pcbddc->current_level],pc,0,0,0);
5459:   return(0);
5460: }

5462: static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose)
5463: {
5464:   PetscErrorCode  ierr;
5465:   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
5466:   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5467:   PetscBool       reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE ) : PETSC_FALSE;

5470:   if (!reuse_solver) {
5471:     VecSet(pcbddc->vec1_R,0.);
5472:   }
5473:   if (!pcbddc->switch_static) {
5474:     if (applytranspose && pcbddc->local_auxmat1) {
5475:       MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);
5476:       MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);
5477:     }
5478:     if (!reuse_solver) {
5479:       VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5480:       VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5481:     } else {
5482:       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

5484:       VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);
5485:       VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);
5486:     }
5487:   } else {
5488:     VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5489:     VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5490:     VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5491:     VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5492:     if (applytranspose && pcbddc->local_auxmat1) {
5493:       MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);
5494:       MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);
5495:       VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5496:       VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5497:     }
5498:   }
5499:   if (!reuse_solver || pcbddc->switch_static) {
5500:     if (applytranspose) {
5501:       KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);
5502:     } else {
5503:       KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);
5504:     }
5505:   } else {
5506:     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

5508:     if (applytranspose) {
5509:       MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);
5510:     } else {
5511:       MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);
5512:     }
5513:   }
5514:   VecSet(inout_B,0.);
5515:   if (!pcbddc->switch_static) {
5516:     if (!reuse_solver) {
5517:       VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5518:       VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5519:     } else {
5520:       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

5522:       VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);
5523:       VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);
5524:     }
5525:     if (!applytranspose && pcbddc->local_auxmat1) {
5526:       MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);
5527:       MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);
5528:     }
5529:   } else {
5530:     VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5531:     VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5532:     VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
5533:     VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
5534:     if (!applytranspose && pcbddc->local_auxmat1) {
5535:       MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);
5536:       MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);
5537:     }
5538:     VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5539:     VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5540:     VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
5541:     VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
5542:   }
5543:   return(0);
5544: }

5546: /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
5547: PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
5548: {
5550:   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
5551:   PC_IS*            pcis = (PC_IS*)  (pc->data);
5552:   const PetscScalar zero = 0.0;

5555:   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
5556:   if (!pcbddc->benign_apply_coarse_only) {
5557:     if (applytranspose) {
5558:       MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);
5559:       if (pcbddc->switch_static) { MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P); }
5560:     } else {
5561:       MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);
5562:       if (pcbddc->switch_static) { MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P); }
5563:     }
5564:   } else {
5565:     VecSet(pcbddc->vec1_P,zero);
5566:   }

5568:   /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */
5569:   if (pcbddc->benign_n) {
5570:     PetscScalar *array;
5571:     PetscInt    j;

5573:     VecGetArray(pcbddc->vec1_P,&array);
5574:     for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j];
5575:     VecRestoreArray(pcbddc->vec1_P,&array);
5576:   }

5578:   /* start communications from local primal nodes to rhs of coarse solver */
5579:   VecSet(pcbddc->coarse_vec,zero);
5580:   PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);
5581:   PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);

5583:   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
5584:   if (pcbddc->coarse_ksp) {
5585:     Mat          coarse_mat;
5586:     Vec          rhs,sol;
5587:     MatNullSpace nullsp;
5588:     PetscBool    isbddc = PETSC_FALSE;

5590:     if (pcbddc->benign_have_null) {
5591:       PC        coarse_pc;

5593:       KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);
5594:       PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);
5595:       /* we need to propagate to coarser levels the need for a possible benign correction */
5596:       if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) {
5597:         PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data);
5598:         coarsepcbddc->benign_skip_correction = PETSC_FALSE;
5599:         coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE;
5600:       }
5601:     }
5602:     KSPGetRhs(pcbddc->coarse_ksp,&rhs);
5603:     KSPGetSolution(pcbddc->coarse_ksp,&sol);
5604:     KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);
5605:     if (applytranspose) {
5606:       if (pcbddc->benign_apply_coarse_only) SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented");
5607:       KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);
5608:       MatGetTransposeNullSpace(coarse_mat,&nullsp);
5609:       if (nullsp) {
5610:         MatNullSpaceRemove(nullsp,sol);
5611:       }
5612:     } else {
5613:       MatGetNullSpace(coarse_mat,&nullsp);
5614:       if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */
5615:         PC        coarse_pc;

5617:         if (nullsp) {
5618:           MatNullSpaceRemove(nullsp,rhs);
5619:         }
5620:         KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);
5621:         PCPreSolve(coarse_pc,pcbddc->coarse_ksp);
5622:         PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);
5623:         PCPostSolve(coarse_pc,pcbddc->coarse_ksp);
5624:       } else {
5625:         KSPSolve(pcbddc->coarse_ksp,rhs,sol);
5626:         if (nullsp) {
5627:           MatNullSpaceRemove(nullsp,sol);
5628:         }
5629:       }
5630:     }
5631:     /* we don't need the benign correction at coarser levels anymore */
5632:     if (pcbddc->benign_have_null && isbddc) {
5633:       PC        coarse_pc;
5634:       PC_BDDC*  coarsepcbddc;

5636:       KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);
5637:       coarsepcbddc = (PC_BDDC*)(coarse_pc->data);
5638:       coarsepcbddc->benign_skip_correction = PETSC_TRUE;
5639:       coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE;
5640:     }
5641:   }

5643:   /* Local solution on R nodes */
5644:   if (pcis->n && !pcbddc->benign_apply_coarse_only) {
5645:     PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);
5646:   }
5647:   /* communications from coarse sol to local primal nodes */
5648:   PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);
5649:   PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);

5651:   /* Sum contributions from the two levels */
5652:   if (!pcbddc->benign_apply_coarse_only) {
5653:     if (applytranspose) {
5654:       MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);
5655:       if (pcbddc->switch_static) { MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D); }
5656:     } else {
5657:       MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);
5658:       if (pcbddc->switch_static) { MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D); }
5659:     }
5660:     /* store p0 */
5661:     if (pcbddc->benign_n) {
5662:       PetscScalar *array;
5663:       PetscInt    j;

5665:       VecGetArray(pcbddc->vec1_P,&array);
5666:       for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j];
5667:       VecRestoreArray(pcbddc->vec1_P,&array);
5668:     }
5669:   } else { /* expand the coarse solution */
5670:     if (applytranspose) {
5671:       MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);
5672:     } else {
5673:       MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);
5674:     }
5675:   }
5676:   return(0);
5677: }

5679: PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
5680: {
5682:   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
5683:   PetscScalar    *array;
5684:   Vec            from,to;

5687:   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
5688:     from = pcbddc->coarse_vec;
5689:     to = pcbddc->vec1_P;
5690:     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
5691:       Vec tvec;

5693:       KSPGetRhs(pcbddc->coarse_ksp,&tvec);
5694:       VecResetArray(tvec);
5695:       KSPGetSolution(pcbddc->coarse_ksp,&tvec);
5696:       VecGetArray(tvec,&array);
5697:       VecPlaceArray(from,array);
5698:       VecRestoreArray(tvec,&array);
5699:     }
5700:   } else { /* from local to global -> put data in coarse right hand side */
5701:     from = pcbddc->vec1_P;
5702:     to = pcbddc->coarse_vec;
5703:   }
5704:   VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);
5705:   return(0);
5706: }

5708: PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
5709: {
5711:   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
5712:   PetscScalar    *array;
5713:   Vec            from,to;

5716:   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
5717:     from = pcbddc->coarse_vec;
5718:     to = pcbddc->vec1_P;
5719:   } else { /* from local to global -> put data in coarse right hand side */
5720:     from = pcbddc->vec1_P;
5721:     to = pcbddc->coarse_vec;
5722:   }
5723:   VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);
5724:   if (smode == SCATTER_FORWARD) {
5725:     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
5726:       Vec tvec;

5728:       KSPGetRhs(pcbddc->coarse_ksp,&tvec);
5729:       VecGetArray(to,&array);
5730:       VecPlaceArray(tvec,array);
5731:       VecRestoreArray(to,&array);
5732:     }
5733:   } else {
5734:     if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
5735:      VecResetArray(from);
5736:     }
5737:   }
5738:   return(0);
5739: }

5741: /* uncomment for testing purposes */
5742: /* #define PETSC_MISSING_LAPACK_GESVD 1 */
5743: PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
5744: {
5745:   PetscErrorCode    ierr;
5746:   PC_IS*            pcis = (PC_IS*)(pc->data);
5747:   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
5748:   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
5749:   /* one and zero */
5750:   PetscScalar       one=1.0,zero=0.0;
5751:   /* space to store constraints and their local indices */
5752:   PetscScalar       *constraints_data;
5753:   PetscInt          *constraints_idxs,*constraints_idxs_B;
5754:   PetscInt          *constraints_idxs_ptr,*constraints_data_ptr;
5755:   PetscInt          *constraints_n;
5756:   /* iterators */
5757:   PetscInt          i,j,k,total_counts,total_counts_cc,cum;
5758:   /* BLAS integers */
5759:   PetscBLASInt      lwork,lierr;
5760:   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
5761:   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
5762:   /* reuse */
5763:   PetscInt          olocal_primal_size,olocal_primal_size_cc;
5764:   PetscInt          *olocal_primal_ref_node,*olocal_primal_ref_mult;
5765:   /* change of basis */
5766:   PetscBool         qr_needed;
5767:   PetscBT           change_basis,qr_needed_idx;
5768:   /* auxiliary stuff */
5769:   PetscInt          *nnz,*is_indices;
5770:   PetscInt          ncc;
5771:   /* some quantities */
5772:   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
5773:   PetscInt          size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints;
5774:   PetscReal         tol; /* tolerance for retaining eigenmodes */

5777:   tol  = PetscSqrtReal(PETSC_SMALL);
5778:   /* Destroy Mat objects computed previously */
5779:   MatDestroy(&pcbddc->ChangeOfBasisMatrix);
5780:   MatDestroy(&pcbddc->ConstraintMatrix);
5781:   MatDestroy(&pcbddc->switch_static_change);
5782:   /* save info on constraints from previous setup (if any) */
5783:   olocal_primal_size = pcbddc->local_primal_size;
5784:   olocal_primal_size_cc = pcbddc->local_primal_size_cc;
5785:   PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);
5786:   PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));
5787:   PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));
5788:   PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);
5789:   PetscFree(pcbddc->primal_indices_local_idxs);

5791:   if (!pcbddc->adaptive_selection) {
5792:     IS           ISForVertices,*ISForFaces,*ISForEdges;
5793:     MatNullSpace nearnullsp;
5794:     const Vec    *nearnullvecs;
5795:     Vec          *localnearnullsp;
5796:     PetscScalar  *array;
5797:     PetscInt     n_ISForFaces,n_ISForEdges,nnsp_size;
5798:     PetscBool    nnsp_has_cnst;
5799:     /* LAPACK working arrays for SVD or POD */
5800:     PetscBool    skip_lapack,boolforchange;
5801:     PetscScalar  *work;
5802:     PetscReal    *singular_vals;
5803: #if defined(PETSC_USE_COMPLEX)
5804:     PetscReal    *rwork;
5805: #endif
5806: #if defined(PETSC_MISSING_LAPACK_GESVD)
5807:     PetscScalar  *temp_basis,*correlation_mat;
5808: #else
5809:     PetscBLASInt dummy_int=1;
5810:     PetscScalar  dummy_scalar=1.;
5811: #endif

5813:     /* Get index sets for faces, edges and vertices from graph */
5814:     PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);
5815:     /* print some info */
5816:     if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) {
5817:       PetscInt nv;

5819:       PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);
5820:       ISGetSize(ISForVertices,&nv);
5821:       PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
5822:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");
5823:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%D)\n",PetscGlobalRank,nv,pcbddc->use_vertices);
5824:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges    (%D)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);
5825:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces    (%D)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);
5826:       PetscViewerFlush(pcbddc->dbg_viewer);
5827:       PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);
5828:     }

5830:     /* free unneeded index sets */
5831:     if (!pcbddc->use_vertices) {
5832:       ISDestroy(&ISForVertices);
5833:     }
5834:     if (!pcbddc->use_edges) {
5835:       for (i=0;i<n_ISForEdges;i++) {
5836:         ISDestroy(&ISForEdges[i]);
5837:       }
5838:       PetscFree(ISForEdges);
5839:       n_ISForEdges = 0;
5840:     }
5841:     if (!pcbddc->use_faces) {
5842:       for (i=0;i<n_ISForFaces;i++) {
5843:         ISDestroy(&ISForFaces[i]);
5844:       }
5845:       PetscFree(ISForFaces);
5846:       n_ISForFaces = 0;
5847:     }

5849:     /* check if near null space is attached to global mat */
5850:     MatGetNearNullSpace(pc->pmat,&nearnullsp);
5851:     if (nearnullsp) {
5852:       MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);
5853:       /* remove any stored info */
5854:       MatNullSpaceDestroy(&pcbddc->onearnullspace);
5855:       PetscFree(pcbddc->onearnullvecs_state);
5856:       /* store information for BDDC solver reuse */
5857:       PetscObjectReference((PetscObject)nearnullsp);
5858:       pcbddc->onearnullspace = nearnullsp;
5859:       PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);
5860:       for (i=0;i<nnsp_size;i++) {
5861:         PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);
5862:       }
5863:     } else { /* if near null space is not provided BDDC uses constants by default */
5864:       nnsp_size = 0;
5865:       nnsp_has_cnst = PETSC_TRUE;
5866:     }
5867:     /* get max number of constraints on a single cc */
5868:     max_constraints = nnsp_size;
5869:     if (nnsp_has_cnst) max_constraints++;

5871:     /*
5872:          Evaluate maximum storage size needed by the procedure
5873:          - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]"
5874:          - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]"
5875:          There can be multiple constraints per connected component
5876:                                                                                                                                                            */
5877:     n_vertices = 0;
5878:     if (ISForVertices) {
5879:       ISGetSize(ISForVertices,&n_vertices);
5880:     }
5881:     ncc = n_vertices+n_ISForFaces+n_ISForEdges;
5882:     PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);

5884:     total_counts = n_ISForFaces+n_ISForEdges;
5885:     total_counts *= max_constraints;
5886:     total_counts += n_vertices;
5887:     PetscBTCreate(total_counts,&change_basis);

5889:     total_counts = 0;
5890:     max_size_of_constraint = 0;
5891:     for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
5892:       IS used_is;
5893:       if (i<n_ISForEdges) {
5894:         used_is = ISForEdges[i];
5895:       } else {
5896:         used_is = ISForFaces[i-n_ISForEdges];
5897:       }
5898:       ISGetSize(used_is,&j);
5899:       total_counts += j;
5900:       max_size_of_constraint = PetscMax(j,max_size_of_constraint);
5901:     }
5902:     PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);

5904:     /* get local part of global near null space vectors */
5905:     PetscMalloc1(nnsp_size,&localnearnullsp);
5906:     for (k=0;k<nnsp_size;k++) {
5907:       VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);
5908:       VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);
5909:       VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);
5910:     }

5912:     /* whether or not to skip lapack calls */
5913:     skip_lapack = PETSC_TRUE;
5914:     if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;

5916:     /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
5917:     if (!skip_lapack) {
5918:       PetscScalar temp_work;

5920: #if defined(PETSC_MISSING_LAPACK_GESVD)
5921:       /* Proper Orthogonal Decomposition (POD) using the snapshot method */
5922:       PetscMalloc1(max_constraints*max_constraints,&correlation_mat);
5923:       PetscMalloc1(max_constraints,&singular_vals);
5924:       PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);
5925: #if defined(PETSC_USE_COMPLEX)
5926:       PetscMalloc1(3*max_constraints,&rwork);
5927: #endif
5928:       /* now we evaluate the optimal workspace using query with lwork=-1 */
5929:       PetscBLASIntCast(max_constraints,&Blas_N);
5930:       PetscBLASIntCast(max_constraints,&Blas_LDA);
5931:       lwork = -1;
5932:       PetscFPTrapPush(PETSC_FP_TRAP_OFF);
5933: #if !defined(PETSC_USE_COMPLEX)
5934:       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
5935: #else
5936:       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
5937: #endif
5938:       PetscFPTrapPop();
5939:       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
5940: #else /* on missing GESVD */
5941:       /* SVD */
5942:       PetscInt max_n,min_n;
5943:       max_n = max_size_of_constraint;
5944:       min_n = max_constraints;
5945:       if (max_size_of_constraint < max_constraints) {
5946:         min_n = max_size_of_constraint;
5947:         max_n = max_constraints;
5948:       }
5949:       PetscMalloc1(min_n,&singular_vals);
5950: #if defined(PETSC_USE_COMPLEX)
5951:       PetscMalloc1(5*min_n,&rwork);
5952: #endif
5953:       /* now we evaluate the optimal workspace using query with lwork=-1 */
5954:       lwork = -1;
5955:       PetscBLASIntCast(max_n,&Blas_M);
5956:       PetscBLASIntCast(min_n,&Blas_N);
5957:       PetscBLASIntCast(max_n,&Blas_LDA);
5958:       PetscFPTrapPush(PETSC_FP_TRAP_OFF);
5959: #if !defined(PETSC_USE_COMPLEX)
5960:       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr));
5961: #else
5962:       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr));
5963: #endif
5964:       PetscFPTrapPop();
5965:       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
5966: #endif /* on missing GESVD */
5967:       /* Allocate optimal workspace */
5968:       PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);
5969:       PetscMalloc1(lwork,&work);
5970:     }
5971:     /* Now we can loop on constraining sets */
5972:     total_counts = 0;
5973:     constraints_idxs_ptr[0] = 0;
5974:     constraints_data_ptr[0] = 0;
5975:     /* vertices */
5976:     if (n_vertices) {
5977:       ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);
5978:       PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));
5979:       for (i=0;i<n_vertices;i++) {
5980:         constraints_n[total_counts] = 1;
5981:         constraints_data[total_counts] = 1.0;
5982:         constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1;
5983:         constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1;
5984:         total_counts++;
5985:       }
5986:       ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);
5987:       n_vertices = total_counts;
5988:     }

5990:     /* edges and faces */
5991:     total_counts_cc = total_counts;
5992:     for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
5993:       IS        used_is;
5994:       PetscBool idxs_copied = PETSC_FALSE;

5996:       if (ncc<n_ISForEdges) {
5997:         used_is = ISForEdges[ncc];
5998:         boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
5999:       } else {
6000:         used_is = ISForFaces[ncc-n_ISForEdges];
6001:         boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
6002:       }
6003:       temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */

6005:       ISGetSize(used_is,&size_of_constraint);
6006:       ISGetIndices(used_is,(const PetscInt**)&is_indices);
6007:       /* change of basis should not be performed on local periodic nodes */
6008:       if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
6009:       if (nnsp_has_cnst) {
6010:         PetscScalar quad_value;

6012:         PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));
6013:         idxs_copied = PETSC_TRUE;

6015:         if (!pcbddc->use_nnsp_true) {
6016:           quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
6017:         } else {
6018:           quad_value = 1.0;
6019:         }
6020:         for (j=0;j<size_of_constraint;j++) {
6021:           constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value;
6022:         }
6023:         temp_constraints++;
6024:         total_counts++;
6025:       }
6026:       for (k=0;k<nnsp_size;k++) {
6027:         PetscReal real_value;
6028:         PetscScalar *ptr_to_data;

6030:         VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);
6031:         ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint];
6032:         for (j=0;j<size_of_constraint;j++) {
6033:           ptr_to_data[j] = array[is_indices[j]];
6034:         }
6035:         VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);
6036:         /* check if array is null on the connected component */
6037:         PetscBLASIntCast(size_of_constraint,&Blas_N);
6038:         PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one));
6039:         if (real_value > tol*size_of_constraint) { /* keep indices and values */
6040:           temp_constraints++;
6041:           total_counts++;
6042:           if (!idxs_copied) {
6043:             PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));
6044:             idxs_copied = PETSC_TRUE;
6045:           }
6046:         }
6047:       }
6048:       ISRestoreIndices(used_is,(const PetscInt**)&is_indices);
6049:       valid_constraints = temp_constraints;
6050:       if (!pcbddc->use_nnsp_true && temp_constraints) {
6051:         if (temp_constraints == 1) { /* just normalize the constraint */
6052:           PetscScalar norm,*ptr_to_data;

6054:           ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
6055:           PetscBLASIntCast(size_of_constraint,&Blas_N);
6056:           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one));
6057:           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
6058:           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one));
6059:         } else { /* perform SVD */
6060:           PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];

6062: #if defined(PETSC_MISSING_LAPACK_GESVD)
6063:           /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
6064:              POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
6065:              -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
6066:                 the constraints basis will differ (by a complex factor with absolute value equal to 1)
6067:                 from that computed using LAPACKgesvd
6068:              -> This is due to a different computation of eigenvectors in LAPACKheev
6069:              -> The quality of the POD-computed basis will be the same */
6070:           PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));
6071:           /* Store upper triangular part of correlation matrix */
6072:           PetscBLASIntCast(size_of_constraint,&Blas_N);
6073:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6074:           for (j=0;j<temp_constraints;j++) {
6075:             for (k=0;k<j+1;k++) {
6076:               PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one));
6077:             }
6078:           }
6079:           /* compute eigenvalues and eigenvectors of correlation matrix */
6080:           PetscBLASIntCast(temp_constraints,&Blas_N);
6081:           PetscBLASIntCast(temp_constraints,&Blas_LDA);
6082: #if !defined(PETSC_USE_COMPLEX)
6083:           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
6084: #else
6085:           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
6086: #endif
6087:           PetscFPTrapPop();
6088:           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
6089:           /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
6090:           j = 0;
6091:           while (j < temp_constraints && singular_vals[j]/singular_vals[temp_constraints-1] < tol) j++;
6092:           total_counts = total_counts-j;
6093:           valid_constraints = temp_constraints-j;
6094:           /* scale and copy POD basis into used quadrature memory */
6095:           PetscBLASIntCast(size_of_constraint,&Blas_M);
6096:           PetscBLASIntCast(temp_constraints,&Blas_N);
6097:           PetscBLASIntCast(temp_constraints,&Blas_K);
6098:           PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6099:           PetscBLASIntCast(temp_constraints,&Blas_LDB);
6100:           PetscBLASIntCast(size_of_constraint,&Blas_LDC);
6101:           if (j<temp_constraints) {
6102:             PetscInt ii;
6103:             for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]);
6104:             PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6105:             PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
6106:             PetscFPTrapPop();
6107:             for (k=0;k<temp_constraints-j;k++) {
6108:               for (ii=0;ii<size_of_constraint;ii++) {
6109:                 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
6110:               }
6111:             }
6112:           }
6113: #else  /* on missing GESVD */
6114:           PetscBLASIntCast(size_of_constraint,&Blas_M);
6115:           PetscBLASIntCast(temp_constraints,&Blas_N);
6116:           PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6117:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6118: #if !defined(PETSC_USE_COMPLEX)
6119:           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr));
6120: #else
6121:           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr));
6122: #endif
6123:           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
6124:           PetscFPTrapPop();
6125:           /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
6126:           k = temp_constraints;
6127:           if (k > size_of_constraint) k = size_of_constraint;
6128:           j = 0;
6129:           while (j < k && singular_vals[k-j-1]/singular_vals[0] < tol) j++;
6130:           valid_constraints = k-j;
6131:           total_counts = total_counts-temp_constraints+valid_constraints;
6132: #endif /* on missing GESVD */
6133:         }
6134:       }
6135:       /* update pointers information */
6136:       if (valid_constraints) {
6137:         constraints_n[total_counts_cc] = valid_constraints;
6138:         constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint;
6139:         constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints;
6140:         /* set change_of_basis flag */
6141:         if (boolforchange) {
6142:           PetscBTSet(change_basis,total_counts_cc);
6143:         }
6144:         total_counts_cc++;
6145:       }
6146:     }
6147:     /* free workspace */
6148:     if (!skip_lapack) {
6149:       PetscFree(work);
6150: #if defined(PETSC_USE_COMPLEX)
6151:       PetscFree(rwork);
6152: #endif
6153:       PetscFree(singular_vals);
6154: #if defined(PETSC_MISSING_LAPACK_GESVD)
6155:       PetscFree(correlation_mat);
6156:       PetscFree(temp_basis);
6157: #endif
6158:     }
6159:     for (k=0;k<nnsp_size;k++) {
6160:       VecDestroy(&localnearnullsp[k]);
6161:     }
6162:     PetscFree(localnearnullsp);
6163:     /* free index sets of faces, edges and vertices */
6164:     for (i=0;i<n_ISForFaces;i++) {
6165:       ISDestroy(&ISForFaces[i]);
6166:     }
6167:     if (n_ISForFaces) {
6168:       PetscFree(ISForFaces);
6169:     }
6170:     for (i=0;i<n_ISForEdges;i++) {
6171:       ISDestroy(&ISForEdges[i]);
6172:     }
6173:     if (n_ISForEdges) {
6174:       PetscFree(ISForEdges);
6175:     }
6176:     ISDestroy(&ISForVertices);
6177:   } else {
6178:     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;

6180:     total_counts = 0;
6181:     n_vertices = 0;
6182:     if (sub_schurs->is_vertices && pcbddc->use_vertices) {
6183:       ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);
6184:     }
6185:     max_constraints = 0;
6186:     total_counts_cc = 0;
6187:     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
6188:       total_counts += pcbddc->adaptive_constraints_n[i];
6189:       if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++;
6190:       max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]);
6191:     }
6192:     constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr;
6193:     constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr;
6194:     constraints_idxs = pcbddc->adaptive_constraints_idxs;
6195:     constraints_data = pcbddc->adaptive_constraints_data;
6196:     /* constraints_n differs from pcbddc->adaptive_constraints_n */
6197:     PetscMalloc1(total_counts_cc,&constraints_n);
6198:     total_counts_cc = 0;
6199:     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
6200:       if (pcbddc->adaptive_constraints_n[i]) {
6201:         constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i];
6202:       }
6203:     }

6205:     max_size_of_constraint = 0;
6206:     for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]);
6207:     PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);
6208:     /* Change of basis */
6209:     PetscBTCreate(total_counts_cc,&change_basis);
6210:     if (pcbddc->use_change_of_basis) {
6211:       for (i=0;i<sub_schurs->n_subs;i++) {
6212:         if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) {
6213:           PetscBTSet(change_basis,i+n_vertices);
6214:         }
6215:       }
6216:     }
6217:   }
6218:   pcbddc->local_primal_size = total_counts;
6219:   PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);

6221:   /* map constraints_idxs in boundary numbering */
6222:   ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);
6223:   if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for constraints indices %D != %D",constraints_idxs_ptr[total_counts_cc],i);

6225:   /* Create constraint matrix */
6226:   MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);
6227:   MatSetType(pcbddc->ConstraintMatrix,MATAIJ);
6228:   MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);

6230:   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
6231:   /* determine if a QR strategy is needed for change of basis */
6232:   qr_needed = pcbddc->use_qr_single;
6233:   PetscBTCreate(total_counts_cc,&qr_needed_idx);
6234:   total_primal_vertices=0;
6235:   pcbddc->local_primal_size_cc = 0;
6236:   for (i=0;i<total_counts_cc;i++) {
6237:     size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
6238:     if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) {
6239:       pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]];
6240:       pcbddc->local_primal_size_cc += 1;
6241:     } else if (PetscBTLookup(change_basis,i)) {
6242:       for (k=0;k<constraints_n[i];k++) {
6243:         pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k];
6244:       }
6245:       pcbddc->local_primal_size_cc += constraints_n[i];
6246:       if (constraints_n[i] > 1 || pcbddc->use_qr_single) {
6247:         PetscBTSet(qr_needed_idx,i);
6248:         qr_needed = PETSC_TRUE;
6249:       }
6250:     } else {
6251:       pcbddc->local_primal_size_cc += 1;
6252:     }
6253:   }
6254:   /* note that the local variable n_vertices used below stores the number of pointwise constraints */
6255:   pcbddc->n_vertices = total_primal_vertices;
6256:   /* permute indices in order to have a sorted set of vertices */
6257:   PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);
6258:   PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);
6259:   PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));
6260:   for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1;

6262:   /* nonzero structure of constraint matrix */
6263:   /* and get reference dof for local constraints */
6264:   PetscMalloc1(pcbddc->local_primal_size,&nnz);
6265:   for (i=0;i<total_primal_vertices;i++) nnz[i] = 1;

6267:   j = total_primal_vertices;
6268:   total_counts = total_primal_vertices;
6269:   cum = total_primal_vertices;
6270:   for (i=n_vertices;i<total_counts_cc;i++) {
6271:     if (!PetscBTLookup(change_basis,i)) {
6272:       pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]];
6273:       pcbddc->local_primal_ref_mult[cum] = constraints_n[i];
6274:       cum++;
6275:       size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
6276:       for (k=0;k<constraints_n[i];k++) {
6277:         pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k];
6278:         nnz[j+k] = size_of_constraint;
6279:       }
6280:       j += constraints_n[i];
6281:     }
6282:   }
6283:   MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);
6284:   MatSetOption(pcbddc->ConstraintMatrix,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);
6285:   PetscFree(nnz);

6287:   /* set values in constraint matrix */
6288:   for (i=0;i<total_primal_vertices;i++) {
6289:     MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);
6290:   }
6291:   total_counts = total_primal_vertices;
6292:   for (i=n_vertices;i<total_counts_cc;i++) {
6293:     if (!PetscBTLookup(change_basis,i)) {
6294:       PetscInt *cols;

6296:       size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
6297:       cols = constraints_idxs+constraints_idxs_ptr[i];
6298:       for (k=0;k<constraints_n[i];k++) {
6299:         PetscInt    row = total_counts+k;
6300:         PetscScalar *vals;

6302:         vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint;
6303:         MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);
6304:       }
6305:       total_counts += constraints_n[i];
6306:     }
6307:   }
6308:   /* assembling */
6309:   MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);
6310:   MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);
6311:   MatViewFromOptions(pcbddc->ConstraintMatrix,NULL,"-pc_bddc_constraint_mat_view");

6313:   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
6314:   if (pcbddc->use_change_of_basis) {
6315:     /* dual and primal dofs on a single cc */
6316:     PetscInt     dual_dofs,primal_dofs;
6317:     /* working stuff for GEQRF */
6318:     PetscScalar  *qr_basis = NULL,*qr_tau = NULL,*qr_work = NULL,lqr_work_t;
6319:     PetscBLASInt lqr_work;
6320:     /* working stuff for UNGQR */
6321:     PetscScalar  *gqr_work = NULL,lgqr_work_t;
6322:     PetscBLASInt lgqr_work;
6323:     /* working stuff for TRTRS */
6324:     PetscScalar  *trs_rhs = NULL;
6325:     PetscBLASInt Blas_NRHS;
6326:     /* pointers for values insertion into change of basis matrix */
6327:     PetscInt     *start_rows,*start_cols;
6328:     PetscScalar  *start_vals;
6329:     /* working stuff for values insertion */
6330:     PetscBT      is_primal;
6331:     PetscInt     *aux_primal_numbering_B;
6332:     /* matrix sizes */
6333:     PetscInt     global_size,local_size;
6334:     /* temporary change of basis */
6335:     Mat          localChangeOfBasisMatrix;
6336:     /* extra space for debugging */
6337:     PetscScalar  *dbg_work = NULL;

6339:     /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
6340:     MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);
6341:     MatSetType(localChangeOfBasisMatrix,MATAIJ);
6342:     MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);
6343:     /* nonzeros for local mat */
6344:     PetscMalloc1(pcis->n,&nnz);
6345:     if (!pcbddc->benign_change || pcbddc->fake_change) {
6346:       for (i=0;i<pcis->n;i++) nnz[i]=1;
6347:     } else {
6348:       const PetscInt *ii;
6349:       PetscInt       n;
6350:       PetscBool      flg_row;
6351:       MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);
6352:       for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i];
6353:       MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);
6354:     }
6355:     for (i=n_vertices;i<total_counts_cc;i++) {
6356:       if (PetscBTLookup(change_basis,i)) {
6357:         size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
6358:         if (PetscBTLookup(qr_needed_idx,i)) {
6359:           for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint;
6360:         } else {
6361:           nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint;
6362:           for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2;
6363:         }
6364:       }
6365:     }
6366:     MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);
6367:     MatSetOption(localChangeOfBasisMatrix,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);
6368:     PetscFree(nnz);
6369:     /* Set interior change in the matrix */
6370:     if (!pcbddc->benign_change || pcbddc->fake_change) {
6371:       for (i=0;i<pcis->n;i++) {
6372:         MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);
6373:       }
6374:     } else {
6375:       const PetscInt *ii,*jj;
6376:       PetscScalar    *aa;
6377:       PetscInt       n;
6378:       PetscBool      flg_row;
6379:       MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);
6380:       MatSeqAIJGetArray(pcbddc->benign_change,&aa);
6381:       for (i=0;i<n;i++) {
6382:         MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);
6383:       }
6384:       MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);
6385:       MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);
6386:     }

6388:     if (pcbddc->dbg_flag) {
6389:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");
6390:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);
6391:     }


6394:     /* Now we loop on the constraints which need a change of basis */
6395:     /*
6396:        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
6397:        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)

6399:        Basic blocks of change of basis matrix T computed by

6401:           - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)

6403:             | 1        0   ...        0         s_1/S |
6404:             | 0        1   ...        0         s_2/S |
6405:             |              ...                        |
6406:             | 0        ...            1     s_{n-1}/S |
6407:             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |

6409:             with S = \sum_{i=1}^n s_i^2
6410:             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
6411:                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering

6413:           - QR decomposition of constraints otherwise
6414:     */
6415:     if (qr_needed && max_size_of_constraint) {
6416:       /* space to store Q */
6417:       PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);
6418:       /* array to store scaling factors for reflectors */
6419:       PetscMalloc1(max_constraints,&qr_tau);
6420:       /* first we issue queries for optimal work */
6421:       PetscBLASIntCast(max_size_of_constraint,&Blas_M);
6422:       PetscBLASIntCast(max_constraints,&Blas_N);
6423:       PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);
6424:       lqr_work = -1;
6425:       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
6426:       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
6427:       PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);
6428:       PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);
6429:       lgqr_work = -1;
6430:       PetscBLASIntCast(max_size_of_constraint,&Blas_M);
6431:       PetscBLASIntCast(max_size_of_constraint,&Blas_N);
6432:       PetscBLASIntCast(max_constraints,&Blas_K);
6433:       PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);
6434:       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
6435:       PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
6436:       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to ORGQR/UNGQR Lapack routine %d",(int)lierr);
6437:       PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);
6438:       PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);
6439:       /* array to store rhs and solution of triangular solver */
6440:       PetscMalloc1(max_constraints*max_constraints,&trs_rhs);
6441:       /* allocating workspace for check */
6442:       if (pcbddc->dbg_flag) {
6443:         PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);
6444:       }
6445:     }
6446:     /* array to store whether a node is primal or not */
6447:     PetscBTCreate(pcis->n_B,&is_primal);
6448:     PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);
6449:     ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);
6450:     if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D",total_primal_vertices,i);
6451:     for (i=0;i<total_primal_vertices;i++) {
6452:       PetscBTSet(is_primal,aux_primal_numbering_B[i]);
6453:     }
6454:     PetscFree(aux_primal_numbering_B);

6456:     /* loop on constraints and see whether or not they need a change of basis and compute it */
6457:     for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) {
6458:       size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts];
6459:       if (PetscBTLookup(change_basis,total_counts)) {
6460:         /* get constraint info */
6461:         primal_dofs = constraints_n[total_counts];
6462:         dual_dofs = size_of_constraint-primal_dofs;

6464:         if (pcbddc->dbg_flag) {
6465:           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %D: %D need a change of basis (size %D)\n",total_counts,primal_dofs,size_of_constraint);
6466:         }

6468:         if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */

6470:           /* copy quadrature constraints for change of basis check */
6471:           if (pcbddc->dbg_flag) {
6472:             PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));
6473:           }
6474:           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
6475:           PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));

6477:           /* compute QR decomposition of constraints */
6478:           PetscBLASIntCast(size_of_constraint,&Blas_M);
6479:           PetscBLASIntCast(primal_dofs,&Blas_N);
6480:           PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6481:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6482:           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
6483:           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
6484:           PetscFPTrapPop();

6486:           /* explictly compute R^-T */
6487:           PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));
6488:           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
6489:           PetscBLASIntCast(primal_dofs,&Blas_N);
6490:           PetscBLASIntCast(primal_dofs,&Blas_NRHS);
6491:           PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6492:           PetscBLASIntCast(primal_dofs,&Blas_LDB);
6493:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6494:           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
6495:           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
6496:           PetscFPTrapPop();

6498:           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
6499:           PetscBLASIntCast(size_of_constraint,&Blas_M);
6500:           PetscBLASIntCast(size_of_constraint,&Blas_N);
6501:           PetscBLASIntCast(primal_dofs,&Blas_K);
6502:           PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6503:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6504:           PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
6505:           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in ORGQR/UNGQR Lapack routine %d",(int)lierr);
6506:           PetscFPTrapPop();

6508:           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
6509:              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
6510:              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
6511:           PetscBLASIntCast(size_of_constraint,&Blas_M);
6512:           PetscBLASIntCast(primal_dofs,&Blas_N);
6513:           PetscBLASIntCast(primal_dofs,&Blas_K);
6514:           PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6515:           PetscBLASIntCast(primal_dofs,&Blas_LDB);
6516:           PetscBLASIntCast(size_of_constraint,&Blas_LDC);
6517:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6518:           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC));
6519:           PetscFPTrapPop();
6520:           PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));

6522:           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
6523:           start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]];
6524:           /* insert cols for primal dofs */
6525:           for (j=0;j<primal_dofs;j++) {
6526:             start_vals = &qr_basis[j*size_of_constraint];
6527:             start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j];
6528:             MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);
6529:           }
6530:           /* insert cols for dual dofs */
6531:           for (j=0,k=0;j<dual_dofs;k++) {
6532:             if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) {
6533:               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
6534:               start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k];
6535:               MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);
6536:               j++;
6537:             }
6538:           }

6540:           /* check change of basis */
6541:           if (pcbddc->dbg_flag) {
6542:             PetscInt   ii,jj;
6543:             PetscBool valid_qr=PETSC_TRUE;
6544:             PetscBLASIntCast(primal_dofs,&Blas_M);
6545:             PetscBLASIntCast(size_of_constraint,&Blas_N);
6546:             PetscBLASIntCast(size_of_constraint,&Blas_K);
6547:             PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6548:             PetscBLASIntCast(size_of_constraint,&Blas_LDB);
6549:             PetscBLASIntCast(primal_dofs,&Blas_LDC);
6550:             PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6551:             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC));
6552:             PetscFPTrapPop();
6553:             for (jj=0;jj<size_of_constraint;jj++) {
6554:               for (ii=0;ii<primal_dofs;ii++) {
6555:                 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
6556:                 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) valid_qr = PETSC_FALSE;
6557:               }
6558:             }
6559:             if (!valid_qr) {
6560:               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");
6561:               for (jj=0;jj<size_of_constraint;jj++) {
6562:                 for (ii=0;ii<primal_dofs;ii++) {
6563:                   if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
6564:                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %D is not orthogonal to constraint %D (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
6565:                   }
6566:                   if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) {
6567:                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %D is not unitary w.r.t constraint %D (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
6568:                   }
6569:                 }
6570:               }
6571:             } else {
6572:               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");
6573:             }
6574:           }
6575:         } else { /* simple transformation block */
6576:           PetscInt    row,col;
6577:           PetscScalar val,norm;

6579:           PetscBLASIntCast(size_of_constraint,&Blas_N);
6580:           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one));
6581:           for (j=0;j<size_of_constraint;j++) {
6582:             PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j];
6583:             row = constraints_idxs[constraints_idxs_ptr[total_counts]+j];
6584:             if (!PetscBTLookup(is_primal,row_B)) {
6585:               col = constraints_idxs[constraints_idxs_ptr[total_counts]];
6586:               MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);
6587:               MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);
6588:             } else {
6589:               for (k=0;k<size_of_constraint;k++) {
6590:                 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k];
6591:                 if (row != col) {
6592:                   val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]];
6593:                 } else {
6594:                   val = constraints_data[constraints_data_ptr[total_counts]]/norm;
6595:                 }
6596:                 MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);
6597:               }
6598:             }
6599:           }
6600:           if (pcbddc->dbg_flag) {
6601:             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");
6602:           }
6603:         }
6604:       } else {
6605:         if (pcbddc->dbg_flag) {
6606:           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %D does not need a change of basis (size %D)\n",total_counts,size_of_constraint);
6607:         }
6608:       }
6609:     }

6611:     /* free workspace */
6612:     if (qr_needed) {
6613:       if (pcbddc->dbg_flag) {
6614:         PetscFree(dbg_work);
6615:       }
6616:       PetscFree(trs_rhs);
6617:       PetscFree(qr_tau);
6618:       PetscFree(qr_work);
6619:       PetscFree(gqr_work);
6620:       PetscFree(qr_basis);
6621:     }
6622:     PetscBTDestroy(&is_primal);
6623:     MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);
6624:     MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);

6626:     /* assembling of global change of variable */
6627:     if (!pcbddc->fake_change) {
6628:       Mat      tmat;
6629:       PetscInt bs;

6631:       VecGetSize(pcis->vec1_global,&global_size);
6632:       VecGetLocalSize(pcis->vec1_global,&local_size);
6633:       MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);
6634:       MatISSetLocalMat(tmat,localChangeOfBasisMatrix);
6635:       MatAssemblyBegin(tmat,MAT_FINAL_ASSEMBLY);
6636:       MatAssemblyEnd(tmat,MAT_FINAL_ASSEMBLY);
6637:       MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);
6638:       MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);
6639:       MatGetBlockSize(pc->pmat,&bs);
6640:       MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);
6641:       MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);
6642:       MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);
6643:       MatConvert(tmat,MATAIJ,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);
6644:       MatDestroy(&tmat);
6645:       VecSet(pcis->vec1_global,0.0);
6646:       VecSet(pcis->vec1_N,1.0);
6647:       VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
6648:       VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
6649:       VecReciprocal(pcis->vec1_global);
6650:       MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);

6652:       /* check */
6653:       if (pcbddc->dbg_flag) {
6654:         PetscReal error;
6655:         Vec       x,x_change;

6657:         VecDuplicate(pcis->vec1_global,&x);
6658:         VecDuplicate(pcis->vec1_global,&x_change);
6659:         VecSetRandom(x,NULL);
6660:         VecCopy(x,pcis->vec1_global);
6661:         VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
6662:         VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
6663:         MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);
6664:         VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);
6665:         VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);
6666:         MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);
6667:         VecAXPY(x,-1.0,x_change);
6668:         VecNorm(x,NORM_INFINITY,&error);
6669:         if (error > PETSC_SMALL) {
6670:           SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e",error);
6671:         }
6672:         VecDestroy(&x);
6673:         VecDestroy(&x_change);
6674:       }
6675:       /* adapt sub_schurs computed (if any) */
6676:       if (pcbddc->use_deluxe_scaling) {
6677:         PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;

6679:         if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");
6680:         if (sub_schurs && sub_schurs->S_Ej_all) {
6681:           Mat                    S_new,tmat;
6682:           IS                     is_all_N,is_V_Sall = NULL;

6684:           ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);
6685:           MatCreateSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);
6686:           if (pcbddc->deluxe_zerorows) {
6687:             ISLocalToGlobalMapping NtoSall;
6688:             IS                     is_V;
6689:             ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);
6690:             ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);
6691:             ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);
6692:             ISLocalToGlobalMappingDestroy(&NtoSall);
6693:             ISDestroy(&is_V);
6694:           }
6695:           ISDestroy(&is_all_N);
6696:           MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);
6697:           MatDestroy(&sub_schurs->S_Ej_all);
6698:           PetscObjectReference((PetscObject)S_new);
6699:           if (pcbddc->deluxe_zerorows) {
6700:             const PetscScalar *array;
6701:             const PetscInt    *idxs_V,*idxs_all;
6702:             PetscInt          i,n_V;

6704:             MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);
6705:             ISGetLocalSize(is_V_Sall,&n_V);
6706:             ISGetIndices(is_V_Sall,&idxs_V);
6707:             ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);
6708:             VecGetArrayRead(pcis->D,&array);
6709:             for (i=0;i<n_V;i++) {
6710:               PetscScalar val;
6711:               PetscInt    idx;

6713:               idx = idxs_V[i];
6714:               val = array[idxs_all[idxs_V[i]]];
6715:               MatSetValue(S_new,idx,idx,val,INSERT_VALUES);
6716:             }
6717:             MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);
6718:             MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);
6719:             VecRestoreArrayRead(pcis->D,&array);
6720:             ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);
6721:             ISRestoreIndices(is_V_Sall,&idxs_V);
6722:           }
6723:           sub_schurs->S_Ej_all = S_new;
6724:           MatDestroy(&S_new);
6725:           if (sub_schurs->sum_S_Ej_all) {
6726:             MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);
6727:             MatDestroy(&sub_schurs->sum_S_Ej_all);
6728:             PetscObjectReference((PetscObject)S_new);
6729:             if (pcbddc->deluxe_zerorows) {
6730:               MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);
6731:             }
6732:             sub_schurs->sum_S_Ej_all = S_new;
6733:             MatDestroy(&S_new);
6734:           }
6735:           ISDestroy(&is_V_Sall);
6736:           MatDestroy(&tmat);
6737:         }
6738:         /* destroy any change of basis context in sub_schurs */
6739:         if (sub_schurs && sub_schurs->change) {
6740:           PetscInt i;

6742:           for (i=0;i<sub_schurs->n_subs;i++) {
6743:             KSPDestroy(&sub_schurs->change[i]);
6744:           }
6745:           PetscFree(sub_schurs->change);
6746:         }
6747:       }
6748:       if (pcbddc->switch_static) { /* need to save the local change */
6749:         pcbddc->switch_static_change = localChangeOfBasisMatrix;
6750:       } else {
6751:         MatDestroy(&localChangeOfBasisMatrix);
6752:       }
6753:       /* determine if any process has changed the pressures locally */
6754:       pcbddc->change_interior = pcbddc->benign_have_null;
6755:     } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */
6756:       MatDestroy(&pcbddc->ConstraintMatrix);
6757:       pcbddc->ConstraintMatrix = localChangeOfBasisMatrix;
6758:       pcbddc->use_qr_single = qr_needed;
6759:     }
6760:   } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) {
6761:     if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) {
6762:       PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);
6763:       pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
6764:     } else {
6765:       Mat benign_global = NULL;
6766:       if (pcbddc->benign_have_null) {
6767:         Mat M;

6769:         pcbddc->change_interior = PETSC_TRUE;
6770:         VecCopy(matis->counter,pcis->vec1_N);
6771:         VecReciprocal(pcis->vec1_N);
6772:         MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&benign_global);
6773:         if (pcbddc->benign_change) {
6774:           MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);
6775:           MatDiagonalScale(M,pcis->vec1_N,NULL);
6776:         } else {
6777:           MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&M);
6778:           MatDiagonalSet(M,pcis->vec1_N,INSERT_VALUES);
6779:         }
6780:         MatISSetLocalMat(benign_global,M);
6781:         MatDestroy(&M);
6782:         MatAssemblyBegin(benign_global,MAT_FINAL_ASSEMBLY);
6783:         MatAssemblyEnd(benign_global,MAT_FINAL_ASSEMBLY);
6784:       }
6785:       if (pcbddc->user_ChangeOfBasisMatrix) {
6786:         MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);
6787:         MatDestroy(&benign_global);
6788:       } else if (pcbddc->benign_have_null) {
6789:         pcbddc->ChangeOfBasisMatrix = benign_global;
6790:       }
6791:     }
6792:     if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */
6793:       IS             is_global;
6794:       const PetscInt *gidxs;

6796:       ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);
6797:       ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);
6798:       ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);
6799:       MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);
6800:       ISDestroy(&is_global);
6801:     }
6802:   }
6803:   if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) {
6804:     VecDuplicate(pcis->vec1_global,&pcbddc->work_change);
6805:   }

6807:   if (!pcbddc->fake_change) {
6808:     /* add pressure dofs to set of primal nodes for numbering purposes */
6809:     for (i=0;i<pcbddc->benign_n;i++) {
6810:       pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i];
6811:       pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i];
6812:       pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1;
6813:       pcbddc->local_primal_size_cc++;
6814:       pcbddc->local_primal_size++;
6815:     }

6817:     /* check if a new primal space has been introduced (also take into account benign trick) */
6818:     pcbddc->new_primal_space_local = PETSC_TRUE;
6819:     if (olocal_primal_size == pcbddc->local_primal_size) {
6820:       PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);
6821:       pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
6822:       if (!pcbddc->new_primal_space_local) {
6823:         PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);
6824:         pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
6825:       }
6826:     }
6827:     /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
6828:     MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
6829:   }
6830:   PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);

6832:   /* flush dbg viewer */
6833:   if (pcbddc->dbg_flag) {
6834:     PetscViewerFlush(pcbddc->dbg_viewer);
6835:   }

6837:   /* free workspace */
6838:   PetscBTDestroy(&qr_needed_idx);
6839:   PetscBTDestroy(&change_basis);
6840:   if (!pcbddc->adaptive_selection) {
6841:     PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);
6842:     PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);
6843:   } else {
6844:     PetscFree5(pcbddc->adaptive_constraints_n,
6845:                       pcbddc->adaptive_constraints_idxs_ptr,
6846:                       pcbddc->adaptive_constraints_data_ptr,
6847:                       pcbddc->adaptive_constraints_idxs,
6848:                       pcbddc->adaptive_constraints_data);
6849:     PetscFree(constraints_n);
6850:     PetscFree(constraints_idxs_B);
6851:   }
6852:   return(0);
6853: }
6854: /* #undef PETSC_MISSING_LAPACK_GESVD */

6856: PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
6857: {
6858:   ISLocalToGlobalMapping map;
6859:   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
6860:   Mat_IS                 *matis  = (Mat_IS*)pc->pmat->data;
6861:   PetscInt               i,N;
6862:   PetscBool              rcsr = PETSC_FALSE;
6863:   PetscErrorCode         ierr;

6866:   if (pcbddc->recompute_topography) {
6867:     pcbddc->graphanalyzed = PETSC_FALSE;
6868:     /* Reset previously computed graph */
6869:     PCBDDCGraphReset(pcbddc->mat_graph);
6870:     /* Init local Graph struct */
6871:     MatGetSize(pc->pmat,&N,NULL);
6872:     MatGetLocalToGlobalMapping(pc->pmat,&map,NULL);
6873:     PCBDDCGraphInit(pcbddc->mat_graph,map,N,pcbddc->graphmaxcount);

6875:     if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) {
6876:       PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->user_primal_vertices_local);
6877:     }
6878:     /* Check validity of the csr graph passed in by the user */
6879:     if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %D, expected %D",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs);

6881:     /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
6882:     if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) {
6883:       PetscInt  *xadj,*adjncy;
6884:       PetscInt  nvtxs;
6885:       PetscBool flg_row=PETSC_FALSE;

6887:       MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);
6888:       if (flg_row) {
6889:         PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);
6890:         pcbddc->computed_rowadj = PETSC_TRUE;
6891:       }
6892:       MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);
6893:       rcsr = PETSC_TRUE;
6894:     }
6895:     if (pcbddc->dbg_flag) {
6896:       PetscViewerFlush(pcbddc->dbg_viewer);
6897:     }

6899:     if (pcbddc->mat_graph->cdim && !pcbddc->mat_graph->cloc) {
6900:       PetscReal    *lcoords;
6901:       PetscInt     n;
6902:       MPI_Datatype dimrealtype;

6904:       if (pcbddc->mat_graph->cnloc != pc->pmat->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid number of local coordinates! Got %D, expected %D",pcbddc->mat_graph->cnloc,pc->pmat->rmap->n);
6905:       MatGetLocalSize(matis->A,&n,NULL);
6906:       MatISSetUpSF(pc->pmat);
6907:       PetscMalloc1(pcbddc->mat_graph->cdim*n,&lcoords);
6908:       MPI_Type_contiguous(pcbddc->mat_graph->cdim,MPIU_REAL,&dimrealtype);
6909:       MPI_Type_commit(&dimrealtype);
6910:       PetscSFBcastBegin(matis->sf,dimrealtype,pcbddc->mat_graph->coords,lcoords);
6911:       PetscSFBcastEnd(matis->sf,dimrealtype,pcbddc->mat_graph->coords,lcoords);
6912:       MPI_Type_free(&dimrealtype);
6913:       PetscFree(pcbddc->mat_graph->coords);

6915:       pcbddc->mat_graph->coords = lcoords;
6916:       pcbddc->mat_graph->cloc   = PETSC_TRUE;
6917:       pcbddc->mat_graph->cnloc  = n;
6918:     }
6919:     if (pcbddc->mat_graph->cnloc && pcbddc->mat_graph->cnloc != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid number of local subdomain coordinates! Got %D, expected %D",pcbddc->mat_graph->cnloc,pcbddc->mat_graph->nvtxs);
6920:     pcbddc->mat_graph->active_coords = (PetscBool)(pcbddc->corner_selection && !pcbddc->corner_selected);

6922:     /* Setup of Graph */
6923:     pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */
6924:     PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);

6926:     /* attach info on disconnected subdomains if present */
6927:     if (pcbddc->n_local_subs) {
6928:       PetscInt *local_subs;

6930:       PetscMalloc1(N,&local_subs);
6931:       for (i=0;i<pcbddc->n_local_subs;i++) {
6932:         const PetscInt *idxs;
6933:         PetscInt       nl,j;

6935:         ISGetLocalSize(pcbddc->local_subs[i],&nl);
6936:         ISGetIndices(pcbddc->local_subs[i],&idxs);
6937:         for (j=0;j<nl;j++) local_subs[idxs[j]] = i;
6938:         ISRestoreIndices(pcbddc->local_subs[i],&idxs);
6939:       }
6940:       pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs;
6941:       pcbddc->mat_graph->local_subs = local_subs;
6942:     }
6943:   }

6945:   if (!pcbddc->graphanalyzed) {
6946:     /* Graph's connected components analysis */
6947:     PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);
6948:     pcbddc->graphanalyzed = PETSC_TRUE;
6949:   }
6950:   if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0;
6951:   return(0);
6952: }

6954: PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
6955: {
6956:   PetscInt       i,j;
6957:   PetscScalar    *alphas;

6961:   if (!n) return(0);
6962:   PetscMalloc1(n,&alphas);
6963:   VecNormalize(vecs[0],NULL);
6964:   for (i=1;i<n;i++) {
6965:     VecMDot(vecs[i],i,vecs,alphas);
6966:     for (j=0;j<i;j++) alphas[j] = PetscConj(-alphas[j]);
6967:     VecMAXPY(vecs[i],i,alphas,vecs);
6968:     VecNormalize(vecs[i],NULL);
6969:   }
6970:   PetscFree(alphas);
6971:   return(0);
6972: }

6974: PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void)
6975: {
6976:   Mat            A;
6977:   PetscInt       n_neighs,*neighs,*n_shared,**shared;
6978:   PetscMPIInt    size,rank,color;
6979:   PetscInt       *xadj,*adjncy;
6980:   PetscInt       *adjncy_wgt,*v_wgt,*ranks_send_to_idx;
6981:   PetscInt       im_active,active_procs,N,n,i,j,threshold = 2;
6982:   PetscInt       void_procs,*procs_candidates = NULL;
6983:   PetscInt       xadj_count,*count;
6984:   PetscBool      ismatis,use_vwgt=PETSC_FALSE;
6985:   PetscSubcomm   psubcomm;
6986:   MPI_Comm       subcomm;

6991:   PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);
6992:   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME);
6995:   if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %D",*n_subdomains);

6997:   if (have_void) *have_void = PETSC_FALSE;
6998:   MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);
6999:   MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);
7000:   MatISGetLocalMat(mat,&A);
7001:   MatGetLocalSize(A,&n,NULL);
7002:   im_active = !!n;
7003:   MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));
7004:   void_procs = size - active_procs;
7005:   /* get ranks of of non-active processes in mat communicator */
7006:   if (void_procs) {
7007:     PetscInt ncand;

7009:     if (have_void) *have_void = PETSC_TRUE;
7010:     PetscMalloc1(size,&procs_candidates);
7011:     MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));
7012:     for (i=0,ncand=0;i<size;i++) {
7013:       if (!procs_candidates[i]) {
7014:         procs_candidates[ncand++] = i;
7015:       }
7016:     }
7017:     /* force n_subdomains to be not greater that the number of non-active processes */
7018:     *n_subdomains = PetscMin(void_procs,*n_subdomains);
7019:   }

7021:   /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix
7022:      number of subdomains requested 1 -> send to master or first candidate in voids  */
7023:   MatGetSize(mat,&N,NULL);
7024:   if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) {
7025:     PetscInt issize,isidx,dest;
7026:     if (*n_subdomains == 1) dest = 0;
7027:     else dest = rank;
7028:     if (im_active) {
7029:       issize = 1;
7030:       if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7031:         isidx = procs_candidates[dest];
7032:       } else {
7033:         isidx = dest;
7034:       }
7035:     } else {
7036:       issize = 0;
7037:       isidx = -1;
7038:     }
7039:     if (*n_subdomains != 1) *n_subdomains = active_procs;
7040:     ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);
7041:     PetscFree(procs_candidates);
7042:     return(0);
7043:   }
7044:   PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);
7045:   PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);
7046:   threshold = PetscMax(threshold,2);

7048:   /* Get info on mapping */
7049:   ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);

7051:   /* build local CSR graph of subdomains' connectivity */
7052:   PetscMalloc1(2,&xadj);
7053:   xadj[0] = 0;
7054:   xadj[1] = PetscMax(n_neighs-1,0);
7055:   PetscMalloc1(xadj[1],&adjncy);
7056:   PetscMalloc1(xadj[1],&adjncy_wgt);
7057:   PetscCalloc1(n,&count);
7058:   for (i=1;i<n_neighs;i++)
7059:     for (j=0;j<n_shared[i];j++)
7060:       count[shared[i][j]] += 1;

7062:   xadj_count = 0;
7063:   for (i=1;i<n_neighs;i++) {
7064:     for (j=0;j<n_shared[i];j++) {
7065:       if (count[shared[i][j]] < threshold) {
7066:         adjncy[xadj_count] = neighs[i];
7067:         adjncy_wgt[xadj_count] = n_shared[i];
7068:         xadj_count++;
7069:         break;
7070:       }
7071:     }
7072:   }
7073:   xadj[1] = xadj_count;
7074:   PetscFree(count);
7075:   ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);
7076:   PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);

7078:   PetscMalloc1(1,&ranks_send_to_idx);

7080:   /* Restrict work on active processes only */
7081:   PetscMPIIntCast(im_active,&color);
7082:   if (void_procs) {
7083:     PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);
7084:     PetscSubcommSetNumber(psubcomm,2); /* 2 groups, active process and not active processes */
7085:     PetscSubcommSetTypeGeneral(psubcomm,color,rank);
7086:     subcomm = PetscSubcommChild(psubcomm);
7087:   } else {
7088:     psubcomm = NULL;
7089:     subcomm = PetscObjectComm((PetscObject)mat);
7090:   }

7092:   v_wgt = NULL;
7093:   if (!color) {
7094:     PetscFree(xadj);
7095:     PetscFree(adjncy);
7096:     PetscFree(adjncy_wgt);
7097:   } else {
7098:     Mat             subdomain_adj;
7099:     IS              new_ranks,new_ranks_contig;
7100:     MatPartitioning partitioner;
7101:     PetscInt        rstart=0,rend=0;
7102:     PetscInt        *is_indices,*oldranks;
7103:     PetscMPIInt     size;
7104:     PetscBool       aggregate;

7106:     MPI_Comm_size(subcomm,&size);
7107:     if (void_procs) {
7108:       PetscInt prank = rank;
7109:       PetscMalloc1(size,&oldranks);
7110:       MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);
7111:       for (i=0;i<xadj[1];i++) {
7112:         PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);
7113:       }
7114:       PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);
7115:     } else {
7116:       oldranks = NULL;
7117:     }
7118:     aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE);
7119:     if (aggregate) { /* TODO: all this part could be made more efficient */
7120:       PetscInt    lrows,row,ncols,*cols;
7121:       PetscMPIInt nrank;
7122:       PetscScalar *vals;

7124:       MPI_Comm_rank(subcomm,&nrank);
7125:       lrows = 0;
7126:       if (nrank<redprocs) {
7127:         lrows = size/redprocs;
7128:         if (nrank<size%redprocs) lrows++;
7129:       }
7130:       MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);
7131:       MatGetOwnershipRange(subdomain_adj,&rstart,&rend);
7132:       MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);
7133:       MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);
7134:       row = nrank;
7135:       ncols = xadj[1]-xadj[0];
7136:       cols = adjncy;
7137:       PetscMalloc1(ncols,&vals);
7138:       for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i];
7139:       MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);
7140:       MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);
7141:       MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);
7142:       PetscFree(xadj);
7143:       PetscFree(adjncy);
7144:       PetscFree(adjncy_wgt);
7145:       PetscFree(vals);
7146:       if (use_vwgt) {
7147:         Vec               v;
7148:         const PetscScalar *array;
7149:         PetscInt          nl;

7151:         MatCreateVecs(subdomain_adj,&v,NULL);
7152:         VecSetValue(v,row,(PetscScalar)n,INSERT_VALUES);
7153:         VecAssemblyBegin(v);
7154:         VecAssemblyEnd(v);
7155:         VecGetLocalSize(v,&nl);
7156:         VecGetArrayRead(v,&array);
7157:         PetscMalloc1(nl,&v_wgt);
7158:         for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]);
7159:         VecRestoreArrayRead(v,&array);
7160:         VecDestroy(&v);
7161:       }
7162:     } else {
7163:       MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);
7164:       if (use_vwgt) {
7165:         PetscMalloc1(1,&v_wgt);
7166:         v_wgt[0] = n;
7167:       }
7168:     }
7169:     /* MatView(subdomain_adj,0); */

7171:     /* Partition */
7172:     MatPartitioningCreate(subcomm,&partitioner);
7173:     MatPartitioningSetAdjacency(partitioner,subdomain_adj);
7174:     if (v_wgt) {
7175:       MatPartitioningSetVertexWeights(partitioner,v_wgt);
7176:     }
7177:     *n_subdomains = PetscMin((PetscInt)size,*n_subdomains);
7178:     MatPartitioningSetNParts(partitioner,*n_subdomains);
7179:     MatPartitioningSetFromOptions(partitioner);
7180:     MatPartitioningApply(partitioner,&new_ranks);
7181:     /* MatPartitioningView(partitioner,0); */

7183:     /* renumber new_ranks to avoid "holes" in new set of processors */
7184:     ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);
7185:     ISDestroy(&new_ranks);
7186:     ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);
7187:     if (!aggregate) {
7188:       if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7189: #if defined(PETSC_USE_DEBUG)
7190:         if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen");
7191: #endif
7192:         ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]];
7193:       } else if (oldranks) {
7194:         ranks_send_to_idx[0] = oldranks[is_indices[0]];
7195:       } else {
7196:         ranks_send_to_idx[0] = is_indices[0];
7197:       }
7198:     } else {
7199:       PetscInt    idx = 0;
7200:       PetscMPIInt tag;
7201:       MPI_Request *reqs;

7203:       PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);
7204:       PetscMalloc1(rend-rstart,&reqs);
7205:       for (i=rstart;i<rend;i++) {
7206:         MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);
7207:       }
7208:       MPI_Recv(&idx,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);
7209:       MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);
7210:       PetscFree(reqs);
7211:       if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7212: #if defined(PETSC_USE_DEBUG)
7213:         if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen");
7214: #endif
7215:         ranks_send_to_idx[0] = procs_candidates[oldranks[idx]];
7216:       } else if (oldranks) {
7217:         ranks_send_to_idx[0] = oldranks[idx];
7218:       } else {
7219:         ranks_send_to_idx[0] = idx;
7220:       }
7221:     }
7222:     ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);
7223:     /* clean up */
7224:     PetscFree(oldranks);
7225:     ISDestroy(&new_ranks_contig);
7226:     MatDestroy(&subdomain_adj);
7227:     MatPartitioningDestroy(&partitioner);
7228:   }
7229:   PetscSubcommDestroy(&psubcomm);
7230:   PetscFree(procs_candidates);

7232:   /* assemble parallel IS for sends */
7233:   i = 1;
7234:   if (!color) i=0;
7235:   ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);
7236:   return(0);
7237: }

7239: typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;

7241: PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[])
7242: {
7243:   Mat                    local_mat;
7244:   IS                     is_sends_internal;
7245:   PetscInt               rows,cols,new_local_rows;
7246:   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs;
7247:   PetscBool              ismatis,isdense,newisdense,destroy_mat;
7248:   ISLocalToGlobalMapping l2gmap;
7249:   PetscInt*              l2gmap_indices;
7250:   const PetscInt*        is_indices;
7251:   MatType                new_local_type;
7252:   /* buffers */
7253:   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
7254:   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
7255:   PetscInt               *recv_buffer_idxs_local;
7256:   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
7257:   PetscScalar            *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs;
7258:   /* MPI */
7259:   MPI_Comm               comm,comm_n;
7260:   PetscSubcomm           subcomm;
7261:   PetscMPIInt            n_sends,n_recvs,size;
7262:   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
7263:   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
7264:   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest;
7265:   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs;
7266:   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs;
7267:   PetscErrorCode         ierr;

7271:   PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);
7272:   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME);
7279:   if (nvecs) {
7280:     if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported");
7282:   }
7283:   /* further checks */
7284:   MatISGetLocalMat(mat,&local_mat);
7285:   PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);
7286:   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
7287:   MatGetSize(local_mat,&rows,&cols);
7288:   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
7289:   if (reuse && *mat_n) {
7290:     PetscInt mrows,mcols,mnrows,mncols;
7292:     PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);
7293:     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
7294:     MatGetSize(mat,&mrows,&mcols);
7295:     MatGetSize(*mat_n,&mnrows,&mncols);
7296:     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
7297:     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
7298:   }
7299:   MatGetBlockSize(local_mat,&bs);

7302:   /* prepare IS for sending if not provided */
7303:   if (!is_sends) {
7304:     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
7305:     PCBDDCMatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);
7306:   } else {
7307:     PetscObjectReference((PetscObject)is_sends);
7308:     is_sends_internal = is_sends;
7309:   }

7311:   /* get comm */
7312:   PetscObjectGetComm((PetscObject)mat,&comm);

7314:   /* compute number of sends */
7315:   ISGetLocalSize(is_sends_internal,&i);
7316:   PetscMPIIntCast(i,&n_sends);

7318:   /* compute number of receives */
7319:   MPI_Comm_size(comm,&size);
7320:   PetscMalloc1(size,&iflags);
7321:   PetscMemzero(iflags,size*sizeof(*iflags));
7322:   ISGetIndices(is_sends_internal,&is_indices);
7323:   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
7324:   PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);
7325:   PetscFree(iflags);

7327:   /* restrict comm if requested */
7328:   subcomm = 0;
7329:   destroy_mat = PETSC_FALSE;
7330:   if (restrict_comm) {
7331:     PetscMPIInt color,subcommsize;

7333:     color = 0;
7334:     if (restrict_full) {
7335:       if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */
7336:     } else {
7337:       if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */
7338:     }
7339:     MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);
7340:     subcommsize = size - subcommsize;
7341:     /* check if reuse has been requested */
7342:     if (reuse) {
7343:       if (*mat_n) {
7344:         PetscMPIInt subcommsize2;
7345:         MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);
7346:         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
7347:         comm_n = PetscObjectComm((PetscObject)*mat_n);
7348:       } else {
7349:         comm_n = PETSC_COMM_SELF;
7350:       }
7351:     } else { /* MAT_INITIAL_MATRIX */
7352:       PetscMPIInt rank;

7354:       MPI_Comm_rank(comm,&rank);
7355:       PetscSubcommCreate(comm,&subcomm);
7356:       PetscSubcommSetNumber(subcomm,2);
7357:       PetscSubcommSetTypeGeneral(subcomm,color,rank);
7358:       comm_n = PetscSubcommChild(subcomm);
7359:     }
7360:     /* flag to destroy *mat_n if not significative */
7361:     if (color) destroy_mat = PETSC_TRUE;
7362:   } else {
7363:     comm_n = comm;
7364:   }

7366:   /* prepare send/receive buffers */
7367:   PetscMalloc1(size,&ilengths_idxs);
7368:   PetscMemzero(ilengths_idxs,size*sizeof(*ilengths_idxs));
7369:   PetscMalloc1(size,&ilengths_vals);
7370:   PetscMemzero(ilengths_vals,size*sizeof(*ilengths_vals));
7371:   if (nis) {
7372:     PetscCalloc1(size,&ilengths_idxs_is);
7373:   }

7375:   /* Get data from local matrices */
7376:   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
7377:     /* TODO: See below some guidelines on how to prepare the local buffers */
7378:     /*
7379:        send_buffer_vals should contain the raw values of the local matrix
7380:        send_buffer_idxs should contain:
7381:        - MatType_PRIVATE type
7382:        - PetscInt        size_of_l2gmap
7383:        - PetscInt        global_row_indices[size_of_l2gmap]
7384:        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
7385:     */
7386:   else {
7387:     MatDenseGetArray(local_mat,&send_buffer_vals);
7388:     ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);
7389:     PetscMalloc1(i+2,&send_buffer_idxs);
7390:     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
7391:     send_buffer_idxs[1] = i;
7392:     ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);
7393:     PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));
7394:     ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);
7395:     PetscMPIIntCast(i,&len);
7396:     for (i=0;i<n_sends;i++) {
7397:       ilengths_vals[is_indices[i]] = len*len;
7398:       ilengths_idxs[is_indices[i]] = len+2;
7399:     }
7400:   }
7401:   PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);
7402:   /* additional is (if any) */
7403:   if (nis) {
7404:     PetscMPIInt psum;
7405:     PetscInt j;
7406:     for (j=0,psum=0;j<nis;j++) {
7407:       PetscInt plen;
7408:       ISGetLocalSize(isarray[j],&plen);
7409:       PetscMPIIntCast(plen,&len);
7410:       psum += len+1; /* indices + lenght */
7411:     }
7412:     PetscMalloc1(psum,&send_buffer_idxs_is);
7413:     for (j=0,psum=0;j<nis;j++) {
7414:       PetscInt plen;
7415:       const PetscInt *is_array_idxs;
7416:       ISGetLocalSize(isarray[j],&plen);
7417:       send_buffer_idxs_is[psum] = plen;
7418:       ISGetIndices(isarray[j],&is_array_idxs);
7419:       PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));
7420:       ISRestoreIndices(isarray[j],&is_array_idxs);
7421:       psum += plen+1; /* indices + lenght */
7422:     }
7423:     for (i=0;i<n_sends;i++) {
7424:       ilengths_idxs_is[is_indices[i]] = psum;
7425:     }
7426:     PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);
7427:   }
7428:   MatISRestoreLocalMat(mat,&local_mat);

7430:   buf_size_idxs = 0;
7431:   buf_size_vals = 0;
7432:   buf_size_idxs_is = 0;
7433:   buf_size_vecs = 0;
7434:   for (i=0;i<n_recvs;i++) {
7435:     buf_size_idxs += (PetscInt)olengths_idxs[i];
7436:     buf_size_vals += (PetscInt)olengths_vals[i];
7437:     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
7438:     if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i];
7439:   }
7440:   PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);
7441:   PetscMalloc1(buf_size_vals,&recv_buffer_vals);
7442:   PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);
7443:   PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);

7445:   /* get new tags for clean communications */
7446:   PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);
7447:   PetscObjectGetNewTag((PetscObject)mat,&tag_vals);
7448:   PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);
7449:   PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);

7451:   /* allocate for requests */
7452:   PetscMalloc1(n_sends,&send_req_idxs);
7453:   PetscMalloc1(n_sends,&send_req_vals);
7454:   PetscMalloc1(n_sends,&send_req_idxs_is);
7455:   PetscMalloc1(n_sends,&send_req_vecs);
7456:   PetscMalloc1(n_recvs,&recv_req_idxs);
7457:   PetscMalloc1(n_recvs,&recv_req_vals);
7458:   PetscMalloc1(n_recvs,&recv_req_idxs_is);
7459:   PetscMalloc1(n_recvs,&recv_req_vecs);

7461:   /* communications */
7462:   ptr_idxs = recv_buffer_idxs;
7463:   ptr_vals = recv_buffer_vals;
7464:   ptr_idxs_is = recv_buffer_idxs_is;
7465:   ptr_vecs = recv_buffer_vecs;
7466:   for (i=0;i<n_recvs;i++) {
7467:     source_dest = onodes[i];
7468:     MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);
7469:     MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);
7470:     ptr_idxs += olengths_idxs[i];
7471:     ptr_vals += olengths_vals[i];
7472:     if (nis) {
7473:       source_dest = onodes_is[i];
7474:       MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);
7475:       ptr_idxs_is += olengths_idxs_is[i];
7476:     }
7477:     if (nvecs) {
7478:       source_dest = onodes[i];
7479:       MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);
7480:       ptr_vecs += olengths_idxs[i]-2;
7481:     }
7482:   }
7483:   for (i=0;i<n_sends;i++) {
7484:     PetscMPIIntCast(is_indices[i],&source_dest);
7485:     MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);
7486:     MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);
7487:     if (nis) {
7488:       MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);
7489:     }
7490:     if (nvecs) {
7491:       VecGetArray(nnsp_vec[0],&send_buffer_vecs);
7492:       MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);
7493:     }
7494:   }
7495:   ISRestoreIndices(is_sends_internal,&is_indices);
7496:   ISDestroy(&is_sends_internal);

7498:   /* assemble new l2g map */
7499:   MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);
7500:   ptr_idxs = recv_buffer_idxs;
7501:   new_local_rows = 0;
7502:   for (i=0;i<n_recvs;i++) {
7503:     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
7504:     ptr_idxs += olengths_idxs[i];
7505:   }
7506:   PetscMalloc1(new_local_rows,&l2gmap_indices);
7507:   ptr_idxs = recv_buffer_idxs;
7508:   new_local_rows = 0;
7509:   for (i=0;i<n_recvs;i++) {
7510:     PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));
7511:     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
7512:     ptr_idxs += olengths_idxs[i];
7513:   }
7514:   PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);
7515:   ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);
7516:   PetscFree(l2gmap_indices);

7518:   /* infer new local matrix type from received local matrices type */
7519:   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
7520:   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
7521:   if (n_recvs) {
7522:     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
7523:     ptr_idxs = recv_buffer_idxs;
7524:     for (i=0;i<n_recvs;i++) {
7525:       if ((PetscInt)new_local_type_private != *ptr_idxs) {
7526:         new_local_type_private = MATAIJ_PRIVATE;
7527:         break;
7528:       }
7529:       ptr_idxs += olengths_idxs[i];
7530:     }
7531:     switch (new_local_type_private) {
7532:       case MATDENSE_PRIVATE:
7533:         new_local_type = MATSEQAIJ;
7534:         bs = 1;
7535:         break;
7536:       case MATAIJ_PRIVATE:
7537:         new_local_type = MATSEQAIJ;
7538:         bs = 1;
7539:         break;
7540:       case MATBAIJ_PRIVATE:
7541:         new_local_type = MATSEQBAIJ;
7542:         break;
7543:       case MATSBAIJ_PRIVATE:
7544:         new_local_type = MATSEQSBAIJ;
7545:         break;
7546:       default:
7547:         SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,PETSC_FUNCTION_NAME);
7548:         break;
7549:     }
7550:   } else { /* by default, new_local_type is seqaij */
7551:     new_local_type = MATSEQAIJ;
7552:     bs = 1;
7553:   }

7555:   /* create MATIS object if needed */
7556:   if (!reuse) {
7557:     MatGetSize(mat,&rows,&cols);
7558:     MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);
7559:   } else {
7560:     /* it also destroys the local matrices */
7561:     if (*mat_n) {
7562:       MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);
7563:     } else { /* this is a fake object */
7564:       MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);
7565:     }
7566:   }
7567:   MatISGetLocalMat(*mat_n,&local_mat);
7568:   MatSetType(local_mat,new_local_type);

7570:   MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);

7572:   /* Global to local map of received indices */
7573:   PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local); /* needed for values insertion */
7574:   ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);
7575:   ISLocalToGlobalMappingDestroy(&l2gmap);

7577:   /* restore attributes -> type of incoming data and its size */
7578:   buf_size_idxs = 0;
7579:   for (i=0;i<n_recvs;i++) {
7580:     recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
7581:     recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
7582:     buf_size_idxs += (PetscInt)olengths_idxs[i];
7583:   }
7584:   PetscFree(recv_buffer_idxs);

7586:   /* set preallocation */
7587:   PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);
7588:   if (!newisdense) {
7589:     PetscInt *new_local_nnz=0;

7591:     ptr_idxs = recv_buffer_idxs_local;
7592:     if (n_recvs) {
7593:       PetscCalloc1(new_local_rows,&new_local_nnz);
7594:     }
7595:     for (i=0;i<n_recvs;i++) {
7596:       PetscInt j;
7597:       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
7598:         for (j=0;j<*(ptr_idxs+1);j++) {
7599:           new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
7600:         }
7601:       } else {
7602:         /* TODO */
7603:       }
7604:       ptr_idxs += olengths_idxs[i];
7605:     }
7606:     if (new_local_nnz) {
7607:       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
7608:       MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);
7609:       for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
7610:       MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);
7611:       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
7612:       MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);
7613:     } else {
7614:       MatSetUp(local_mat);
7615:     }
7616:     PetscFree(new_local_nnz);
7617:   } else {
7618:     MatSetUp(local_mat);
7619:   }

7621:   /* set values */
7622:   ptr_vals = recv_buffer_vals;
7623:   ptr_idxs = recv_buffer_idxs_local;
7624:   for (i=0;i<n_recvs;i++) {
7625:     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
7626:       MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);
7627:       MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);
7628:       MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);
7629:       MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);
7630:       MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);
7631:     } else {
7632:       /* TODO */
7633:     }
7634:     ptr_idxs += olengths_idxs[i];
7635:     ptr_vals += olengths_vals[i];
7636:   }
7637:   MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);
7638:   MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);
7639:   MatISRestoreLocalMat(*mat_n,&local_mat);
7640:   MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);
7641:   MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);
7642:   PetscFree(recv_buffer_vals);

7644: #if 0
7645:   if (!restrict_comm) { /* check */
7646:     Vec       lvec,rvec;
7647:     PetscReal infty_error;

7649:     MatCreateVecs(mat,&rvec,&lvec);
7650:     VecSetRandom(rvec,NULL);
7651:     MatMult(mat,rvec,lvec);
7652:     VecScale(lvec,-1.0);
7653:     MatMultAdd(*mat_n,rvec,lvec,lvec);
7654:     VecNorm(lvec,NORM_INFINITY,&infty_error);
7655:     PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
7656:     VecDestroy(&rvec);
7657:     VecDestroy(&lvec);
7658:   }
7659: #endif

7661:   /* assemble new additional is (if any) */
7662:   if (nis) {
7663:     PetscInt **temp_idxs,*count_is,j,psum;

7665:     MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);
7666:     PetscCalloc1(nis,&count_is);
7667:     ptr_idxs = recv_buffer_idxs_is;
7668:     psum = 0;
7669:     for (i=0;i<n_recvs;i++) {
7670:       for (j=0;j<nis;j++) {
7671:         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
7672:         count_is[j] += plen; /* increment counting of buffer for j-th IS */
7673:         psum += plen;
7674:         ptr_idxs += plen+1; /* shift pointer to received data */
7675:       }
7676:     }
7677:     PetscMalloc1(nis,&temp_idxs);
7678:     PetscMalloc1(psum,&temp_idxs[0]);
7679:     for (i=1;i<nis;i++) {
7680:       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
7681:     }
7682:     PetscMemzero(count_is,nis*sizeof(PetscInt));
7683:     ptr_idxs = recv_buffer_idxs_is;
7684:     for (i=0;i<n_recvs;i++) {
7685:       for (j=0;j<nis;j++) {
7686:         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
7687:         PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));
7688:         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
7689:         ptr_idxs += plen+1; /* shift pointer to received data */
7690:       }
7691:     }
7692:     for (i=0;i<nis;i++) {
7693:       ISDestroy(&isarray[i]);
7694:       PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);
7695:       ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);
7696:     }
7697:     PetscFree(count_is);
7698:     PetscFree(temp_idxs[0]);
7699:     PetscFree(temp_idxs);
7700:   }
7701:   /* free workspace */
7702:   PetscFree(recv_buffer_idxs_is);
7703:   MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);
7704:   PetscFree(send_buffer_idxs);
7705:   MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);
7706:   if (isdense) {
7707:     MatISGetLocalMat(mat,&local_mat);
7708:     MatDenseRestoreArray(local_mat,&send_buffer_vals);
7709:     MatISRestoreLocalMat(mat,&local_mat);
7710:   } else {
7711:     /* PetscFree(send_buffer_vals); */
7712:   }
7713:   if (nis) {
7714:     MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);
7715:     PetscFree(send_buffer_idxs_is);
7716:   }

7718:   if (nvecs) {
7719:     MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);
7720:     MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);
7721:     VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);
7722:     VecDestroy(&nnsp_vec[0]);
7723:     VecCreate(comm_n,&nnsp_vec[0]);
7724:     VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);
7725:     VecSetType(nnsp_vec[0],VECSTANDARD);
7726:     /* set values */
7727:     ptr_vals = recv_buffer_vecs;
7728:     ptr_idxs = recv_buffer_idxs_local;
7729:     VecGetArray(nnsp_vec[0],&send_buffer_vecs);
7730:     for (i=0;i<n_recvs;i++) {
7731:       PetscInt j;
7732:       for (j=0;j<*(ptr_idxs+1);j++) {
7733:         send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j);
7734:       }
7735:       ptr_idxs += olengths_idxs[i];
7736:       ptr_vals += olengths_idxs[i]-2;
7737:     }
7738:     VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);
7739:     VecAssemblyBegin(nnsp_vec[0]);
7740:     VecAssemblyEnd(nnsp_vec[0]);
7741:   }

7743:   PetscFree(recv_buffer_vecs);
7744:   PetscFree(recv_buffer_idxs_local);
7745:   PetscFree(recv_req_idxs);
7746:   PetscFree(recv_req_vals);
7747:   PetscFree(recv_req_vecs);
7748:   PetscFree(recv_req_idxs_is);
7749:   PetscFree(send_req_idxs);
7750:   PetscFree(send_req_vals);
7751:   PetscFree(send_req_vecs);
7752:   PetscFree(send_req_idxs_is);
7753:   PetscFree(ilengths_vals);
7754:   PetscFree(ilengths_idxs);
7755:   PetscFree(olengths_vals);
7756:   PetscFree(olengths_idxs);
7757:   PetscFree(onodes);
7758:   if (nis) {
7759:     PetscFree(ilengths_idxs_is);
7760:     PetscFree(olengths_idxs_is);
7761:     PetscFree(onodes_is);
7762:   }
7763:   PetscSubcommDestroy(&subcomm);
7764:   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
7765:     MatDestroy(mat_n);
7766:     for (i=0;i<nis;i++) {
7767:       ISDestroy(&isarray[i]);
7768:     }
7769:     if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */
7770:       VecDestroy(&nnsp_vec[0]);
7771:     }
7772:     *mat_n = NULL;
7773:   }
7774:   return(0);
7775: }

7777: /* temporary hack into ksp private data structure */
7778:  #include <petsc/private/kspimpl.h>

7780: PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
7781: {
7782:   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
7783:   PC_IS                  *pcis = (PC_IS*)pc->data;
7784:   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
7785:   Mat                    coarsedivudotp = NULL;
7786:   Mat                    coarseG,t_coarse_mat_is;
7787:   MatNullSpace           CoarseNullSpace = NULL;
7788:   ISLocalToGlobalMapping coarse_islg;
7789:   IS                     coarse_is,*isarray;
7790:   PetscInt               i,im_active=-1,active_procs=-1;
7791:   PetscInt               nis,nisdofs,nisneu,nisvert;
7792:   PetscInt               coarse_eqs_per_proc;
7793:   PC                     pc_temp;
7794:   PCType                 coarse_pc_type;
7795:   KSPType                coarse_ksp_type;
7796:   PetscBool              multilevel_requested,multilevel_allowed;
7797:   PetscBool              coarse_reuse;
7798:   PetscInt               ncoarse,nedcfield;
7799:   PetscBool              compute_vecs = PETSC_FALSE;
7800:   PetscScalar            *array;
7801:   MatReuse               coarse_mat_reuse;
7802:   PetscBool              restr, full_restr, have_void;
7803:   PetscMPIInt            size;
7804:   PetscErrorCode         ierr;

7807:   PetscLogEventBegin(PC_BDDC_CoarseSetUp[pcbddc->current_level],pc,0,0,0);
7808:   /* Assign global numbering to coarse dofs */
7809:   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
7810:     PetscInt ocoarse_size;
7811:     compute_vecs = PETSC_TRUE;

7813:     pcbddc->new_primal_space = PETSC_TRUE;
7814:     ocoarse_size = pcbddc->coarse_size;
7815:     PetscFree(pcbddc->global_primal_indices);
7816:     PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);
7817:     /* see if we can avoid some work */
7818:     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
7819:       /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */
7820:       if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) {
7821:         KSPReset(pcbddc->coarse_ksp);
7822:         coarse_reuse = PETSC_FALSE;
7823:       } else { /* we can safely reuse already computed coarse matrix */
7824:         coarse_reuse = PETSC_TRUE;
7825:       }
7826:     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
7827:       coarse_reuse = PETSC_FALSE;
7828:     }
7829:     /* reset any subassembling information */
7830:     if (!coarse_reuse || pcbddc->recompute_topography) {
7831:       ISDestroy(&pcbddc->coarse_subassembling);
7832:     }
7833:   } else { /* primal space is unchanged, so we can reuse coarse matrix */
7834:     coarse_reuse = PETSC_TRUE;
7835:   }
7836:   /* assemble coarse matrix */
7837:   if (coarse_reuse && pcbddc->coarse_ksp) {
7838:     KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);
7839:     PetscObjectReference((PetscObject)coarse_mat);
7840:     coarse_mat_reuse = MAT_REUSE_MATRIX;
7841:   } else {
7842:     coarse_mat = NULL;
7843:     coarse_mat_reuse = MAT_INITIAL_MATRIX;
7844:   }

7846:   /* creates temporary l2gmap and IS for coarse indexes */
7847:   ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);
7848:   ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);

7850:   /* creates temporary MATIS object for coarse matrix */
7851:   MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);
7852:   MatDenseGetArray(coarse_submat_dense,&array);
7853:   PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);
7854:   MatDenseRestoreArray(coarse_submat_dense,&array);
7855:   MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);
7856:   MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);
7857:   MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);
7858:   MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);
7859:   MatDestroy(&coarse_submat_dense);

7861:   /* count "active" (i.e. with positive local size) and "void" processes */
7862:   im_active = !!(pcis->n);
7863:   MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));

7865:   /* determine number of processes partecipating to coarse solver and compute subassembling pattern */
7866:   /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */
7867:   /* full_restr : just use the receivers from the subassembling pattern */
7868:   MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);
7869:   coarse_mat_is        = NULL;
7870:   multilevel_allowed   = PETSC_FALSE;
7871:   multilevel_requested = PETSC_FALSE;
7872:   coarse_eqs_per_proc  = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc);
7873:   if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE;
7874:   if (pcbddc->coarse_size <= pcbddc->coarse_eqs_limit) multilevel_requested = PETSC_FALSE;
7875:   if (multilevel_requested) {
7876:     ncoarse    = active_procs/pcbddc->coarsening_ratio;
7877:     restr      = PETSC_FALSE;
7878:     full_restr = PETSC_FALSE;
7879:   } else {
7880:     ncoarse    = pcbddc->coarse_size/coarse_eqs_per_proc + !!(pcbddc->coarse_size%coarse_eqs_per_proc);
7881:     restr      = PETSC_TRUE;
7882:     full_restr = PETSC_TRUE;
7883:   }
7884:   if (!pcbddc->coarse_size || size == 1) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE;
7885:   ncoarse = PetscMax(1,ncoarse);
7886:   if (!pcbddc->coarse_subassembling) {
7887:     if (pcbddc->coarsening_ratio > 1) {
7888:       if (multilevel_requested) {
7889:         PCBDDCMatISGetSubassemblingPattern(pc->pmat,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);
7890:       } else {
7891:         PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);
7892:       }
7893:     } else {
7894:       PetscMPIInt rank;
7895:       MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);
7896:       have_void = (active_procs == (PetscInt)size) ? PETSC_FALSE : PETSC_TRUE;
7897:       ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);
7898:     }
7899:   } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */
7900:     PetscInt    psum;
7901:     if (pcbddc->coarse_ksp) psum = 1;
7902:     else psum = 0;
7903:     MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));
7904:     have_void = ncoarse < size ? PETSC_TRUE : PETSC_FALSE;
7905:   }
7906:   /* determine if we can go multilevel */
7907:   if (multilevel_requested) {
7908:     if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */
7909:     else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */
7910:   }
7911:   if (multilevel_allowed && have_void) restr = PETSC_TRUE;

7913:   /* dump subassembling pattern */
7914:   if (pcbddc->dbg_flag && multilevel_allowed) {
7915:     ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);
7916:   }
7917:   /* compute dofs splitting and neumann boundaries for coarse dofs */
7918:   nedcfield = -1;
7919:   if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal)) { /* protects from unneded computations */
7920:     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
7921:     const PetscInt         *idxs;
7922:     ISLocalToGlobalMapping tmap;

7924:     /* create map between primal indices (in local representative ordering) and local primal numbering */
7925:     ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);
7926:     /* allocate space for temporary storage */
7927:     PetscMalloc1(pcbddc->local_primal_size,&tidxs);
7928:     PetscMalloc1(pcbddc->local_primal_size,&tidxs2);
7929:     /* allocate for IS array */
7930:     nisdofs = pcbddc->n_ISForDofsLocal;
7931:     if (pcbddc->nedclocal) {
7932:       if (pcbddc->nedfield > -1) {
7933:         nedcfield = pcbddc->nedfield;
7934:       } else {
7935:         nedcfield = 0;
7936:         if (nisdofs) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen (%D)",nisdofs);
7937:         nisdofs = 1;
7938:       }
7939:     }
7940:     nisneu = !!pcbddc->NeumannBoundariesLocal;
7941:     nisvert = 0; /* nisvert is not used */
7942:     nis = nisdofs + nisneu + nisvert;
7943:     PetscMalloc1(nis,&isarray);
7944:     /* dofs splitting */
7945:     for (i=0;i<nisdofs;i++) {
7946:       /* ISView(pcbddc->ISForDofsLocal[i],0); */
7947:       if (nedcfield != i) {
7948:         ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);
7949:         ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);
7950:         ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);
7951:         ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);
7952:       } else {
7953:         ISGetLocalSize(pcbddc->nedclocal,&tsize);
7954:         ISGetIndices(pcbddc->nedclocal,&idxs);
7955:         ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);
7956:         if (tsize != nout) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping coarse nedelec field! %D != %D",tsize,nout);
7957:         ISRestoreIndices(pcbddc->nedclocal,&idxs);
7958:       }
7959:       ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);
7960:       ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);
7961:       /* ISView(isarray[i],0); */
7962:     }
7963:     /* neumann boundaries */
7964:     if (pcbddc->NeumannBoundariesLocal) {
7965:       /* ISView(pcbddc->NeumannBoundariesLocal,0); */
7966:       ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);
7967:       ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);
7968:       ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);
7969:       ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);
7970:       ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);
7971:       ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);
7972:       /* ISView(isarray[nisdofs],0); */
7973:     }
7974:     /* free memory */
7975:     PetscFree(tidxs);
7976:     PetscFree(tidxs2);
7977:     ISLocalToGlobalMappingDestroy(&tmap);
7978:   } else {
7979:     nis = 0;
7980:     nisdofs = 0;
7981:     nisneu = 0;
7982:     nisvert = 0;
7983:     isarray = NULL;
7984:   }
7985:   /* destroy no longer needed map */
7986:   ISLocalToGlobalMappingDestroy(&coarse_islg);

7988:   /* subassemble */
7989:   if (multilevel_allowed) {
7990:     Vec       vp[1];
7991:     PetscInt  nvecs = 0;
7992:     PetscBool reuse,reuser;

7994:     if (coarse_mat) reuse = PETSC_TRUE;
7995:     else reuse = PETSC_FALSE;
7996:     MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
7997:     vp[0] = NULL;
7998:     if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */
7999:       VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);
8000:       VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);
8001:       VecSetType(vp[0],VECSTANDARD);
8002:       nvecs = 1;

8004:       if (pcbddc->divudotp) {
8005:         Mat      B,loc_divudotp;
8006:         Vec      v,p;
8007:         IS       dummy;
8008:         PetscInt np;

8010:         MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);
8011:         MatGetSize(loc_divudotp,&np,NULL);
8012:         ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);
8013:         MatCreateSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);
8014:         MatCreateVecs(B,&v,&p);
8015:         VecSet(p,1.);
8016:         MatMultTranspose(B,p,v);
8017:         VecDestroy(&p);
8018:         MatDestroy(&B);
8019:         VecGetArray(vp[0],&array);
8020:         VecPlaceArray(pcbddc->vec1_P,array);
8021:         VecRestoreArray(vp[0],&array);
8022:         MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);
8023:         VecResetArray(pcbddc->vec1_P);
8024:         ISDestroy(&dummy);
8025:         VecDestroy(&v);
8026:       }
8027:     }
8028:     if (reuser) {
8029:       PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);
8030:     } else {
8031:       PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);
8032:     }
8033:     if (vp[0]) { /* vp[0] could have been placed on a different set of processes */
8034:       PetscScalar *arraym,*arrayv;
8035:       PetscInt    nl;
8036:       VecGetLocalSize(vp[0],&nl);
8037:       MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);
8038:       MatDenseGetArray(coarsedivudotp,&arraym);
8039:       VecGetArray(vp[0],&arrayv);
8040:       PetscMemcpy(arraym,arrayv,nl*sizeof(PetscScalar));
8041:       VecRestoreArray(vp[0],&arrayv);
8042:       MatDenseRestoreArray(coarsedivudotp,&arraym);
8043:       VecDestroy(&vp[0]);
8044:     } else {
8045:       MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);
8046:     }
8047:   } else {
8048:     PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,0,NULL,0,NULL);
8049:   }
8050:   if (coarse_mat_is || coarse_mat) {
8051:     if (!multilevel_allowed) {
8052:       MatConvert(coarse_mat_is,MATAIJ,coarse_mat_reuse,&coarse_mat);
8053:     } else {
8054:       Mat A;

8056:       /* if this matrix is present, it means we are not reusing the coarse matrix */
8057:       if (coarse_mat_is) {
8058:         if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen");
8059:         PetscObjectReference((PetscObject)coarse_mat_is);
8060:         coarse_mat = coarse_mat_is;
8061:       }
8062:       /* be sure we don't have MatSeqDENSE as local mat */
8063:       MatISGetLocalMat(coarse_mat,&A);
8064:       MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);
8065:     }
8066:   }
8067:   MatDestroy(&t_coarse_mat_is);
8068:   MatDestroy(&coarse_mat_is);

8070:   /* create local to global scatters for coarse problem */
8071:   if (compute_vecs) {
8072:     PetscInt lrows;
8073:     VecDestroy(&pcbddc->coarse_vec);
8074:     if (coarse_mat) {
8075:       MatGetLocalSize(coarse_mat,&lrows,NULL);
8076:     } else {
8077:       lrows = 0;
8078:     }
8079:     VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);
8080:     VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);
8081:     VecSetType(pcbddc->coarse_vec,coarse_mat ? coarse_mat->defaultvectype : VECSTANDARD);
8082:     VecScatterDestroy(&pcbddc->coarse_loc_to_glob);
8083:     VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);
8084:   }
8085:   ISDestroy(&coarse_is);

8087:   /* set defaults for coarse KSP and PC */
8088:   if (multilevel_allowed) {
8089:     coarse_ksp_type = KSPRICHARDSON;
8090:     coarse_pc_type  = PCBDDC;
8091:   } else {
8092:     coarse_ksp_type = KSPPREONLY;
8093:     coarse_pc_type  = PCREDUNDANT;
8094:   }

8096:   /* print some info if requested */
8097:   if (pcbddc->dbg_flag) {
8098:     if (!multilevel_allowed) {
8099:       PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
8100:       if (multilevel_requested) {
8101:         PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %D (active processes %D, coarsening ratio %D)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);
8102:       } else if (pcbddc->max_levels) {
8103:         PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%D)\n",pcbddc->max_levels);
8104:       }
8105:       PetscViewerFlush(pcbddc->dbg_viewer);
8106:     }
8107:   }

8109:   /* communicate coarse discrete gradient */
8110:   coarseG = NULL;
8111:   if (pcbddc->nedcG && multilevel_allowed) {
8112:     MPI_Comm ccomm;
8113:     if (coarse_mat) {
8114:       ccomm = PetscObjectComm((PetscObject)coarse_mat);
8115:     } else {
8116:       ccomm = MPI_COMM_NULL;
8117:     }
8118:     MatMPIAIJRestrict(pcbddc->nedcG,ccomm,&coarseG);
8119:   }

8121:   /* create the coarse KSP object only once with defaults */
8122:   if (coarse_mat) {
8123:     PetscBool   isredundant,isnn,isbddc;
8124:     PetscViewer dbg_viewer = NULL;

8126:     if (pcbddc->dbg_flag) {
8127:       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat));
8128:       PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);
8129:     }
8130:     if (!pcbddc->coarse_ksp) {
8131:       char   prefix[256],str_level[16];
8132:       size_t len;

8134:       KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);
8135:       KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);
8136:       PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);
8137:       KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);
8138:       KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);
8139:       KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);
8140:       KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);
8141:       KSPGetPC(pcbddc->coarse_ksp,&pc_temp);
8142:       /* TODO is this logic correct? should check for coarse_mat type */
8143:       PCSetType(pc_temp,coarse_pc_type);
8144:       /* prefix */
8145:       PetscStrcpy(prefix,"");
8146:       PetscStrcpy(str_level,"");
8147:       if (!pcbddc->current_level) {
8148:         PetscStrncpy(prefix,((PetscObject)pc)->prefix,sizeof(prefix));
8149:         PetscStrlcat(prefix,"pc_bddc_coarse_",sizeof(prefix));
8150:       } else {
8151:         PetscStrlen(((PetscObject)pc)->prefix,&len);
8152:         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
8153:         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
8154:         /* Nonstandard use of PetscStrncpy() to copy only a portion of the string */
8155:         PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);
8156:         PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level));
8157:         PetscStrlcat(prefix,str_level,sizeof(prefix));
8158:       }
8159:       KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);
8160:       /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
8161:       PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);
8162:       PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);
8163:       PCBDDCSetLevels(pc_temp,pcbddc->max_levels);
8164:       /* allow user customization */
8165:       KSPSetFromOptions(pcbddc->coarse_ksp);
8166:       /* get some info after set from options */
8167:       KSPGetPC(pcbddc->coarse_ksp,&pc_temp);
8168:       /* multilevel cannot be done with coarse PCs different from BDDC or NN */
8169:       PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);
8170:       PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);
8171:       if (multilevel_allowed && !isbddc && !isnn) {
8172:         isbddc = PETSC_TRUE;
8173:         PCSetType(pc_temp,PCBDDC);
8174:         PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);
8175:         PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);
8176:         PCBDDCSetLevels(pc_temp,pcbddc->max_levels);
8177:       }
8178:     }
8179:     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
8180:     KSPGetPC(pcbddc->coarse_ksp,&pc_temp);
8181:     if (nisdofs) {
8182:       PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);
8183:       for (i=0;i<nisdofs;i++) {
8184:         ISDestroy(&isarray[i]);
8185:       }
8186:     }
8187:     if (nisneu) {
8188:       PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);
8189:       ISDestroy(&isarray[nisdofs]);
8190:     }
8191:     if (nisvert) {
8192:       PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);
8193:       ISDestroy(&isarray[nis-1]);
8194:     }
8195:     if (coarseG) {
8196:       PCBDDCSetDiscreteGradient(pc_temp,coarseG,1,nedcfield,PETSC_FALSE,PETSC_TRUE);
8197:     }

8199:     /* get some info after set from options */
8200:     PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);
8201:     /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */
8202:     if (isbddc && !multilevel_allowed) {
8203:       PCSetType(pc_temp,coarse_pc_type);
8204:       isbddc = PETSC_FALSE;
8205:     }
8206:     /* multilevel cannot be done with coarse PCs different from BDDC or NN */
8207:     PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);
8208:     if (multilevel_requested && multilevel_allowed && !isbddc && !isnn) {
8209:       PCSetType(pc_temp,PCBDDC);
8210:       isbddc = PETSC_TRUE;
8211:     }
8212:     PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);
8213:     if (isredundant) {
8214:       KSP inner_ksp;
8215:       PC  inner_pc;

8217:       PCRedundantGetKSP(pc_temp,&inner_ksp);
8218:       KSPGetPC(inner_ksp,&inner_pc);
8219:     }

8221:     /* parameters which miss an API */
8222:     PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);
8223:     if (isbddc) {
8224:       PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data;

8226:       pcbddc_coarse->detect_disconnected = PETSC_TRUE;
8227:       pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc;
8228:       pcbddc_coarse->coarse_eqs_limit    = pcbddc->coarse_eqs_limit;
8229:       pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null;
8230:       if (pcbddc_coarse->benign_saddle_point) {
8231:         Mat                    coarsedivudotp_is;
8232:         ISLocalToGlobalMapping l2gmap,rl2g,cl2g;
8233:         IS                     row,col;
8234:         const PetscInt         *gidxs;
8235:         PetscInt               n,st,M,N;

8237:         MatGetSize(coarsedivudotp,&n,NULL);
8238:         MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));
8239:         st   = st-n;
8240:         ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);
8241:         MatGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);
8242:         ISLocalToGlobalMappingGetSize(l2gmap,&n);
8243:         ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);
8244:         ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);
8245:         ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);
8246:         ISLocalToGlobalMappingCreateIS(row,&rl2g);
8247:         ISLocalToGlobalMappingCreateIS(col,&cl2g);
8248:         ISGetSize(row,&M);
8249:         MatGetSize(coarse_mat,&N,NULL);
8250:         ISDestroy(&row);
8251:         ISDestroy(&col);
8252:         MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);
8253:         MatSetType(coarsedivudotp_is,MATIS);
8254:         MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);
8255:         MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);
8256:         ISLocalToGlobalMappingDestroy(&rl2g);
8257:         ISLocalToGlobalMappingDestroy(&cl2g);
8258:         MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);
8259:         MatDestroy(&coarsedivudotp);
8260:         PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL);
8261:         MatDestroy(&coarsedivudotp_is);
8262:         pcbddc_coarse->adaptive_userdefined = PETSC_TRUE;
8263:         if (pcbddc->adaptive_threshold[0] == 0.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE;
8264:       }
8265:     }

8267:     /* propagate symmetry info of coarse matrix */
8268:     MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);
8269:     if (pc->pmat->symmetric_set) {
8270:       MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);
8271:     }
8272:     if (pc->pmat->hermitian_set) {
8273:       MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);
8274:     }
8275:     if (pc->pmat->spd_set) {
8276:       MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);
8277:     }
8278:     if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) {
8279:       MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);
8280:     }
8281:     /* set operators */
8282:     MatViewFromOptions(coarse_mat,(PetscObject)pc,"-pc_bddc_coarse_mat_view");
8283:     MatSetOptionsPrefix(coarse_mat,((PetscObject)pcbddc->coarse_ksp)->prefix);
8284:     KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);
8285:     if (pcbddc->dbg_flag) {
8286:       PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);
8287:     }
8288:   }
8289:   MatDestroy(&coarseG);
8290:   PetscFree(isarray);
8291: #if 0
8292:   {
8293:     PetscViewer viewer;
8294:     char filename[256];
8295:     sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level);
8296:     PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);
8297:     PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
8298:     MatView(coarse_mat,viewer);
8299:     PetscViewerPopFormat(viewer);
8300:     PetscViewerDestroy(&viewer);
8301:   }
8302: #endif

8304:   if (pcbddc->coarse_ksp) {
8305:     Vec crhs,csol;

8307:     KSPGetSolution(pcbddc->coarse_ksp,&csol);
8308:     KSPGetRhs(pcbddc->coarse_ksp,&crhs);
8309:     if (!csol) {
8310:       MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);
8311:     }
8312:     if (!crhs) {
8313:       MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));
8314:     }
8315:   }
8316:   MatDestroy(&coarsedivudotp);

8318:   /* compute null space for coarse solver if the benign trick has been requested */
8319:   if (pcbddc->benign_null) {

8321:     VecSet(pcbddc->vec1_P,0.);
8322:     for (i=0;i<pcbddc->benign_n;i++) {
8323:       VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);
8324:     }
8325:     VecAssemblyBegin(pcbddc->vec1_P);
8326:     VecAssemblyEnd(pcbddc->vec1_P);
8327:     VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);
8328:     VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);
8329:     if (coarse_mat) {
8330:       Vec         nullv;
8331:       PetscScalar *array,*array2;
8332:       PetscInt    nl;

8334:       MatCreateVecs(coarse_mat,&nullv,NULL);
8335:       VecGetLocalSize(nullv,&nl);
8336:       VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);
8337:       VecGetArray(nullv,&array2);
8338:       PetscMemcpy(array2,array,nl*sizeof(*array));
8339:       VecRestoreArray(nullv,&array2);
8340:       VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);
8341:       VecNormalize(nullv,NULL);
8342:       MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);
8343:       VecDestroy(&nullv);
8344:     }
8345:   }
8346:   PetscLogEventEnd(PC_BDDC_CoarseSetUp[pcbddc->current_level],pc,0,0,0);

8348:   PetscLogEventBegin(PC_BDDC_CoarseSolver[pcbddc->current_level],pc,0,0,0);
8349:   if (pcbddc->coarse_ksp) {
8350:     PetscBool ispreonly;

8352:     if (CoarseNullSpace) {
8353:       PetscBool isnull;
8354:       MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);
8355:       if (isnull) {
8356:         MatSetNullSpace(coarse_mat,CoarseNullSpace);
8357:       }
8358:       /* TODO: add local nullspaces (if any) */
8359:     }
8360:     /* setup coarse ksp */
8361:     KSPSetUp(pcbddc->coarse_ksp);
8362:     /* Check coarse problem if in debug mode or if solving with an iterative method */
8363:     PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);
8364:     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
8365:       KSP       check_ksp;
8366:       KSPType   check_ksp_type;
8367:       PC        check_pc;
8368:       Vec       check_vec,coarse_vec;
8369:       PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
8370:       PetscInt  its;
8371:       PetscBool compute_eigs;
8372:       PetscReal *eigs_r,*eigs_c;
8373:       PetscInt  neigs;
8374:       const char *prefix;

8376:       /* Create ksp object suitable for estimation of extreme eigenvalues */
8377:       KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);
8378:       PetscObjectIncrementTabLevel((PetscObject)check_ksp,(PetscObject)pcbddc->coarse_ksp,0);
8379:       KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,PETSC_FALSE);
8380:       KSPSetOperators(check_ksp,coarse_mat,coarse_mat);
8381:       KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);
8382:       /* prevent from setup unneeded object */
8383:       KSPGetPC(check_ksp,&check_pc);
8384:       PCSetType(check_pc,PCNONE);
8385:       if (ispreonly) {
8386:         check_ksp_type = KSPPREONLY;
8387:         compute_eigs = PETSC_FALSE;
8388:       } else {
8389:         check_ksp_type = KSPGMRES;
8390:         compute_eigs = PETSC_TRUE;
8391:       }
8392:       KSPSetType(check_ksp,check_ksp_type);
8393:       KSPSetComputeSingularValues(check_ksp,compute_eigs);
8394:       KSPSetComputeEigenvalues(check_ksp,compute_eigs);
8395:       KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);
8396:       KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);
8397:       KSPSetOptionsPrefix(check_ksp,prefix);
8398:       KSPAppendOptionsPrefix(check_ksp,"check_");
8399:       KSPSetFromOptions(check_ksp);
8400:       KSPSetUp(check_ksp);
8401:       KSPGetPC(pcbddc->coarse_ksp,&check_pc);
8402:       KSPSetPC(check_ksp,check_pc);
8403:       /* create random vec */
8404:       MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);
8405:       VecSetRandom(check_vec,NULL);
8406:       MatMult(coarse_mat,check_vec,coarse_vec);
8407:       /* solve coarse problem */
8408:       KSPSolve(check_ksp,coarse_vec,coarse_vec);
8409:       /* set eigenvalue estimation if preonly has not been requested */
8410:       if (compute_eigs) {
8411:         PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);
8412:         PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);
8413:         KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);
8414:         if (neigs) {
8415:           lambda_max = eigs_r[neigs-1];
8416:           lambda_min = eigs_r[0];
8417:           if (pcbddc->use_coarse_estimates) {
8418:             if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */
8419:               KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);
8420:               KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));
8421:             }
8422:           }
8423:         }
8424:       }

8426:       /* check coarse problem residual error */
8427:       if (pcbddc->dbg_flag) {
8428:         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
8429:         PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));
8430:         VecAXPY(check_vec,-1.0,coarse_vec);
8431:         VecNorm(check_vec,NORM_INFINITY,&infty_error);
8432:         MatMult(coarse_mat,check_vec,coarse_vec);
8433:         VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);
8434:         PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);
8435:         PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);
8436:         PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);
8437:         PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);
8438:         PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);
8439:         if (CoarseNullSpace) {
8440:           PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");
8441:         }
8442:         if (compute_eigs) {
8443:           PetscReal          lambda_max_s,lambda_min_s;
8444:           KSPConvergedReason reason;
8445:           KSPGetType(check_ksp,&check_ksp_type);
8446:           KSPGetIterationNumber(check_ksp,&its);
8447:           KSPGetConvergedReason(check_ksp,&reason);
8448:           KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);
8449:           PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,reason,lambda_min,lambda_max,lambda_min_s,lambda_max_s);
8450:           for (i=0;i<neigs;i++) {
8451:             PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);
8452:           }
8453:         }
8454:         PetscViewerFlush(dbg_viewer);
8455:         PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));
8456:       }
8457:       VecDestroy(&check_vec);
8458:       VecDestroy(&coarse_vec);
8459:       KSPDestroy(&check_ksp);
8460:       if (compute_eigs) {
8461:         PetscFree(eigs_r);
8462:         PetscFree(eigs_c);
8463:       }
8464:     }
8465:   }
8466:   MatNullSpaceDestroy(&CoarseNullSpace);
8467:   /* print additional info */
8468:   if (pcbddc->dbg_flag) {
8469:     /* waits until all processes reaches this point */
8470:     PetscBarrier((PetscObject)pc);
8471:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %D\n",pcbddc->current_level);
8472:     PetscViewerFlush(pcbddc->dbg_viewer);
8473:   }

8475:   /* free memory */
8476:   MatDestroy(&coarse_mat);
8477:   PetscLogEventEnd(PC_BDDC_CoarseSolver[pcbddc->current_level],pc,0,0,0);
8478:   return(0);
8479: }

8481: PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
8482: {
8483:   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
8484:   PC_IS*         pcis = (PC_IS*)pc->data;
8485:   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
8486:   IS             subset,subset_mult,subset_n;
8487:   PetscInt       local_size,coarse_size=0;
8488:   PetscInt       *local_primal_indices=NULL;
8489:   const PetscInt *t_local_primal_indices;

8493:   /* Compute global number of coarse dofs */
8494:   if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first");
8495:   ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);
8496:   ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);
8497:   ISDestroy(&subset_n);
8498:   ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);
8499:   ISRenumber(subset,subset_mult,&coarse_size,&subset_n);
8500:   ISDestroy(&subset);
8501:   ISDestroy(&subset_mult);
8502:   ISGetLocalSize(subset_n,&local_size);
8503:   if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size);
8504:   PetscMalloc1(local_size,&local_primal_indices);
8505:   ISGetIndices(subset_n,&t_local_primal_indices);
8506:   PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));
8507:   ISRestoreIndices(subset_n,&t_local_primal_indices);
8508:   ISDestroy(&subset_n);

8510:   /* check numbering */
8511:   if (pcbddc->dbg_flag) {
8512:     PetscScalar coarsesum,*array,*array2;
8513:     PetscInt    i;
8514:     PetscBool   set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;

8516:     PetscViewerFlush(pcbddc->dbg_viewer);
8517:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
8518:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");
8519:     PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
8520:     /* counter */
8521:     VecSet(pcis->vec1_global,0.0);
8522:     VecSet(pcis->vec1_N,1.0);
8523:     VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8524:     VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8525:     VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);
8526:     VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);
8527:     VecSet(pcis->vec1_N,0.0);
8528:     for (i=0;i<pcbddc->local_primal_size;i++) {
8529:       VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);
8530:     }
8531:     VecAssemblyBegin(pcis->vec1_N);
8532:     VecAssemblyEnd(pcis->vec1_N);
8533:     VecSet(pcis->vec1_global,0.0);
8534:     VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8535:     VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8536:     VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
8537:     VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
8538:     VecGetArray(pcis->vec1_N,&array);
8539:     VecGetArray(pcis->vec2_N,&array2);
8540:     for (i=0;i<pcis->n;i++) {
8541:       if (array[i] != 0.0 && array[i] != array2[i]) {
8542:         PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi;
8543:         PetscInt neigh = (PetscInt)PetscRealPart(array2[i]);
8544:         set_error = PETSC_TRUE;
8545:         ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);
8546:         PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %D (gid %D) owned by %D processes instead of %D!\n",PetscGlobalRank,i,gi,owned,neigh);
8547:       }
8548:     }
8549:     VecRestoreArray(pcis->vec2_N,&array2);
8550:     MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
8551:     PetscViewerFlush(pcbddc->dbg_viewer);
8552:     for (i=0;i<pcis->n;i++) {
8553:       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
8554:     }
8555:     VecRestoreArray(pcis->vec1_N,&array);
8556:     VecSet(pcis->vec1_global,0.0);
8557:     VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8558:     VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8559:     VecSum(pcis->vec1_global,&coarsesum);
8560:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %D (%lf)\n",coarse_size,PetscRealPart(coarsesum));
8561:     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
8562:       PetscInt *gidxs;

8564:       PetscMalloc1(pcbddc->local_primal_size,&gidxs);
8565:       ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);
8566:       PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");
8567:       PetscViewerFlush(pcbddc->dbg_viewer);
8568:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);
8569:       for (i=0;i<pcbddc->local_primal_size;i++) {
8570:         PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%D]=%D (%D,%D)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);
8571:       }
8572:       PetscViewerFlush(pcbddc->dbg_viewer);
8573:       PetscFree(gidxs);
8574:     }
8575:     PetscViewerFlush(pcbddc->dbg_viewer);
8576:     PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
8577:     if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
8578:   }

8580:   /* get back data */
8581:   *coarse_size_n = coarse_size;
8582:   *local_primal_indices_n = local_primal_indices;
8583:   return(0);
8584: }

8586: PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
8587: {
8588:   IS             localis_t;
8589:   PetscInt       i,lsize,*idxs,n;
8590:   PetscScalar    *vals;

8594:   /* get indices in local ordering exploiting local to global map */
8595:   ISGetLocalSize(globalis,&lsize);
8596:   PetscMalloc1(lsize,&vals);
8597:   for (i=0;i<lsize;i++) vals[i] = 1.0;
8598:   ISGetIndices(globalis,(const PetscInt**)&idxs);
8599:   VecSet(gwork,0.0);
8600:   VecSet(lwork,0.0);
8601:   if (idxs) { /* multilevel guard */
8602:     VecSetOption(gwork,VEC_IGNORE_NEGATIVE_INDICES,PETSC_TRUE);
8603:     VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);
8604:   }
8605:   VecAssemblyBegin(gwork);
8606:   ISRestoreIndices(globalis,(const PetscInt**)&idxs);
8607:   PetscFree(vals);
8608:   VecAssemblyEnd(gwork);
8609:   /* now compute set in local ordering */
8610:   VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);
8611:   VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);
8612:   VecGetArrayRead(lwork,(const PetscScalar**)&vals);
8613:   VecGetSize(lwork,&n);
8614:   for (i=0,lsize=0;i<n;i++) {
8615:     if (PetscRealPart(vals[i]) > 0.5) {
8616:       lsize++;
8617:     }
8618:   }
8619:   PetscMalloc1(lsize,&idxs);
8620:   for (i=0,lsize=0;i<n;i++) {
8621:     if (PetscRealPart(vals[i]) > 0.5) {
8622:       idxs[lsize++] = i;
8623:     }
8624:   }
8625:   VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);
8626:   ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);
8627:   *localis = localis_t;
8628:   return(0);
8629: }

8631: PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
8632: {
8633:   PC_IS               *pcis=(PC_IS*)pc->data;
8634:   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
8635:   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
8636:   Mat                 S_j;
8637:   PetscInt            *used_xadj,*used_adjncy;
8638:   PetscBool           free_used_adj;
8639:   PetscErrorCode      ierr;

8642:   PetscLogEventBegin(PC_BDDC_Schurs[pcbddc->current_level],pc,0,0,0);
8643:   /* decide the adjacency to be used for determining internal problems for local schur on subsets */
8644:   free_used_adj = PETSC_FALSE;
8645:   if (pcbddc->sub_schurs_layers == -1) {
8646:     used_xadj = NULL;
8647:     used_adjncy = NULL;
8648:   } else {
8649:     if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
8650:       used_xadj = pcbddc->mat_graph->xadj;
8651:       used_adjncy = pcbddc->mat_graph->adjncy;
8652:     } else if (pcbddc->computed_rowadj) {
8653:       used_xadj = pcbddc->mat_graph->xadj;
8654:       used_adjncy = pcbddc->mat_graph->adjncy;
8655:     } else {
8656:       PetscBool      flg_row=PETSC_FALSE;
8657:       const PetscInt *xadj,*adjncy;
8658:       PetscInt       nvtxs;

8660:       MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);
8661:       if (flg_row) {
8662:         PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);
8663:         PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));
8664:         PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));
8665:         free_used_adj = PETSC_TRUE;
8666:       } else {
8667:         pcbddc->sub_schurs_layers = -1;
8668:         used_xadj = NULL;
8669:         used_adjncy = NULL;
8670:       }
8671:       MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);
8672:     }
8673:   }

8675:   /* setup sub_schurs data */
8676:   MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);
8677:   if (!sub_schurs->schur_explicit) {
8678:     /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */
8679:     MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);
8680:     PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);
8681:   } else {
8682:     Mat       change = NULL;
8683:     Vec       scaling = NULL;
8684:     IS        change_primal = NULL, iP;
8685:     PetscInt  benign_n;
8686:     PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis;
8687:     PetscBool isseqaij,need_change = PETSC_FALSE;
8688:     PetscBool discrete_harmonic = PETSC_FALSE;

8690:     if (!pcbddc->use_vertices && reuse_solvers) {
8691:       PetscInt n_vertices;

8693:       ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);
8694:       reuse_solvers = (PetscBool)!n_vertices;
8695:     }
8696:     PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);
8697:     if (!isseqaij) {
8698:       Mat_IS* matis = (Mat_IS*)pc->pmat->data;
8699:       if (matis->A == pcbddc->local_mat) {
8700:         MatDestroy(&pcbddc->local_mat);
8701:         MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);
8702:       } else {
8703:         MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);
8704:       }
8705:     }
8706:     if (!pcbddc->benign_change_explicit) {
8707:       benign_n = pcbddc->benign_n;
8708:     } else {
8709:       benign_n = 0;
8710:     }
8711:     /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc.
8712:        We need a global reduction to avoid possible deadlocks.
8713:        We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */
8714:     if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) {
8715:       PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change);
8716:       MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
8717:       need_change = (PetscBool)(!need_change);
8718:     }
8719:     /* If the user defines additional constraints, we import them here.
8720:        We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */
8721:     if (need_change) {
8722:       PC_IS   *pcisf;
8723:       PC_BDDC *pcbddcf;
8724:       PC      pcf;

8726:       if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph");
8727:       PCCreate(PetscObjectComm((PetscObject)pc),&pcf);
8728:       PCSetOperators(pcf,pc->mat,pc->pmat);
8729:       PCSetType(pcf,PCBDDC);

8731:       /* hacks */
8732:       pcisf                        = (PC_IS*)pcf->data;
8733:       pcisf->is_B_local            = pcis->is_B_local;
8734:       pcisf->vec1_N                = pcis->vec1_N;
8735:       pcisf->BtoNmap               = pcis->BtoNmap;
8736:       pcisf->n                     = pcis->n;
8737:       pcisf->n_B                   = pcis->n_B;
8738:       pcbddcf                      = (PC_BDDC*)pcf->data;
8739:       PetscFree(pcbddcf->mat_graph);
8740:       pcbddcf->mat_graph           = pcbddc->mat_graph;
8741:       pcbddcf->use_faces           = PETSC_TRUE;
8742:       pcbddcf->use_change_of_basis = PETSC_TRUE;
8743:       pcbddcf->use_change_on_faces = PETSC_TRUE;
8744:       pcbddcf->use_qr_single       = PETSC_TRUE;
8745:       pcbddcf->fake_change         = PETSC_TRUE;

8747:       /* setup constraints so that we can get information on primal vertices and change of basis (in local numbering) */
8748:       PCBDDCConstraintsSetUp(pcf);
8749:       sub_schurs->change_with_qr = pcbddcf->use_qr_single;
8750:       ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);
8751:       change = pcbddcf->ConstraintMatrix;
8752:       pcbddcf->ConstraintMatrix = NULL;

8754:       /* free unneeded memory allocated in PCBDDCConstraintsSetUp */
8755:       PetscFree(pcbddcf->sub_schurs);
8756:       MatNullSpaceDestroy(&pcbddcf->onearnullspace);
8757:       PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);
8758:       PetscFree(pcbddcf->primal_indices_local_idxs);
8759:       PetscFree(pcbddcf->onearnullvecs_state);
8760:       PetscFree(pcf->data);
8761:       pcf->ops->destroy = NULL;
8762:       pcf->ops->reset   = NULL;
8763:       PCDestroy(&pcf);
8764:     }
8765:     if (!pcbddc->use_deluxe_scaling) scaling = pcis->D;

8767:     PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_iP",(PetscObject*)&iP);
8768:     if (iP) {
8769:       PetscOptionsBegin(PetscObjectComm((PetscObject)iP),sub_schurs->prefix,"BDDC sub_schurs options","PC");
8770:       PetscOptionsBool("-sub_schurs_discrete_harmonic",NULL,NULL,discrete_harmonic,&discrete_harmonic,NULL);
8771:       PetscOptionsEnd();
8772:     }
8773:     if (discrete_harmonic) {
8774:       Mat A;
8775:       MatDuplicate(pcbddc->local_mat,MAT_COPY_VALUES,&A);
8776:       MatZeroRowsColumnsIS(A,iP,1.0,NULL,NULL);
8777:       PetscObjectCompose((PetscObject)A,"__KSPFETIDP_iP",(PetscObject)iP);
8778:       PCBDDCSubSchursSetUp(sub_schurs,A,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);
8779:       MatDestroy(&A);
8780:     } else {
8781:       PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);
8782:     }
8783:     MatDestroy(&change);
8784:     ISDestroy(&change_primal);
8785:   }
8786:   MatDestroy(&S_j);

8788:   /* free adjacency */
8789:   if (free_used_adj) {
8790:     PetscFree2(used_xadj,used_adjncy);
8791:   }
8792:   PetscLogEventEnd(PC_BDDC_Schurs[pcbddc->current_level],pc,0,0,0);
8793:   return(0);
8794: }

8796: PetscErrorCode PCBDDCInitSubSchurs(PC pc)
8797: {
8798:   PC_IS               *pcis=(PC_IS*)pc->data;
8799:   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
8800:   PCBDDCGraph         graph;
8801:   PetscErrorCode      ierr;

8804:   /* attach interface graph for determining subsets */
8805:   if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
8806:     IS       verticesIS,verticescomm;
8807:     PetscInt vsize,*idxs;

8809:     PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);
8810:     ISGetSize(verticesIS,&vsize);
8811:     ISGetIndices(verticesIS,(const PetscInt**)&idxs);
8812:     ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);
8813:     ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);
8814:     PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);
8815:     PCBDDCGraphCreate(&graph);
8816:     PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global,pcbddc->graphmaxcount);
8817:     PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);
8818:     ISDestroy(&verticescomm);
8819:     PCBDDCGraphComputeConnectedComponents(graph);
8820:   } else {
8821:     graph = pcbddc->mat_graph;
8822:   }
8823:   /* print some info */
8824:   if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) {
8825:     IS       vertices;
8826:     PetscInt nv,nedges,nfaces;
8827:     PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);
8828:     PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);
8829:     ISGetSize(vertices,&nv);
8830:     PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
8831:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");
8832:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%D)\n",PetscGlobalRank,(int)nv,pcbddc->use_vertices);
8833:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges    (%D)\n",PetscGlobalRank,(int)nedges,pcbddc->use_edges);
8834:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces    (%D)\n",PetscGlobalRank,(int)nfaces,pcbddc->use_faces);
8835:     PetscViewerFlush(pcbddc->dbg_viewer);
8836:     PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);
8837:     PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);
8838:   }

8840:   /* sub_schurs init */
8841:   if (!pcbddc->sub_schurs) {
8842:     PCBDDCSubSchursCreate(&pcbddc->sub_schurs);
8843:   }
8844:   PCBDDCSubSchursInit(pcbddc->sub_schurs,((PetscObject)pc)->prefix,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_rebuild);

8846:   /* free graph struct */
8847:   if (pcbddc->sub_schurs_rebuild) {
8848:     PCBDDCGraphDestroy(&graph);
8849:   }
8850:   return(0);
8851: }

8853: PetscErrorCode PCBDDCCheckOperator(PC pc)
8854: {
8855:   PC_IS               *pcis=(PC_IS*)pc->data;
8856:   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
8857:   PetscErrorCode      ierr;

8860:   if (pcbddc->n_vertices == pcbddc->local_primal_size) {
8861:     IS             zerodiag = NULL;
8862:     Mat            S_j,B0_B=NULL;
8863:     Vec            dummy_vec=NULL,vec_check_B,vec_scale_P;
8864:     PetscScalar    *p0_check,*array,*array2;
8865:     PetscReal      norm;
8866:     PetscInt       i;

8868:     /* B0 and B0_B */
8869:     if (zerodiag) {
8870:       IS       dummy;

8872:       ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);
8873:       MatCreateSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);
8874:       MatCreateVecs(B0_B,NULL,&dummy_vec);
8875:       ISDestroy(&dummy);
8876:     }
8877:     /* I need a primal vector to scale primal nodes since BDDC sums contibutions */
8878:     VecDuplicate(pcbddc->vec1_P,&vec_scale_P);
8879:     VecSet(pcbddc->vec1_P,1.0);
8880:     VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);
8881:     VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);
8882:     VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);
8883:     VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);
8884:     VecReciprocal(vec_scale_P);
8885:     /* S_j */
8886:     MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);
8887:     MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);

8889:     /* mimic vector in \widetilde{W}_\Gamma */
8890:     VecSetRandom(pcis->vec1_N,NULL);
8891:     /* continuous in primal space */
8892:     VecSetRandom(pcbddc->coarse_vec,NULL);
8893:     VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);
8894:     VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);
8895:     VecGetArray(pcbddc->vec1_P,&array);
8896:     PetscCalloc1(pcbddc->benign_n,&p0_check);
8897:     for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i];
8898:     VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);
8899:     VecRestoreArray(pcbddc->vec1_P,&array);
8900:     VecAssemblyBegin(pcis->vec1_N);
8901:     VecAssemblyEnd(pcis->vec1_N);
8902:     VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);
8903:     VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);
8904:     VecDuplicate(pcis->vec2_B,&vec_check_B);
8905:     VecCopy(pcis->vec2_B,vec_check_B);

8907:     /* assemble rhs for coarse problem */
8908:     /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */
8909:     /* local with Schur */
8910:     MatMult(S_j,pcis->vec2_B,pcis->vec1_B);
8911:     if (zerodiag) {
8912:       VecGetArray(dummy_vec,&array);
8913:       for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i];
8914:       VecRestoreArray(dummy_vec,&array);
8915:       MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);
8916:     }
8917:     /* sum on primal nodes the local contributions */
8918:     VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);
8919:     VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);
8920:     VecGetArray(pcis->vec1_N,&array);
8921:     VecGetArray(pcbddc->vec1_P,&array2);
8922:     for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]];
8923:     VecRestoreArray(pcbddc->vec1_P,&array2);
8924:     VecRestoreArray(pcis->vec1_N,&array);
8925:     VecSet(pcbddc->coarse_vec,0.);
8926:     VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);
8927:     VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);
8928:     VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);
8929:     VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);
8930:     VecGetArray(pcbddc->vec1_P,&array);
8931:     /* scale primal nodes (BDDC sums contibutions) */
8932:     VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);
8933:     VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);
8934:     VecRestoreArray(pcbddc->vec1_P,&array);
8935:     VecAssemblyBegin(pcis->vec1_N);
8936:     VecAssemblyEnd(pcis->vec1_N);
8937:     VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
8938:     VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
8939:     /* global: \widetilde{B0}_B w_\Gamma */
8940:     if (zerodiag) {
8941:       MatMult(B0_B,pcis->vec2_B,dummy_vec);
8942:       VecGetArray(dummy_vec,&array);
8943:       for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i];
8944:       VecRestoreArray(dummy_vec,&array);
8945:     }
8946:     /* BDDC */
8947:     VecSet(pcis->vec1_D,0.);
8948:     PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);

8950:     VecCopy(pcis->vec1_B,pcis->vec2_B);
8951:     VecAXPY(pcis->vec1_B,-1.0,vec_check_B);
8952:     VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);
8953:     PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm);
8954:     for (i=0;i<pcbddc->benign_n;i++) {
8955:       PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%D] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i]));
8956:     }
8957:     PetscFree(p0_check);
8958:     VecDestroy(&vec_scale_P);
8959:     VecDestroy(&vec_check_B);
8960:     VecDestroy(&dummy_vec);
8961:     MatDestroy(&S_j);
8962:     MatDestroy(&B0_B);
8963:   }
8964:   return(0);
8965: }

8967:  #include <../src/mat/impls/aij/mpi/mpiaij.h>
8968: PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B)
8969: {
8970:   Mat            At;
8971:   IS             rows;
8972:   PetscInt       rst,ren;
8974:   PetscLayout    rmap;

8977:   rst = ren = 0;
8978:   if (ccomm != MPI_COMM_NULL) {
8979:     PetscLayoutCreate(ccomm,&rmap);
8980:     PetscLayoutSetSize(rmap,A->rmap->N);
8981:     PetscLayoutSetBlockSize(rmap,1);
8982:     PetscLayoutSetUp(rmap);
8983:     PetscLayoutGetRange(rmap,&rst,&ren);
8984:   }
8985:   ISCreateStride(PetscObjectComm((PetscObject)A),ren-rst,rst,1,&rows);
8986:   MatCreateSubMatrix(A,rows,NULL,MAT_INITIAL_MATRIX,&At);
8987:   ISDestroy(&rows);

8989:   if (ccomm != MPI_COMM_NULL) {
8990:     Mat_MPIAIJ *a,*b;
8991:     IS         from,to;
8992:     Vec        gvec;
8993:     PetscInt   lsize;

8995:     MatCreate(ccomm,B);
8996:     MatSetSizes(*B,ren-rst,PETSC_DECIDE,PETSC_DECIDE,At->cmap->N);
8997:     MatSetType(*B,MATAIJ);
8998:     PetscLayoutDestroy(&((*B)->rmap));
8999:     PetscLayoutSetUp((*B)->cmap);
9000:     a    = (Mat_MPIAIJ*)At->data;
9001:     b    = (Mat_MPIAIJ*)(*B)->data;
9002:     MPI_Comm_size(ccomm,&b->size);
9003:     MPI_Comm_rank(ccomm,&b->rank);
9004:     PetscObjectReference((PetscObject)a->A);
9005:     PetscObjectReference((PetscObject)a->B);
9006:     b->A = a->A;
9007:     b->B = a->B;

9009:     b->donotstash      = a->donotstash;
9010:     b->roworiented     = a->roworiented;
9011:     b->rowindices      = 0;
9012:     b->rowvalues       = 0;
9013:     b->getrowactive    = PETSC_FALSE;

9015:     (*B)->rmap         = rmap;
9016:     (*B)->factortype   = A->factortype;
9017:     (*B)->assembled    = PETSC_TRUE;
9018:     (*B)->insertmode   = NOT_SET_VALUES;
9019:     (*B)->preallocated = PETSC_TRUE;

9021:     if (a->colmap) {
9022: #if defined(PETSC_USE_CTABLE)
9023:       PetscTableCreateCopy(a->colmap,&b->colmap);
9024: #else
9025:       PetscMalloc1(At->cmap->N,&b->colmap);
9026:       PetscLogObjectMemory((PetscObject)*B,At->cmap->N*sizeof(PetscInt));
9027:       PetscMemcpy(b->colmap,a->colmap,At->cmap->N*sizeof(PetscInt));
9028: #endif
9029:     } else b->colmap = 0;
9030:     if (a->garray) {
9031:       PetscInt len;
9032:       len  = a->B->cmap->n;
9033:       PetscMalloc1(len+1,&b->garray);
9034:       PetscLogObjectMemory((PetscObject)(*B),len*sizeof(PetscInt));
9035:       if (len) { PetscMemcpy(b->garray,a->garray,len*sizeof(PetscInt)); }
9036:     } else b->garray = 0;

9038:     PetscObjectReference((PetscObject)a->lvec);
9039:     b->lvec = a->lvec;
9040:     PetscLogObjectParent((PetscObject)*B,(PetscObject)b->lvec);

9042:     /* cannot use VecScatterCopy */
9043:     VecGetLocalSize(b->lvec,&lsize);
9044:     ISCreateGeneral(ccomm,lsize,b->garray,PETSC_USE_POINTER,&from);
9045:     ISCreateStride(PETSC_COMM_SELF,lsize,0,1,&to);
9046:     MatCreateVecs(*B,&gvec,NULL);
9047:     VecScatterCreate(gvec,from,b->lvec,to,&b->Mvctx);
9048:     PetscLogObjectParent((PetscObject)*B,(PetscObject)b->Mvctx);
9049:     ISDestroy(&from);
9050:     ISDestroy(&to);
9051:     VecDestroy(&gvec);
9052:   }
9053:   MatDestroy(&At);
9054:   return(0);
9055: }