Actual source code: bddcprivate.c

petsc-3.11.4 2019-09-28
Report Typos and Errors
  1:  #include <../src/mat/impls/aij/seq/aij.h>
  2:  #include <../src/ksp/pc/impls/bddc/bddc.h>
  3:  #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
  4:  #include <../src/mat/impls/dense/seq/dense.h>
  5:  #include <petscdmplex.h>
  6:  #include <petscblaslapack.h>
  7:  #include <petsc/private/sfimpl.h>
  8:  #include <petsc/private/dmpleximpl.h>
  9:  #include <petscdmda.h>

 11: static PetscErrorCode MatMPIAIJRestrict(Mat,MPI_Comm,Mat*);

 13: /* if range is true,  it returns B s.t. span{B} = range(A)
 14:    if range is false, it returns B s.t. range(B) _|_ range(A) */
 15: PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B)
 16: {
 17: #if !defined(PETSC_USE_COMPLEX)
 18:   PetscScalar    *uwork,*data,*U, ds = 0.;
 19:   PetscReal      *sing;
 20:   PetscBLASInt   bM,bN,lwork,lierr,di = 1;
 21:   PetscInt       ulw,i,nr,nc,n;

 25: #if defined(PETSC_MISSING_LAPACK_GESVD)
 26:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LAPACK _GESVD not available");
 27: #else
 28:   MatGetSize(A,&nr,&nc);
 29:   if (!nr || !nc) return(0);

 31:   /* workspace */
 32:   if (!work) {
 33:     ulw  = PetscMax(PetscMax(1,5*PetscMin(nr,nc)),3*PetscMin(nr,nc)+PetscMax(nr,nc));
 34:     PetscMalloc1(ulw,&uwork);
 35:   } else {
 36:     ulw   = lw;
 37:     uwork = work;
 38:   }
 39:   n = PetscMin(nr,nc);
 40:   if (!rwork) {
 41:     PetscMalloc1(n,&sing);
 42:   } else {
 43:     sing = rwork;
 44:   }

 46:   /* SVD */
 47:   PetscMalloc1(nr*nr,&U);
 48:   PetscBLASIntCast(nr,&bM);
 49:   PetscBLASIntCast(nc,&bN);
 50:   PetscBLASIntCast(ulw,&lwork);
 51:   MatDenseGetArray(A,&data);
 52:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
 53:   PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,&lierr));
 54:   PetscFPTrapPop();
 55:   if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
 56:   MatDenseRestoreArray(A,&data);
 57:   for (i=0;i<n;i++) if (sing[i] < PETSC_SMALL) break;
 58:   if (!rwork) {
 59:     PetscFree(sing);
 60:   }
 61:   if (!work) {
 62:     PetscFree(uwork);
 63:   }
 64:   /* create B */
 65:   if (!range) {
 66:     MatCreateSeqDense(PETSC_COMM_SELF,nr,nr-i,NULL,B);
 67:     MatDenseGetArray(*B,&data);
 68:     PetscMemcpy(data,U+nr*i,(nr-i)*nr*sizeof(PetscScalar));
 69:   } else {
 70:     MatCreateSeqDense(PETSC_COMM_SELF,nr,i,NULL,B);
 71:     MatDenseGetArray(*B,&data);
 72:     PetscMemcpy(data,U,i*nr*sizeof(PetscScalar));
 73:   }
 74:   MatDenseRestoreArray(*B,&data);
 75:   PetscFree(U);
 76: #endif
 77: #else /* PETSC_USE_COMPLEX */
 79:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for complexes");
 80: #endif
 81:   return(0);
 82: }

 84: /* TODO REMOVE */
 85: #if defined(PRINT_GDET)
 86: static int inc = 0;
 87: static int lev = 0;
 88: #endif

 90: PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat* Gins, Mat* GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork)
 91: {
 93:   Mat            GE,GEd;
 94:   PetscInt       rsize,csize,esize;
 95:   PetscScalar    *ptr;

 98:   ISGetSize(edge,&esize);
 99:   if (!esize) return(0);
100:   ISGetSize(extrow,&rsize);
101:   ISGetSize(extcol,&csize);

103:   /* gradients */
104:   ptr  = work + 5*esize;
105:   MatCreateSubMatrix(lG,extrow,extcol,MAT_INITIAL_MATRIX,&GE);
106:   MatCreateSeqDense(PETSC_COMM_SELF,rsize,csize,ptr,Gins);
107:   MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,Gins);
108:   MatDestroy(&GE);

110:   /* constants */
111:   ptr += rsize*csize;
112:   MatCreateSeqDense(PETSC_COMM_SELF,esize,csize,ptr,&GEd);
113:   MatCreateSubMatrix(lG,edge,extcol,MAT_INITIAL_MATRIX,&GE);
114:   MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,&GEd);
115:   MatDestroy(&GE);
116:   MatDenseOrthogonalRangeOrComplement(GEd,PETSC_FALSE,5*esize,work,rwork,GKins);
117:   MatDestroy(&GEd);

119:   if (corners) {
120:     Mat            GEc;
121:     PetscScalar    *vals,v;

123:     MatCreateSubMatrix(lG,edge,corners,MAT_INITIAL_MATRIX,&GEc);
124:     MatTransposeMatMult(GEc,*GKins,MAT_INITIAL_MATRIX,1.0,&GEd);
125:     MatDenseGetArray(GEd,&vals);
126:     /* v    = PetscAbsScalar(vals[0]) */;
127:     v    = 1.;
128:     cvals[0] = vals[0]/v;
129:     cvals[1] = vals[1]/v;
130:     MatDenseRestoreArray(GEd,&vals);
131:     MatScale(*GKins,1./v);
132: #if defined(PRINT_GDET)
133:     {
134:       PetscViewer viewer;
135:       char filename[256];
136:       sprintf(filename,"Gdet_l%d_r%d_cc%d.m",lev,PetscGlobalRank,inc++);
137:       PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);
138:       PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
139:       PetscObjectSetName((PetscObject)GEc,"GEc");
140:       MatView(GEc,viewer);
141:       PetscObjectSetName((PetscObject)(*GKins),"GK");
142:       MatView(*GKins,viewer);
143:       PetscObjectSetName((PetscObject)GEd,"Gproj");
144:       MatView(GEd,viewer);
145:       PetscViewerDestroy(&viewer);
146:     }
147: #endif
148:     MatDestroy(&GEd);
149:     MatDestroy(&GEc);
150:   }

152:   return(0);
153: }

155: PetscErrorCode PCBDDCNedelecSupport(PC pc)
156: {
157:   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
158:   Mat_IS                 *matis = (Mat_IS*)pc->pmat->data;
159:   Mat                    G,T,conn,lG,lGt,lGis,lGall,lGe,lGinit;
160:   Vec                    tvec;
161:   PetscSF                sfv;
162:   ISLocalToGlobalMapping el2g,vl2g,fl2g,al2g;
163:   MPI_Comm               comm;
164:   IS                     lned,primals,allprimals,nedfieldlocal;
165:   IS                     *eedges,*extrows,*extcols,*alleedges;
166:   PetscBT                btv,bte,btvc,btb,btbd,btvcand,btvi,btee,bter;
167:   PetscScalar            *vals,*work;
168:   PetscReal              *rwork;
169:   const PetscInt         *idxs,*ii,*jj,*iit,*jjt;
170:   PetscInt               ne,nv,Lv,order,n,field;
171:   PetscInt               n_neigh,*neigh,*n_shared,**shared;
172:   PetscInt               i,j,extmem,cum,maxsize,nee;
173:   PetscInt               *extrow,*extrowcum,*marks,*vmarks,*gidxs;
174:   PetscInt               *sfvleaves,*sfvroots;
175:   PetscInt               *corners,*cedges;
176:   PetscInt               *ecount,**eneighs,*vcount,**vneighs;
177: #if defined(PETSC_USE_DEBUG)
178:   PetscInt               *emarks;
179: #endif
180:   PetscBool              print,eerr,done,lrc[2],conforming,global,singular,setprimal;
181:   PetscErrorCode         ierr;

184:   /* If the discrete gradient is defined for a subset of dofs and global is true,
185:      it assumes G is given in global ordering for all the dofs.
186:      Otherwise, the ordering is global for the Nedelec field */
187:   order      = pcbddc->nedorder;
188:   conforming = pcbddc->conforming;
189:   field      = pcbddc->nedfield;
190:   global     = pcbddc->nedglobal;
191:   setprimal  = PETSC_FALSE;
192:   print      = PETSC_FALSE;
193:   singular   = PETSC_FALSE;

195:   /* Command line customization */
196:   PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC Nedelec options","PC");
197:   PetscOptionsBool("-pc_bddc_nedelec_field_primal","All edge dofs set as primals: Toselli's algorithm C",NULL,setprimal,&setprimal,NULL);
198:   PetscOptionsBool("-pc_bddc_nedelec_singular","Infer nullspace from discrete gradient",NULL,singular,&singular,NULL);
199:   PetscOptionsInt("-pc_bddc_nedelec_order","Test variable order code (to be removed)",NULL,order,&order,NULL);
200:   /* print debug info TODO: to be removed */
201:   PetscOptionsBool("-pc_bddc_nedelec_print","Print debug info",NULL,print,&print,NULL);
202:   PetscOptionsEnd();

204:   /* Return if there are no edges in the decomposition and the problem is not singular */
205:   MatGetLocalToGlobalMapping(pc->pmat,&al2g,NULL);
206:   ISLocalToGlobalMappingGetSize(al2g,&n);
207:   PetscObjectGetComm((PetscObject)pc,&comm);
208:   if (!singular) {
209:     VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);
210:     lrc[0] = PETSC_FALSE;
211:     for (i=0;i<n;i++) {
212:       if (PetscRealPart(vals[i]) > 2.) {
213:         lrc[0] = PETSC_TRUE;
214:         break;
215:       }
216:     }
217:     VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);
218:     MPIU_Allreduce(&lrc[0],&lrc[1],1,MPIU_BOOL,MPI_LOR,comm);
219:     if (!lrc[1]) return(0);
220:   }

222:   /* Get Nedelec field */
223:   if (pcbddc->n_ISForDofsLocal && field >= pcbddc->n_ISForDofsLocal) SETERRQ2(comm,PETSC_ERR_USER,"Invalid field for Nedelec %D: number of fields is %D",field,pcbddc->n_ISForDofsLocal);
224:   if (pcbddc->n_ISForDofsLocal && field >= 0) {
225:     PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]);
226:     nedfieldlocal = pcbddc->ISForDofsLocal[field];
227:     ISGetLocalSize(nedfieldlocal,&ne);
228:   } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) {
229:     ne            = n;
230:     nedfieldlocal = NULL;
231:     global        = PETSC_TRUE;
232:   } else if (field == PETSC_DECIDE) {
233:     PetscInt rst,ren,*idx;

235:     PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));
236:     PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));
237:     MatGetOwnershipRange(pcbddc->discretegradient,&rst,&ren);
238:     for (i=rst;i<ren;i++) {
239:       PetscInt nc;

241:       MatGetRow(pcbddc->discretegradient,i,&nc,NULL,NULL);
242:       if (nc > 1) matis->sf_rootdata[i-rst] = 1;
243:       MatRestoreRow(pcbddc->discretegradient,i,&nc,NULL,NULL);
244:     }
245:     PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);
246:     PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);
247:     PetscMalloc1(n,&idx);
248:     for (i=0,ne=0;i<n;i++) if (matis->sf_leafdata[i]) idx[ne++] = i;
249:     ISCreateGeneral(comm,ne,idx,PETSC_OWN_POINTER,&nedfieldlocal);
250:   } else {
251:     SETERRQ(comm,PETSC_ERR_USER,"When multiple fields are present, the Nedelec field has to be specified");
252:   }

254:   /* Sanity checks */
255:   if (!order && !conforming) SETERRQ(comm,PETSC_ERR_SUP,"Variable order and non-conforming spaces are not supported at the same time");
256:   if (pcbddc->user_ChangeOfBasisMatrix) SETERRQ(comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with user defined change of basis");
257:   if (order && ne%order) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"The number of local edge dofs %D it's not a multiple of the order %D",ne,order);

259:   /* Just set primal dofs and return */
260:   if (setprimal) {
261:     IS       enedfieldlocal;
262:     PetscInt *eidxs;

264:     PetscMalloc1(ne,&eidxs);
265:     VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);
266:     if (nedfieldlocal) {
267:       ISGetIndices(nedfieldlocal,&idxs);
268:       for (i=0,cum=0;i<ne;i++) {
269:         if (PetscRealPart(vals[idxs[i]]) > 2.) {
270:           eidxs[cum++] = idxs[i];
271:         }
272:       }
273:       ISRestoreIndices(nedfieldlocal,&idxs);
274:     } else {
275:       for (i=0,cum=0;i<ne;i++) {
276:         if (PetscRealPart(vals[i]) > 2.) {
277:           eidxs[cum++] = i;
278:         }
279:       }
280:     }
281:     VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);
282:     ISCreateGeneral(comm,cum,eidxs,PETSC_COPY_VALUES,&enedfieldlocal);
283:     PCBDDCSetPrimalVerticesLocalIS(pc,enedfieldlocal);
284:     PetscFree(eidxs);
285:     ISDestroy(&nedfieldlocal);
286:     ISDestroy(&enedfieldlocal);
287:     return(0);
288:   }

290:   /* Compute some l2g maps */
291:   if (nedfieldlocal) {
292:     IS is;

294:     /* need to map from the local Nedelec field to local numbering */
295:     ISLocalToGlobalMappingCreateIS(nedfieldlocal,&fl2g);
296:     /* need to map from the local Nedelec field to global numbering for the whole dofs*/
297:     ISLocalToGlobalMappingApplyIS(al2g,nedfieldlocal,&is);
298:     ISLocalToGlobalMappingCreateIS(is,&al2g);
299:     /* need to map from the local Nedelec field to global numbering (for Nedelec only) */
300:     if (global) {
301:       PetscObjectReference((PetscObject)al2g);
302:       el2g = al2g;
303:     } else {
304:       IS gis;

306:       ISRenumber(is,NULL,NULL,&gis);
307:       ISLocalToGlobalMappingCreateIS(gis,&el2g);
308:       ISDestroy(&gis);
309:     }
310:     ISDestroy(&is);
311:   } else {
312:     /* restore default */
313:     pcbddc->nedfield = -1;
314:     /* one ref for the destruction of al2g, one for el2g */
315:     PetscObjectReference((PetscObject)al2g);
316:     PetscObjectReference((PetscObject)al2g);
317:     el2g = al2g;
318:     fl2g = NULL;
319:   }

321:   /* Start communication to drop connections for interior edges (for cc analysis only) */
322:   PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));
323:   PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));
324:   if (nedfieldlocal) {
325:     ISGetIndices(nedfieldlocal,&idxs);
326:     for (i=0;i<ne;i++) matis->sf_leafdata[idxs[i]] = 1;
327:     ISRestoreIndices(nedfieldlocal,&idxs);
328:   } else {
329:     for (i=0;i<ne;i++) matis->sf_leafdata[i] = 1;
330:   }
331:   PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);
332:   PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);

334:   if (!singular) { /* drop connections with interior edges to avoid unneeded communications and memory movements */
335:     MatDuplicate(pcbddc->discretegradient,MAT_COPY_VALUES,&G);
336:     MatSetOption(G,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);
337:     if (global) {
338:       PetscInt rst;

340:       MatGetOwnershipRange(G,&rst,NULL);
341:       for (i=0,cum=0;i<pc->pmat->rmap->n;i++) {
342:         if (matis->sf_rootdata[i] < 2) {
343:           matis->sf_rootdata[cum++] = i + rst;
344:         }
345:       }
346:       MatSetOption(G,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE);
347:       MatZeroRows(G,cum,matis->sf_rootdata,0.,NULL,NULL);
348:     } else {
349:       PetscInt *tbz;

351:       PetscMalloc1(ne,&tbz);
352:       PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);
353:       PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);
354:       ISGetIndices(nedfieldlocal,&idxs);
355:       for (i=0,cum=0;i<ne;i++)
356:         if (matis->sf_leafdata[idxs[i]] == 1)
357:           tbz[cum++] = i;
358:       ISRestoreIndices(nedfieldlocal,&idxs);
359:       ISLocalToGlobalMappingApply(el2g,cum,tbz,tbz);
360:       MatZeroRows(G,cum,tbz,0.,NULL,NULL);
361:       PetscFree(tbz);
362:     }
363:   } else { /* we need the entire G to infer the nullspace */
364:     PetscObjectReference((PetscObject)pcbddc->discretegradient);
365:     G    = pcbddc->discretegradient;
366:   }

368:   /* Extract subdomain relevant rows of G */
369:   ISLocalToGlobalMappingGetIndices(el2g,&idxs);
370:   ISCreateGeneral(comm,ne,idxs,PETSC_USE_POINTER,&lned);
371:   MatCreateSubMatrix(G,lned,NULL,MAT_INITIAL_MATRIX,&lGall);
372:   ISLocalToGlobalMappingRestoreIndices(el2g,&idxs);
373:   ISDestroy(&lned);
374:   MatConvert(lGall,MATIS,MAT_INITIAL_MATRIX,&lGis);
375:   MatDestroy(&lGall);
376:   MatISGetLocalMat(lGis,&lG);

378:   /* SF for nodal dofs communications */
379:   MatGetLocalSize(G,NULL,&Lv);
380:   MatGetLocalToGlobalMapping(lGis,NULL,&vl2g);
381:   PetscObjectReference((PetscObject)vl2g);
382:   ISLocalToGlobalMappingGetSize(vl2g,&nv);
383:   PetscSFCreate(comm,&sfv);
384:   ISLocalToGlobalMappingGetIndices(vl2g,&idxs);
385:   PetscSFSetGraphLayout(sfv,lGis->cmap,nv,NULL,PETSC_OWN_POINTER,idxs);
386:   ISLocalToGlobalMappingRestoreIndices(vl2g,&idxs);
387:   i    = singular ? 2 : 1;
388:   PetscMalloc2(i*nv,&sfvleaves,i*Lv,&sfvroots);

390:   /* Destroy temporary G created in MATIS format and modified G */
391:   PetscObjectReference((PetscObject)lG);
392:   MatDestroy(&lGis);
393:   MatDestroy(&G);

395:   if (print) {
396:     PetscObjectSetName((PetscObject)lG,"initial_lG");
397:     MatView(lG,NULL);
398:   }

400:   /* Save lG for values insertion in change of basis */
401:   MatDuplicate(lG,MAT_COPY_VALUES,&lGinit);

403:   /* Analyze the edge-nodes connections (duplicate lG) */
404:   MatDuplicate(lG,MAT_COPY_VALUES,&lGe);
405:   MatSetOption(lGe,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);
406:   PetscBTCreate(nv,&btv);
407:   PetscBTCreate(ne,&bte);
408:   PetscBTCreate(ne,&btb);
409:   PetscBTCreate(ne,&btbd);
410:   PetscBTCreate(nv,&btvcand);
411:   /* need to import the boundary specification to ensure the
412:      proper detection of coarse edges' endpoints */
413:   if (pcbddc->DirichletBoundariesLocal) {
414:     IS is;

416:     if (fl2g) {
417:       ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->DirichletBoundariesLocal,&is);
418:     } else {
419:       is = pcbddc->DirichletBoundariesLocal;
420:     }
421:     ISGetLocalSize(is,&cum);
422:     ISGetIndices(is,&idxs);
423:     for (i=0;i<cum;i++) {
424:       if (idxs[i] >= 0) {
425:         PetscBTSet(btb,idxs[i]);
426:         PetscBTSet(btbd,idxs[i]);
427:       }
428:     }
429:     ISRestoreIndices(is,&idxs);
430:     if (fl2g) {
431:       ISDestroy(&is);
432:     }
433:   }
434:   if (pcbddc->NeumannBoundariesLocal) {
435:     IS is;

437:     if (fl2g) {
438:       ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->NeumannBoundariesLocal,&is);
439:     } else {
440:       is = pcbddc->NeumannBoundariesLocal;
441:     }
442:     ISGetLocalSize(is,&cum);
443:     ISGetIndices(is,&idxs);
444:     for (i=0;i<cum;i++) {
445:       if (idxs[i] >= 0) {
446:         PetscBTSet(btb,idxs[i]);
447:       }
448:     }
449:     ISRestoreIndices(is,&idxs);
450:     if (fl2g) {
451:       ISDestroy(&is);
452:     }
453:   }

455:   /* Count neighs per dof */
456:   ISLocalToGlobalMappingGetNodeInfo(el2g,NULL,&ecount,&eneighs);
457:   ISLocalToGlobalMappingGetNodeInfo(vl2g,NULL,&vcount,&vneighs);

459:   /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs
460:      for proper detection of coarse edges' endpoints */
461:   PetscBTCreate(ne,&btee);
462:   for (i=0;i<ne;i++) {
463:     if ((ecount[i] > 2 && !PetscBTLookup(btbd,i)) || (ecount[i] == 2 && PetscBTLookup(btb,i))) {
464:       PetscBTSet(btee,i);
465:     }
466:   }
467:   PetscMalloc1(ne,&marks);
468:   if (!conforming) {
469:     MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);
470:     MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
471:   }
472:   MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
473:   MatSeqAIJGetArray(lGe,&vals);
474:   cum  = 0;
475:   for (i=0;i<ne;i++) {
476:     /* eliminate rows corresponding to edge dofs belonging to coarse faces */
477:     if (!PetscBTLookup(btee,i)) {
478:       marks[cum++] = i;
479:       continue;
480:     }
481:     /* set badly connected edge dofs as primal */
482:     if (!conforming) {
483:       if (ii[i+1]-ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */
484:         marks[cum++] = i;
485:         PetscBTSet(bte,i);
486:         for (j=ii[i];j<ii[i+1];j++) {
487:           PetscBTSet(btv,jj[j]);
488:         }
489:       } else {
490:         /* every edge dofs should be connected trough a certain number of nodal dofs
491:            to other edge dofs belonging to coarse edges
492:            - at most 2 endpoints
493:            - order-1 interior nodal dofs
494:            - no undefined nodal dofs (nconn < order)
495:         */
496:         PetscInt ends = 0,ints = 0, undef = 0;
497:         for (j=ii[i];j<ii[i+1];j++) {
498:           PetscInt v = jj[j],k;
499:           PetscInt nconn = iit[v+1]-iit[v];
500:           for (k=iit[v];k<iit[v+1];k++) if (!PetscBTLookup(btee,jjt[k])) nconn--;
501:           if (nconn > order) ends++;
502:           else if (nconn == order) ints++;
503:           else undef++;
504:         }
505:         if (undef || ends > 2 || ints != order -1) {
506:           marks[cum++] = i;
507:           PetscBTSet(bte,i);
508:           for (j=ii[i];j<ii[i+1];j++) {
509:             PetscBTSet(btv,jj[j]);
510:           }
511:         }
512:       }
513:     }
514:     /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */
515:     if (!order && ii[i+1] != ii[i]) {
516:       PetscScalar val = 1./(ii[i+1]-ii[i]-1);
517:       for (j=ii[i];j<ii[i+1];j++) vals[j] = val;
518:     }
519:   }
520:   PetscBTDestroy(&btee);
521:   MatSeqAIJRestoreArray(lGe,&vals);
522:   MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
523:   if (!conforming) {
524:     MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
525:     MatDestroy(&lGt);
526:   }
527:   MatZeroRows(lGe,cum,marks,0.,NULL,NULL);

529:   /* identify splitpoints and corner candidates */
530:   MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);
531:   if (print) {
532:     PetscObjectSetName((PetscObject)lGe,"edgerestr_lG");
533:     MatView(lGe,NULL);
534:     PetscObjectSetName((PetscObject)lGt,"edgerestr_lGt");
535:     MatView(lGt,NULL);
536:   }
537:   MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
538:   MatSeqAIJGetArray(lGt,&vals);
539:   for (i=0;i<nv;i++) {
540:     PetscInt  ord = order, test = ii[i+1]-ii[i], vc = vcount[i];
541:     PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE;
542:     if (!order) { /* variable order */
543:       PetscReal vorder = 0.;

545:       for (j=ii[i];j<ii[i+1];j++) vorder += PetscRealPart(vals[j]);
546:       test = PetscFloorReal(vorder+10.*PETSC_SQRT_MACHINE_EPSILON);
547:       if (vorder-test > PETSC_SQRT_MACHINE_EPSILON) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected value for vorder: %g (%D)",vorder,test);
548:       ord  = 1;
549:     }
550: #if defined(PETSC_USE_DEBUG)
551:     if (test%ord) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected number of edge dofs %D connected with nodal dof %D with order %D",test,i,ord);
552: #endif
553:     for (j=ii[i];j<ii[i+1] && sneighs;j++) {
554:       if (PetscBTLookup(btbd,jj[j])) {
555:         bdir = PETSC_TRUE;
556:         break;
557:       }
558:       if (vc != ecount[jj[j]]) {
559:         sneighs = PETSC_FALSE;
560:       } else {
561:         PetscInt k,*vn = vneighs[i], *en = eneighs[jj[j]];
562:         for (k=0;k<vc;k++) {
563:           if (vn[k] != en[k]) {
564:             sneighs = PETSC_FALSE;
565:             break;
566:           }
567:         }
568:       }
569:     }
570:     if (!sneighs || test >= 3*ord || bdir) { /* splitpoints */
571:       if (print) PetscPrintf(PETSC_COMM_SELF,"SPLITPOINT %D (%D %D %D)\n",i,!sneighs,test >= 3*ord,bdir);
572:       PetscBTSet(btv,i);
573:     } else if (test == ord) {
574:       if (order == 1 || (!order && ii[i+1]-ii[i] == 1)) {
575:         if (print) PetscPrintf(PETSC_COMM_SELF,"ENDPOINT %D\n",i);
576:         PetscBTSet(btv,i);
577:       } else {
578:         if (print) PetscPrintf(PETSC_COMM_SELF,"CORNER CANDIDATE %D\n",i);
579:         PetscBTSet(btvcand,i);
580:       }
581:     }
582:   }
583:   ISLocalToGlobalMappingRestoreNodeInfo(el2g,NULL,&ecount,&eneighs);
584:   ISLocalToGlobalMappingRestoreNodeInfo(vl2g,NULL,&vcount,&vneighs);
585:   PetscBTDestroy(&btbd);

587:   /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */
588:   if (order != 1) {
589:     if (print) PetscPrintf(PETSC_COMM_SELF,"INSPECTING CANDIDATES\n");
590:     MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
591:     for (i=0;i<nv;i++) {
592:       if (PetscBTLookup(btvcand,i)) {
593:         PetscBool found = PETSC_FALSE;
594:         for (j=ii[i];j<ii[i+1] && !found;j++) {
595:           PetscInt k,e = jj[j];
596:           if (PetscBTLookup(bte,e)) continue;
597:           for (k=iit[e];k<iit[e+1];k++) {
598:             PetscInt v = jjt[k];
599:             if (v != i && PetscBTLookup(btvcand,v)) {
600:               found = PETSC_TRUE;
601:               break;
602:             }
603:           }
604:         }
605:         if (!found) {
606:           if (print) PetscPrintf(PETSC_COMM_SELF,"  CANDIDATE %D CLEARED\n",i);
607:           PetscBTClear(btvcand,i);
608:         } else {
609:           if (print) PetscPrintf(PETSC_COMM_SELF,"  CANDIDATE %D ACCEPTED\n",i);
610:         }
611:       }
612:     }
613:     MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
614:   }
615:   MatSeqAIJRestoreArray(lGt,&vals);
616:   MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
617:   MatDestroy(&lGe);

619:   /* Get the local G^T explicitly */
620:   MatDestroy(&lGt);
621:   MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);
622:   MatSetOption(lGt,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);

624:   /* Mark interior nodal dofs */
625:   ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);
626:   PetscBTCreate(nv,&btvi);
627:   for (i=1;i<n_neigh;i++) {
628:     for (j=0;j<n_shared[i];j++) {
629:       PetscBTSet(btvi,shared[i][j]);
630:     }
631:   }
632:   ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);

634:   /* communicate corners and splitpoints */
635:   PetscMalloc1(nv,&vmarks);
636:   PetscMemzero(sfvleaves,nv*sizeof(PetscInt));
637:   PetscMemzero(sfvroots,Lv*sizeof(PetscInt));
638:   for (i=0;i<nv;i++) if (PetscUnlikely(PetscBTLookup(btv,i))) sfvleaves[i] = 1;

640:   if (print) {
641:     IS tbz;

643:     cum = 0;
644:     for (i=0;i<nv;i++)
645:       if (sfvleaves[i])
646:         vmarks[cum++] = i;

648:     ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);
649:     PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_local");
650:     ISView(tbz,NULL);
651:     ISDestroy(&tbz);
652:   }

654:   PetscSFReduceBegin(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);
655:   PetscSFReduceEnd(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);
656:   PetscSFBcastBegin(sfv,MPIU_INT,sfvroots,sfvleaves);
657:   PetscSFBcastEnd(sfv,MPIU_INT,sfvroots,sfvleaves);

659:   /* Zero rows of lGt corresponding to identified corners
660:      and interior nodal dofs */
661:   cum = 0;
662:   for (i=0;i<nv;i++) {
663:     if (sfvleaves[i]) {
664:       vmarks[cum++] = i;
665:       PetscBTSet(btv,i);
666:     }
667:     if (!PetscBTLookup(btvi,i)) vmarks[cum++] = i;
668:   }
669:   PetscBTDestroy(&btvi);
670:   if (print) {
671:     IS tbz;

673:     ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);
674:     PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_with_interior");
675:     ISView(tbz,NULL);
676:     ISDestroy(&tbz);
677:   }
678:   MatZeroRows(lGt,cum,vmarks,0.,NULL,NULL);
679:   PetscFree(vmarks);
680:   PetscSFDestroy(&sfv);
681:   PetscFree2(sfvleaves,sfvroots);

683:   /* Recompute G */
684:   MatDestroy(&lG);
685:   MatTranspose(lGt,MAT_INITIAL_MATRIX,&lG);
686:   if (print) {
687:     PetscObjectSetName((PetscObject)lG,"used_lG");
688:     MatView(lG,NULL);
689:     PetscObjectSetName((PetscObject)lGt,"used_lGt");
690:     MatView(lGt,NULL);
691:   }

693:   /* Get primal dofs (if any) */
694:   cum = 0;
695:   for (i=0;i<ne;i++) {
696:     if (PetscUnlikely(PetscBTLookup(bte,i))) marks[cum++] = i;
697:   }
698:   if (fl2g) {
699:     ISLocalToGlobalMappingApply(fl2g,cum,marks,marks);
700:   }
701:   ISCreateGeneral(comm,cum,marks,PETSC_COPY_VALUES,&primals);
702:   if (print) {
703:     PetscObjectSetName((PetscObject)primals,"prescribed_primal_dofs");
704:     ISView(primals,NULL);
705:   }
706:   PetscBTDestroy(&bte);
707:   /* TODO: what if the user passed in some of them ?  */
708:   PCBDDCSetPrimalVerticesLocalIS(pc,primals);
709:   ISDestroy(&primals);

711:   /* Compute edge connectivity */
712:   PetscObjectSetOptionsPrefix((PetscObject)lG,"econn_");
713:   MatMatMultSymbolic(lG,lGt,PETSC_DEFAULT,&conn);
714:   MatGetRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
715:   if (fl2g) {
716:     PetscBT   btf;
717:     PetscInt  *iia,*jja,*iiu,*jju;
718:     PetscBool rest = PETSC_FALSE,free = PETSC_FALSE;

720:     /* create CSR for all local dofs */
721:     PetscMalloc1(n+1,&iia);
722:     if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */
723:       if (pcbddc->mat_graph->nvtxs_csr != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid size of CSR graph %D. Should be %D",pcbddc->mat_graph->nvtxs_csr,n);
724:       iiu = pcbddc->mat_graph->xadj;
725:       jju = pcbddc->mat_graph->adjncy;
726:     } else if (pcbddc->use_local_adj) {
727:       rest = PETSC_TRUE;
728:       MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);
729:     } else {
730:       free   = PETSC_TRUE;
731:       PetscMalloc2(n+1,&iiu,n,&jju);
732:       iiu[0] = 0;
733:       for (i=0;i<n;i++) {
734:         iiu[i+1] = i+1;
735:         jju[i]   = -1;
736:       }
737:     }

739:     /* import sizes of CSR */
740:     iia[0] = 0;
741:     for (i=0;i<n;i++) iia[i+1] = iiu[i+1]-iiu[i];

743:     /* overwrite entries corresponding to the Nedelec field */
744:     PetscBTCreate(n,&btf);
745:     ISGetIndices(nedfieldlocal,&idxs);
746:     for (i=0;i<ne;i++) {
747:       PetscBTSet(btf,idxs[i]);
748:       iia[idxs[i]+1] = ii[i+1]-ii[i];
749:     }

751:     /* iia in CSR */
752:     for (i=0;i<n;i++) iia[i+1] += iia[i];

754:     /* jja in CSR */
755:     PetscMalloc1(iia[n],&jja);
756:     for (i=0;i<n;i++)
757:       if (!PetscBTLookup(btf,i))
758:         for (j=0;j<iiu[i+1]-iiu[i];j++)
759:           jja[iia[i]+j] = jju[iiu[i]+j];

761:     /* map edge dofs connectivity */
762:     if (jj) {
763:       ISLocalToGlobalMappingApply(fl2g,ii[ne],jj,(PetscInt *)jj);
764:       for (i=0;i<ne;i++) {
765:         PetscInt e = idxs[i];
766:         for (j=0;j<ii[i+1]-ii[i];j++) jja[iia[e]+j] = jj[ii[i]+j];
767:       }
768:     }
769:     ISRestoreIndices(nedfieldlocal,&idxs);
770:     PCBDDCSetLocalAdjacencyGraph(pc,n,iia,jja,PETSC_OWN_POINTER);
771:     if (rest) {
772:       MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);
773:     }
774:     if (free) {
775:       PetscFree2(iiu,jju);
776:     }
777:     PetscBTDestroy(&btf);
778:   } else {
779:     PCBDDCSetLocalAdjacencyGraph(pc,n,ii,jj,PETSC_USE_POINTER);
780:   }

782:   /* Analyze interface for edge dofs */
783:   PCBDDCAnalyzeInterface(pc);
784:   pcbddc->mat_graph->twodim = PETSC_FALSE;

786:   /* Get coarse edges in the edge space */
787:   PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);
788:   MatRestoreRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);

790:   if (fl2g) {
791:     ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);
792:     PetscMalloc1(nee,&eedges);
793:     for (i=0;i<nee;i++) {
794:       ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);
795:     }
796:   } else {
797:     eedges  = alleedges;
798:     primals = allprimals;
799:   }

801:   /* Mark fine edge dofs with their coarse edge id */
802:   PetscMemzero(marks,ne*sizeof(PetscInt));
803:   ISGetLocalSize(primals,&cum);
804:   ISGetIndices(primals,&idxs);
805:   for (i=0;i<cum;i++) marks[idxs[i]] = nee+1;
806:   ISRestoreIndices(primals,&idxs);
807:   if (print) {
808:     PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs");
809:     ISView(primals,NULL);
810:   }

812:   maxsize = 0;
813:   for (i=0;i<nee;i++) {
814:     PetscInt size,mark = i+1;

816:     ISGetLocalSize(eedges[i],&size);
817:     ISGetIndices(eedges[i],&idxs);
818:     for (j=0;j<size;j++) marks[idxs[j]] = mark;
819:     ISRestoreIndices(eedges[i],&idxs);
820:     maxsize = PetscMax(maxsize,size);
821:   }

823:   /* Find coarse edge endpoints */
824:   MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
825:   MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
826:   for (i=0;i<nee;i++) {
827:     PetscInt mark = i+1,size;

829:     ISGetLocalSize(eedges[i],&size);
830:     if (!size && nedfieldlocal) continue;
831:     if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i);
832:     ISGetIndices(eedges[i],&idxs);
833:     if (print) {
834:       PetscPrintf(PETSC_COMM_SELF,"ENDPOINTS ANALYSIS EDGE %D\n",i);
835:       ISView(eedges[i],NULL);
836:     }
837:     for (j=0;j<size;j++) {
838:       PetscInt k, ee = idxs[j];
839:       if (print) PetscPrintf(PETSC_COMM_SELF,"  idx %D\n",ee);
840:       for (k=ii[ee];k<ii[ee+1];k++) {
841:         if (print) PetscPrintf(PETSC_COMM_SELF,"    inspect %D\n",jj[k]);
842:         if (PetscBTLookup(btv,jj[k])) {
843:           if (print) PetscPrintf(PETSC_COMM_SELF,"      corner found (already set) %D\n",jj[k]);
844:         } else if (PetscBTLookup(btvcand,jj[k])) { /* is it ok? */
845:           PetscInt  k2;
846:           PetscBool corner = PETSC_FALSE;
847:           for (k2 = iit[jj[k]];k2 < iit[jj[k]+1];k2++) {
848:             if (print) PetscPrintf(PETSC_COMM_SELF,"        INSPECTING %D: mark %D (ref mark %D), boundary %D\n",jjt[k2],marks[jjt[k2]],mark,!!PetscBTLookup(btb,jjt[k2]));
849:             /* it's a corner if either is connected with an edge dof belonging to a different cc or
850:                if the edge dof lie on the natural part of the boundary */
851:             if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb,jjt[k2]))) {
852:               corner = PETSC_TRUE;
853:               break;
854:             }
855:           }
856:           if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */
857:             if (print) PetscPrintf(PETSC_COMM_SELF,"        corner found %D\n",jj[k]);
858:             PetscBTSet(btv,jj[k]);
859:           } else {
860:             if (print) PetscPrintf(PETSC_COMM_SELF,"        no corners found\n");
861:           }
862:         }
863:       }
864:     }
865:     ISRestoreIndices(eedges[i],&idxs);
866:   }
867:   MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
868:   MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
869:   PetscBTDestroy(&btb);

871:   /* Reset marked primal dofs */
872:   ISGetLocalSize(primals,&cum);
873:   ISGetIndices(primals,&idxs);
874:   for (i=0;i<cum;i++) marks[idxs[i]] = 0;
875:   ISRestoreIndices(primals,&idxs);

877:   /* Now use the initial lG */
878:   MatDestroy(&lG);
879:   MatDestroy(&lGt);
880:   lG   = lGinit;
881:   MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);

883:   /* Compute extended cols indices */
884:   PetscBTCreate(nv,&btvc);
885:   PetscBTCreate(nee,&bter);
886:   MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
887:   MatSeqAIJGetMaxRowNonzeros(lG,&i);
888:   i   *= maxsize;
889:   PetscCalloc1(nee,&extcols);
890:   PetscMalloc2(i,&extrow,i,&gidxs);
891:   eerr = PETSC_FALSE;
892:   for (i=0;i<nee;i++) {
893:     PetscInt size,found = 0;

895:     cum  = 0;
896:     ISGetLocalSize(eedges[i],&size);
897:     if (!size && nedfieldlocal) continue;
898:     if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i);
899:     ISGetIndices(eedges[i],&idxs);
900:     PetscBTMemzero(nv,btvc);
901:     for (j=0;j<size;j++) {
902:       PetscInt k,ee = idxs[j];
903:       for (k=ii[ee];k<ii[ee+1];k++) {
904:         PetscInt vv = jj[k];
905:         if (!PetscBTLookup(btv,vv)) extrow[cum++] = vv;
906:         else if (!PetscBTLookupSet(btvc,vv)) found++;
907:       }
908:     }
909:     ISRestoreIndices(eedges[i],&idxs);
910:     PetscSortRemoveDupsInt(&cum,extrow);
911:     ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);
912:     PetscSortIntWithArray(cum,gidxs,extrow);
913:     ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);
914:     /* it may happen that endpoints are not defined at this point
915:        if it is the case, mark this edge for a second pass */
916:     if (cum != size -1 || found != 2) {
917:       PetscBTSet(bter,i);
918:       if (print) {
919:         PetscObjectSetName((PetscObject)eedges[i],"error_edge");
920:         ISView(eedges[i],NULL);
921:         PetscObjectSetName((PetscObject)extcols[i],"error_extcol");
922:         ISView(extcols[i],NULL);
923:       }
924:       eerr = PETSC_TRUE;
925:     }
926:   }
927:   /* if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */
928:   MPIU_Allreduce(&eerr,&done,1,MPIU_BOOL,MPI_LOR,comm);
929:   if (done) {
930:     PetscInt *newprimals;

932:     PetscMalloc1(ne,&newprimals);
933:     ISGetLocalSize(primals,&cum);
934:     ISGetIndices(primals,&idxs);
935:     PetscMemcpy(newprimals,idxs,cum*sizeof(PetscInt));
936:     ISRestoreIndices(primals,&idxs);
937:     MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
938:     if (print) PetscPrintf(PETSC_COMM_SELF,"DOING SECOND PASS (eerr %D)\n",eerr);
939:     for (i=0;i<nee;i++) {
940:       PetscBool has_candidates = PETSC_FALSE;
941:       if (PetscBTLookup(bter,i)) {
942:         PetscInt size,mark = i+1;

944:         ISGetLocalSize(eedges[i],&size);
945:         ISGetIndices(eedges[i],&idxs);
946:         /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */
947:         for (j=0;j<size;j++) {
948:           PetscInt k,ee = idxs[j];
949:           if (print) PetscPrintf(PETSC_COMM_SELF,"Inspecting edge dof %D [%D %D)\n",ee,ii[ee],ii[ee+1]);
950:           for (k=ii[ee];k<ii[ee+1];k++) {
951:             /* set all candidates located on the edge as corners */
952:             if (PetscBTLookup(btvcand,jj[k])) {
953:               PetscInt k2,vv = jj[k];
954:               has_candidates = PETSC_TRUE;
955:               if (print) PetscPrintf(PETSC_COMM_SELF,"  Candidate set to vertex %D\n",vv);
956:               PetscBTSet(btv,vv);
957:               /* set all edge dofs connected to candidate as primals */
958:               for (k2=iit[vv];k2<iit[vv+1];k2++) {
959:                 if (marks[jjt[k2]] == mark) {
960:                   PetscInt k3,ee2 = jjt[k2];
961:                   if (print) PetscPrintf(PETSC_COMM_SELF,"    Connected edge dof set to primal %D\n",ee2);
962:                   newprimals[cum++] = ee2;
963:                   /* finally set the new corners */
964:                   for (k3=ii[ee2];k3<ii[ee2+1];k3++) {
965:                     if (print) PetscPrintf(PETSC_COMM_SELF,"      Connected nodal dof set to vertex %D\n",jj[k3]);
966:                     PetscBTSet(btv,jj[k3]);
967:                   }
968:                 }
969:               }
970:             } else {
971:               if (print) PetscPrintf(PETSC_COMM_SELF,"  Not a candidate vertex %D\n",jj[k]);
972:             }
973:           }
974:         }
975:         if (!has_candidates) { /* circular edge */
976:           PetscInt k, ee = idxs[0],*tmarks;

978:           PetscCalloc1(ne,&tmarks);
979:           if (print) PetscPrintf(PETSC_COMM_SELF,"  Circular edge %D\n",i);
980:           for (k=ii[ee];k<ii[ee+1];k++) {
981:             PetscInt k2;
982:             if (print) PetscPrintf(PETSC_COMM_SELF,"    Set to corner %D\n",jj[k]);
983:             PetscBTSet(btv,jj[k]);
984:             for (k2=iit[jj[k]];k2<iit[jj[k]+1];k2++) tmarks[jjt[k2]]++;
985:           }
986:           for (j=0;j<size;j++) {
987:             if (tmarks[idxs[j]] > 1) {
988:               if (print) PetscPrintf(PETSC_COMM_SELF,"  Edge dof set to primal %D\n",idxs[j]);
989:               newprimals[cum++] = idxs[j];
990:             }
991:           }
992:           PetscFree(tmarks);
993:         }
994:         ISRestoreIndices(eedges[i],&idxs);
995:       }
996:       ISDestroy(&extcols[i]);
997:     }
998:     PetscFree(extcols);
999:     MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
1000:     PetscSortRemoveDupsInt(&cum,newprimals);
1001:     if (fl2g) {
1002:       ISLocalToGlobalMappingApply(fl2g,cum,newprimals,newprimals);
1003:       ISDestroy(&primals);
1004:       for (i=0;i<nee;i++) {
1005:         ISDestroy(&eedges[i]);
1006:       }
1007:       PetscFree(eedges);
1008:     }
1009:     PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);
1010:     ISCreateGeneral(comm,cum,newprimals,PETSC_COPY_VALUES,&primals);
1011:     PetscFree(newprimals);
1012:     PCBDDCSetPrimalVerticesLocalIS(pc,primals);
1013:     ISDestroy(&primals);
1014:     PCBDDCAnalyzeInterface(pc);
1015:     pcbddc->mat_graph->twodim = PETSC_FALSE;
1016:     PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);
1017:     if (fl2g) {
1018:       ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);
1019:       PetscMalloc1(nee,&eedges);
1020:       for (i=0;i<nee;i++) {
1021:         ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);
1022:       }
1023:     } else {
1024:       eedges  = alleedges;
1025:       primals = allprimals;
1026:     }
1027:     PetscCalloc1(nee,&extcols);

1029:     /* Mark again */
1030:     PetscMemzero(marks,ne*sizeof(PetscInt));
1031:     for (i=0;i<nee;i++) {
1032:       PetscInt size,mark = i+1;

1034:       ISGetLocalSize(eedges[i],&size);
1035:       ISGetIndices(eedges[i],&idxs);
1036:       for (j=0;j<size;j++) marks[idxs[j]] = mark;
1037:       ISRestoreIndices(eedges[i],&idxs);
1038:     }
1039:     if (print) {
1040:       PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs_secondpass");
1041:       ISView(primals,NULL);
1042:     }

1044:     /* Recompute extended cols */
1045:     eerr = PETSC_FALSE;
1046:     for (i=0;i<nee;i++) {
1047:       PetscInt size;

1049:       cum  = 0;
1050:       ISGetLocalSize(eedges[i],&size);
1051:       if (!size && nedfieldlocal) continue;
1052:       if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i);
1053:       ISGetIndices(eedges[i],&idxs);
1054:       for (j=0;j<size;j++) {
1055:         PetscInt k,ee = idxs[j];
1056:         for (k=ii[ee];k<ii[ee+1];k++) if (!PetscBTLookup(btv,jj[k])) extrow[cum++] = jj[k];
1057:       }
1058:       ISRestoreIndices(eedges[i],&idxs);
1059:       PetscSortRemoveDupsInt(&cum,extrow);
1060:       ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);
1061:       PetscSortIntWithArray(cum,gidxs,extrow);
1062:       ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);
1063:       if (cum != size -1) {
1064:         if (print) {
1065:           PetscObjectSetName((PetscObject)eedges[i],"error_edge_secondpass");
1066:           ISView(eedges[i],NULL);
1067:           PetscObjectSetName((PetscObject)extcols[i],"error_extcol_secondpass");
1068:           ISView(extcols[i],NULL);
1069:         }
1070:         eerr = PETSC_TRUE;
1071:       }
1072:     }
1073:   }
1074:   MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1075:   PetscFree2(extrow,gidxs);
1076:   PetscBTDestroy(&bter);
1077:   if (print) { PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF); }
1078:   /* an error should not occur at this point */
1079:   if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL SECOND PASS");

1081:   /* Check the number of endpoints */
1082:   MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1083:   PetscMalloc1(2*nee,&corners);
1084:   PetscMalloc1(nee,&cedges);
1085:   for (i=0;i<nee;i++) {
1086:     PetscInt size, found = 0, gc[2];

1088:     /* init with defaults */
1089:     cedges[i] = corners[i*2] = corners[i*2+1] = -1;
1090:     ISGetLocalSize(eedges[i],&size);
1091:     if (!size && nedfieldlocal) continue;
1092:     if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i);
1093:     ISGetIndices(eedges[i],&idxs);
1094:     PetscBTMemzero(nv,btvc);
1095:     for (j=0;j<size;j++) {
1096:       PetscInt k,ee = idxs[j];
1097:       for (k=ii[ee];k<ii[ee+1];k++) {
1098:         PetscInt vv = jj[k];
1099:         if (PetscBTLookup(btv,vv) && !PetscBTLookupSet(btvc,vv)) {
1100:           if (found == 2) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found more then two corners for edge %D",i);
1101:           corners[i*2+found++] = vv;
1102:         }
1103:       }
1104:     }
1105:     if (found != 2) {
1106:       PetscInt e;
1107:       if (fl2g) {
1108:         ISLocalToGlobalMappingApply(fl2g,1,idxs,&e);
1109:       } else {
1110:         e = idxs[0];
1111:       }
1112:       SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found %D corners for edge %D (astart %D, estart %D)",found,i,e,idxs[0]);
1113:     }

1115:     /* get primal dof index on this coarse edge */
1116:     ISLocalToGlobalMappingApply(vl2g,2,corners+2*i,gc);
1117:     if (gc[0] > gc[1]) {
1118:       PetscInt swap  = corners[2*i];
1119:       corners[2*i]   = corners[2*i+1];
1120:       corners[2*i+1] = swap;
1121:     }
1122:     cedges[i] = idxs[size-1];
1123:     ISRestoreIndices(eedges[i],&idxs);
1124:     if (print) PetscPrintf(PETSC_COMM_SELF,"EDGE %D: ce %D, corners (%D,%D)\n",i,cedges[i],corners[2*i],corners[2*i+1]);
1125:   }
1126:   MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1127:   PetscBTDestroy(&btvc);

1129: #if defined(PETSC_USE_DEBUG)
1130:   /* Inspects columns of lG (rows of lGt) and make sure the change of basis will
1131:      not interfere with neighbouring coarse edges */
1132:   PetscMalloc1(nee+1,&emarks);
1133:   MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1134:   for (i=0;i<nv;i++) {
1135:     PetscInt emax = 0,eemax = 0;

1137:     if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue;
1138:     PetscMemzero(emarks,(nee+1)*sizeof(PetscInt));
1139:     for (j=ii[i];j<ii[i+1];j++) emarks[marks[jj[j]]]++;
1140:     for (j=1;j<nee+1;j++) {
1141:       if (emax < emarks[j]) {
1142:         emax = emarks[j];
1143:         eemax = j;
1144:       }
1145:     }
1146:     /* not relevant for edges */
1147:     if (!eemax) continue;

1149:     for (j=ii[i];j<ii[i+1];j++) {
1150:       if (marks[jj[j]] && marks[jj[j]] != eemax) {
1151:         SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_SUP,"Found 2 coarse edges (id %D and %D) connected through the %D nodal dof at edge dof %D",marks[jj[j]]-1,eemax,i,jj[j]);
1152:       }
1153:     }
1154:   }
1155:   PetscFree(emarks);
1156:   MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1157: #endif

1159:   /* Compute extended rows indices for edge blocks of the change of basis */
1160:   MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1161:   MatSeqAIJGetMaxRowNonzeros(lGt,&extmem);
1162:   extmem *= maxsize;
1163:   PetscMalloc1(extmem*nee,&extrow);
1164:   PetscMalloc1(nee,&extrows);
1165:   PetscCalloc1(nee,&extrowcum);
1166:   for (i=0;i<nv;i++) {
1167:     PetscInt mark = 0,size,start;

1169:     if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue;
1170:     for (j=ii[i];j<ii[i+1];j++)
1171:       if (marks[jj[j]] && !mark)
1172:         mark = marks[jj[j]];

1174:     /* not relevant */
1175:     if (!mark) continue;

1177:     /* import extended row */
1178:     mark--;
1179:     start = mark*extmem+extrowcum[mark];
1180:     size = ii[i+1]-ii[i];
1181:     if (extrowcum[mark] + size > extmem) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not enough memory allocated %D > %D",extrowcum[mark] + size,extmem);
1182:     PetscMemcpy(extrow+start,jj+ii[i],size*sizeof(PetscInt));
1183:     extrowcum[mark] += size;
1184:   }
1185:   MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1186:   MatDestroy(&lGt);
1187:   PetscFree(marks);

1189:   /* Compress extrows */
1190:   cum  = 0;
1191:   for (i=0;i<nee;i++) {
1192:     PetscInt size = extrowcum[i],*start = extrow + i*extmem;
1193:     PetscSortRemoveDupsInt(&size,start);
1194:     ISCreateGeneral(PETSC_COMM_SELF,size,start,PETSC_USE_POINTER,&extrows[i]);
1195:     cum  = PetscMax(cum,size);
1196:   }
1197:   PetscFree(extrowcum);
1198:   PetscBTDestroy(&btv);
1199:   PetscBTDestroy(&btvcand);

1201:   /* Workspace for lapack inner calls and VecSetValues */
1202:   PetscMalloc2((5+cum+maxsize)*maxsize,&work,maxsize,&rwork);

1204:   /* Create change of basis matrix (preallocation can be improved) */
1205:   MatCreate(comm,&T);
1206:   MatSetSizes(T,pc->pmat->rmap->n,pc->pmat->rmap->n,
1207:                        pc->pmat->rmap->N,pc->pmat->rmap->N);
1208:   MatSetType(T,MATAIJ);
1209:   MatSeqAIJSetPreallocation(T,10,NULL);
1210:   MatMPIAIJSetPreallocation(T,10,NULL,10,NULL);
1211:   MatSetLocalToGlobalMapping(T,al2g,al2g);
1212:   MatSetOption(T,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);
1213:   MatSetOption(T,MAT_ROW_ORIENTED,PETSC_FALSE);
1214:   ISLocalToGlobalMappingDestroy(&al2g);

1216:   /* Defaults to identity */
1217:   MatCreateVecs(pc->pmat,&tvec,NULL);
1218:   VecSet(tvec,1.0);
1219:   MatDiagonalSet(T,tvec,INSERT_VALUES);
1220:   VecDestroy(&tvec);

1222:   /* Create discrete gradient for the coarser level if needed */
1223:   MatDestroy(&pcbddc->nedcG);
1224:   ISDestroy(&pcbddc->nedclocal);
1225:   if (pcbddc->current_level < pcbddc->max_levels) {
1226:     ISLocalToGlobalMapping cel2g,cvl2g;
1227:     IS                     wis,gwis;
1228:     PetscInt               cnv,cne;

1230:     ISCreateGeneral(comm,nee,cedges,PETSC_COPY_VALUES,&wis);
1231:     if (fl2g) {
1232:       ISLocalToGlobalMappingApplyIS(fl2g,wis,&pcbddc->nedclocal);
1233:     } else {
1234:       PetscObjectReference((PetscObject)wis);
1235:       pcbddc->nedclocal = wis;
1236:     }
1237:     ISLocalToGlobalMappingApplyIS(el2g,wis,&gwis);
1238:     ISDestroy(&wis);
1239:     ISRenumber(gwis,NULL,&cne,&wis);
1240:     ISLocalToGlobalMappingCreateIS(wis,&cel2g);
1241:     ISDestroy(&wis);
1242:     ISDestroy(&gwis);

1244:     ISCreateGeneral(comm,2*nee,corners,PETSC_USE_POINTER,&wis);
1245:     ISLocalToGlobalMappingApplyIS(vl2g,wis,&gwis);
1246:     ISDestroy(&wis);
1247:     ISRenumber(gwis,NULL,&cnv,&wis);
1248:     ISLocalToGlobalMappingCreateIS(wis,&cvl2g);
1249:     ISDestroy(&wis);
1250:     ISDestroy(&gwis);

1252:     MatCreate(comm,&pcbddc->nedcG);
1253:     MatSetSizes(pcbddc->nedcG,PETSC_DECIDE,PETSC_DECIDE,cne,cnv);
1254:     MatSetType(pcbddc->nedcG,MATAIJ);
1255:     MatSeqAIJSetPreallocation(pcbddc->nedcG,2,NULL);
1256:     MatMPIAIJSetPreallocation(pcbddc->nedcG,2,NULL,2,NULL);
1257:     MatSetLocalToGlobalMapping(pcbddc->nedcG,cel2g,cvl2g);
1258:     ISLocalToGlobalMappingDestroy(&cel2g);
1259:     ISLocalToGlobalMappingDestroy(&cvl2g);
1260:   }
1261:   ISLocalToGlobalMappingDestroy(&vl2g);

1263: #if defined(PRINT_GDET)
1264:   inc = 0;
1265:   lev = pcbddc->current_level;
1266: #endif

1268:   /* Insert values in the change of basis matrix */
1269:   for (i=0;i<nee;i++) {
1270:     Mat         Gins = NULL, GKins = NULL;
1271:     IS          cornersis = NULL;
1272:     PetscScalar cvals[2];

1274:     if (pcbddc->nedcG) {
1275:       ISCreateGeneral(PETSC_COMM_SELF,2,corners+2*i,PETSC_USE_POINTER,&cornersis);
1276:     }
1277:     PCBDDCComputeNedelecChangeEdge(lG,eedges[i],extrows[i],extcols[i],cornersis,&Gins,&GKins,cvals,work,rwork);
1278:     if (Gins && GKins) {
1279:       PetscScalar    *data;
1280:       const PetscInt *rows,*cols;
1281:       PetscInt       nrh,nch,nrc,ncc;

1283:       ISGetIndices(eedges[i],&cols);
1284:       /* H1 */
1285:       ISGetIndices(extrows[i],&rows);
1286:       MatGetSize(Gins,&nrh,&nch);
1287:       MatDenseGetArray(Gins,&data);
1288:       MatSetValuesLocal(T,nrh,rows,nch,cols,data,INSERT_VALUES);
1289:       MatDenseRestoreArray(Gins,&data);
1290:       ISRestoreIndices(extrows[i],&rows);
1291:       /* complement */
1292:       MatGetSize(GKins,&nrc,&ncc);
1293:       if (!ncc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Constant function has not been generated for coarse edge %D",i);
1294:       if (ncc + nch != nrc) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"The sum of the number of columns of GKins %D and Gins %D does not match %D for coarse edge %D",ncc,nch,nrc,i);
1295:       if (ncc != 1 && pcbddc->nedcG) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot generate the coarse discrete gradient for coarse edge %D with ncc %D",i,ncc);
1296:       MatDenseGetArray(GKins,&data);
1297:       MatSetValuesLocal(T,nrc,cols,ncc,cols+nch,data,INSERT_VALUES);
1298:       MatDenseRestoreArray(GKins,&data);

1300:       /* coarse discrete gradient */
1301:       if (pcbddc->nedcG) {
1302:         PetscInt cols[2];

1304:         cols[0] = 2*i;
1305:         cols[1] = 2*i+1;
1306:         MatSetValuesLocal(pcbddc->nedcG,1,&i,2,cols,cvals,INSERT_VALUES);
1307:       }
1308:       ISRestoreIndices(eedges[i],&cols);
1309:     }
1310:     ISDestroy(&extrows[i]);
1311:     ISDestroy(&extcols[i]);
1312:     ISDestroy(&cornersis);
1313:     MatDestroy(&Gins);
1314:     MatDestroy(&GKins);
1315:   }
1316:   ISLocalToGlobalMappingDestroy(&el2g);

1318:   /* Start assembling */
1319:   MatAssemblyBegin(T,MAT_FINAL_ASSEMBLY);
1320:   if (pcbddc->nedcG) {
1321:     MatAssemblyBegin(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);
1322:   }

1324:   /* Free */
1325:   if (fl2g) {
1326:     ISDestroy(&primals);
1327:     for (i=0;i<nee;i++) {
1328:       ISDestroy(&eedges[i]);
1329:     }
1330:     PetscFree(eedges);
1331:   }

1333:   /* hack mat_graph with primal dofs on the coarse edges */
1334:   {
1335:     PCBDDCGraph graph   = pcbddc->mat_graph;
1336:     PetscInt    *oqueue = graph->queue;
1337:     PetscInt    *ocptr  = graph->cptr;
1338:     PetscInt    ncc,*idxs;

1340:     /* find first primal edge */
1341:     if (pcbddc->nedclocal) {
1342:       ISGetIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);
1343:     } else {
1344:       if (fl2g) {
1345:         ISLocalToGlobalMappingApply(fl2g,nee,cedges,cedges);
1346:       }
1347:       idxs = cedges;
1348:     }
1349:     cum = 0;
1350:     while (cum < nee && cedges[cum] < 0) cum++;

1352:     /* adapt connected components */
1353:     PetscMalloc2(graph->nvtxs+1,&graph->cptr,ocptr[graph->ncc],&graph->queue);
1354:     graph->cptr[0] = 0;
1355:     for (i=0,ncc=0;i<graph->ncc;i++) {
1356:       PetscInt lc = ocptr[i+1]-ocptr[i];
1357:       if (cum != nee && oqueue[ocptr[i+1]-1] == cedges[cum]) { /* this cc has a primal dof */
1358:         graph->cptr[ncc+1] = graph->cptr[ncc]+1;
1359:         graph->queue[graph->cptr[ncc]] = cedges[cum];
1360:         ncc++;
1361:         lc--;
1362:         cum++;
1363:         while (cum < nee && cedges[cum] < 0) cum++;
1364:       }
1365:       graph->cptr[ncc+1] = graph->cptr[ncc] + lc;
1366:       for (j=0;j<lc;j++) graph->queue[graph->cptr[ncc]+j] = oqueue[ocptr[i]+j];
1367:       ncc++;
1368:     }
1369:     graph->ncc = ncc;
1370:     if (pcbddc->nedclocal) {
1371:       ISRestoreIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);
1372:     }
1373:     PetscFree2(ocptr,oqueue);
1374:   }
1375:   ISLocalToGlobalMappingDestroy(&fl2g);
1376:   PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);
1377:   PCBDDCGraphResetCSR(pcbddc->mat_graph);
1378:   MatDestroy(&conn);

1380:   ISDestroy(&nedfieldlocal);
1381:   PetscFree(extrow);
1382:   PetscFree2(work,rwork);
1383:   PetscFree(corners);
1384:   PetscFree(cedges);
1385:   PetscFree(extrows);
1386:   PetscFree(extcols);
1387:   MatDestroy(&lG);

1389:   /* Complete assembling */
1390:   MatAssemblyEnd(T,MAT_FINAL_ASSEMBLY);
1391:   if (pcbddc->nedcG) {
1392:     MatAssemblyEnd(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);
1393: #if 0
1394:     PetscObjectSetName((PetscObject)pcbddc->nedcG,"coarse_G");
1395:     MatView(pcbddc->nedcG,NULL);
1396: #endif
1397:   }

1399:   /* set change of basis */
1400:   PCBDDCSetChangeOfBasisMat(pc,T,singular);
1401:   MatDestroy(&T);

1403:   return(0);
1404: }

1406: /* the near-null space of BDDC carries information on quadrature weights,
1407:    and these can be collinear -> so cheat with MatNullSpaceCreate
1408:    and create a suitable set of basis vectors first */
1409: PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp)
1410: {
1412:   PetscInt       i;

1415:   for (i=0;i<nvecs;i++) {
1416:     PetscInt first,last;

1418:     VecGetOwnershipRange(quad_vecs[i],&first,&last);
1419:     if (last-first < 2*nvecs && has_const) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented");
1420:     if (i>=first && i < last) {
1421:       PetscScalar *data;
1422:       VecGetArray(quad_vecs[i],&data);
1423:       if (!has_const) {
1424:         data[i-first] = 1.;
1425:       } else {
1426:         data[2*i-first] = 1./PetscSqrtReal(2.);
1427:         data[2*i-first+1] = -1./PetscSqrtReal(2.);
1428:       }
1429:       VecRestoreArray(quad_vecs[i],&data);
1430:     }
1431:     PetscObjectStateIncrease((PetscObject)quad_vecs[i]);
1432:   }
1433:   MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp);
1434:   for (i=0;i<nvecs;i++) { /* reset vectors */
1435:     PetscInt first,last;
1436:     VecLockReadPop(quad_vecs[i]);
1437:     VecGetOwnershipRange(quad_vecs[i],&first,&last);
1438:     if (i>=first && i < last) {
1439:       PetscScalar *data;
1440:       VecGetArray(quad_vecs[i],&data);
1441:       if (!has_const) {
1442:         data[i-first] = 0.;
1443:       } else {
1444:         data[2*i-first] = 0.;
1445:         data[2*i-first+1] = 0.;
1446:       }
1447:       VecRestoreArray(quad_vecs[i],&data);
1448:     }
1449:     PetscObjectStateIncrease((PetscObject)quad_vecs[i]);
1450:     VecLockReadPush(quad_vecs[i]);
1451:   }
1452:   return(0);
1453: }

1455: PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp)
1456: {
1457:   Mat                    loc_divudotp;
1458:   Vec                    p,v,vins,quad_vec,*quad_vecs;
1459:   ISLocalToGlobalMapping map;
1460:   PetscScalar            *vals;
1461:   const PetscScalar      *array;
1462:   PetscInt               i,maxneighs,maxsize,*gidxs;
1463:   PetscInt               n_neigh,*neigh,*n_shared,**shared;
1464:   PetscMPIInt            rank;
1465:   PetscErrorCode         ierr;

1468:   ISLocalToGlobalMappingGetInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);
1469:   MPIU_Allreduce(&n_neigh,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));
1470:   if (!maxneighs) {
1471:     ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);
1472:     *nnsp = NULL;
1473:     return(0);
1474:   }
1475:   maxsize = 0;
1476:   for (i=0;i<n_neigh;i++) maxsize = PetscMax(n_shared[i],maxsize);
1477:   PetscMalloc2(maxsize,&gidxs,maxsize,&vals);
1478:   /* create vectors to hold quadrature weights */
1479:   MatCreateVecs(A,&quad_vec,NULL);
1480:   if (!transpose) {
1481:     MatGetLocalToGlobalMapping(A,&map,NULL);
1482:   } else {
1483:     MatGetLocalToGlobalMapping(A,NULL,&map);
1484:   }
1485:   VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);
1486:   VecDestroy(&quad_vec);
1487:   PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);
1488:   for (i=0;i<maxneighs;i++) {
1489:     VecLockReadPop(quad_vecs[i]);
1490:   }

1492:   /* compute local quad vec */
1493:   MatISGetLocalMat(divudotp,&loc_divudotp);
1494:   if (!transpose) {
1495:     MatCreateVecs(loc_divudotp,&v,&p);
1496:   } else {
1497:     MatCreateVecs(loc_divudotp,&p,&v);
1498:   }
1499:   VecSet(p,1.);
1500:   if (!transpose) {
1501:     MatMultTranspose(loc_divudotp,p,v);
1502:   } else {
1503:     MatMult(loc_divudotp,p,v);
1504:   }
1505:   if (vl2l) {
1506:     Mat        lA;
1507:     VecScatter sc;

1509:     MatISGetLocalMat(A,&lA);
1510:     MatCreateVecs(lA,&vins,NULL);
1511:     VecScatterCreate(v,NULL,vins,vl2l,&sc);
1512:     VecScatterBegin(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);
1513:     VecScatterEnd(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);
1514:     VecScatterDestroy(&sc);
1515:   } else {
1516:     vins = v;
1517:   }
1518:   VecGetArrayRead(vins,&array);
1519:   VecDestroy(&p);

1521:   /* insert in global quadrature vecs */
1522:   MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);
1523:   for (i=0;i<n_neigh;i++) {
1524:     const PetscInt    *idxs;
1525:     PetscInt          idx,nn,j;

1527:     idxs = shared[i];
1528:     nn   = n_shared[i];
1529:     for (j=0;j<nn;j++) vals[j] = array[idxs[j]];
1530:     PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);
1531:     idx  = -(idx+1);
1532:     ISLocalToGlobalMappingApply(map,nn,idxs,gidxs);
1533:     VecSetValues(quad_vecs[idx],nn,gidxs,vals,INSERT_VALUES);
1534:   }
1535:   ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);
1536:   VecRestoreArrayRead(vins,&array);
1537:   if (vl2l) {
1538:     VecDestroy(&vins);
1539:   }
1540:   VecDestroy(&v);
1541:   PetscFree2(gidxs,vals);

1543:   /* assemble near null space */
1544:   for (i=0;i<maxneighs;i++) {
1545:     VecAssemblyBegin(quad_vecs[i]);
1546:   }
1547:   for (i=0;i<maxneighs;i++) {
1548:     VecAssemblyEnd(quad_vecs[i]);
1549:     VecViewFromOptions(quad_vecs[i],NULL,"-pc_bddc_quad_vecs_view");
1550:     VecLockReadPush(quad_vecs[i]);
1551:   }
1552:   VecDestroyVecs(maxneighs,&quad_vecs);
1553:   return(0);
1554: }

1556: PetscErrorCode PCBDDCAddPrimalVerticesLocalIS(PC pc, IS primalv)
1557: {
1558:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;

1562:   if (primalv) {
1563:     if (pcbddc->user_primal_vertices_local) {
1564:       IS list[2], newp;

1566:       list[0] = primalv;
1567:       list[1] = pcbddc->user_primal_vertices_local;
1568:       ISConcatenate(PetscObjectComm((PetscObject)pc),2,list,&newp);
1569:       ISSortRemoveDups(newp);
1570:       ISDestroy(&list[1]);
1571:       pcbddc->user_primal_vertices_local = newp;
1572:     } else {
1573:       PCBDDCSetPrimalVerticesLocalIS(pc,primalv);
1574:     }
1575:   }
1576:   return(0);
1577: }

1579: static PetscErrorCode func_coords_private(PetscInt dim, PetscReal t, const PetscReal X[], PetscInt Nf, PetscScalar *out, void *ctx)
1580: {
1581:   PetscInt f, *comp  = (PetscInt *)ctx;

1584:   for (f=0;f<Nf;f++) out[f] = X[*comp];
1585:   return(0);
1586: }

1588: PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc)
1589: {
1591:   Vec            local,global;
1592:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1593:   Mat_IS         *matis = (Mat_IS*)pc->pmat->data;
1594:   PetscBool      monolithic = PETSC_FALSE;

1597:   PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC topology options","PC");
1598:   PetscOptionsBool("-pc_bddc_monolithic","Discard any information on dofs splitting",NULL,monolithic,&monolithic,NULL);
1599:   PetscOptionsEnd();
1600:   /* need to convert from global to local topology information and remove references to information in global ordering */
1601:   MatCreateVecs(pc->pmat,&global,NULL);
1602:   MatCreateVecs(matis->A,&local,NULL);
1603:   if (monolithic) { /* just get block size to properly compute vertices */
1604:     if (pcbddc->vertex_size == 1) {
1605:       MatGetBlockSize(pc->pmat,&pcbddc->vertex_size);
1606:     }
1607:     goto boundary;
1608:   }

1610:   if (pcbddc->user_provided_isfordofs) {
1611:     if (pcbddc->n_ISForDofs) {
1612:       PetscInt i;

1614:       PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);
1615:       for (i=0;i<pcbddc->n_ISForDofs;i++) {
1616:         PetscInt bs;

1618:         PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);
1619:         ISGetBlockSize(pcbddc->ISForDofs[i],&bs);
1620:         ISSetBlockSize(pcbddc->ISForDofsLocal[i],bs);
1621:         ISDestroy(&pcbddc->ISForDofs[i]);
1622:       }
1623:       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
1624:       pcbddc->n_ISForDofs = 0;
1625:       PetscFree(pcbddc->ISForDofs);
1626:     }
1627:   } else {
1628:     if (!pcbddc->n_ISForDofsLocal) { /* field split not present */
1629:       DM dm;

1631:       MatGetDM(pc->pmat, &dm);
1632:       if (!dm) {
1633:         PCGetDM(pc, &dm);
1634:       }
1635:       if (dm) {
1636:         IS      *fields;
1637:         PetscInt nf,i;

1639:         DMCreateFieldDecomposition(dm,&nf,NULL,&fields,NULL);
1640:         PetscMalloc1(nf,&pcbddc->ISForDofsLocal);
1641:         for (i=0;i<nf;i++) {
1642:           PetscInt bs;

1644:           PCBDDCGlobalToLocal(matis->rctx,global,local,fields[i],&pcbddc->ISForDofsLocal[i]);
1645:           ISGetBlockSize(fields[i],&bs);
1646:           ISSetBlockSize(pcbddc->ISForDofsLocal[i],bs);
1647:           ISDestroy(&fields[i]);
1648:         }
1649:         PetscFree(fields);
1650:         pcbddc->n_ISForDofsLocal = nf;
1651:       } else { /* See if MATIS has fields attached by the conversion from MatNest */
1652:         PetscContainer   c;

1654:         PetscObjectQuery((PetscObject)pc->pmat,"_convert_nest_lfields",(PetscObject*)&c);
1655:         if (c) {
1656:           MatISLocalFields lf;
1657:           PetscContainerGetPointer(c,(void**)&lf);
1658:           PCBDDCSetDofsSplittingLocal(pc,lf->nr,lf->rf);
1659:         } else { /* fallback, create the default fields if bs > 1 */
1660:           PetscInt i, n = matis->A->rmap->n;
1661:           MatGetBlockSize(pc->pmat,&i);
1662:           if (i > 1) {
1663:             pcbddc->n_ISForDofsLocal = i;
1664:             PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);
1665:             for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
1666:               ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);
1667:             }
1668:           }
1669:         }
1670:       }
1671:     } else {
1672:       PetscInt i;
1673:       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
1674:         PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->ISForDofsLocal[i]);
1675:       }
1676:     }
1677:   }

1679: boundary:
1680:   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) {
1681:     PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);
1682:   } else if (pcbddc->DirichletBoundariesLocal) {
1683:     PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->DirichletBoundariesLocal);
1684:   }
1685:   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) {
1686:     PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);
1687:   } else if (pcbddc->NeumannBoundariesLocal) {
1688:     PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->NeumannBoundariesLocal);
1689:   }
1690:   if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) {
1691:     PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);
1692:   }
1693:   VecDestroy(&global);
1694:   VecDestroy(&local);
1695:   /* detect local disconnected subdomains if requested (use matis->A) */
1696:   if (pcbddc->detect_disconnected) {
1697:     IS        primalv = NULL;
1698:     PetscInt  i;
1699:     PetscBool filter = pcbddc->detect_disconnected_filter;

1701:     for (i=0;i<pcbddc->n_local_subs;i++) {
1702:       ISDestroy(&pcbddc->local_subs[i]);
1703:     }
1704:     PetscFree(pcbddc->local_subs);
1705:     PCBDDCDetectDisconnectedComponents(pc,filter,&pcbddc->n_local_subs,&pcbddc->local_subs,&primalv);
1706:     PCBDDCAddPrimalVerticesLocalIS(pc,primalv);
1707:     ISDestroy(&primalv);
1708:   }
1709:   /* early stage corner detection */
1710:   {
1711:     DM dm;

1713:     MatGetDM(pc->pmat,&dm);
1714:     if (!dm) {
1715:       PCGetDM(pc,&dm);
1716:     }
1717:     if (dm) {
1718:       PetscBool isda;

1720:       PetscObjectTypeCompare((PetscObject)dm,DMDA,&isda);
1721:       if (isda) {
1722:         ISLocalToGlobalMapping l2l;
1723:         IS                     corners;
1724:         Mat                    lA;
1725:         PetscBool              gl,lo;

1727:         {
1728:           Vec               cvec;
1729:           const PetscScalar *coords;
1730:           PetscInt          dof,n,cdim;
1731:           PetscBool         memc = PETSC_TRUE;

1733:           DMDAGetInfo(dm,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL);
1734:           DMGetCoordinates(dm,&cvec);
1735:           VecGetLocalSize(cvec,&n);
1736:           VecGetBlockSize(cvec,&cdim);
1737:           n   /= cdim;
1738:           PetscFree(pcbddc->mat_graph->coords);
1739:           PetscMalloc1(dof*n*cdim,&pcbddc->mat_graph->coords);
1740:           VecGetArrayRead(cvec,&coords);
1741: #if defined(PETSC_USE_COMPLEX)
1742:           memc = PETSC_FALSE;
1743: #endif
1744:           if (dof != 1) memc = PETSC_FALSE;
1745:           if (memc) {
1746:             PetscMemcpy(pcbddc->mat_graph->coords,coords,cdim*n*dof*sizeof(PetscReal));
1747:           } else { /* BDDC graph does not use any blocked information, we need to replicate the data */
1748:             PetscReal *bcoords = pcbddc->mat_graph->coords;
1749:             PetscInt  i, b, d;

1751:             for (i=0;i<n;i++) {
1752:               for (b=0;b<dof;b++) {
1753:                 for (d=0;d<cdim;d++) {
1754:                   bcoords[i*dof*cdim + b*cdim + d] = PetscRealPart(coords[i*cdim+d]);
1755:                 }
1756:               }
1757:             }
1758:           }
1759:           VecRestoreArrayRead(cvec,&coords);
1760:           pcbddc->mat_graph->cdim  = cdim;
1761:           pcbddc->mat_graph->cnloc = dof*n;
1762:           pcbddc->mat_graph->cloc  = PETSC_FALSE;
1763:         }
1764:         DMDAGetSubdomainCornersIS(dm,&corners);
1765:         MatISGetLocalMat(pc->pmat,&lA);
1766:         MatGetLocalToGlobalMapping(lA,&l2l,NULL);
1767:         MatISRestoreLocalMat(pc->pmat,&lA);
1768:         lo   = (PetscBool)(l2l && corners);
1769:         MPIU_Allreduce(&lo,&gl,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));
1770:         if (gl) { /* From PETSc's DMDA */
1771:           const PetscInt    *idx;
1772:           PetscInt          dof,bs,*idxout,n;

1774:           DMDAGetInfo(dm,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL);
1775:           ISLocalToGlobalMappingGetBlockSize(l2l,&bs);
1776:           ISGetLocalSize(corners,&n);
1777:           ISGetIndices(corners,&idx);
1778:           if (bs == dof) {
1779:             PetscMalloc1(n,&idxout);
1780:             ISLocalToGlobalMappingApplyBlock(l2l,n,idx,idxout);
1781:           } else { /* the original DMDA local-to-local map have been modified */
1782:             PetscInt i,d;

1784:             PetscMalloc1(dof*n,&idxout);
1785:             for (i=0;i<n;i++) for (d=0;d<dof;d++) idxout[dof*i+d] = dof*idx[i]+d;
1786:             ISLocalToGlobalMappingApply(l2l,dof*n,idxout,idxout);

1788:             bs = 1;
1789:             n *= dof;
1790:           }
1791:           ISRestoreIndices(corners,&idx);
1792:           DMDARestoreSubdomainCornersIS(dm,&corners);
1793:           ISCreateBlock(PetscObjectComm((PetscObject)pc),bs,n,idxout,PETSC_OWN_POINTER,&corners);
1794:           PCBDDCAddPrimalVerticesLocalIS(pc,corners);
1795:           ISDestroy(&corners);
1796:           pcbddc->corner_selected  = PETSC_TRUE;
1797:           pcbddc->corner_selection = PETSC_TRUE;
1798:         }
1799:         if (corners) {
1800:           DMDARestoreSubdomainCornersIS(dm,&corners);
1801:         }
1802:       }
1803:     }
1804:   }
1805:   if (pcbddc->corner_selection && !pcbddc->mat_graph->cdim) {
1806:     DM dm;

1808:     MatGetDM(pc->pmat,&dm);
1809:     if (!dm) {
1810:       PCGetDM(pc,&dm);
1811:     }
1812:     if (dm) { /* this can get very expensive, I need to find a faster alternative */
1813:       Vec            vcoords;
1814:       PetscSection   section;
1815:       PetscReal      *coords;
1816:       PetscInt       d,cdim,nl,nf,**ctxs;
1817:       PetscErrorCode (**funcs)(PetscInt, PetscReal, const PetscReal *, PetscInt, PetscScalar *, void *);

1819:       DMGetCoordinateDim(dm,&cdim);
1820:       DMGetSection(dm,&section);
1821:       PetscSectionGetNumFields(section,&nf);
1822:       DMCreateGlobalVector(dm,&vcoords);
1823:       VecGetLocalSize(vcoords,&nl);
1824:       PetscMalloc1(nl*cdim,&coords);
1825:       PetscMalloc2(nf,&funcs,nf,&ctxs);
1826:       PetscMalloc1(nf,&ctxs[0]);
1827:       for (d=0;d<nf;d++) funcs[d] = func_coords_private;
1828:       for (d=1;d<nf;d++) ctxs[d] = ctxs[d-1] + 1;
1829:       for (d=0;d<cdim;d++) {
1830:         PetscInt          i;
1831:         const PetscScalar *v;

1833:         for (i=0;i<nf;i++) ctxs[i][0] = d;
1834:         DMProjectFunction(dm,0.0,funcs,(void**)ctxs,INSERT_VALUES,vcoords);
1835:         VecGetArrayRead(vcoords,&v);
1836:         for (i=0;i<nl;i++) coords[i*cdim+d] = PetscRealPart(v[i]);
1837:         VecRestoreArrayRead(vcoords,&v);
1838:       }
1839:       VecDestroy(&vcoords);
1840:       PCSetCoordinates(pc,cdim,nl,coords);
1841:       PetscFree(coords);
1842:       PetscFree(ctxs[0]);
1843:       PetscFree2(funcs,ctxs);
1844:     }
1845:   }
1846:   return(0);
1847: }

1849: PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is)
1850: {
1851:   Mat_IS          *matis = (Mat_IS*)(pc->pmat->data);
1852:   PetscErrorCode  ierr;
1853:   IS              nis;
1854:   const PetscInt  *idxs;
1855:   PetscInt        i,nd,n = matis->A->rmap->n,*nidxs,nnd;
1856:   PetscBool       *ld;

1859:   if (mop != MPI_LAND && mop != MPI_LOR) SETERRQ(PetscObjectComm((PetscObject)(pc)),PETSC_ERR_SUP,"Supported are MPI_LAND and MPI_LOR");
1860:   if (mop == MPI_LAND) {
1861:     /* init rootdata with true */
1862:     ld   = (PetscBool*) matis->sf_rootdata;
1863:     for (i=0;i<pc->pmat->rmap->n;i++) ld[i] = PETSC_TRUE;
1864:   } else {
1865:     PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscBool));
1866:   }
1867:   PetscMemzero(matis->sf_leafdata,n*sizeof(PetscBool));
1868:   ISGetLocalSize(*is,&nd);
1869:   ISGetIndices(*is,&idxs);
1870:   ld   = (PetscBool*) matis->sf_leafdata;
1871:   for (i=0;i<nd;i++)
1872:     if (-1 < idxs[i] && idxs[i] < n)
1873:       ld[idxs[i]] = PETSC_TRUE;
1874:   ISRestoreIndices(*is,&idxs);
1875:   PetscSFReduceBegin(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);
1876:   PetscSFReduceEnd(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);
1877:   PetscSFBcastBegin(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);
1878:   PetscSFBcastEnd(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);
1879:   if (mop == MPI_LAND) {
1880:     PetscMalloc1(nd,&nidxs);
1881:   } else {
1882:     PetscMalloc1(n,&nidxs);
1883:   }
1884:   for (i=0,nnd=0;i<n;i++)
1885:     if (ld[i])
1886:       nidxs[nnd++] = i;
1887:   ISCreateGeneral(PetscObjectComm((PetscObject)(*is)),nnd,nidxs,PETSC_OWN_POINTER,&nis);
1888:   ISDestroy(is);
1889:   *is  = nis;
1890:   return(0);
1891: }

1893: PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z)
1894: {
1895:   PC_IS             *pcis = (PC_IS*)(pc->data);
1896:   PC_BDDC           *pcbddc = (PC_BDDC*)(pc->data);
1897:   PetscErrorCode    ierr;

1900:   if (!pcbddc->benign_have_null) {
1901:     return(0);
1902:   }
1903:   if (pcbddc->ChangeOfBasisMatrix) {
1904:     Vec swap;

1906:     MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);
1907:     swap = pcbddc->work_change;
1908:     pcbddc->work_change = r;
1909:     r = swap;
1910:   }
1911:   VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
1912:   VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
1913:   KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);
1914:   KSPCheckSolve(pcbddc->ksp_D,pc,pcis->vec2_D);
1915:   VecSet(z,0.);
1916:   VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);
1917:   VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);
1918:   if (pcbddc->ChangeOfBasisMatrix) {
1919:     pcbddc->work_change = r;
1920:     VecCopy(z,pcbddc->work_change);
1921:     MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);
1922:   }
1923:   return(0);
1924: }

1926: PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose)
1927: {
1928:   PCBDDCBenignMatMult_ctx ctx;
1929:   PetscErrorCode          ierr;
1930:   PetscBool               apply_right,apply_left,reset_x;

1933:   MatShellGetContext(A,&ctx);
1934:   if (transpose) {
1935:     apply_right = ctx->apply_left;
1936:     apply_left = ctx->apply_right;
1937:   } else {
1938:     apply_right = ctx->apply_right;
1939:     apply_left = ctx->apply_left;
1940:   }
1941:   reset_x = PETSC_FALSE;
1942:   if (apply_right) {
1943:     const PetscScalar *ax;
1944:     PetscInt          nl,i;

1946:     VecGetLocalSize(x,&nl);
1947:     VecGetArrayRead(x,&ax);
1948:     PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));
1949:     VecRestoreArrayRead(x,&ax);
1950:     for (i=0;i<ctx->benign_n;i++) {
1951:       PetscScalar    sum,val;
1952:       const PetscInt *idxs;
1953:       PetscInt       nz,j;
1954:       ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);
1955:       ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);
1956:       sum = 0.;
1957:       if (ctx->apply_p0) {
1958:         val = ctx->work[idxs[nz-1]];
1959:         for (j=0;j<nz-1;j++) {
1960:           sum += ctx->work[idxs[j]];
1961:           ctx->work[idxs[j]] += val;
1962:         }
1963:       } else {
1964:         for (j=0;j<nz-1;j++) {
1965:           sum += ctx->work[idxs[j]];
1966:         }
1967:       }
1968:       ctx->work[idxs[nz-1]] -= sum;
1969:       ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);
1970:     }
1971:     VecPlaceArray(x,ctx->work);
1972:     reset_x = PETSC_TRUE;
1973:   }
1974:   if (transpose) {
1975:     MatMultTranspose(ctx->A,x,y);
1976:   } else {
1977:     MatMult(ctx->A,x,y);
1978:   }
1979:   if (reset_x) {
1980:     VecResetArray(x);
1981:   }
1982:   if (apply_left) {
1983:     PetscScalar *ay;
1984:     PetscInt    i;

1986:     VecGetArray(y,&ay);
1987:     for (i=0;i<ctx->benign_n;i++) {
1988:       PetscScalar    sum,val;
1989:       const PetscInt *idxs;
1990:       PetscInt       nz,j;
1991:       ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);
1992:       ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);
1993:       val = -ay[idxs[nz-1]];
1994:       if (ctx->apply_p0) {
1995:         sum = 0.;
1996:         for (j=0;j<nz-1;j++) {
1997:           sum += ay[idxs[j]];
1998:           ay[idxs[j]] += val;
1999:         }
2000:         ay[idxs[nz-1]] += sum;
2001:       } else {
2002:         for (j=0;j<nz-1;j++) {
2003:           ay[idxs[j]] += val;
2004:         }
2005:         ay[idxs[nz-1]] = 0.;
2006:       }
2007:       ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);
2008:     }
2009:     VecRestoreArray(y,&ay);
2010:   }
2011:   return(0);
2012: }

2014: PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y)
2015: {

2019:   PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);
2020:   return(0);
2021: }

2023: PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y)
2024: {

2028:   PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);
2029:   return(0);
2030: }

2032: PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore)
2033: {
2034:   PC_IS                   *pcis = (PC_IS*)pc->data;
2035:   PC_BDDC                 *pcbddc = (PC_BDDC*)pc->data;
2036:   PCBDDCBenignMatMult_ctx ctx;
2037:   PetscErrorCode          ierr;

2040:   if (!restore) {
2041:     Mat                A_IB,A_BI;
2042:     PetscScalar        *work;
2043:     PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL;

2045:     if (pcbddc->benign_original_mat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored");
2046:     if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) return(0);
2047:     PetscMalloc1(pcis->n,&work);
2048:     MatCreate(PETSC_COMM_SELF,&A_IB);
2049:     MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);
2050:     MatSetType(A_IB,MATSHELL);
2051:     MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);
2052:     MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);
2053:     PetscNew(&ctx);
2054:     MatShellSetContext(A_IB,ctx);
2055:     ctx->apply_left = PETSC_TRUE;
2056:     ctx->apply_right = PETSC_FALSE;
2057:     ctx->apply_p0 = PETSC_FALSE;
2058:     ctx->benign_n = pcbddc->benign_n;
2059:     if (reuse) {
2060:       ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs;
2061:       ctx->free = PETSC_FALSE;
2062:     } else { /* TODO: could be optimized for successive solves */
2063:       ISLocalToGlobalMapping N_to_D;
2064:       PetscInt               i;

2066:       ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);
2067:       PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);
2068:       for (i=0;i<pcbddc->benign_n;i++) {
2069:         ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);
2070:       }
2071:       ISLocalToGlobalMappingDestroy(&N_to_D);
2072:       ctx->free = PETSC_TRUE;
2073:     }
2074:     ctx->A = pcis->A_IB;
2075:     ctx->work = work;
2076:     MatSetUp(A_IB);
2077:     MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);
2078:     MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);
2079:     pcis->A_IB = A_IB;

2081:     /* A_BI as A_IB^T */
2082:     MatCreateTranspose(A_IB,&A_BI);
2083:     pcbddc->benign_original_mat = pcis->A_BI;
2084:     pcis->A_BI = A_BI;
2085:   } else {
2086:     if (!pcbddc->benign_original_mat) {
2087:       return(0);
2088:     }
2089:     MatShellGetContext(pcis->A_IB,&ctx);
2090:     MatDestroy(&pcis->A_IB);
2091:     pcis->A_IB = ctx->A;
2092:     ctx->A = NULL;
2093:     MatDestroy(&pcis->A_BI);
2094:     pcis->A_BI = pcbddc->benign_original_mat;
2095:     pcbddc->benign_original_mat = NULL;
2096:     if (ctx->free) {
2097:       PetscInt i;
2098:       for (i=0;i<ctx->benign_n;i++) {
2099:         ISDestroy(&ctx->benign_zerodiag_subs[i]);
2100:       }
2101:       PetscFree(ctx->benign_zerodiag_subs);
2102:     }
2103:     PetscFree(ctx->work);
2104:     PetscFree(ctx);
2105:   }
2106:   return(0);
2107: }

2109: /* used just in bddc debug mode */
2110: PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B)
2111: {
2112:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
2113:   Mat_IS         *matis = (Mat_IS*)pc->pmat->data;
2114:   Mat            An;

2118:   MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);
2119:   MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);
2120:   if (is1) {
2121:     MatCreateSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);
2122:     MatDestroy(&An);
2123:   } else {
2124:     *B = An;
2125:   }
2126:   return(0);
2127: }

2129: /* TODO: add reuse flag */
2130: PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B)
2131: {
2132:   Mat            Bt;
2133:   PetscScalar    *a,*bdata;
2134:   const PetscInt *ii,*ij;
2135:   PetscInt       m,n,i,nnz,*bii,*bij;
2136:   PetscBool      flg_row;

2140:   MatGetSize(A,&n,&m);
2141:   MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);
2142:   MatSeqAIJGetArray(A,&a);
2143:   nnz = n;
2144:   for (i=0;i<ii[n];i++) {
2145:     if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++;
2146:   }
2147:   PetscMalloc1(n+1,&bii);
2148:   PetscMalloc1(nnz,&bij);
2149:   PetscMalloc1(nnz,&bdata);
2150:   nnz = 0;
2151:   bii[0] = 0;
2152:   for (i=0;i<n;i++) {
2153:     PetscInt j;
2154:     for (j=ii[i];j<ii[i+1];j++) {
2155:       PetscScalar entry = a[j];
2156:       if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || (n == m && ij[j] == i)) {
2157:         bij[nnz] = ij[j];
2158:         bdata[nnz] = entry;
2159:         nnz++;
2160:       }
2161:     }
2162:     bii[i+1] = nnz;
2163:   }
2164:   MatSeqAIJRestoreArray(A,&a);
2165:   MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);
2166:   MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);
2167:   {
2168:     Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data);
2169:     b->free_a = PETSC_TRUE;
2170:     b->free_ij = PETSC_TRUE;
2171:   }
2172:   if (*B == A) {
2173:     MatDestroy(&A);
2174:   }
2175:   *B = Bt;
2176:   return(0);
2177: }

2179: PetscErrorCode PCBDDCDetectDisconnectedComponents(PC pc, PetscBool filter, PetscInt *ncc, IS* cc[], IS* primalv)
2180: {
2181:   Mat                    B = NULL;
2182:   DM                     dm;
2183:   IS                     is_dummy,*cc_n;
2184:   ISLocalToGlobalMapping l2gmap_dummy;
2185:   PCBDDCGraph            graph;
2186:   PetscInt               *xadj_filtered = NULL,*adjncy_filtered = NULL;
2187:   PetscInt               i,n;
2188:   PetscInt               *xadj,*adjncy;
2189:   PetscBool              isplex = PETSC_FALSE;
2190:   PetscErrorCode         ierr;

2193:   if (ncc) *ncc = 0;
2194:   if (cc) *cc = NULL;
2195:   if (primalv) *primalv = NULL;
2196:   PCBDDCGraphCreate(&graph);
2197:   MatGetDM(pc->pmat,&dm);
2198:   if (!dm) {
2199:     PCGetDM(pc,&dm);
2200:   }
2201:   if (dm) {
2202:     PetscObjectTypeCompare((PetscObject)dm,DMPLEX,&isplex);
2203:   }
2204:   if (filter) isplex = PETSC_FALSE;

2206:   if (isplex) { /* this code has been modified from plexpartition.c */
2207:     PetscInt       p, pStart, pEnd, a, adjSize, idx, size, nroots;
2208:     PetscInt      *adj = NULL;
2209:     IS             cellNumbering;
2210:     const PetscInt *cellNum;
2211:     PetscBool      useCone, useClosure;
2212:     PetscSection   section;
2213:     PetscSegBuffer adjBuffer;
2214:     PetscSF        sfPoint;

2218:     DMPlexGetHeightStratum(dm, 0, &pStart, &pEnd);
2219:     DMGetPointSF(dm, &sfPoint);
2220:     PetscSFGetGraph(sfPoint, &nroots, NULL, NULL, NULL);
2221:     /* Build adjacency graph via a section/segbuffer */
2222:     PetscSectionCreate(PetscObjectComm((PetscObject) dm), &section);
2223:     PetscSectionSetChart(section, pStart, pEnd);
2224:     PetscSegBufferCreate(sizeof(PetscInt),1000,&adjBuffer);
2225:     /* Always use FVM adjacency to create partitioner graph */
2226:     DMGetBasicAdjacency(dm, &useCone, &useClosure);
2227:     DMSetBasicAdjacency(dm, PETSC_TRUE, PETSC_FALSE);
2228:     DMPlexGetCellNumbering(dm, &cellNumbering);
2229:     ISGetIndices(cellNumbering, &cellNum);
2230:     for (n = 0, p = pStart; p < pEnd; p++) {
2231:       /* Skip non-owned cells in parallel (ParMetis expects no overlap) */
2232:       if (nroots > 0) {if (cellNum[p] < 0) continue;}
2233:       adjSize = PETSC_DETERMINE;
2234:       DMPlexGetAdjacency(dm, p, &adjSize, &adj);
2235:       for (a = 0; a < adjSize; ++a) {
2236:         const PetscInt point = adj[a];
2237:         if (pStart <= point && point < pEnd) {
2238:           PetscInt *PETSC_RESTRICT pBuf;
2239:           PetscSectionAddDof(section, p, 1);
2240:           PetscSegBufferGetInts(adjBuffer, 1, &pBuf);
2241:           *pBuf = point;
2242:         }
2243:       }
2244:       n++;
2245:     }
2246:     DMSetBasicAdjacency(dm, useCone, useClosure);
2247:     /* Derive CSR graph from section/segbuffer */
2248:     PetscSectionSetUp(section);
2249:     PetscSectionGetStorageSize(section, &size);
2250:     PetscMalloc1(n+1, &xadj);
2251:     for (idx = 0, p = pStart; p < pEnd; p++) {
2252:       if (nroots > 0) {if (cellNum[p] < 0) continue;}
2253:       PetscSectionGetOffset(section, p, &(xadj[idx++]));
2254:     }
2255:     xadj[n] = size;
2256:     PetscSegBufferExtractAlloc(adjBuffer, &adjncy);
2257:     /* Clean up */
2258:     PetscSegBufferDestroy(&adjBuffer);
2259:     PetscSectionDestroy(&section);
2260:     PetscFree(adj);
2261:     graph->xadj = xadj;
2262:     graph->adjncy = adjncy;
2263:   } else {
2264:     Mat       A;
2265:     PetscBool isseqaij, flg_row;

2267:     MatISGetLocalMat(pc->pmat,&A);
2268:     if (!A->rmap->N || !A->cmap->N) {
2269:       PCBDDCGraphDestroy(&graph);
2270:       return(0);
2271:     }
2272:     PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);
2273:     if (!isseqaij && filter) {
2274:       PetscBool isseqdense;

2276:       PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);
2277:       if (!isseqdense) {
2278:         MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
2279:       } else { /* TODO: rectangular case and LDA */
2280:         PetscScalar *array;
2281:         PetscReal   chop=1.e-6;

2283:         MatDuplicate(A,MAT_COPY_VALUES,&B);
2284:         MatDenseGetArray(B,&array);
2285:         MatGetSize(B,&n,NULL);
2286:         for (i=0;i<n;i++) {
2287:           PetscInt j;
2288:           for (j=i+1;j<n;j++) {
2289:             PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)]));
2290:             if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.;
2291:             if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.;
2292:           }
2293:         }
2294:         MatDenseRestoreArray(B,&array);
2295:         MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);
2296:       }
2297:     } else {
2298:       PetscObjectReference((PetscObject)A);
2299:       B = A;
2300:     }
2301:     MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);

2303:     /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */
2304:     if (filter) {
2305:       PetscScalar *data;
2306:       PetscInt    j,cum;

2308:       PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);
2309:       MatSeqAIJGetArray(B,&data);
2310:       cum = 0;
2311:       for (i=0;i<n;i++) {
2312:         PetscInt t;

2314:         for (j=xadj[i];j<xadj[i+1];j++) {
2315:           if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) {
2316:             continue;
2317:           }
2318:           adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j];
2319:         }
2320:         t = xadj_filtered[i];
2321:         xadj_filtered[i] = cum;
2322:         cum += t;
2323:       }
2324:       MatSeqAIJRestoreArray(B,&data);
2325:       graph->xadj = xadj_filtered;
2326:       graph->adjncy = adjncy_filtered;
2327:     } else {
2328:       graph->xadj = xadj;
2329:       graph->adjncy = adjncy;
2330:     }
2331:   }
2332:   /* compute local connected components using PCBDDCGraph */
2333:   ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);
2334:   ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);
2335:   ISDestroy(&is_dummy);
2336:   PCBDDCGraphInit(graph,l2gmap_dummy,n,PETSC_MAX_INT);
2337:   ISLocalToGlobalMappingDestroy(&l2gmap_dummy);
2338:   PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);
2339:   PCBDDCGraphComputeConnectedComponents(graph);

2341:   /* partial clean up */
2342:   PetscFree2(xadj_filtered,adjncy_filtered);
2343:   if (B) {
2344:     PetscBool flg_row;
2345:     MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);
2346:     MatDestroy(&B);
2347:   }
2348:   if (isplex) {
2349:     PetscFree(xadj);
2350:     PetscFree(adjncy);
2351:   }

2353:   /* get back data */
2354:   if (isplex) {
2355:     if (ncc) *ncc = graph->ncc;
2356:     if (cc || primalv) {
2357:       Mat          A;
2358:       PetscBT      btv,btvt;
2359:       PetscSection subSection;
2360:       PetscInt     *ids,cum,cump,*cids,*pids;

2362:       DMPlexGetSubdomainSection(dm,&subSection);
2363:       MatISGetLocalMat(pc->pmat,&A);
2364:       PetscMalloc3(A->rmap->n,&ids,graph->ncc+1,&cids,A->rmap->n,&pids);
2365:       PetscBTCreate(A->rmap->n,&btv);
2366:       PetscBTCreate(A->rmap->n,&btvt);

2368:       cids[0] = 0;
2369:       for (i = 0, cump = 0, cum = 0; i < graph->ncc; i++) {
2370:         PetscInt j;

2372:         PetscBTMemzero(A->rmap->n,btvt);
2373:         for (j = graph->cptr[i]; j < graph->cptr[i+1]; j++) {
2374:           PetscInt k, size, *closure = NULL, cell = graph->queue[j];

2376:           DMPlexGetTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);
2377:           for (k = 0; k < 2*size; k += 2) {
2378:             PetscInt s, pp, p = closure[k], off, dof, cdof;

2380:             PetscSectionGetConstraintDof(subSection,p,&cdof);
2381:             PetscSectionGetOffset(subSection,p,&off);
2382:             PetscSectionGetDof(subSection,p,&dof);
2383:             for (s = 0; s < dof-cdof; s++) {
2384:               if (PetscBTLookupSet(btvt,off+s)) continue;
2385:               if (!PetscBTLookup(btv,off+s)) {
2386:                 ids[cum++] = off+s;
2387:               } else { /* cross-vertex */
2388:                 pids[cump++] = off+s;
2389:               }
2390:             }
2391:             DMPlexGetTreeParent(dm,p,&pp,NULL);
2392:             if (pp != p) {
2393:               PetscSectionGetConstraintDof(subSection,pp,&cdof);
2394:               PetscSectionGetOffset(subSection,pp,&off);
2395:               PetscSectionGetDof(subSection,pp,&dof);
2396:               for (s = 0; s < dof-cdof; s++) {
2397:                 if (PetscBTLookupSet(btvt,off+s)) continue;
2398:                 if (!PetscBTLookup(btv,off+s)) {
2399:                   ids[cum++] = off+s;
2400:                 } else { /* cross-vertex */
2401:                   pids[cump++] = off+s;
2402:                 }
2403:               }
2404:             }
2405:           }
2406:           DMPlexRestoreTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);
2407:         }
2408:         cids[i+1] = cum;
2409:         /* mark dofs as already assigned */
2410:         for (j = cids[i]; j < cids[i+1]; j++) {
2411:           PetscBTSet(btv,ids[j]);
2412:         }
2413:       }
2414:       if (cc) {
2415:         PetscMalloc1(graph->ncc,&cc_n);
2416:         for (i = 0; i < graph->ncc; i++) {
2417:           ISCreateGeneral(PETSC_COMM_SELF,cids[i+1]-cids[i],ids+cids[i],PETSC_COPY_VALUES,&cc_n[i]);
2418:         }
2419:         *cc = cc_n;
2420:       }
2421:       if (primalv) {
2422:         ISCreateGeneral(PetscObjectComm((PetscObject)pc),cump,pids,PETSC_COPY_VALUES,primalv);
2423:       }
2424:       PetscFree3(ids,cids,pids);
2425:       PetscBTDestroy(&btv);
2426:       PetscBTDestroy(&btvt);
2427:     }
2428:   } else {
2429:     if (ncc) *ncc = graph->ncc;
2430:     if (cc) {
2431:       PetscMalloc1(graph->ncc,&cc_n);
2432:       for (i=0;i<graph->ncc;i++) {
2433:         ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);
2434:       }
2435:       *cc = cc_n;
2436:     }
2437:   }
2438:   /* clean up graph */
2439:   graph->xadj = 0;
2440:   graph->adjncy = 0;
2441:   PCBDDCGraphDestroy(&graph);
2442:   return(0);
2443: }

2445: PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag)
2446: {
2447:   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
2448:   PC_IS*         pcis = (PC_IS*)(pc->data);
2449:   IS             dirIS = NULL;
2450:   PetscInt       i;

2454:   PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);
2455:   if (zerodiag) {
2456:     Mat            A;
2457:     Vec            vec3_N;
2458:     PetscScalar    *vals;
2459:     const PetscInt *idxs;
2460:     PetscInt       nz,*count;

2462:     /* p0 */
2463:     VecSet(pcis->vec1_N,0.);
2464:     PetscMalloc1(pcis->n,&vals);
2465:     ISGetLocalSize(zerodiag,&nz);
2466:     ISGetIndices(zerodiag,&idxs);
2467:     for (i=0;i<nz;i++) vals[i] = 1.;
2468:     VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);
2469:     VecAssemblyBegin(pcis->vec1_N);
2470:     VecAssemblyEnd(pcis->vec1_N);
2471:     /* v_I */
2472:     VecSetRandom(pcis->vec2_N,NULL);
2473:     for (i=0;i<nz;i++) vals[i] = 0.;
2474:     VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);
2475:     ISRestoreIndices(zerodiag,&idxs);
2476:     ISGetIndices(pcis->is_B_local,&idxs);
2477:     for (i=0;i<pcis->n_B;i++) vals[i] = 0.;
2478:     VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);
2479:     ISRestoreIndices(pcis->is_B_local,&idxs);
2480:     if (dirIS) {
2481:       PetscInt n;

2483:       ISGetLocalSize(dirIS,&n);
2484:       ISGetIndices(dirIS,&idxs);
2485:       for (i=0;i<n;i++) vals[i] = 0.;
2486:       VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);
2487:       ISRestoreIndices(dirIS,&idxs);
2488:     }
2489:     VecAssemblyBegin(pcis->vec2_N);
2490:     VecAssemblyEnd(pcis->vec2_N);
2491:     VecDuplicate(pcis->vec1_N,&vec3_N);
2492:     VecSet(vec3_N,0.);
2493:     MatISGetLocalMat(pc->pmat,&A);
2494:     MatMult(A,pcis->vec1_N,vec3_N);
2495:     VecDot(vec3_N,pcis->vec2_N,&vals[0]);
2496:     if (PetscAbsScalar(vals[0]) > 1.e-1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0]));
2497:     PetscFree(vals);
2498:     VecDestroy(&vec3_N);

2500:     /* there should not be any pressure dofs lying on the interface */
2501:     PetscCalloc1(pcis->n,&count);
2502:     ISGetIndices(pcis->is_B_local,&idxs);
2503:     for (i=0;i<pcis->n_B;i++) count[idxs[i]]++;
2504:     ISRestoreIndices(pcis->is_B_local,&idxs);
2505:     ISGetIndices(zerodiag,&idxs);
2506:     for (i=0;i<nz;i++) if (count[idxs[i]]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %D is an interface dof",idxs[i]);
2507:     ISRestoreIndices(zerodiag,&idxs);
2508:     PetscFree(count);
2509:   }
2510:   ISDestroy(&dirIS);

2512:   /* check PCBDDCBenignGetOrSetP0 */
2513:   VecSetRandom(pcis->vec1_global,NULL);
2514:   for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i;
2515:   PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);
2516:   for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1;
2517:   PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);
2518:   for (i=0;i<pcbddc->benign_n;i++) {
2519:     PetscInt val = PetscRealPart(pcbddc->benign_p0[i]);
2520:     if (val != -PetscGlobalRank-i) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %D instead of %g",PetscRealPart(pcbddc->benign_p0[i]),i,-PetscGlobalRank-i);
2521:   }
2522:   return(0);
2523: }

2525: PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, PetscBool reuse, IS *zerodiaglocal)
2526: {
2527:   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
2528:   IS             pressures = NULL,zerodiag = NULL,*bzerodiag = NULL,zerodiag_save,*zerodiag_subs;
2529:   PetscInt       nz,n,benign_n,bsp = 1;
2530:   PetscInt       *interior_dofs,n_interior_dofs,nneu;
2531:   PetscBool      sorted,have_null,has_null_pressures,recompute_zerodiag,checkb;

2535:   if (reuse) goto project_b0;
2536:   PetscSFDestroy(&pcbddc->benign_sf);
2537:   MatDestroy(&pcbddc->benign_B0);
2538:   for (n=0;n<pcbddc->benign_n;n++) {
2539:     ISDestroy(&pcbddc->benign_zerodiag_subs[n]);
2540:   }
2541:   PetscFree(pcbddc->benign_zerodiag_subs);
2542:   has_null_pressures = PETSC_TRUE;
2543:   have_null = PETSC_TRUE;
2544:   /* if a local information on dofs is present, gets pressure dofs from command line (uses the last field is not provided)
2545:      Without local information, it uses only the zerodiagonal dofs (ok if the pressure block is all zero and it is a scalar field)
2546:      Checks if all the pressure dofs in each subdomain have a zero diagonal
2547:      If not, a change of basis on pressures is not needed
2548:      since the local Schur complements are already SPD
2549:   */
2550:   if (pcbddc->n_ISForDofsLocal) {
2551:     IS        iP = NULL;
2552:     PetscInt  p,*pp;
2553:     PetscBool flg;

2555:     PetscMalloc1(pcbddc->n_ISForDofsLocal,&pp);
2556:     n    = pcbddc->n_ISForDofsLocal;
2557:     PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC benign options","PC");
2558:     PetscOptionsIntArray("-pc_bddc_pressure_field","Field id for pressures",NULL,pp,&n,&flg);
2559:     PetscOptionsEnd();
2560:     if (!flg) {
2561:       n = 1;
2562:       pp[0] = pcbddc->n_ISForDofsLocal-1;
2563:     }

2565:     bsp = 0;
2566:     for (p=0;p<n;p++) {
2567:       PetscInt bs;

2569:       if (pp[p] < 0 || pp[p] > pcbddc->n_ISForDofsLocal-1) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Invalid field id for pressures %D",pp[p]);
2570:       ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]],&bs);
2571:       bsp += bs;
2572:     }
2573:     PetscMalloc1(bsp,&bzerodiag);
2574:     bsp  = 0;
2575:     for (p=0;p<n;p++) {
2576:       const PetscInt *idxs;
2577:       PetscInt       b,bs,npl,*bidxs;

2579:       ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]],&bs);
2580:       ISGetLocalSize(pcbddc->ISForDofsLocal[pp[p]],&npl);
2581:       ISGetIndices(pcbddc->ISForDofsLocal[pp[p]],&idxs);
2582:       PetscMalloc1(npl/bs,&bidxs);
2583:       for (b=0;b<bs;b++) {
2584:         PetscInt i;

2586:         for (i=0;i<npl/bs;i++) bidxs[i] = idxs[bs*i+b];
2587:         ISCreateGeneral(PETSC_COMM_SELF,npl/bs,bidxs,PETSC_COPY_VALUES,&bzerodiag[bsp]);
2588:         bsp++;
2589:       }
2590:       PetscFree(bidxs);
2591:       ISRestoreIndices(pcbddc->ISForDofsLocal[pp[p]],&idxs);
2592:     }
2593:     ISConcatenate(PETSC_COMM_SELF,bsp,bzerodiag,&pressures);

2595:     /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */
2596:     PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lP",(PetscObject*)&iP);
2597:     if (iP) {
2598:       IS newpressures;

2600:       ISDifference(pressures,iP,&newpressures);
2601:       ISDestroy(&pressures);
2602:       pressures = newpressures;
2603:     }
2604:     ISSorted(pressures,&sorted);
2605:     if (!sorted) {
2606:       ISSort(pressures);
2607:     }
2608:     PetscFree(pp);
2609:   }

2611:   /* pcis has not been setup yet, so get the local size from the subdomain matrix */
2612:   MatGetLocalSize(pcbddc->local_mat,&n,NULL);
2613:   if (!n) pcbddc->benign_change_explicit = PETSC_TRUE;
2614:   MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);
2615:   ISSorted(zerodiag,&sorted);
2616:   if (!sorted) {
2617:     ISSort(zerodiag);
2618:   }
2619:   PetscObjectReference((PetscObject)zerodiag);
2620:   zerodiag_save = zerodiag;
2621:   ISGetLocalSize(zerodiag,&nz);
2622:   if (!nz) {
2623:     if (n) have_null = PETSC_FALSE;
2624:     has_null_pressures = PETSC_FALSE;
2625:     ISDestroy(&zerodiag);
2626:   }
2627:   recompute_zerodiag = PETSC_FALSE;

2629:   /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */
2630:   zerodiag_subs    = NULL;
2631:   benign_n         = 0;
2632:   n_interior_dofs  = 0;
2633:   interior_dofs    = NULL;
2634:   nneu             = 0;
2635:   if (pcbddc->NeumannBoundariesLocal) {
2636:     ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nneu);
2637:   }
2638:   checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level);
2639:   if (checkb) { /* need to compute interior nodes */
2640:     PetscInt n,i,j;
2641:     PetscInt n_neigh,*neigh,*n_shared,**shared;
2642:     PetscInt *iwork;

2644:     ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);
2645:     ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);
2646:     PetscCalloc1(n,&iwork);
2647:     PetscMalloc1(n,&interior_dofs);
2648:     for (i=1;i<n_neigh;i++)
2649:       for (j=0;j<n_shared[i];j++)
2650:           iwork[shared[i][j]] += 1;
2651:     for (i=0;i<n;i++)
2652:       if (!iwork[i])
2653:         interior_dofs[n_interior_dofs++] = i;
2654:     PetscFree(iwork);
2655:     ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);
2656:   }
2657:   if (has_null_pressures) {
2658:     IS             *subs;
2659:     PetscInt       nsubs,i,j,nl;
2660:     const PetscInt *idxs;
2661:     PetscScalar    *array;
2662:     Vec            *work;
2663:     Mat_IS*        matis = (Mat_IS*)(pc->pmat->data);

2665:     subs  = pcbddc->local_subs;
2666:     nsubs = pcbddc->n_local_subs;
2667:     /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */
2668:     if (checkb) {
2669:       VecDuplicateVecs(matis->y,2,&work);
2670:       ISGetLocalSize(zerodiag,&nl);
2671:       ISGetIndices(zerodiag,&idxs);
2672:       /* work[0] = 1_p */
2673:       VecSet(work[0],0.);
2674:       VecGetArray(work[0],&array);
2675:       for (j=0;j<nl;j++) array[idxs[j]] = 1.;
2676:       VecRestoreArray(work[0],&array);
2677:       /* work[0] = 1_v */
2678:       VecSet(work[1],1.);
2679:       VecGetArray(work[1],&array);
2680:       for (j=0;j<nl;j++) array[idxs[j]] = 0.;
2681:       VecRestoreArray(work[1],&array);
2682:       ISRestoreIndices(zerodiag,&idxs);
2683:     }

2685:     if (nsubs > 1 || bsp > 1) {
2686:       IS       *is;
2687:       PetscInt b,totb;

2689:       totb  = bsp;
2690:       is    = bsp > 1 ? bzerodiag : &zerodiag;
2691:       nsubs = PetscMax(nsubs,1);
2692:       PetscCalloc1(nsubs*totb,&zerodiag_subs);
2693:       for (b=0;b<totb;b++) {
2694:         for (i=0;i<nsubs;i++) {
2695:           ISLocalToGlobalMapping l2g;
2696:           IS                     t_zerodiag_subs;
2697:           PetscInt               nl;

2699:           if (subs) {
2700:             ISLocalToGlobalMappingCreateIS(subs[i],&l2g);
2701:           } else {
2702:             IS tis;

2704:             MatGetLocalSize(pcbddc->local_mat,&nl,NULL);
2705:             ISCreateStride(PETSC_COMM_SELF,nl,0,1,&tis);
2706:             ISLocalToGlobalMappingCreateIS(tis,&l2g);
2707:             ISDestroy(&tis);
2708:           }
2709:           ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,is[b],&t_zerodiag_subs);
2710:           ISGetLocalSize(t_zerodiag_subs,&nl);
2711:           if (nl) {
2712:             PetscBool valid = PETSC_TRUE;

2714:             if (checkb) {
2715:               VecSet(matis->x,0);
2716:               ISGetLocalSize(subs[i],&nl);
2717:               ISGetIndices(subs[i],&idxs);
2718:               VecGetArray(matis->x,&array);
2719:               for (j=0;j<nl;j++) array[idxs[j]] = 1.;
2720:               VecRestoreArray(matis->x,&array);
2721:               ISRestoreIndices(subs[i],&idxs);
2722:               VecPointwiseMult(matis->x,work[0],matis->x);
2723:               MatMult(matis->A,matis->x,matis->y);
2724:               VecPointwiseMult(matis->y,work[1],matis->y);
2725:               VecGetArray(matis->y,&array);
2726:               for (j=0;j<n_interior_dofs;j++) {
2727:                 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) {
2728:                   valid = PETSC_FALSE;
2729:                   break;
2730:                 }
2731:               }
2732:               VecRestoreArray(matis->y,&array);
2733:             }
2734:             if (valid && nneu) {
2735:               const PetscInt *idxs;
2736:               PetscInt       nzb;

2738:               ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);
2739:               ISGlobalToLocalMappingApply(l2g,IS_GTOLM_DROP,nneu,idxs,&nzb,NULL);
2740:               ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);
2741:               if (nzb) valid = PETSC_FALSE;
2742:             }
2743:             if (valid && pressures) {
2744:               IS       t_pressure_subs,tmp;
2745:               PetscInt i1,i2;

2747:               ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);
2748:               ISEmbed(t_zerodiag_subs,t_pressure_subs,PETSC_TRUE,&tmp);
2749:               ISGetLocalSize(tmp,&i1);
2750:               ISGetLocalSize(t_zerodiag_subs,&i2);
2751:               if (i2 != i1) valid = PETSC_FALSE;
2752:               ISDestroy(&t_pressure_subs);
2753:               ISDestroy(&tmp);
2754:             }
2755:             if (valid) {
2756:               ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[benign_n]);
2757:               benign_n++;
2758:             } else recompute_zerodiag = PETSC_TRUE;
2759:           }
2760:           ISDestroy(&t_zerodiag_subs);
2761:           ISLocalToGlobalMappingDestroy(&l2g);
2762:         }
2763:       }
2764:     } else { /* there's just one subdomain (or zero if they have not been detected */
2765:       PetscBool valid = PETSC_TRUE;

2767:       if (nneu) valid = PETSC_FALSE;
2768:       if (valid && pressures) {
2769:         ISEqual(pressures,zerodiag,&valid);
2770:       }
2771:       if (valid && checkb) {
2772:         MatMult(matis->A,work[0],matis->x);
2773:         VecPointwiseMult(matis->x,work[1],matis->x);
2774:         VecGetArray(matis->x,&array);
2775:         for (j=0;j<n_interior_dofs;j++) {
2776:           if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) {
2777:             valid = PETSC_FALSE;
2778:             break;
2779:           }
2780:         }
2781:         VecRestoreArray(matis->x,&array);
2782:       }
2783:       if (valid) {
2784:         benign_n = 1;
2785:         PetscMalloc1(benign_n,&zerodiag_subs);
2786:         PetscObjectReference((PetscObject)zerodiag);
2787:         zerodiag_subs[0] = zerodiag;
2788:       }
2789:     }
2790:     if (checkb) {
2791:       VecDestroyVecs(2,&work);
2792:     }
2793:   }
2794:   PetscFree(interior_dofs);

2796:   if (!benign_n) {
2797:     PetscInt n;

2799:     ISDestroy(&zerodiag);
2800:     recompute_zerodiag = PETSC_FALSE;
2801:     MatGetLocalSize(pcbddc->local_mat,&n,NULL);
2802:     if (n) have_null = PETSC_FALSE;
2803:   }

2805:   /* final check for null pressures */
2806:   if (zerodiag && pressures) {
2807:     ISEqual(pressures,zerodiag,&have_null);
2808:   }

2810:   if (recompute_zerodiag) {
2811:     ISDestroy(&zerodiag);
2812:     if (benign_n == 1) {
2813:       PetscObjectReference((PetscObject)zerodiag_subs[0]);
2814:       zerodiag = zerodiag_subs[0];
2815:     } else {
2816:       PetscInt i,nzn,*new_idxs;

2818:       nzn = 0;
2819:       for (i=0;i<benign_n;i++) {
2820:         PetscInt ns;
2821:         ISGetLocalSize(zerodiag_subs[i],&ns);
2822:         nzn += ns;
2823:       }
2824:       PetscMalloc1(nzn,&new_idxs);
2825:       nzn = 0;
2826:       for (i=0;i<benign_n;i++) {
2827:         PetscInt ns,*idxs;
2828:         ISGetLocalSize(zerodiag_subs[i],&ns);
2829:         ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);
2830:         PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));
2831:         ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);
2832:         nzn += ns;
2833:       }
2834:       PetscSortInt(nzn,new_idxs);
2835:       ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);
2836:     }
2837:     have_null = PETSC_FALSE;
2838:   }

2840:   /* determines if the coarse solver will be singular or not */
2841:   MPIU_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));

2843:   /* Prepare matrix to compute no-net-flux */
2844:   if (pcbddc->compute_nonetflux && !pcbddc->divudotp) {
2845:     Mat                    A,loc_divudotp;
2846:     ISLocalToGlobalMapping rl2g,cl2g,l2gmap;
2847:     IS                     row,col,isused = NULL;
2848:     PetscInt               M,N,n,st,n_isused;

2850:     if (pressures) {
2851:       isused = pressures;
2852:     } else {
2853:       isused = zerodiag_save;
2854:     }
2855:     MatGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);
2856:     MatISGetLocalMat(pc->pmat,&A);
2857:     MatGetLocalSize(A,&n,NULL);
2858:     if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field");
2859:     n_isused = 0;
2860:     if (isused) {
2861:       ISGetLocalSize(isused,&n_isused);
2862:     }
2863:     MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));
2864:     st = st-n_isused;
2865:     if (n) {
2866:       const PetscInt *gidxs;

2868:       MatCreateSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);
2869:       ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);
2870:       /* TODO: extend ISCreateStride with st = PETSC_DECIDE */
2871:       ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);
2872:       ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);
2873:       ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);
2874:     } else {
2875:       MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);
2876:       ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);
2877:       ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);
2878:     }
2879:     MatGetSize(pc->pmat,NULL,&N);
2880:     ISGetSize(row,&M);
2881:     ISLocalToGlobalMappingCreateIS(row,&rl2g);
2882:     ISLocalToGlobalMappingCreateIS(col,&cl2g);
2883:     ISDestroy(&row);
2884:     ISDestroy(&col);
2885:     MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);
2886:     MatSetType(pcbddc->divudotp,MATIS);
2887:     MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);
2888:     MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);
2889:     ISLocalToGlobalMappingDestroy(&rl2g);
2890:     ISLocalToGlobalMappingDestroy(&cl2g);
2891:     MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);
2892:     MatDestroy(&loc_divudotp);
2893:     MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);
2894:     MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);
2895:   }
2896:   ISDestroy(&zerodiag_save);
2897:   ISDestroy(&pressures);
2898:   if (bzerodiag) {
2899:     PetscInt i;

2901:     for (i=0;i<bsp;i++) {
2902:       ISDestroy(&bzerodiag[i]);
2903:     }
2904:     PetscFree(bzerodiag);
2905:   }
2906:   pcbddc->benign_n = benign_n;
2907:   pcbddc->benign_zerodiag_subs = zerodiag_subs;

2909:   /* determines if the problem has subdomains with 0 pressure block */
2910:   have_null = (PetscBool)(!!pcbddc->benign_n);
2911:   MPIU_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));

2913: project_b0:
2914:   MatGetLocalSize(pcbddc->local_mat,&n,NULL);
2915:   /* change of basis and p0 dofs */
2916:   if (pcbddc->benign_n) {
2917:     PetscInt i,s,*nnz;

2919:     /* local change of basis for pressures */
2920:     MatDestroy(&pcbddc->benign_change);
2921:     MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);
2922:     MatSetType(pcbddc->benign_change,MATAIJ);
2923:     MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);
2924:     PetscMalloc1(n,&nnz);
2925:     for (i=0;i<n;i++) nnz[i] = 1; /* defaults to identity */
2926:     for (i=0;i<pcbddc->benign_n;i++) {
2927:       const PetscInt *idxs;
2928:       PetscInt       nzs,j;

2930:       ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nzs);
2931:       ISGetIndices(pcbddc->benign_zerodiag_subs[i],&idxs);
2932:       for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */
2933:       nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */
2934:       ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],&idxs);
2935:     }
2936:     MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);
2937:     MatSetOption(pcbddc->benign_change,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);
2938:     PetscFree(nnz);
2939:     /* set identity by default */
2940:     for (i=0;i<n;i++) {
2941:       MatSetValue(pcbddc->benign_change,i,i,1.,INSERT_VALUES);
2942:     }
2943:     PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);
2944:     PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);
2945:     /* set change on pressures */
2946:     for (s=0;s<pcbddc->benign_n;s++) {
2947:       PetscScalar    *array;
2948:       const PetscInt *idxs;
2949:       PetscInt       nzs;

2951:       ISGetLocalSize(pcbddc->benign_zerodiag_subs[s],&nzs);
2952:       ISGetIndices(pcbddc->benign_zerodiag_subs[s],&idxs);
2953:       for (i=0;i<nzs-1;i++) {
2954:         PetscScalar vals[2];
2955:         PetscInt    cols[2];

2957:         cols[0] = idxs[i];
2958:         cols[1] = idxs[nzs-1];
2959:         vals[0] = 1.;
2960:         vals[1] = 1.;
2961:         MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);
2962:       }
2963:       PetscMalloc1(nzs,&array);
2964:       for (i=0;i<nzs-1;i++) array[i] = -1.;
2965:       array[nzs-1] = 1.;
2966:       MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);
2967:       /* store local idxs for p0 */
2968:       pcbddc->benign_p0_lidx[s] = idxs[nzs-1];
2969:       ISRestoreIndices(pcbddc->benign_zerodiag_subs[s],&idxs);
2970:       PetscFree(array);
2971:     }
2972:     MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);
2973:     MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);

2975:     /* project if needed */
2976:     if (pcbddc->benign_change_explicit) {
2977:       Mat M;

2979:       MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);
2980:       MatDestroy(&pcbddc->local_mat);
2981:       MatSeqAIJCompress(M,&pcbddc->local_mat);
2982:       MatDestroy(&M);
2983:     }
2984:     /* store global idxs for p0 */
2985:     ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);
2986:   }
2987:   *zerodiaglocal = zerodiag;
2988:   return(0);
2989: }

2991: PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get)
2992: {
2993:   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
2994:   PetscScalar    *array;

2998:   if (!pcbddc->benign_sf) {
2999:     PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);
3000:     PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);
3001:   }
3002:   if (get) {
3003:     VecGetArrayRead(v,(const PetscScalar**)&array);
3004:     PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);
3005:     PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);
3006:     VecRestoreArrayRead(v,(const PetscScalar**)&array);
3007:   } else {
3008:     VecGetArray(v,&array);
3009:     PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);
3010:     PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);
3011:     VecRestoreArray(v,&array);
3012:   }
3013:   return(0);
3014: }

3016: PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop)
3017: {
3018:   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;

3022:   /* TODO: add error checking
3023:     - avoid nested pop (or push) calls.
3024:     - cannot push before pop.
3025:     - cannot call this if pcbddc->local_mat is NULL
3026:   */
3027:   if (!pcbddc->benign_n) {
3028:     return(0);
3029:   }
3030:   if (pop) {
3031:     if (pcbddc->benign_change_explicit) {
3032:       IS       is_p0;
3033:       MatReuse reuse;

3035:       /* extract B_0 */
3036:       reuse = MAT_INITIAL_MATRIX;
3037:       if (pcbddc->benign_B0) {
3038:         reuse = MAT_REUSE_MATRIX;
3039:       }
3040:       ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);
3041:       MatCreateSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);
3042:       /* remove rows and cols from local problem */
3043:       MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);
3044:       MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);
3045:       MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);
3046:       ISDestroy(&is_p0);
3047:     } else {
3048:       Mat_IS      *matis = (Mat_IS*)pc->pmat->data;
3049:       PetscScalar *vals;
3050:       PetscInt    i,n,*idxs_ins;

3052:       VecGetLocalSize(matis->y,&n);
3053:       PetscMalloc2(n,&idxs_ins,n,&vals);
3054:       if (!pcbddc->benign_B0) {
3055:         PetscInt *nnz;
3056:         MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);
3057:         MatSetType(pcbddc->benign_B0,MATAIJ);
3058:         MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);
3059:         PetscMalloc1(pcbddc->benign_n,&nnz);
3060:         for (i=0;i<pcbddc->benign_n;i++) {
3061:           ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);
3062:           nnz[i] = n - nnz[i];
3063:         }
3064:         MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);
3065:         MatSetOption(pcbddc->benign_B0,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);
3066:         PetscFree(nnz);
3067:       }

3069:       for (i=0;i<pcbddc->benign_n;i++) {
3070:         PetscScalar *array;
3071:         PetscInt    *idxs,j,nz,cum;

3073:         VecSet(matis->x,0.);
3074:         ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);
3075:         ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);
3076:         for (j=0;j<nz;j++) vals[j] = 1.;
3077:         VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);
3078:         VecAssemblyBegin(matis->x);
3079:         VecAssemblyEnd(matis->x);
3080:         VecSet(matis->y,0.);
3081:         MatMult(matis->A,matis->x,matis->y);
3082:         VecGetArray(matis->y,&array);
3083:         cum = 0;
3084:         for (j=0;j<n;j++) {
3085:           if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) {
3086:             vals[cum] = array[j];
3087:             idxs_ins[cum] = j;
3088:             cum++;
3089:           }
3090:         }
3091:         MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);
3092:         VecRestoreArray(matis->y,&array);
3093:         ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);
3094:       }
3095:       MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);
3096:       MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);
3097:       PetscFree2(idxs_ins,vals);
3098:     }
3099:   } else { /* push */
3100:     if (pcbddc->benign_change_explicit) {
3101:       PetscInt i;

3103:       for (i=0;i<pcbddc->benign_n;i++) {
3104:         PetscScalar *B0_vals;
3105:         PetscInt    *B0_cols,B0_ncol;

3107:         MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);
3108:         MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);
3109:         MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);
3110:         MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);
3111:         MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);
3112:       }
3113:       MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);
3114:       MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);
3115:     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!");
3116:   }
3117:   return(0);
3118: }

3120: PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
3121: {
3122:   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
3123:   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
3124:   PetscBLASInt    B_dummyint,B_neigs,B_ierr,B_lwork;
3125:   PetscBLASInt    *B_iwork,*B_ifail;
3126:   PetscScalar     *work,lwork;
3127:   PetscScalar     *St,*S,*eigv;
3128:   PetscScalar     *Sarray,*Starray;
3129:   PetscReal       *eigs,thresh,lthresh,uthresh;
3130:   PetscInt        i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs;
3131:   PetscBool       allocated_S_St;
3132: #if defined(PETSC_USE_COMPLEX)
3133:   PetscReal       *rwork;
3134: #endif
3135:   PetscErrorCode  ierr;

3138:   if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data");
3139:   if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO");
3140:   if (sub_schurs->n_subs && (!sub_schurs->is_symmetric)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for this matrix pencil (herm %d, symm %d, posdef %d)",sub_schurs->is_hermitian,sub_schurs->is_symmetric,sub_schurs->is_posdef);
3141:   PetscLogEventBegin(PC_BDDC_AdaptiveSetUp[pcbddc->current_level],pc,0,0,0);

3143:   if (pcbddc->dbg_flag) {
3144:     PetscViewerFlush(pcbddc->dbg_viewer);
3145:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
3146:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");
3147:     PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
3148:   }

3150:   if (pcbddc->dbg_flag) {
3151:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %D (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef);
3152:   }

3154:   /* max size of subsets */
3155:   mss = 0;
3156:   for (i=0;i<sub_schurs->n_subs;i++) {
3157:     PetscInt subset_size;

3159:     ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
3160:     mss = PetscMax(mss,subset_size);
3161:   }

3163:   /* min/max and threshold */
3164:   nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
3165:   nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0;
3166:   nmax = PetscMax(nmin,nmax);
3167:   allocated_S_St = PETSC_FALSE;
3168:   if (nmin || !sub_schurs->is_posdef) { /* XXX */
3169:     allocated_S_St = PETSC_TRUE;
3170:   }

3172:   /* allocate lapack workspace */
3173:   cum = cum2 = 0;
3174:   maxneigs = 0;
3175:   for (i=0;i<sub_schurs->n_subs;i++) {
3176:     PetscInt n,subset_size;

3178:     ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
3179:     n = PetscMin(subset_size,nmax);
3180:     cum += subset_size;
3181:     cum2 += subset_size*n;
3182:     maxneigs = PetscMax(maxneigs,n);
3183:   }
3184:   if (mss) {
3185:     if (sub_schurs->is_symmetric) {
3186:       PetscBLASInt B_itype = 1;
3187:       PetscBLASInt B_N = mss;
3188:       PetscReal    zero = 0.0;
3189:       PetscReal    eps = 0.0; /* dlamch? */

3191:       B_lwork = -1;
3192:       S = NULL;
3193:       St = NULL;
3194:       eigs = NULL;
3195:       eigv = NULL;
3196:       B_iwork = NULL;
3197:       B_ifail = NULL;
3198: #if defined(PETSC_USE_COMPLEX)
3199:       rwork = NULL;
3200: #endif
3201:       thresh = 1.0;
3202:       PetscFPTrapPush(PETSC_FP_TRAP_OFF);
3203: #if defined(PETSC_USE_COMPLEX)
3204:       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3205: #else
3206:       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr));
3207: #endif
3208:       if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr);
3209:       PetscFPTrapPop();
3210:     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented");
3211:   } else {
3212:     lwork = 0;
3213:   }

3215:   nv = 0;
3216:   if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */
3217:     ISGetLocalSize(sub_schurs->is_vertices,&nv);
3218:   }
3219:   PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);
3220:   if (allocated_S_St) {
3221:     PetscMalloc2(mss*mss,&S,mss*mss,&St);
3222:   }
3223:   PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);
3224: #if defined(PETSC_USE_COMPLEX)
3225:   PetscMalloc1(7*mss,&rwork);
3226: #endif
3227:   PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n,
3228:                       nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr,
3229:                       nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr,
3230:                       nv+cum,&pcbddc->adaptive_constraints_idxs,
3231:                       nv+cum2,&pcbddc->adaptive_constraints_data);
3232:   PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));

3234:   maxneigs = 0;
3235:   cum = cumarray = 0;
3236:   pcbddc->adaptive_constraints_idxs_ptr[0] = 0;
3237:   pcbddc->adaptive_constraints_data_ptr[0] = 0;
3238:   if (sub_schurs->is_vertices && pcbddc->use_vertices) {
3239:     const PetscInt *idxs;

3241:     ISGetIndices(sub_schurs->is_vertices,&idxs);
3242:     for (cum=0;cum<nv;cum++) {
3243:       pcbddc->adaptive_constraints_n[cum] = 1;
3244:       pcbddc->adaptive_constraints_idxs[cum] = idxs[cum];
3245:       pcbddc->adaptive_constraints_data[cum] = 1.0;
3246:       pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1;
3247:       pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1;
3248:     }
3249:     ISRestoreIndices(sub_schurs->is_vertices,&idxs);
3250:   }

3252:   if (mss) { /* multilevel */
3253:     MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);
3254:     MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);
3255:   }

3257:   lthresh = pcbddc->adaptive_threshold[0];
3258:   uthresh = pcbddc->adaptive_threshold[1];
3259:   for (i=0;i<sub_schurs->n_subs;i++) {
3260:     const PetscInt *idxs;
3261:     PetscReal      upper,lower;
3262:     PetscInt       j,subset_size,eigs_start = 0;
3263:     PetscBLASInt   B_N;
3264:     PetscBool      same_data = PETSC_FALSE;
3265:     PetscBool      scal = PETSC_FALSE;

3267:     if (pcbddc->use_deluxe_scaling) {
3268:       upper = PETSC_MAX_REAL;
3269:       lower = uthresh;
3270:     } else {
3271:       if (!sub_schurs->is_posdef) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented without deluxe scaling");
3272:       upper = 1./uthresh;
3273:       lower = 0.;
3274:     }
3275:     ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
3276:     ISGetIndices(sub_schurs->is_subs[i],&idxs);
3277:     PetscBLASIntCast(subset_size,&B_N);
3278:     /* this is experimental: we assume the dofs have been properly grouped to have
3279:        the diagonal blocks Schur complements either positive or negative definite (true for Stokes) */
3280:     if (!sub_schurs->is_posdef) {
3281:       Mat T;

3283:       for (j=0;j<subset_size;j++) {
3284:         if (PetscRealPart(*(Sarray+cumarray+j*(subset_size+1))) < 0.0) {
3285:           MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,Sarray+cumarray,&T);
3286:           MatScale(T,-1.0);
3287:           MatDestroy(&T);
3288:           MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,Starray+cumarray,&T);
3289:           MatScale(T,-1.0);
3290:           MatDestroy(&T);
3291:           if (sub_schurs->change_primal_sub) {
3292:             PetscInt       nz,k;
3293:             const PetscInt *idxs;

3295:             ISGetLocalSize(sub_schurs->change_primal_sub[i],&nz);
3296:             ISGetIndices(sub_schurs->change_primal_sub[i],&idxs);
3297:             for (k=0;k<nz;k++) {
3298:               *( Sarray + cumarray + idxs[k]*(subset_size+1)) *= -1.0;
3299:               *(Starray + cumarray + idxs[k]*(subset_size+1))  = 0.0;
3300:             }
3301:             ISRestoreIndices(sub_schurs->change_primal_sub[i],&idxs);
3302:           }
3303:           scal = PETSC_TRUE;
3304:           break;
3305:         }
3306:       }
3307:     }

3309:     if (allocated_S_St) { /* S and S_t should be copied since we could need them later */
3310:       if (sub_schurs->is_symmetric) {
3311:         PetscInt j,k;
3312:         if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */
3313:           PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));
3314:           PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));
3315:         }
3316:         for (j=0;j<subset_size;j++) {
3317:           for (k=j;k<subset_size;k++) {
3318:             S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k];
3319:             St[j*subset_size+k] = Starray[cumarray+j*subset_size+k];
3320:           }
3321:         }
3322:       } else {
3323:         PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3324:         PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3325:       }
3326:     } else {
3327:       S = Sarray + cumarray;
3328:       St = Starray + cumarray;
3329:     }
3330:     /* see if we can save some work */
3331:     if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) {
3332:       PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);
3333:     }

3335:     if (same_data && !sub_schurs->change) { /* there's no need of constraints here */
3336:       B_neigs = 0;
3337:     } else {
3338:       if (sub_schurs->is_symmetric) {
3339:         PetscBLASInt B_itype = 1;
3340:         PetscBLASInt B_IL, B_IU;
3341:         PetscReal    eps = -1.0; /* dlamch? */
3342:         PetscInt     nmin_s;
3343:         PetscBool    compute_range;

3345:         B_neigs = 0;
3346:         compute_range = (PetscBool)!same_data;
3347:         if (nmin >= subset_size) compute_range = PETSC_FALSE;

3349:         if (pcbddc->dbg_flag) {
3350:           PetscInt nc = 0;

3352:           if (sub_schurs->change_primal_sub) {
3353:             ISGetLocalSize(sub_schurs->change_primal_sub[i],&nc);
3354:           }
3355:           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %D/%D size %D count %D fid %D (range %d) (change %D).\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]+1,pcbddc->mat_graph->which_dof[idxs[0]],compute_range,nc);
3356:         }

3358:         PetscFPTrapPush(PETSC_FP_TRAP_OFF);
3359:         if (compute_range) {

3361:           /* ask for eigenvalues larger than thresh */
3362:           if (sub_schurs->is_posdef) {
3363: #if defined(PETSC_USE_COMPLEX)
3364:             PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3365: #else
3366:             PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3367: #endif
3368:             PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3369:           } else { /* no theory so far, but it works nicely */
3370:             PetscInt  recipe = 0,recipe_m = 1;
3371:             PetscReal bb[2];

3373:             PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe",&recipe,NULL);
3374:             switch (recipe) {
3375:             case 0:
3376:               if (scal) { bb[0] = PETSC_MIN_REAL; bb[1] = lthresh; }
3377:               else { bb[0] = uthresh; bb[1] = PETSC_MAX_REAL; }
3378: #if defined(PETSC_USE_COMPLEX)
3379:               PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3380: #else
3381:               PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3382: #endif
3383:               PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3384:               break;
3385:             case 1:
3386:               bb[0] = PETSC_MIN_REAL; bb[1] = lthresh*lthresh;
3387: #if defined(PETSC_USE_COMPLEX)
3388:               PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3389: #else
3390:               PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3391: #endif
3392:               PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3393:               if (!scal) {
3394:                 PetscBLASInt B_neigs2 = 0;

3396:                 bb[0] = PetscMax(lthresh*lthresh,uthresh); bb[1] = PETSC_MAX_REAL;
3397:                 PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3398:                 PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3399: #if defined(PETSC_USE_COMPLEX)
3400:                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3401: #else
3402:                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3403: #endif
3404:                 PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3405:                 B_neigs += B_neigs2;
3406:               }
3407:               break;
3408:             case 2:
3409:               if (scal) {
3410:                 bb[0] = PETSC_MIN_REAL;
3411:                 bb[1] = 0;
3412: #if defined(PETSC_USE_COMPLEX)
3413:                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3414: #else
3415:                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3416: #endif
3417:                 PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3418:               } else {
3419:                 PetscBLASInt B_neigs2 = 0;
3420:                 PetscBool    import = PETSC_FALSE;

3422:                 lthresh = PetscMax(lthresh,0.0);
3423:                 if (lthresh > 0.0) {
3424:                   bb[0] = PETSC_MIN_REAL;
3425:                   bb[1] = lthresh*lthresh;

3427:                   import = PETSC_TRUE;
3428: #if defined(PETSC_USE_COMPLEX)
3429:                   PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3430: #else
3431:                   PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3432: #endif
3433:                   PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3434:                 }
3435:                 bb[0] = PetscMax(lthresh*lthresh,uthresh);
3436:                 bb[1] = PETSC_MAX_REAL;
3437:                 if (import) {
3438:                   PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3439:                   PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3440:                 }
3441: #if defined(PETSC_USE_COMPLEX)
3442:                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3443: #else
3444:                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3445: #endif
3446:                 PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3447:                 B_neigs += B_neigs2;
3448:               }
3449:               break;
3450:             case 3:
3451:               if (scal) {
3452:                 PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe3_min_scal",&recipe_m,NULL);
3453:               } else {
3454:                 PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe3_min",&recipe_m,NULL);
3455:               }
3456:               if (!scal) {
3457:                 bb[0] = uthresh;
3458:                 bb[1] = PETSC_MAX_REAL;
3459: #if defined(PETSC_USE_COMPLEX)
3460:                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3461: #else
3462:                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3463: #endif
3464:                 PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3465:               }
3466:               if (recipe_m > 0 && B_N - B_neigs > 0) {
3467:                 PetscBLASInt B_neigs2 = 0;

3469:                 B_IL = 1;
3470:                 PetscBLASIntCast(PetscMin(recipe_m,B_N - B_neigs),&B_IU);
3471:                 PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3472:                 PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3473: #if defined(PETSC_USE_COMPLEX)
3474:                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3475: #else
3476:                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3477: #endif
3478:                 PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3479:                 B_neigs += B_neigs2;
3480:               }
3481:               break;
3482:             case 4:
3483:               bb[0] = PETSC_MIN_REAL; bb[1] = lthresh;
3484: #if defined(PETSC_USE_COMPLEX)
3485:               PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3486: #else
3487:               PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3488: #endif
3489:               PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3490:               {
3491:                 PetscBLASInt B_neigs2 = 0;

3493:                 bb[0] = PetscMax(lthresh+PETSC_SMALL,uthresh); bb[1] = PETSC_MAX_REAL;
3494:                 PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3495:                 PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3496: #if defined(PETSC_USE_COMPLEX)
3497:                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3498: #else
3499:                 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3500: #endif
3501:                 PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3502:                 B_neigs += B_neigs2;
3503:               }
3504:               break;
3505:             case 5: /* same as before: first compute all eigenvalues, then filter */
3506: #if defined(PETSC_USE_COMPLEX)
3507:               PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","A","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3508: #else
3509:               PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","A","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3510: #endif
3511:               PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3512:               {
3513:                 PetscInt e,k,ne;
3514:                 for (e=0,ne=0;e<B_neigs;e++) {
3515:                   if (eigs[e] < lthresh || eigs[e] > uthresh) {
3516:                     for (k=0;k<B_N;k++) S[ne*B_N+k] = eigv[e*B_N+k];
3517:                     eigs[ne] = eigs[e];
3518:                     ne++;
3519:                   }
3520:                 }
3521:                 PetscMemcpy(eigv,S,B_N*ne*sizeof(PetscScalar));
3522:                 B_neigs = ne;
3523:               }
3524:               break;
3525:             default:
3526:               SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Unknown recipe %D",recipe);
3527:               break;
3528:             }
3529:           }
3530:         } else if (!same_data) { /* this is just to see all the eigenvalues */
3531:           B_IU = PetscMax(1,PetscMin(B_N,nmax));
3532:           B_IL = 1;
3533: #if defined(PETSC_USE_COMPLEX)
3534:           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3535: #else
3536:           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3537: #endif
3538:           PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3539:         } else { /* same_data is true, so just get the adaptive functional requested by the user */
3540:           PetscInt k;
3541:           if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen");
3542:           ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);
3543:           PetscBLASIntCast(nmax,&B_neigs);
3544:           nmin = nmax;
3545:           PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));
3546:           for (k=0;k<nmax;k++) {
3547:             eigs[k] = 1./PETSC_SMALL;
3548:             eigv[k*(subset_size+1)] = 1.0;
3549:           }
3550:         }
3551:         PetscFPTrapPop();
3552:         if (B_ierr) {
3553:           if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
3554:           else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
3555:           else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
3556:         }

3558:         if (B_neigs > nmax) {
3559:           if (pcbddc->dbg_flag) {
3560:             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, more than maximum required %D.\n",B_neigs,nmax);
3561:           }
3562:           if (pcbddc->use_deluxe_scaling) eigs_start = scal ? 0 : B_neigs-nmax;
3563:           B_neigs = nmax;
3564:         }

3566:         nmin_s = PetscMin(nmin,B_N);
3567:         if (B_neigs < nmin_s) {
3568:           PetscBLASInt B_neigs2 = 0;

3570:           if (pcbddc->use_deluxe_scaling) {
3571:             if (scal) {
3572:               B_IU = nmin_s;
3573:               B_IL = B_neigs + 1;
3574:             } else {
3575:               B_IL = B_N - nmin_s + 1;
3576:               B_IU = B_N - B_neigs;
3577:             }
3578:           } else {
3579:             B_IL = B_neigs + 1;
3580:             B_IU = nmin_s;
3581:           }
3582:           if (pcbddc->dbg_flag) {
3583:             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, less than minimum required %D. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU);
3584:           }
3585:           if (sub_schurs->is_symmetric) {
3586:             PetscInt j,k;
3587:             for (j=0;j<subset_size;j++) {
3588:               for (k=j;k<subset_size;k++) {
3589:                 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k];
3590:                 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k];
3591:               }
3592:             }
3593:           } else {
3594:             PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3595:             PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3596:           }
3597:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
3598: #if defined(PETSC_USE_COMPLEX)
3599:           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3600: #else
3601:           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3602: #endif
3603:           PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3604:           PetscFPTrapPop();
3605:           B_neigs += B_neigs2;
3606:         }
3607:         if (B_ierr) {
3608:           if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
3609:           else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
3610:           else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
3611:         }
3612:         if (pcbddc->dbg_flag) {
3613:           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Got %d eigs\n",B_neigs);
3614:           for (j=0;j<B_neigs;j++) {
3615:             if (eigs[j] == 0.0) {
3616:               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     Inf\n");
3617:             } else {
3618:               if (pcbddc->use_deluxe_scaling) {
3619:                 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     %1.6e\n",eigs[j+eigs_start]);
3620:               } else {
3621:                 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     %1.6e\n",1./eigs[j+eigs_start]);
3622:               }
3623:             }
3624:           }
3625:         }
3626:       } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented");
3627:     }
3628:     /* change the basis back to the original one */
3629:     if (sub_schurs->change) {
3630:       Mat change,phi,phit;

3632:       if (pcbddc->dbg_flag > 2) {
3633:         PetscInt ii;
3634:         for (ii=0;ii<B_neigs;ii++) {
3635:           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);
3636:           for (j=0;j<B_N;j++) {
3637: #if defined(PETSC_USE_COMPLEX)
3638:             PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]);
3639:             PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]);
3640:             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e + %1.4e i\n",r,c);
3641: #else
3642:             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);
3643: #endif
3644:           }
3645:         }
3646:       }
3647:       KSPGetOperators(sub_schurs->change[i],&change,NULL);
3648:       MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);
3649:       MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);
3650:       MatCopy(phi,phit,SAME_NONZERO_PATTERN);
3651:       MatDestroy(&phit);
3652:       MatDestroy(&phi);
3653:     }
3654:     maxneigs = PetscMax(B_neigs,maxneigs);
3655:     pcbddc->adaptive_constraints_n[i+nv] = B_neigs;
3656:     if (B_neigs) {
3657:       PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));

3659:       if (pcbddc->dbg_flag > 1) {
3660:         PetscInt ii;
3661:         for (ii=0;ii<B_neigs;ii++) {
3662:           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);
3663:           for (j=0;j<B_N;j++) {
3664: #if defined(PETSC_USE_COMPLEX)
3665:             PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
3666:             PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
3667:             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e + %1.4e i\n",r,c);
3668: #else
3669:             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
3670: #endif
3671:           }
3672:         }
3673:       }
3674:       PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));
3675:       pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size;
3676:       pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs;
3677:       cum++;
3678:     }
3679:     ISRestoreIndices(sub_schurs->is_subs[i],&idxs);
3680:     /* shift for next computation */
3681:     cumarray += subset_size*subset_size;
3682:   }
3683:   if (pcbddc->dbg_flag) {
3684:     PetscViewerFlush(pcbddc->dbg_viewer);
3685:   }

3687:   if (mss) {
3688:     MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);
3689:     MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);
3690:     /* destroy matrices (junk) */
3691:     MatDestroy(&sub_schurs->sum_S_Ej_inv_all);
3692:     MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);
3693:   }
3694:   if (allocated_S_St) {
3695:     PetscFree2(S,St);
3696:   }
3697:   PetscFree5(eigv,eigs,work,B_iwork,B_ifail);
3698: #if defined(PETSC_USE_COMPLEX)
3699:   PetscFree(rwork);
3700: #endif
3701:   if (pcbddc->dbg_flag) {
3702:     PetscInt maxneigs_r;
3703:     MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));
3704:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %D\n",maxneigs_r);
3705:   }
3706:   PetscLogEventEnd(PC_BDDC_AdaptiveSetUp[pcbddc->current_level],pc,0,0,0);
3707:   return(0);
3708: }

3710: PetscErrorCode PCBDDCSetUpSolvers(PC pc)
3711: {
3712:   PetscScalar    *coarse_submat_vals;

3716:   /* Setup local scatters R_to_B and (optionally) R_to_D */
3717:   /* PCBDDCSetUpLocalWorkVectors should be called first! */
3718:   PCBDDCSetUpLocalScatters(pc);

3720:   /* Setup local neumann solver ksp_R */
3721:   /* PCBDDCSetUpLocalScatters should be called first! */
3722:   PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);

3724:   /*
3725:      Setup local correction and local part of coarse basis.
3726:      Gives back the dense local part of the coarse matrix in column major ordering
3727:   */
3728:   PCBDDCSetUpCorrection(pc,&coarse_submat_vals);

3730:   /* Compute total number of coarse nodes and setup coarse solver */
3731:   PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);

3733:   /* free */
3734:   PetscFree(coarse_submat_vals);
3735:   return(0);
3736: }

3738: PetscErrorCode PCBDDCResetCustomization(PC pc)
3739: {
3740:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;

3744:   ISDestroy(&pcbddc->user_primal_vertices);
3745:   ISDestroy(&pcbddc->user_primal_vertices_local);
3746:   ISDestroy(&pcbddc->NeumannBoundaries);
3747:   ISDestroy(&pcbddc->NeumannBoundariesLocal);
3748:   ISDestroy(&pcbddc->DirichletBoundaries);
3749:   MatNullSpaceDestroy(&pcbddc->onearnullspace);
3750:   PetscFree(pcbddc->onearnullvecs_state);
3751:   ISDestroy(&pcbddc->DirichletBoundariesLocal);
3752:   PCBDDCSetDofsSplitting(pc,0,NULL);
3753:   PCBDDCSetDofsSplittingLocal(pc,0,NULL);
3754:   return(0);
3755: }

3757: PetscErrorCode PCBDDCResetTopography(PC pc)
3758: {
3759:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
3760:   PetscInt       i;

3764:   MatDestroy(&pcbddc->nedcG);
3765:   ISDestroy(&pcbddc->nedclocal);
3766:   MatDestroy(&pcbddc->discretegradient);
3767:   MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);
3768:   MatDestroy(&pcbddc->ChangeOfBasisMatrix);
3769:   MatDestroy(&pcbddc->switch_static_change);
3770:   VecDestroy(&pcbddc->work_change);
3771:   MatDestroy(&pcbddc->ConstraintMatrix);
3772:   MatDestroy(&pcbddc->divudotp);
3773:   ISDestroy(&pcbddc->divudotp_vl2l);
3774:   PCBDDCGraphDestroy(&pcbddc->mat_graph);
3775:   for (i=0;i<pcbddc->n_local_subs;i++) {
3776:     ISDestroy(&pcbddc->local_subs[i]);
3777:   }
3778:   pcbddc->n_local_subs = 0;
3779:   PetscFree(pcbddc->local_subs);
3780:   PCBDDCSubSchursDestroy(&pcbddc->sub_schurs);
3781:   pcbddc->graphanalyzed        = PETSC_FALSE;
3782:   pcbddc->recompute_topography = PETSC_TRUE;
3783:   pcbddc->corner_selected      = PETSC_FALSE;
3784:   return(0);
3785: }

3787: PetscErrorCode PCBDDCResetSolvers(PC pc)
3788: {
3789:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;

3793:   VecDestroy(&pcbddc->coarse_vec);
3794:   if (pcbddc->coarse_phi_B) {
3795:     PetscScalar *array;
3796:     MatDenseGetArray(pcbddc->coarse_phi_B,&array);
3797:     PetscFree(array);
3798:   }
3799:   MatDestroy(&pcbddc->coarse_phi_B);
3800:   MatDestroy(&pcbddc->coarse_phi_D);
3801:   MatDestroy(&pcbddc->coarse_psi_B);
3802:   MatDestroy(&pcbddc->coarse_psi_D);
3803:   VecDestroy(&pcbddc->vec1_P);
3804:   VecDestroy(&pcbddc->vec1_C);
3805:   MatDestroy(&pcbddc->local_auxmat2);
3806:   MatDestroy(&pcbddc->local_auxmat1);
3807:   VecDestroy(&pcbddc->vec1_R);
3808:   VecDestroy(&pcbddc->vec2_R);
3809:   ISDestroy(&pcbddc->is_R_local);
3810:   VecScatterDestroy(&pcbddc->R_to_B);
3811:   VecScatterDestroy(&pcbddc->R_to_D);
3812:   VecScatterDestroy(&pcbddc->coarse_loc_to_glob);
3813:   KSPReset(pcbddc->ksp_D);
3814:   KSPReset(pcbddc->ksp_R);
3815:   KSPReset(pcbddc->coarse_ksp);
3816:   MatDestroy(&pcbddc->local_mat);
3817:   PetscFree(pcbddc->primal_indices_local_idxs);
3818:   PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);
3819:   PetscFree(pcbddc->global_primal_indices);
3820:   ISDestroy(&pcbddc->coarse_subassembling);
3821:   MatDestroy(&pcbddc->benign_change);
3822:   VecDestroy(&pcbddc->benign_vec);
3823:   PCBDDCBenignShellMat(pc,PETSC_TRUE);
3824:   MatDestroy(&pcbddc->benign_B0);
3825:   PetscSFDestroy(&pcbddc->benign_sf);
3826:   if (pcbddc->benign_zerodiag_subs) {
3827:     PetscInt i;
3828:     for (i=0;i<pcbddc->benign_n;i++) {
3829:       ISDestroy(&pcbddc->benign_zerodiag_subs[i]);
3830:     }
3831:     PetscFree(pcbddc->benign_zerodiag_subs);
3832:   }
3833:   PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);
3834:   return(0);
3835: }

3837: PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
3838: {
3839:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
3840:   PC_IS          *pcis = (PC_IS*)pc->data;
3841:   VecType        impVecType;
3842:   PetscInt       n_constraints,n_R,old_size;

3846:   n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices;
3847:   n_R = pcis->n - pcbddc->n_vertices;
3848:   VecGetType(pcis->vec1_N,&impVecType);
3849:   /* local work vectors (try to avoid unneeded work)*/
3850:   /* R nodes */
3851:   old_size = -1;
3852:   if (pcbddc->vec1_R) {
3853:     VecGetSize(pcbddc->vec1_R,&old_size);
3854:   }
3855:   if (n_R != old_size) {
3856:     VecDestroy(&pcbddc->vec1_R);
3857:     VecDestroy(&pcbddc->vec2_R);
3858:     VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);
3859:     VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);
3860:     VecSetType(pcbddc->vec1_R,impVecType);
3861:     VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);
3862:   }
3863:   /* local primal dofs */
3864:   old_size = -1;
3865:   if (pcbddc->vec1_P) {
3866:     VecGetSize(pcbddc->vec1_P,&old_size);
3867:   }
3868:   if (pcbddc->local_primal_size != old_size) {
3869:     VecDestroy(&pcbddc->vec1_P);
3870:     VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);
3871:     VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);
3872:     VecSetType(pcbddc->vec1_P,impVecType);
3873:   }
3874:   /* local explicit constraints */
3875:   old_size = -1;
3876:   if (pcbddc->vec1_C) {
3877:     VecGetSize(pcbddc->vec1_C,&old_size);
3878:   }
3879:   if (n_constraints && n_constraints != old_size) {
3880:     VecDestroy(&pcbddc->vec1_C);
3881:     VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);
3882:     VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);
3883:     VecSetType(pcbddc->vec1_C,impVecType);
3884:   }
3885:   return(0);
3886: }

3888: PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
3889: {
3890:   PetscErrorCode  ierr;
3891:   /* pointers to pcis and pcbddc */
3892:   PC_IS*          pcis = (PC_IS*)pc->data;
3893:   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
3894:   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
3895:   /* submatrices of local problem */
3896:   Mat             A_RV,A_VR,A_VV,local_auxmat2_R;
3897:   /* submatrices of local coarse problem */
3898:   Mat             S_VV,S_CV,S_VC,S_CC;
3899:   /* working matrices */
3900:   Mat             C_CR;
3901:   /* additional working stuff */
3902:   PC              pc_R;
3903:   Mat             F,Brhs = NULL;
3904:   Vec             dummy_vec;
3905:   PetscBool       isLU,isCHOL,isILU,need_benign_correction,sparserhs;
3906:   PetscScalar     *coarse_submat_vals; /* TODO: use a PETSc matrix */
3907:   PetscScalar     *work;
3908:   PetscInt        *idx_V_B;
3909:   PetscInt        lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I;
3910:   PetscInt        i,n_R,n_D,n_B;

3912:   /* some shortcuts to scalars */
3913:   PetscScalar     one=1.0,m_one=-1.0;

3916:   if (!pcbddc->symmetric_primal && pcbddc->benign_n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented");
3917:   PetscLogEventBegin(PC_BDDC_CorrectionSetUp[pcbddc->current_level],pc,0,0,0);

3919:   /* Set Non-overlapping dimensions */
3920:   n_vertices = pcbddc->n_vertices;
3921:   n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices;
3922:   n_B = pcis->n_B;
3923:   n_D = pcis->n - n_B;
3924:   n_R = pcis->n - n_vertices;

3926:   /* vertices in boundary numbering */
3927:   PetscMalloc1(n_vertices,&idx_V_B);
3928:   ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);
3929:   if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D",n_vertices,i);

3931:   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
3932:   PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);
3933:   MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);
3934:   MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);
3935:   MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);
3936:   MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);
3937:   MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);
3938:   MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);
3939:   MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);
3940:   MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);

3942:   /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
3943:   KSPGetPC(pcbddc->ksp_R,&pc_R);
3944:   PCSetUp(pc_R);
3945:   PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);
3946:   PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);
3947:   PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);
3948:   lda_rhs = n_R;
3949:   need_benign_correction = PETSC_FALSE;
3950:   if (isLU || isILU || isCHOL) {
3951:     PCFactorGetMatrix(pc_R,&F);
3952:   } else if (sub_schurs && sub_schurs->reuse_solver) {
3953:     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
3954:     MatFactorType      type;

3956:     F = reuse_solver->F;
3957:     MatGetFactorType(F,&type);
3958:     if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE;
3959:     MatGetSize(F,&lda_rhs,NULL);
3960:     need_benign_correction = (PetscBool)(!!reuse_solver->benign_n);
3961:   } else {
3962:     F = NULL;
3963:   }

3965:   /* determine if we can use a sparse right-hand side */
3966:   sparserhs = PETSC_FALSE;
3967:   if (F) {
3968:     MatSolverType solver;

3970:     MatFactorGetSolverType(F,&solver);
3971:     PetscStrcmp(solver,MATSOLVERMUMPS,&sparserhs);
3972:   }

3974:   /* allocate workspace */
3975:   n = 0;
3976:   if (n_constraints) {
3977:     n += lda_rhs*n_constraints;
3978:   }
3979:   if (n_vertices) {
3980:     n = PetscMax(2*lda_rhs*n_vertices,n);
3981:     n = PetscMax((lda_rhs+n_B)*n_vertices,n);
3982:   }
3983:   if (!pcbddc->symmetric_primal) {
3984:     n = PetscMax(2*lda_rhs*pcbddc->local_primal_size,n);
3985:   }
3986:   PetscMalloc1(n,&work);

3988:   /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */
3989:   dummy_vec = NULL;
3990:   if (need_benign_correction && lda_rhs != n_R && F) {
3991:     VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&dummy_vec);
3992:     VecSetSizes(dummy_vec,lda_rhs,PETSC_DECIDE);
3993:     VecSetType(dummy_vec,((PetscObject)pcis->vec1_N)->type_name);
3994:   }

3996:   /* Precompute stuffs needed for preprocessing and application of BDDC*/
3997:   if (n_constraints) {
3998:     Mat         M3,C_B;
3999:     IS          is_aux;
4000:     PetscScalar *array,*array2;

4002:     MatDestroy(&pcbddc->local_auxmat1);
4003:     MatDestroy(&pcbddc->local_auxmat2);

4005:     /* Extract constraints on R nodes: C_{CR}  */
4006:     ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);
4007:     MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);
4008:     MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);

4010:     /* Assemble         local_auxmat2_R =        (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */
4011:     /* Assemble pcbddc->local_auxmat2   = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */
4012:     if (!sparserhs) {
4013:       PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));
4014:       for (i=0;i<n_constraints;i++) {
4015:         const PetscScalar *row_cmat_values;
4016:         const PetscInt    *row_cmat_indices;
4017:         PetscInt          size_of_constraint,j;

4019:         MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);
4020:         for (j=0;j<size_of_constraint;j++) {
4021:           work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j];
4022:         }
4023:         MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);
4024:       }
4025:       MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&Brhs);
4026:     } else {
4027:       Mat tC_CR;

4029:       MatScale(C_CR,-1.0);
4030:       if (lda_rhs != n_R) {
4031:         PetscScalar *aa;
4032:         PetscInt    r,*ii,*jj;
4033:         PetscBool   done;

4035:         MatGetRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);
4036:         if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed");
4037:         MatSeqAIJGetArray(C_CR,&aa);
4038:         MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_constraints,lda_rhs,ii,jj,aa,&tC_CR);
4039:         MatRestoreRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);
4040:         if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed");
4041:       } else {
4042:         PetscObjectReference((PetscObject)C_CR);
4043:         tC_CR = C_CR;
4044:       }
4045:       MatCreateTranspose(tC_CR,&Brhs);
4046:       MatDestroy(&tC_CR);
4047:     }
4048:     MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);
4049:     if (F) {
4050:       if (need_benign_correction) {
4051:         PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

4053:         /* rhs is already zero on interior dofs, no need to change the rhs */
4054:         PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));
4055:       }
4056:       MatMatSolve(F,Brhs,local_auxmat2_R);
4057:       if (need_benign_correction) {
4058:         PetscScalar        *marr;
4059:         PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

4061:         MatDenseGetArray(local_auxmat2_R,&marr);
4062:         if (lda_rhs != n_R) {
4063:           for (i=0;i<n_constraints;i++) {
4064:             VecPlaceArray(dummy_vec,marr+i*lda_rhs);
4065:             PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);
4066:             VecResetArray(dummy_vec);
4067:           }
4068:         } else {
4069:           for (i=0;i<n_constraints;i++) {
4070:             VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);
4071:             PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);
4072:             VecResetArray(pcbddc->vec1_R);
4073:           }
4074:         }
4075:         MatDenseRestoreArray(local_auxmat2_R,&marr);
4076:       }
4077:     } else {
4078:       PetscScalar *marr;

4080:       MatDenseGetArray(local_auxmat2_R,&marr);
4081:       for (i=0;i<n_constraints;i++) {
4082:         VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);
4083:         VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);
4084:         KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
4085:         KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);
4086:         VecResetArray(pcbddc->vec1_R);
4087:         VecResetArray(pcbddc->vec2_R);
4088:       }
4089:       MatDenseRestoreArray(local_auxmat2_R,&marr);
4090:     }
4091:     if (sparserhs) {
4092:       MatScale(C_CR,-1.0);
4093:     }
4094:     MatDestroy(&Brhs);
4095:     if (!pcbddc->switch_static) {
4096:       MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);
4097:       MatDenseGetArray(pcbddc->local_auxmat2,&array);
4098:       MatDenseGetArray(local_auxmat2_R,&array2);
4099:       for (i=0;i<n_constraints;i++) {
4100:         VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);
4101:         VecPlaceArray(pcis->vec1_B,array+i*n_B);
4102:         VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4103:         VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4104:         VecResetArray(pcis->vec1_B);
4105:         VecResetArray(pcbddc->vec1_R);
4106:       }
4107:       MatDenseRestoreArray(local_auxmat2_R,&array2);
4108:       MatDenseRestoreArray(pcbddc->local_auxmat2,&array);
4109:       MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);
4110:     } else {
4111:       if (lda_rhs != n_R) {
4112:         IS dummy;

4114:         ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);
4115:         MatCreateSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);
4116:         ISDestroy(&dummy);
4117:       } else {
4118:         PetscObjectReference((PetscObject)local_auxmat2_R);
4119:         pcbddc->local_auxmat2 = local_auxmat2_R;
4120:       }
4121:       MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);
4122:     }
4123:     ISDestroy(&is_aux);
4124:     /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1}  */
4125:     MatScale(M3,m_one);
4126:     if (isCHOL) {
4127:       MatCholeskyFactor(M3,NULL,NULL);
4128:     } else {
4129:       MatLUFactor(M3,NULL,NULL,NULL);
4130:     }
4131:     MatSeqDenseInvertFactors_Private(M3);
4132:     /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */
4133:     MatMatMult(M3,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);
4134:     MatDestroy(&C_B);
4135:     MatCopy(M3,S_CC,SAME_NONZERO_PATTERN); /* S_CC can have a different LDA, MatMatSolve doesn't support it */
4136:     MatDestroy(&M3);
4137:   }

4139:   /* Get submatrices from subdomain matrix */
4140:   if (n_vertices) {
4141:     IS        is_aux;
4142:     PetscBool isseqaij;

4144:     if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */
4145:       IS tis;

4147:       ISDuplicate(pcbddc->is_R_local,&tis);
4148:       ISSort(tis);
4149:       ISComplement(tis,0,pcis->n,&is_aux);
4150:       ISDestroy(&tis);
4151:     } else {
4152:       ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);
4153:     }
4154:     MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);
4155:     MatCreateSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);
4156:     PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);
4157:     if (!isseqaij) { /* MatMatMult(A_VR,A_RRmA_RV) below will raise an error */
4158:       MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);
4159:     }
4160:     MatCreateSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);
4161:     ISDestroy(&is_aux);
4162:   }

4164:   /* Matrix of coarse basis functions (local) */
4165:   if (pcbddc->coarse_phi_B) {
4166:     PetscInt on_B,on_primal,on_D=n_D;
4167:     if (pcbddc->coarse_phi_D) {
4168:       MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);
4169:     }
4170:     MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);
4171:     if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) {
4172:       PetscScalar *marray;

4174:       MatDenseGetArray(pcbddc->coarse_phi_B,&marray);
4175:       PetscFree(marray);
4176:       MatDestroy(&pcbddc->coarse_phi_B);
4177:       MatDestroy(&pcbddc->coarse_psi_B);
4178:       MatDestroy(&pcbddc->coarse_phi_D);
4179:       MatDestroy(&pcbddc->coarse_psi_D);
4180:     }
4181:   }

4183:   if (!pcbddc->coarse_phi_B) {
4184:     PetscScalar *marr;

4186:     /* memory size */
4187:     n = n_B*pcbddc->local_primal_size;
4188:     if (pcbddc->switch_static || pcbddc->dbg_flag) n += n_D*pcbddc->local_primal_size;
4189:     if (!pcbddc->symmetric_primal) n *= 2;
4190:     PetscCalloc1(n,&marr);
4191:     MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_B);
4192:     marr += n_B*pcbddc->local_primal_size;
4193:     if (pcbddc->switch_static || pcbddc->dbg_flag) {
4194:       MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_D);
4195:       marr += n_D*pcbddc->local_primal_size;
4196:     }
4197:     if (!pcbddc->symmetric_primal) {
4198:       MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_B);
4199:       marr += n_B*pcbddc->local_primal_size;
4200:       if (pcbddc->switch_static || pcbddc->dbg_flag) {
4201:         MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_D);
4202:       }
4203:     } else {
4204:       PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);
4205:       pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
4206:       if (pcbddc->switch_static || pcbddc->dbg_flag) {
4207:         PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);
4208:         pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
4209:       }
4210:     }
4211:   }

4213:   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
4214:   p0_lidx_I = NULL;
4215:   if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) {
4216:     const PetscInt *idxs;

4218:     ISGetIndices(pcis->is_I_local,&idxs);
4219:     PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);
4220:     for (i=0;i<pcbddc->benign_n;i++) {
4221:       PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);
4222:     }
4223:     ISRestoreIndices(pcis->is_I_local,&idxs);
4224:   }

4226:   /* vertices */
4227:   if (n_vertices) {
4228:     PetscBool restoreavr = PETSC_FALSE;

4230:     MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);

4232:     if (n_R) {
4233:       Mat          A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */
4234:       PetscBLASInt B_N,B_one = 1;
4235:       PetscScalar  *x,*y;

4237:       MatScale(A_RV,m_one);
4238:       if (need_benign_correction) {
4239:         ISLocalToGlobalMapping RtoN;
4240:         IS                     is_p0;
4241:         PetscInt               *idxs_p0,n;

4243:         PetscMalloc1(pcbddc->benign_n,&idxs_p0);
4244:         ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);
4245:         ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);
4246:         if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %D != %D",n,pcbddc->benign_n);
4247:         ISLocalToGlobalMappingDestroy(&RtoN);
4248:         ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);
4249:         MatCreateSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);
4250:         ISDestroy(&is_p0);
4251:       }

4253:       MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);
4254:       if (!sparserhs || need_benign_correction) {
4255:         if (lda_rhs == n_R) {
4256:           MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);
4257:         } else {
4258:           PetscScalar    *av,*array;
4259:           const PetscInt *xadj,*adjncy;
4260:           PetscInt       n;
4261:           PetscBool      flg_row;

4263:           array = work+lda_rhs*n_vertices;
4264:           PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));
4265:           MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);
4266:           MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);
4267:           MatSeqAIJGetArray(A_RV,&av);
4268:           for (i=0;i<n;i++) {
4269:             PetscInt j;
4270:             for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j];
4271:           }
4272:           MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);
4273:           MatDestroy(&A_RV);
4274:           MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);
4275:         }
4276:         if (need_benign_correction) {
4277:           PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4278:           PetscScalar        *marr;

4280:           MatDenseGetArray(A_RV,&marr);
4281:           /* need \Phi^T A_RV = (I+L)A_RV, L given by

4283:                  | 0 0  0 | (V)
4284:              L = | 0 0 -1 | (P-p0)
4285:                  | 0 0 -1 | (p0)

4287:           */
4288:           for (i=0;i<reuse_solver->benign_n;i++) {
4289:             const PetscScalar *vals;
4290:             const PetscInt    *idxs,*idxs_zero;
4291:             PetscInt          n,j,nz;

4293:             ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);
4294:             ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);
4295:             MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);
4296:             for (j=0;j<n;j++) {
4297:               PetscScalar val = vals[j];
4298:               PetscInt    k,col = idxs[j];
4299:               for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val;
4300:             }
4301:             MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);
4302:             ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);
4303:           }
4304:           MatDenseRestoreArray(A_RV,&marr);
4305:         }
4306:         PetscObjectReference((PetscObject)A_RV);
4307:         Brhs = A_RV;
4308:       } else {
4309:         Mat tA_RVT,A_RVT;

4311:         if (!pcbddc->symmetric_primal) {
4312:           /* A_RV already scaled by -1 */
4313:           MatTranspose(A_RV,MAT_INITIAL_MATRIX,&A_RVT);
4314:         } else {
4315:           restoreavr = PETSC_TRUE;
4316:           MatScale(A_VR,-1.0);
4317:           PetscObjectReference((PetscObject)A_VR);
4318:           A_RVT = A_VR;
4319:         }
4320:         if (lda_rhs != n_R) {
4321:           PetscScalar *aa;
4322:           PetscInt    r,*ii,*jj;
4323:           PetscBool   done;

4325:           MatGetRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);
4326:           if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed");
4327:           MatSeqAIJGetArray(A_RVT,&aa);
4328:           MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_vertices,lda_rhs,ii,jj,aa,&tA_RVT);
4329:           MatRestoreRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);
4330:           if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed");
4331:         } else {
4332:           PetscObjectReference((PetscObject)A_RVT);
4333:           tA_RVT = A_RVT;
4334:         }
4335:         MatCreateTranspose(tA_RVT,&Brhs);
4336:         MatDestroy(&tA_RVT);
4337:         MatDestroy(&A_RVT);
4338:       }
4339:       if (F) {
4340:         /* need to correct the rhs */
4341:         if (need_benign_correction) {
4342:           PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4343:           PetscScalar        *marr;

4345:           MatDenseGetArray(Brhs,&marr);
4346:           if (lda_rhs != n_R) {
4347:             for (i=0;i<n_vertices;i++) {
4348:               VecPlaceArray(dummy_vec,marr+i*lda_rhs);
4349:               PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);
4350:               VecResetArray(dummy_vec);
4351:             }
4352:           } else {
4353:             for (i=0;i<n_vertices;i++) {
4354:               VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);
4355:               PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);
4356:               VecResetArray(pcbddc->vec1_R);
4357:             }
4358:           }
4359:           MatDenseRestoreArray(Brhs,&marr);
4360:         }
4361:         MatMatSolve(F,Brhs,A_RRmA_RV);
4362:         if (restoreavr) {
4363:           MatScale(A_VR,-1.0);
4364:         }
4365:         /* need to correct the solution */
4366:         if (need_benign_correction) {
4367:           PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4368:           PetscScalar        *marr;

4370:           MatDenseGetArray(A_RRmA_RV,&marr);
4371:           if (lda_rhs != n_R) {
4372:             for (i=0;i<n_vertices;i++) {
4373:               VecPlaceArray(dummy_vec,marr+i*lda_rhs);
4374:               PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);
4375:               VecResetArray(dummy_vec);
4376:             }
4377:           } else {
4378:             for (i=0;i<n_vertices;i++) {
4379:               VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);
4380:               PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);
4381:               VecResetArray(pcbddc->vec1_R);
4382:             }
4383:           }
4384:           MatDenseRestoreArray(A_RRmA_RV,&marr);
4385:         }
4386:       } else {
4387:         MatDenseGetArray(Brhs,&y);
4388:         for (i=0;i<n_vertices;i++) {
4389:           VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);
4390:           VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);
4391:           KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
4392:           KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);
4393:           VecResetArray(pcbddc->vec1_R);
4394:           VecResetArray(pcbddc->vec2_R);
4395:         }
4396:         MatDenseRestoreArray(Brhs,&y);
4397:       }
4398:       MatDestroy(&A_RV);
4399:       MatDestroy(&Brhs);
4400:       /* S_VV and S_CV */
4401:       if (n_constraints) {
4402:         Mat B;

4404:         PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));
4405:         for (i=0;i<n_vertices;i++) {
4406:           VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);
4407:           VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);
4408:           VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4409:           VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4410:           VecResetArray(pcis->vec1_B);
4411:           VecResetArray(pcbddc->vec1_R);
4412:         }
4413:         MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);
4414:         MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);
4415:         MatDestroy(&B);
4416:         MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);
4417:         MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);
4418:         MatScale(S_CV,m_one);
4419:         PetscBLASIntCast(lda_rhs*n_vertices,&B_N);
4420:         PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one));
4421:         MatDestroy(&B);
4422:       }
4423:       if (lda_rhs != n_R) {
4424:         MatDestroy(&A_RRmA_RV);
4425:         MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);
4426:         MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);
4427:       }
4428:       MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);
4429:       /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */
4430:       if (need_benign_correction) {
4431:         PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4432:         PetscScalar      *marr,*sums;

4434:         PetscMalloc1(n_vertices,&sums);
4435:         MatDenseGetArray(S_VVt,&marr);
4436:         for (i=0;i<reuse_solver->benign_n;i++) {
4437:           const PetscScalar *vals;
4438:           const PetscInt    *idxs,*idxs_zero;
4439:           PetscInt          n,j,nz;

4441:           ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);
4442:           ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);
4443:           for (j=0;j<n_vertices;j++) {
4444:             PetscInt k;
4445:             sums[j] = 0.;
4446:             for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs];
4447:           }
4448:           MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);
4449:           for (j=0;j<n;j++) {
4450:             PetscScalar val = vals[j];
4451:             PetscInt k;
4452:             for (k=0;k<n_vertices;k++) {
4453:               marr[idxs[j]+k*n_vertices] += val*sums[k];
4454:             }
4455:           }
4456:           MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);
4457:           ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);
4458:         }
4459:         PetscFree(sums);
4460:         MatDenseRestoreArray(S_VVt,&marr);
4461:         MatDestroy(&A_RV_bcorr);
4462:       }
4463:       MatDestroy(&A_RRmA_RV);
4464:       PetscBLASIntCast(n_vertices*n_vertices,&B_N);
4465:       MatDenseGetArray(A_VV,&x);
4466:       MatDenseGetArray(S_VVt,&y);
4467:       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one));
4468:       MatDenseRestoreArray(A_VV,&x);
4469:       MatDenseRestoreArray(S_VVt,&y);
4470:       MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);
4471:       MatDestroy(&S_VVt);
4472:     } else {
4473:       MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);
4474:     }
4475:     MatDestroy(&A_VV);

4477:     /* coarse basis functions */
4478:     for (i=0;i<n_vertices;i++) {
4479:       PetscScalar *y;

4481:       VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);
4482:       MatDenseGetArray(pcbddc->coarse_phi_B,&y);
4483:       VecPlaceArray(pcis->vec1_B,y+n_B*i);
4484:       VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4485:       VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4486:       y[n_B*i+idx_V_B[i]] = 1.0;
4487:       MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);
4488:       VecResetArray(pcis->vec1_B);

4490:       if (pcbddc->switch_static || pcbddc->dbg_flag) {
4491:         PetscInt j;

4493:         MatDenseGetArray(pcbddc->coarse_phi_D,&y);
4494:         VecPlaceArray(pcis->vec1_D,y+n_D*i);
4495:         VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4496:         VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4497:         VecResetArray(pcis->vec1_D);
4498:         for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0;
4499:         MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);
4500:       }
4501:       VecResetArray(pcbddc->vec1_R);
4502:     }
4503:     /* if n_R == 0 the object is not destroyed */
4504:     MatDestroy(&A_RV);
4505:   }
4506:   VecDestroy(&dummy_vec);

4508:   if (n_constraints) {
4509:     Mat B;

4511:     MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);
4512:     MatScale(S_CC,m_one);
4513:     MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);
4514:     MatScale(S_CC,m_one);
4515:     if (n_vertices) {
4516:       if (isCHOL || need_benign_correction) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */
4517:         MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);
4518:       } else {
4519:         Mat S_VCt;

4521:         if (lda_rhs != n_R) {
4522:           MatDestroy(&B);
4523:           MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);
4524:           MatSeqDenseSetLDA(B,lda_rhs);
4525:         }
4526:         MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);
4527:         MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);
4528:         MatDestroy(&S_VCt);
4529:       }
4530:     }
4531:     MatDestroy(&B);
4532:     /* coarse basis functions */
4533:     for (i=0;i<n_constraints;i++) {
4534:       PetscScalar *y;

4536:       VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);
4537:       MatDenseGetArray(pcbddc->coarse_phi_B,&y);
4538:       VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));
4539:       VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4540:       VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4541:       MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);
4542:       VecResetArray(pcis->vec1_B);
4543:       if (pcbddc->switch_static || pcbddc->dbg_flag) {
4544:         PetscInt j;

4546:         MatDenseGetArray(pcbddc->coarse_phi_D,&y);
4547:         VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));
4548:         VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4549:         VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4550:         VecResetArray(pcis->vec1_D);
4551:         for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0;
4552:         MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);
4553:       }
4554:       VecResetArray(pcbddc->vec1_R);
4555:     }
4556:   }
4557:   if (n_constraints) {
4558:     MatDestroy(&local_auxmat2_R);
4559:   }
4560:   PetscFree(p0_lidx_I);

4562:   /* coarse matrix entries relative to B_0 */
4563:   if (pcbddc->benign_n) {
4564:     Mat         B0_B,B0_BPHI;
4565:     IS          is_dummy;
4566:     PetscScalar *data;
4567:     PetscInt    j;

4569:     ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);
4570:     MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);
4571:     ISDestroy(&is_dummy);
4572:     MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);
4573:     MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);
4574:     MatDenseGetArray(B0_BPHI,&data);
4575:     for (j=0;j<pcbddc->benign_n;j++) {
4576:       PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j;
4577:       for (i=0;i<pcbddc->local_primal_size;i++) {
4578:         coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j];
4579:         coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j];
4580:       }
4581:     }
4582:     MatDenseRestoreArray(B0_BPHI,&data);
4583:     MatDestroy(&B0_B);
4584:     MatDestroy(&B0_BPHI);
4585:   }

4587:   /* compute other basis functions for non-symmetric problems */
4588:   if (!pcbddc->symmetric_primal) {
4589:     Mat         B_V=NULL,B_C=NULL;
4590:     PetscScalar *marray;

4592:     if (n_constraints) {
4593:       Mat S_CCT,C_CRT;

4595:       MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT);
4596:       MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);
4597:       MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);
4598:       MatDestroy(&S_CCT);
4599:       if (n_vertices) {
4600:         Mat S_VCT;

4602:         MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);
4603:         MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);
4604:         MatDestroy(&S_VCT);
4605:       }
4606:       MatDestroy(&C_CRT);
4607:     } else {
4608:       MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);
4609:     }
4610:     if (n_vertices && n_R) {
4611:       PetscScalar    *av,*marray;
4612:       const PetscInt *xadj,*adjncy;
4613:       PetscInt       n;
4614:       PetscBool      flg_row;

4616:       /* B_V = B_V - A_VR^T */
4617:       MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);
4618:       MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);
4619:       MatSeqAIJGetArray(A_VR,&av);
4620:       MatDenseGetArray(B_V,&marray);
4621:       for (i=0;i<n;i++) {
4622:         PetscInt j;
4623:         for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j];
4624:       }
4625:       MatDenseRestoreArray(B_V,&marray);
4626:       MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);
4627:       MatDestroy(&A_VR);
4628:     }

4630:     /* currently there's no support for MatTransposeMatSolve(F,B,X) */
4631:     if (n_vertices) {
4632:       MatDenseGetArray(B_V,&marray);
4633:       for (i=0;i<n_vertices;i++) {
4634:         VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);
4635:         VecPlaceArray(pcbddc->vec2_R,work+i*n_R);
4636:         KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
4637:         KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);
4638:         VecResetArray(pcbddc->vec1_R);
4639:         VecResetArray(pcbddc->vec2_R);
4640:       }
4641:       MatDenseRestoreArray(B_V,&marray);
4642:     }
4643:     if (B_C) {
4644:       MatDenseGetArray(B_C,&marray);
4645:       for (i=n_vertices;i<n_constraints+n_vertices;i++) {
4646:         VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);
4647:         VecPlaceArray(pcbddc->vec2_R,work+i*n_R);
4648:         KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
4649:         KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);
4650:         VecResetArray(pcbddc->vec1_R);
4651:         VecResetArray(pcbddc->vec2_R);
4652:       }
4653:       MatDenseRestoreArray(B_C,&marray);
4654:     }
4655:     /* coarse basis functions */
4656:     for (i=0;i<pcbddc->local_primal_size;i++) {
4657:       PetscScalar *y;

4659:       VecPlaceArray(pcbddc->vec1_R,work+i*n_R);
4660:       MatDenseGetArray(pcbddc->coarse_psi_B,&y);
4661:       VecPlaceArray(pcis->vec1_B,y+n_B*i);
4662:       VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4663:       VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4664:       if (i<n_vertices) {
4665:         y[n_B*i+idx_V_B[i]] = 1.0;
4666:       }
4667:       MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);
4668:       VecResetArray(pcis->vec1_B);

4670:       if (pcbddc->switch_static || pcbddc->dbg_flag) {
4671:         MatDenseGetArray(pcbddc->coarse_psi_D,&y);
4672:         VecPlaceArray(pcis->vec1_D,y+n_D*i);
4673:         VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4674:         VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4675:         VecResetArray(pcis->vec1_D);
4676:         MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);
4677:       }
4678:       VecResetArray(pcbddc->vec1_R);
4679:     }
4680:     MatDestroy(&B_V);
4681:     MatDestroy(&B_C);
4682:   }

4684:   /* free memory */
4685:   PetscFree(idx_V_B);
4686:   MatDestroy(&S_VV);
4687:   MatDestroy(&S_CV);
4688:   MatDestroy(&S_VC);
4689:   MatDestroy(&S_CC);
4690:   PetscFree(work);
4691:   if (n_vertices) {
4692:     MatDestroy(&A_VR);
4693:   }
4694:   if (n_constraints) {
4695:     MatDestroy(&C_CR);
4696:   }
4697:   /* Checking coarse_sub_mat and coarse basis functios */
4698:   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
4699:   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
4700:   if (pcbddc->dbg_flag) {
4701:     Mat         coarse_sub_mat;
4702:     Mat         AUXMAT,TM1,TM2,TM3,TM4;
4703:     Mat         coarse_phi_D,coarse_phi_B;
4704:     Mat         coarse_psi_D,coarse_psi_B;
4705:     Mat         A_II,A_BB,A_IB,A_BI;
4706:     Mat         C_B,CPHI;
4707:     IS          is_dummy;
4708:     Vec         mones;
4709:     MatType     checkmattype=MATSEQAIJ;
4710:     PetscReal   real_value;

4712:     if (pcbddc->benign_n && !pcbddc->benign_change_explicit) {
4713:       Mat A;
4714:       PCBDDCBenignProject(pc,NULL,NULL,&A);
4715:       MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);
4716:       MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);
4717:       MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);
4718:       MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);
4719:       MatDestroy(&A);
4720:     } else {
4721:       MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);
4722:       MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);
4723:       MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);
4724:       MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);
4725:     }
4726:     MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);
4727:     MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);
4728:     if (!pcbddc->symmetric_primal) {
4729:       MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);
4730:       MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);
4731:     }
4732:     MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);

4734:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
4735:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);
4736:     PetscViewerFlush(pcbddc->dbg_viewer);
4737:     if (!pcbddc->symmetric_primal) {
4738:       MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4739:       MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);
4740:       MatDestroy(&AUXMAT);
4741:       MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4742:       MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);
4743:       MatDestroy(&AUXMAT);
4744:       MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4745:       MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);
4746:       MatDestroy(&AUXMAT);
4747:       MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4748:       MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);
4749:       MatDestroy(&AUXMAT);
4750:     } else {
4751:       MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);
4752:       MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);
4753:       MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4754:       MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);
4755:       MatDestroy(&AUXMAT);
4756:       MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4757:       MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);
4758:       MatDestroy(&AUXMAT);
4759:     }
4760:     MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);
4761:     MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);
4762:     MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);
4763:     MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);
4764:     if (pcbddc->benign_n) {
4765:       Mat         B0_B,B0_BPHI;
4766:       PetscScalar *data,*data2;
4767:       PetscInt    j;

4769:       ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);
4770:       MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);
4771:       MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);
4772:       MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);
4773:       MatDenseGetArray(TM1,&data);
4774:       MatDenseGetArray(B0_BPHI,&data2);
4775:       for (j=0;j<pcbddc->benign_n;j++) {
4776:         PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j;
4777:         for (i=0;i<pcbddc->local_primal_size;i++) {
4778:           data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j];
4779:           data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j];
4780:         }
4781:       }
4782:       MatDenseRestoreArray(TM1,&data);
4783:       MatDenseRestoreArray(B0_BPHI,&data2);
4784:       MatDestroy(&B0_B);
4785:       ISDestroy(&is_dummy);
4786:       MatDestroy(&B0_BPHI);
4787:     }
4788: #if 0
4789:   {
4790:     PetscViewer viewer;
4791:     char filename[256];
4792:     sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level);
4793:     PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);
4794:     PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
4795:     PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");
4796:     MatView(coarse_sub_mat,viewer);
4797:     PetscObjectSetName((PetscObject)TM1,"projected");
4798:     MatView(TM1,viewer);
4799:     if (pcbddc->coarse_phi_B) {
4800:       PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");
4801:       MatView(pcbddc->coarse_phi_B,viewer);
4802:     }
4803:     if (pcbddc->coarse_phi_D) {
4804:       PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");
4805:       MatView(pcbddc->coarse_phi_D,viewer);
4806:     }
4807:     if (pcbddc->coarse_psi_B) {
4808:       PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");
4809:       MatView(pcbddc->coarse_psi_B,viewer);
4810:     }
4811:     if (pcbddc->coarse_psi_D) {
4812:       PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");
4813:       MatView(pcbddc->coarse_psi_D,viewer);
4814:     }
4815:     PetscObjectSetName((PetscObject)pcbddc->local_mat,"A");
4816:     MatView(pcbddc->local_mat,viewer);
4817:     PetscObjectSetName((PetscObject)pcbddc->ConstraintMatrix,"C");
4818:     MatView(pcbddc->ConstraintMatrix,viewer);
4819:     PetscObjectSetName((PetscObject)pcis->is_I_local,"I");
4820:     ISView(pcis->is_I_local,viewer);
4821:     PetscObjectSetName((PetscObject)pcis->is_B_local,"B");
4822:     ISView(pcis->is_B_local,viewer);
4823:     PetscObjectSetName((PetscObject)pcbddc->is_R_local,"R");
4824:     ISView(pcbddc->is_R_local,viewer);
4825:     PetscViewerDestroy(&viewer);
4826:   }
4827: #endif
4828:     MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);
4829:     MatNorm(TM1,NORM_FROBENIUS,&real_value);
4830:     PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
4831:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d          matrix error % 1.14e\n",PetscGlobalRank,real_value);

4833:     /* check constraints */
4834:     ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);
4835:     MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);
4836:     if (!pcbddc->benign_n) { /* TODO: add benign case */
4837:       MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);
4838:     } else {
4839:       PetscScalar *data;
4840:       Mat         tmat;
4841:       MatDenseGetArray(pcbddc->coarse_phi_B,&data);
4842:       MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);
4843:       MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);
4844:       MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);
4845:       MatDestroy(&tmat);
4846:     }
4847:     MatCreateVecs(CPHI,&mones,NULL);
4848:     VecSet(mones,-1.0);
4849:     MatDiagonalSet(CPHI,mones,ADD_VALUES);
4850:     MatNorm(CPHI,NORM_FROBENIUS,&real_value);
4851:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);
4852:     if (!pcbddc->symmetric_primal) {
4853:       MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);
4854:       VecSet(mones,-1.0);
4855:       MatDiagonalSet(CPHI,mones,ADD_VALUES);
4856:       MatNorm(CPHI,NORM_FROBENIUS,&real_value);
4857:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);
4858:     }
4859:     MatDestroy(&C_B);
4860:     MatDestroy(&CPHI);
4861:     ISDestroy(&is_dummy);
4862:     VecDestroy(&mones);
4863:     PetscViewerFlush(pcbddc->dbg_viewer);
4864:     MatDestroy(&A_II);
4865:     MatDestroy(&A_BB);
4866:     MatDestroy(&A_IB);
4867:     MatDestroy(&A_BI);
4868:     MatDestroy(&TM1);
4869:     MatDestroy(&TM2);
4870:     MatDestroy(&TM3);
4871:     MatDestroy(&TM4);
4872:     MatDestroy(&coarse_phi_D);
4873:     MatDestroy(&coarse_phi_B);
4874:     if (!pcbddc->symmetric_primal) {
4875:       MatDestroy(&coarse_psi_D);
4876:       MatDestroy(&coarse_psi_B);
4877:     }
4878:     MatDestroy(&coarse_sub_mat);
4879:   }
4880:   /* get back data */
4881:   *coarse_submat_vals_n = coarse_submat_vals;
4882:   PetscLogEventEnd(PC_BDDC_CorrectionSetUp[pcbddc->current_level],pc,0,0,0);
4883:   return(0);
4884: }

4886: PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B)
4887: {
4888:   Mat            *work_mat;
4889:   IS             isrow_s,iscol_s;
4890:   PetscBool      rsorted,csorted;
4891:   PetscInt       rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL;

4895:   ISSorted(isrow,&rsorted);
4896:   ISSorted(iscol,&csorted);
4897:   ISGetLocalSize(isrow,&rsize);
4898:   ISGetLocalSize(iscol,&csize);

4900:   if (!rsorted) {
4901:     const PetscInt *idxs;
4902:     PetscInt *idxs_sorted,i;

4904:     PetscMalloc1(rsize,&idxs_perm_r);
4905:     PetscMalloc1(rsize,&idxs_sorted);
4906:     for (i=0;i<rsize;i++) {
4907:       idxs_perm_r[i] = i;
4908:     }
4909:     ISGetIndices(isrow,&idxs);
4910:     PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);
4911:     for (i=0;i<rsize;i++) {
4912:       idxs_sorted[i] = idxs[idxs_perm_r[i]];
4913:     }
4914:     ISRestoreIndices(isrow,&idxs);
4915:     ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);
4916:   } else {
4917:     PetscObjectReference((PetscObject)isrow);
4918:     isrow_s = isrow;
4919:   }

4921:   if (!csorted) {
4922:     if (isrow == iscol) {
4923:       PetscObjectReference((PetscObject)isrow_s);
4924:       iscol_s = isrow_s;
4925:     } else {
4926:       const PetscInt *idxs;
4927:       PetscInt       *idxs_sorted,i;

4929:       PetscMalloc1(csize,&idxs_perm_c);
4930:       PetscMalloc1(csize,&idxs_sorted);
4931:       for (i=0;i<csize;i++) {
4932:         idxs_perm_c[i] = i;
4933:       }
4934:       ISGetIndices(iscol,&idxs);
4935:       PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);
4936:       for (i=0;i<csize;i++) {
4937:         idxs_sorted[i] = idxs[idxs_perm_c[i]];
4938:       }
4939:       ISRestoreIndices(iscol,&idxs);
4940:       ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);
4941:     }
4942:   } else {
4943:     PetscObjectReference((PetscObject)iscol);
4944:     iscol_s = iscol;
4945:   }

4947:   MatCreateSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);

4949:   if (!rsorted || !csorted) {
4950:     Mat      new_mat;
4951:     IS       is_perm_r,is_perm_c;

4953:     if (!rsorted) {
4954:       PetscInt *idxs_r,i;
4955:       PetscMalloc1(rsize,&idxs_r);
4956:       for (i=0;i<rsize;i++) {
4957:         idxs_r[idxs_perm_r[i]] = i;
4958:       }
4959:       PetscFree(idxs_perm_r);
4960:       ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);
4961:     } else {
4962:       ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);
4963:     }
4964:     ISSetPermutation(is_perm_r);

4966:     if (!csorted) {
4967:       if (isrow_s == iscol_s) {
4968:         PetscObjectReference((PetscObject)is_perm_r);
4969:         is_perm_c = is_perm_r;
4970:       } else {
4971:         PetscInt *idxs_c,i;
4972:         if (!idxs_perm_c) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Permutation array not present");
4973:         PetscMalloc1(csize,&idxs_c);
4974:         for (i=0;i<csize;i++) {
4975:           idxs_c[idxs_perm_c[i]] = i;
4976:         }
4977:         PetscFree(idxs_perm_c);
4978:         ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);
4979:       }
4980:     } else {
4981:       ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);
4982:     }
4983:     ISSetPermutation(is_perm_c);

4985:     MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);
4986:     MatDestroy(&work_mat[0]);
4987:     work_mat[0] = new_mat;
4988:     ISDestroy(&is_perm_r);
4989:     ISDestroy(&is_perm_c);
4990:   }

4992:   PetscObjectReference((PetscObject)work_mat[0]);
4993:   *B = work_mat[0];
4994:   MatDestroyMatrices(1,&work_mat);
4995:   ISDestroy(&isrow_s);
4996:   ISDestroy(&iscol_s);
4997:   return(0);
4998: }

5000: PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
5001: {
5002:   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
5003:   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
5004:   Mat            new_mat,lA;
5005:   IS             is_local,is_global;
5006:   PetscInt       local_size;
5007:   PetscBool      isseqaij;

5011:   MatDestroy(&pcbddc->local_mat);
5012:   MatGetSize(matis->A,&local_size,NULL);
5013:   ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);
5014:   ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);
5015:   ISDestroy(&is_local);
5016:   MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);
5017:   ISDestroy(&is_global);

5019:   /* check */
5020:   if (pcbddc->dbg_flag) {
5021:     Vec       x,x_change;
5022:     PetscReal error;

5024:     MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);
5025:     VecSetRandom(x,NULL);
5026:     MatMult(ChangeOfBasisMatrix,x,x_change);
5027:     VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);
5028:     VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);
5029:     MatMult(new_mat,matis->x,matis->y);
5030:     if (!pcbddc->change_interior) {
5031:       const PetscScalar *x,*y,*v;
5032:       PetscReal         lerror = 0.;
5033:       PetscInt          i;

5035:       VecGetArrayRead(matis->x,&x);
5036:       VecGetArrayRead(matis->y,&y);
5037:       VecGetArrayRead(matis->counter,&v);
5038:       for (i=0;i<local_size;i++)
5039:         if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i]-y[i]) > lerror)
5040:           lerror = PetscAbsScalar(x[i]-y[i]);
5041:       VecRestoreArrayRead(matis->x,&x);
5042:       VecRestoreArrayRead(matis->y,&y);
5043:       VecRestoreArrayRead(matis->counter,&v);
5044:       MPIU_Allreduce(&lerror,&error,1,MPIU_REAL,MPI_MAX,PetscObjectComm((PetscObject)pc));
5045:       if (error > PETSC_SMALL) {
5046:         if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) {
5047:           SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on I: %1.6e",error);
5048:         } else {
5049:           SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on I: %1.6e",error);
5050:         }
5051:       }
5052:     }
5053:     VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);
5054:     VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);
5055:     VecAXPY(x,-1.0,x_change);
5056:     VecNorm(x,NORM_INFINITY,&error);
5057:     if (error > PETSC_SMALL) {
5058:       if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) {
5059:         SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e",error);
5060:       } else {
5061:         SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on N: %1.6e",error);
5062:       }
5063:     }
5064:     VecDestroy(&x);
5065:     VecDestroy(&x_change);
5066:   }

5068:   /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */
5069:   PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject*)&lA);

5071:   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
5072:   PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);
5073:   if (isseqaij) {
5074:     MatDestroy(&pcbddc->local_mat);
5075:     MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);
5076:     if (lA) {
5077:       Mat work;
5078:       MatPtAP(lA,new_mat,MAT_INITIAL_MATRIX,2.0,&work);
5079:       PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);
5080:       MatDestroy(&work);
5081:     }
5082:   } else {
5083:     Mat work_mat;

5085:     MatDestroy(&pcbddc->local_mat);
5086:     MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);
5087:     MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);
5088:     MatDestroy(&work_mat);
5089:     if (lA) {
5090:       Mat work;
5091:       MatConvert(lA,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);
5092:       MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&work);
5093:       PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);
5094:       MatDestroy(&work);
5095:     }
5096:   }
5097:   if (matis->A->symmetric_set) {
5098:     MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);
5099: #if !defined(PETSC_USE_COMPLEX)
5100:     MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);
5101: #endif
5102:   }
5103:   MatDestroy(&new_mat);
5104:   return(0);
5105: }

5107: PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
5108: {
5109:   PC_IS*          pcis = (PC_IS*)(pc->data);
5110:   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
5111:   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5112:   PetscInt        *idx_R_local=NULL;
5113:   PetscInt        n_vertices,i,j,n_R,n_D,n_B;
5114:   PetscInt        vbs,bs;
5115:   PetscBT         bitmask=NULL;
5116:   PetscErrorCode  ierr;

5119:   /*
5120:     No need to setup local scatters if
5121:       - primal space is unchanged
5122:         AND
5123:       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
5124:         AND
5125:       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
5126:   */
5127:   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
5128:     return(0);
5129:   }
5130:   /* destroy old objects */
5131:   ISDestroy(&pcbddc->is_R_local);
5132:   VecScatterDestroy(&pcbddc->R_to_B);
5133:   VecScatterDestroy(&pcbddc->R_to_D);
5134:   /* Set Non-overlapping dimensions */
5135:   n_B = pcis->n_B;
5136:   n_D = pcis->n - n_B;
5137:   n_vertices = pcbddc->n_vertices;

5139:   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */

5141:   /* create auxiliary bitmask and allocate workspace */
5142:   if (!sub_schurs || !sub_schurs->reuse_solver) {
5143:     PetscMalloc1(pcis->n-n_vertices,&idx_R_local);
5144:     PetscBTCreate(pcis->n,&bitmask);
5145:     for (i=0;i<n_vertices;i++) {
5146:       PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);
5147:     }

5149:     for (i=0, n_R=0; i<pcis->n; i++) {
5150:       if (!PetscBTLookup(bitmask,i)) {
5151:         idx_R_local[n_R++] = i;
5152:       }
5153:     }
5154:   } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */
5155:     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

5157:     ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);
5158:     ISGetLocalSize(reuse_solver->is_R,&n_R);
5159:   }

5161:   /* Block code */
5162:   vbs = 1;
5163:   MatGetBlockSize(pcbddc->local_mat,&bs);
5164:   if (bs>1 && !(n_vertices%bs)) {
5165:     PetscBool is_blocked = PETSC_TRUE;
5166:     PetscInt  *vary;
5167:     if (!sub_schurs || !sub_schurs->reuse_solver) {
5168:       PetscMalloc1(pcis->n/bs,&vary);
5169:       PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));
5170:       /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
5171:       /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */
5172:       for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++;
5173:       for (i=0; i<pcis->n/bs; i++) {
5174:         if (vary[i]!=0 && vary[i]!=bs) {
5175:           is_blocked = PETSC_FALSE;
5176:           break;
5177:         }
5178:       }
5179:       PetscFree(vary);
5180:     } else {
5181:       /* Verify directly the R set */
5182:       for (i=0; i<n_R/bs; i++) {
5183:         PetscInt j,node=idx_R_local[bs*i];
5184:         for (j=1; j<bs; j++) {
5185:           if (node != idx_R_local[bs*i+j]-j) {
5186:             is_blocked = PETSC_FALSE;
5187:             break;
5188:           }
5189:         }
5190:       }
5191:     }
5192:     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
5193:       vbs = bs;
5194:       for (i=0;i<n_R/vbs;i++) {
5195:         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
5196:       }
5197:     }
5198:   }
5199:   ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);
5200:   if (sub_schurs && sub_schurs->reuse_solver) {
5201:     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

5203:     ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);
5204:     ISDestroy(&reuse_solver->is_R);
5205:     PetscObjectReference((PetscObject)pcbddc->is_R_local);
5206:     reuse_solver->is_R = pcbddc->is_R_local;
5207:   } else {
5208:     PetscFree(idx_R_local);
5209:   }

5211:   /* print some info if requested */
5212:   if (pcbddc->dbg_flag) {
5213:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
5214:     PetscViewerFlush(pcbddc->dbg_viewer);
5215:     PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
5216:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);
5217:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %D, dirichlet_size = %D, boundary_size = %D\n",pcis->n,n_D,n_B);
5218:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %D, v_size = %D, constraints = %D, local_primal_size = %D\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);
5219:     PetscViewerFlush(pcbddc->dbg_viewer);
5220:   }

5222:   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
5223:   if (!sub_schurs || !sub_schurs->reuse_solver) {
5224:     IS       is_aux1,is_aux2;
5225:     PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local;

5227:     ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);
5228:     PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);
5229:     PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);
5230:     ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);
5231:     for (i=0; i<n_D; i++) {
5232:       PetscBTSet(bitmask,is_indices[i]);
5233:     }
5234:     ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);
5235:     for (i=0, j=0; i<n_R; i++) {
5236:       if (!PetscBTLookup(bitmask,idx_R_local[i])) {
5237:         aux_array1[j++] = i;
5238:       }
5239:     }
5240:     ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);
5241:     ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);
5242:     for (i=0, j=0; i<n_B; i++) {
5243:       if (!PetscBTLookup(bitmask,is_indices[i])) {
5244:         aux_array2[j++] = i;
5245:       }
5246:     }
5247:     ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);
5248:     ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);
5249:     VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);
5250:     ISDestroy(&is_aux1);
5251:     ISDestroy(&is_aux2);

5253:     if (pcbddc->switch_static || pcbddc->dbg_flag) {
5254:       PetscMalloc1(n_D,&aux_array1);
5255:       for (i=0, j=0; i<n_R; i++) {
5256:         if (PetscBTLookup(bitmask,idx_R_local[i])) {
5257:           aux_array1[j++] = i;
5258:         }
5259:       }
5260:       ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);
5261:       VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);
5262:       ISDestroy(&is_aux1);
5263:     }
5264:     PetscBTDestroy(&bitmask);
5265:     ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);
5266:   } else {
5267:     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5268:     IS                 tis;
5269:     PetscInt           schur_size;

5271:     ISGetLocalSize(reuse_solver->is_B,&schur_size);
5272:     ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);
5273:     VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);
5274:     ISDestroy(&tis);
5275:     if (pcbddc->switch_static || pcbddc->dbg_flag) {
5276:       ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);
5277:       VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);
5278:       ISDestroy(&tis);
5279:     }
5280:   }
5281:   return(0);
5282: }

5284: static PetscErrorCode MatNullSpacePropagate_Private(Mat A, IS is, Mat B)
5285: {
5286:   MatNullSpace   NullSpace;
5287:   Mat            dmat;
5288:   const Vec      *nullvecs;
5289:   Vec            v,v2,*nullvecs2;
5290:   VecScatter     sct;
5291:   PetscInt       k,nnsp_size,bsiz,n,N,bs;
5292:   PetscBool      nnsp_has_cnst;

5296:   MatGetNullSpace(B,&NullSpace);
5297:   if (!NullSpace) {
5298:     MatGetNearNullSpace(B,&NullSpace);
5299:   }
5300:   if (NullSpace) return(0);
5301:   MatGetNullSpace(A,&NullSpace);
5302:   if (!NullSpace) {
5303:     MatGetNearNullSpace(A,&NullSpace);
5304:   }
5305:   if (!NullSpace) return(0);
5306:   MatCreateVecs(A,&v,NULL);
5307:   MatCreateVecs(B,&v2,NULL);
5308:   VecScatterCreate(v,is,v2,NULL,&sct);
5309:   MatNullSpaceGetVecs(NullSpace,&nnsp_has_cnst,&nnsp_size,(const Vec**)&nullvecs);
5310:   bsiz = nnsp_size+!!nnsp_has_cnst;
5311:   PetscMalloc1(bsiz,&nullvecs2);
5312:   VecGetBlockSize(v2,&bs);
5313:   VecGetSize(v2,&N);
5314:   VecGetLocalSize(v2,&n);
5315:   MatCreateDense(PetscObjectComm((PetscObject)B),n,PETSC_DECIDE,N,bsiz,NULL,&dmat);
5316:   for (k=0;k<nnsp_size;k++) {
5317:     PetscScalar *arr;

5319:     MatDenseGetColumn(dmat,k,&arr);
5320:     VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),bs,n,N,arr,&nullvecs2[k]);
5321:     VecScatterBegin(sct,nullvecs[k],nullvecs2[k],INSERT_VALUES,SCATTER_FORWARD);
5322:     VecScatterEnd(sct,nullvecs[k],nullvecs2[k],INSERT_VALUES,SCATTER_FORWARD);
5323:     MatDenseRestoreColumn(dmat,&arr);
5324:   }
5325:   if (nnsp_has_cnst) {
5326:     PetscScalar *arr;

5328:     MatDenseGetColumn(dmat,nnsp_size,&arr);
5329:     VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),bs,n,N,arr,&nullvecs2[nnsp_size]);
5330:     VecSet(nullvecs2[nnsp_size],1.0);
5331:     MatDenseRestoreColumn(dmat,&arr);
5332:   }
5333:   PCBDDCOrthonormalizeVecs(bsiz,nullvecs2);
5334:   MatNullSpaceCreate(PetscObjectComm((PetscObject)B),PETSC_FALSE,bsiz,nullvecs2,&NullSpace);
5335:   PetscObjectCompose((PetscObject)NullSpace,"_PBDDC_Null_dmat",(PetscObject)dmat);
5336:   MatDestroy(&dmat);
5337:   for (k=0;k<bsiz;k++) {
5338:     VecDestroy(&nullvecs2[k]);
5339:   }
5340:   PetscFree(nullvecs2);
5341:   MatSetNearNullSpace(B,NullSpace);
5342:   MatNullSpaceDestroy(&NullSpace);
5343:   VecDestroy(&v);
5344:   VecDestroy(&v2);
5345:   VecScatterDestroy(&sct);
5346:   return(0);
5347: }

5349: PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
5350: {
5351:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
5352:   PC_IS          *pcis = (PC_IS*)pc->data;
5353:   PC             pc_temp;
5354:   Mat            A_RR;
5355:   MatNullSpace   nnsp;
5356:   MatReuse       reuse;
5357:   PetscScalar    m_one = -1.0;
5358:   PetscReal      value;
5359:   PetscInt       n_D,n_R;
5360:   PetscBool      issbaij,opts;
5362:   void           (*f)(void) = 0;
5363:   char           dir_prefix[256],neu_prefix[256],str_level[16];
5364:   size_t         len;

5367:   PetscLogEventBegin(PC_BDDC_LocalSolvers[pcbddc->current_level],pc,0,0,0);
5368:   /* compute prefixes */
5369:   PetscStrcpy(dir_prefix,"");
5370:   PetscStrcpy(neu_prefix,"");
5371:   if (!pcbddc->current_level) {
5372:     PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,sizeof(dir_prefix));
5373:     PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,sizeof(neu_prefix));
5374:     PetscStrlcat(dir_prefix,"pc_bddc_dirichlet_",sizeof(dir_prefix));
5375:     PetscStrlcat(neu_prefix,"pc_bddc_neumann_",sizeof(neu_prefix));
5376:   } else {
5377:     PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level));
5378:     PetscStrlen(((PetscObject)pc)->prefix,&len);
5379:     len -= 15; /* remove "pc_bddc_coarse_" */
5380:     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
5381:     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
5382:     /* Nonstandard use of PetscStrncpy() to only copy a portion of the input string */
5383:     PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);
5384:     PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);
5385:     PetscStrlcat(dir_prefix,"pc_bddc_dirichlet_",sizeof(dir_prefix));
5386:     PetscStrlcat(neu_prefix,"pc_bddc_neumann_",sizeof(neu_prefix));
5387:     PetscStrlcat(dir_prefix,str_level,sizeof(dir_prefix));
5388:     PetscStrlcat(neu_prefix,str_level,sizeof(neu_prefix));
5389:   }

5391:   /* DIRICHLET PROBLEM */
5392:   if (dirichlet) {
5393:     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5394:     if (pcbddc->benign_n && !pcbddc->benign_change_explicit) {
5395:       if (!sub_schurs || !sub_schurs->reuse_solver) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented");
5396:       if (pcbddc->dbg_flag) {
5397:         Mat    A_IIn;

5399:         PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);
5400:         MatDestroy(&pcis->A_II);
5401:         pcis->A_II = A_IIn;
5402:       }
5403:     }
5404:     if (pcbddc->local_mat->symmetric_set) {
5405:       MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric);
5406:     }
5407:     /* Matrix for Dirichlet problem is pcis->A_II */
5408:     n_D  = pcis->n - pcis->n_B;
5409:     opts = PETSC_FALSE;
5410:     if (!pcbddc->ksp_D) { /* create object if not yet build */
5411:       opts = PETSC_TRUE;
5412:       KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);
5413:       PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);
5414:       /* default */
5415:       KSPSetType(pcbddc->ksp_D,KSPPREONLY);
5416:       KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);
5417:       PetscObjectTypeCompare((PetscObject)pcis->pA_II,MATSEQSBAIJ,&issbaij);
5418:       KSPGetPC(pcbddc->ksp_D,&pc_temp);
5419:       if (issbaij) {
5420:         PCSetType(pc_temp,PCCHOLESKY);
5421:       } else {
5422:         PCSetType(pc_temp,PCLU);
5423:       }
5424:       KSPSetErrorIfNotConverged(pcbddc->ksp_D,pc->erroriffailure);
5425:     }
5426:     MatSetOptionsPrefix(pcis->pA_II,((PetscObject)pcbddc->ksp_D)->prefix);
5427:     KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->pA_II);
5428:     /* Allow user's customization */
5429:     if (opts) {
5430:       KSPSetFromOptions(pcbddc->ksp_D);
5431:     }
5432:     if (pcbddc->NullSpace_corr[0]) { /* approximate solver, propagate NearNullSpace */
5433:       MatNullSpacePropagate_Private(pcbddc->local_mat,pcis->is_I_local,pcis->pA_II);
5434:     }
5435:     MatGetNearNullSpace(pcis->pA_II,&nnsp);
5436:     KSPGetPC(pcbddc->ksp_D,&pc_temp);
5437:     PetscObjectQueryFunction((PetscObject)pc_temp,"PCSetCoordinates_C",&f);
5438:     if (f && pcbddc->mat_graph->cloc && !nnsp) {
5439:       PetscReal      *coords = pcbddc->mat_graph->coords,*scoords;
5440:       const PetscInt *idxs;
5441:       PetscInt       cdim = pcbddc->mat_graph->cdim,nl,i,d;

5443:       ISGetLocalSize(pcis->is_I_local,&nl);
5444:       ISGetIndices(pcis->is_I_local,&idxs);
5445:       PetscMalloc1(nl*cdim,&scoords);
5446:       for (i=0;i<nl;i++) {
5447:         for (d=0;d<cdim;d++) {
5448:           scoords[i*cdim+d] = coords[idxs[i]*cdim+d];
5449:         }
5450:       }
5451:       ISRestoreIndices(pcis->is_I_local,&idxs);
5452:       PCSetCoordinates(pc_temp,cdim,nl,scoords);
5453:       PetscFree(scoords);
5454:     }
5455:     if (sub_schurs && sub_schurs->reuse_solver) {
5456:       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

5458:       KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);
5459:     }

5461:     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
5462:     if (!n_D) {
5463:       KSPGetPC(pcbddc->ksp_D,&pc_temp);
5464:       PCSetType(pc_temp,PCNONE);
5465:     }
5466:     /* set ksp_D into pcis data */
5467:     PetscObjectReference((PetscObject)pcbddc->ksp_D);
5468:     KSPDestroy(&pcis->ksp_D);
5469:     pcis->ksp_D = pcbddc->ksp_D;
5470:   }

5472:   /* NEUMANN PROBLEM */
5473:   A_RR = 0;
5474:   if (neumann) {
5475:     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5476:     PetscInt        ibs,mbs;
5477:     PetscBool       issbaij, reuse_neumann_solver;
5478:     Mat_IS*         matis = (Mat_IS*)pc->pmat->data;

5480:     reuse_neumann_solver = PETSC_FALSE;
5481:     if (sub_schurs && sub_schurs->reuse_solver) {
5482:       IS iP;

5484:       reuse_neumann_solver = PETSC_TRUE;
5485:       PetscObjectQuery((PetscObject)sub_schurs->A,"__KSPFETIDP_iP",(PetscObject*)&iP);
5486:       if (iP) reuse_neumann_solver = PETSC_FALSE;
5487:     }
5488:     /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
5489:     ISGetSize(pcbddc->is_R_local,&n_R);
5490:     if (pcbddc->ksp_R) { /* already created ksp */
5491:       PetscInt nn_R;
5492:       KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);
5493:       PetscObjectReference((PetscObject)A_RR);
5494:       MatGetSize(A_RR,&nn_R,NULL);
5495:       if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
5496:         KSPReset(pcbddc->ksp_R);
5497:         MatDestroy(&A_RR);
5498:         reuse = MAT_INITIAL_MATRIX;
5499:       } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
5500:         if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
5501:           MatDestroy(&A_RR);
5502:           reuse = MAT_INITIAL_MATRIX;
5503:         } else { /* safe to reuse the matrix */
5504:           reuse = MAT_REUSE_MATRIX;
5505:         }
5506:       }
5507:       /* last check */
5508:       if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
5509:         MatDestroy(&A_RR);
5510:         reuse = MAT_INITIAL_MATRIX;
5511:       }
5512:     } else { /* first time, so we need to create the matrix */
5513:       reuse = MAT_INITIAL_MATRIX;
5514:     }
5515:     /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */
5516:     MatGetBlockSize(pcbddc->local_mat,&mbs);
5517:     ISGetBlockSize(pcbddc->is_R_local,&ibs);
5518:     PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);
5519:     if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */
5520:       if (matis->A == pcbddc->local_mat) {
5521:         MatDestroy(&pcbddc->local_mat);
5522:         MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);
5523:       } else {
5524:         MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);
5525:       }
5526:     } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
5527:       if (matis->A == pcbddc->local_mat) {
5528:         MatDestroy(&pcbddc->local_mat);
5529:         MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);
5530:       } else {
5531:         MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);
5532:       }
5533:     }
5534:     /* extract A_RR */
5535:     if (reuse_neumann_solver) {
5536:       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

5538:       if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */
5539:         MatDestroy(&A_RR);
5540:         if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */
5541:           PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);
5542:         } else {
5543:           MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);
5544:         }
5545:       } else {
5546:         MatDestroy(&A_RR);
5547:         PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);
5548:         PetscObjectReference((PetscObject)A_RR);
5549:       }
5550:     } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */
5551:       MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);
5552:     }
5553:     if (pcbddc->local_mat->symmetric_set) {
5554:       MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric);
5555:     }
5556:     opts = PETSC_FALSE;
5557:     if (!pcbddc->ksp_R) { /* create object if not present */
5558:       opts = PETSC_TRUE;
5559:       KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);
5560:       PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);
5561:       /* default */
5562:       KSPSetType(pcbddc->ksp_R,KSPPREONLY);
5563:       KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);
5564:       KSPGetPC(pcbddc->ksp_R,&pc_temp);
5565:       PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);
5566:       if (issbaij) {
5567:         PCSetType(pc_temp,PCCHOLESKY);
5568:       } else {
5569:         PCSetType(pc_temp,PCLU);
5570:       }
5571:       KSPSetErrorIfNotConverged(pcbddc->ksp_R,pc->erroriffailure);
5572:     }
5573:     KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);
5574:     MatSetOptionsPrefix(A_RR,((PetscObject)pcbddc->ksp_R)->prefix);
5575:     if (opts) { /* Allow user's customization once */
5576:       KSPSetFromOptions(pcbddc->ksp_R);
5577:     }
5578:     if (pcbddc->NullSpace_corr[2]) { /* approximate solver, propagate NearNullSpace */
5579:       MatNullSpacePropagate_Private(pcbddc->local_mat,pcbddc->is_R_local,A_RR);
5580:     }
5581:     MatGetNearNullSpace(A_RR,&nnsp);
5582:     KSPGetPC(pcbddc->ksp_R,&pc_temp);
5583:     PetscObjectQueryFunction((PetscObject)pc_temp,"PCSetCoordinates_C",&f);
5584:     if (f && pcbddc->mat_graph->cloc && !nnsp) {
5585:       PetscReal      *coords = pcbddc->mat_graph->coords,*scoords;
5586:       const PetscInt *idxs;
5587:       PetscInt       cdim = pcbddc->mat_graph->cdim,nl,i,d;

5589:       ISGetLocalSize(pcbddc->is_R_local,&nl);
5590:       ISGetIndices(pcbddc->is_R_local,&idxs);
5591:       PetscMalloc1(nl*cdim,&scoords);
5592:       for (i=0;i<nl;i++) {
5593:         for (d=0;d<cdim;d++) {
5594:           scoords[i*cdim+d] = coords[idxs[i]*cdim+d];
5595:         }
5596:       }
5597:       ISRestoreIndices(pcbddc->is_R_local,&idxs);
5598:       PCSetCoordinates(pc_temp,cdim,nl,scoords);
5599:       PetscFree(scoords);
5600:     }

5602:     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
5603:     if (!n_R) {
5604:       KSPGetPC(pcbddc->ksp_R,&pc_temp);
5605:       PCSetType(pc_temp,PCNONE);
5606:     }
5607:     /* Reuse solver if it is present */
5608:     if (reuse_neumann_solver) {
5609:       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

5611:       KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);
5612:     }
5613:   }

5615:   if (pcbddc->dbg_flag) {
5616:     PetscViewerFlush(pcbddc->dbg_viewer);
5617:     PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
5618:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
5619:   }

5621:   /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */
5622:   if (pcbddc->NullSpace_corr[0]) {
5623:     PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);
5624:   }
5625:   if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) {
5626:     PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);
5627:   }
5628:   if (neumann && pcbddc->NullSpace_corr[2]) {
5629:     PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);
5630:   }
5631:   /* check Dirichlet and Neumann solvers */
5632:   if (pcbddc->dbg_flag) {
5633:     if (dirichlet) { /* Dirichlet */
5634:       VecSetRandom(pcis->vec1_D,NULL);
5635:       MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);
5636:       KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);
5637:       KSPCheckSolve(pcbddc->ksp_D,pc,pcis->vec2_D);
5638:       VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);
5639:       VecNorm(pcis->vec1_D,NORM_INFINITY,&value);
5640:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);
5641:       PetscViewerFlush(pcbddc->dbg_viewer);
5642:     }
5643:     if (neumann) { /* Neumann */
5644:       VecSetRandom(pcbddc->vec1_R,NULL);
5645:       MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);
5646:       KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);
5647:       KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);
5648:       VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);
5649:       VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);
5650:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);
5651:       PetscViewerFlush(pcbddc->dbg_viewer);
5652:     }
5653:   }
5654:   /* free Neumann problem's matrix */
5655:   MatDestroy(&A_RR);
5656:   PetscLogEventEnd(PC_BDDC_LocalSolvers[pcbddc->current_level],pc,0,0,0);
5657:   return(0);
5658: }

5660: static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose)
5661: {
5662:   PetscErrorCode  ierr;
5663:   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
5664:   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5665:   PetscBool       reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE ) : PETSC_FALSE;

5668:   if (!reuse_solver) {
5669:     VecSet(pcbddc->vec1_R,0.);
5670:   }
5671:   if (!pcbddc->switch_static) {
5672:     if (applytranspose && pcbddc->local_auxmat1) {
5673:       MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);
5674:       MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);
5675:     }
5676:     if (!reuse_solver) {
5677:       VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5678:       VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5679:     } else {
5680:       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

5682:       VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);
5683:       VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);
5684:     }
5685:   } else {
5686:     VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5687:     VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5688:     VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5689:     VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5690:     if (applytranspose && pcbddc->local_auxmat1) {
5691:       MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);
5692:       MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);
5693:       VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5694:       VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5695:     }
5696:   }
5697:   if (!reuse_solver || pcbddc->switch_static) {
5698:     if (applytranspose) {
5699:       KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);
5700:     } else {
5701:       KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);
5702:     }
5703:     KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec1_R);
5704:   } else {
5705:     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

5707:     if (applytranspose) {
5708:       MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);
5709:     } else {
5710:       MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);
5711:     }
5712:   }
5713:   VecSet(inout_B,0.);
5714:   if (!pcbddc->switch_static) {
5715:     if (!reuse_solver) {
5716:       VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5717:       VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5718:     } else {
5719:       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;

5721:       VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);
5722:       VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);
5723:     }
5724:     if (!applytranspose && pcbddc->local_auxmat1) {
5725:       MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);
5726:       MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);
5727:     }
5728:   } else {
5729:     VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5730:     VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5731:     VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
5732:     VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
5733:     if (!applytranspose && pcbddc->local_auxmat1) {
5734:       MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);
5735:       MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);
5736:     }
5737:     VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5738:     VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5739:     VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
5740:     VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
5741:   }
5742:   return(0);
5743: }

5745: /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
5746: PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
5747: {
5749:   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
5750:   PC_IS*            pcis = (PC_IS*)  (pc->data);
5751:   const PetscScalar zero = 0.0;

5754:   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
5755:   if (!pcbddc->benign_apply_coarse_only) {
5756:     if (applytranspose) {
5757:       MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);
5758:       if (pcbddc->switch_static) { MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P); }
5759:     } else {
5760:       MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);
5761:       if (pcbddc->switch_static) { MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P); }
5762:     }
5763:   } else {
5764:     VecSet(pcbddc->vec1_P,zero);
5765:   }

5767:   /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */
5768:   if (pcbddc->benign_n) {
5769:     PetscScalar *array;
5770:     PetscInt    j;

5772:     VecGetArray(pcbddc->vec1_P,&array);
5773:     for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j];
5774:     VecRestoreArray(pcbddc->vec1_P,&array);
5775:   }

5777:   /* start communications from local primal nodes to rhs of coarse solver */
5778:   VecSet(pcbddc->coarse_vec,zero);
5779:   PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);
5780:   PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);

5782:   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
5783:   if (pcbddc->coarse_ksp) {
5784:     Mat          coarse_mat;
5785:     Vec          rhs,sol;
5786:     MatNullSpace nullsp;
5787:     PetscBool    isbddc = PETSC_FALSE;

5789:     if (pcbddc->benign_have_null) {
5790:       PC        coarse_pc;

5792:       KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);
5793:       PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);
5794:       /* we need to propagate to coarser levels the need for a possible benign correction */
5795:       if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) {
5796:         PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data);
5797:         coarsepcbddc->benign_skip_correction = PETSC_FALSE;
5798:         coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE;
5799:       }
5800:     }
5801:     KSPGetRhs(pcbddc->coarse_ksp,&rhs);
5802:     KSPGetSolution(pcbddc->coarse_ksp,&sol);
5803:     KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);
5804:     if (applytranspose) {
5805:       if (pcbddc->benign_apply_coarse_only) SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented");
5806:       KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);
5807:       KSPCheckSolve(pcbddc->coarse_ksp,pc,sol);
5808:       MatGetTransposeNullSpace(coarse_mat,&nullsp);
5809:       if (nullsp) {
5810:         MatNullSpaceRemove(nullsp,sol);
5811:       }
5812:     } else {
5813:       MatGetNullSpace(coarse_mat,&nullsp);
5814:       if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */
5815:         PC        coarse_pc;

5817:         if (nullsp) {
5818:           MatNullSpaceRemove(nullsp,rhs);
5819:         }
5820:         KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);
5821:         PCPreSolve(coarse_pc,pcbddc->coarse_ksp);
5822:         PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);
5823:         PCPostSolve(coarse_pc,pcbddc->coarse_ksp);
5824:       } else {
5825:         KSPSolve(pcbddc->coarse_ksp,rhs,sol);
5826:         KSPCheckSolve(pcbddc->coarse_ksp,pc,sol);
5827:         if (nullsp) {
5828:           MatNullSpaceRemove(nullsp,sol);
5829:         }
5830:       }
5831:     }
5832:     /* we don't need the benign correction at coarser levels anymore */
5833:     if (pcbddc->benign_have_null && isbddc) {
5834:       PC        coarse_pc;
5835:       PC_BDDC*  coarsepcbddc;

5837:       KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);
5838:       coarsepcbddc = (PC_BDDC*)(coarse_pc->data);
5839:       coarsepcbddc->benign_skip_correction = PETSC_TRUE;
5840:       coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE;
5841:     }
5842:   }

5844:   /* Local solution on R nodes */
5845:   if (pcis->n && !pcbddc->benign_apply_coarse_only) {
5846:     PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);
5847:   }
5848:   /* communications from coarse sol to local primal nodes */
5849:   PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);
5850:   PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);

5852:   /* Sum contributions from the two levels */
5853:   if (!pcbddc->benign_apply_coarse_only) {
5854:     if (applytranspose) {
5855:       MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);
5856:       if (pcbddc->switch_static) { MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D); }
5857:     } else {
5858:       MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);
5859:       if (pcbddc->switch_static) { MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D); }
5860:     }
5861:     /* store p0 */
5862:     if (pcbddc->benign_n) {
5863:       PetscScalar *array;
5864:       PetscInt    j;

5866:       VecGetArray(pcbddc->vec1_P,&array);
5867:       for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j];
5868:       VecRestoreArray(pcbddc->vec1_P,&array);
5869:     }
5870:   } else { /* expand the coarse solution */
5871:     if (applytranspose) {
5872:       MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);
5873:     } else {
5874:       MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);
5875:     }
5876:   }
5877:   return(0);
5878: }

5880: PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
5881: {
5883:   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
5884:   PetscScalar    *array;
5885:   Vec            from,to;

5888:   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
5889:     from = pcbddc->coarse_vec;
5890:     to = pcbddc->vec1_P;
5891:     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
5892:       Vec tvec;

5894:       KSPGetRhs(pcbddc->coarse_ksp,&tvec);
5895:       VecResetArray(tvec);
5896:       KSPGetSolution(pcbddc->coarse_ksp,&tvec);
5897:       VecGetArray(tvec,&array);
5898:       VecPlaceArray(from,array);
5899:       VecRestoreArray(tvec,&array);
5900:     }
5901:   } else { /* from local to global -> put data in coarse right hand side */
5902:     from = pcbddc->vec1_P;
5903:     to = pcbddc->coarse_vec;
5904:   }
5905:   VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);
5906:   return(0);
5907: }

5909: PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
5910: {
5912:   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
5913:   PetscScalar    *array;
5914:   Vec            from,to;

5917:   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
5918:     from = pcbddc->coarse_vec;
5919:     to = pcbddc->vec1_P;
5920:   } else { /* from local to global -> put data in coarse right hand side */
5921:     from = pcbddc->vec1_P;
5922:     to = pcbddc->coarse_vec;
5923:   }
5924:   VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);
5925:   if (smode == SCATTER_FORWARD) {
5926:     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
5927:       Vec tvec;

5929:       KSPGetRhs(pcbddc->coarse_ksp,&tvec);
5930:       VecGetArray(to,&array);
5931:       VecPlaceArray(tvec,array);
5932:       VecRestoreArray(to,&array);
5933:     }
5934:   } else {
5935:     if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
5936:      VecResetArray(from);
5937:     }
5938:   }
5939:   return(0);
5940: }

5942: /* uncomment for testing purposes */
5943: /* #define PETSC_MISSING_LAPACK_GESVD 1 */
5944: PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
5945: {
5946:   PetscErrorCode    ierr;
5947:   PC_IS*            pcis = (PC_IS*)(pc->data);
5948:   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
5949:   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
5950:   /* one and zero */
5951:   PetscScalar       one=1.0,zero=0.0;
5952:   /* space to store constraints and their local indices */
5953:   PetscScalar       *constraints_data;
5954:   PetscInt          *constraints_idxs,*constraints_idxs_B;
5955:   PetscInt          *constraints_idxs_ptr,*constraints_data_ptr;
5956:   PetscInt          *constraints_n;
5957:   /* iterators */
5958:   PetscInt          i,j,k,total_counts,total_counts_cc,cum;
5959:   /* BLAS integers */
5960:   PetscBLASInt      lwork,lierr;
5961:   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
5962:   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
5963:   /* reuse */
5964:   PetscInt          olocal_primal_size,olocal_primal_size_cc;
5965:   PetscInt          *olocal_primal_ref_node,*olocal_primal_ref_mult;
5966:   /* change of basis */
5967:   PetscBool         qr_needed;
5968:   PetscBT           change_basis,qr_needed_idx;
5969:   /* auxiliary stuff */
5970:   PetscInt          *nnz,*is_indices;
5971:   PetscInt          ncc;
5972:   /* some quantities */
5973:   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
5974:   PetscInt          size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints;
5975:   PetscReal         tol; /* tolerance for retaining eigenmodes */

5978:   tol  = PetscSqrtReal(PETSC_SMALL);
5979:   /* Destroy Mat objects computed previously */
5980:   MatDestroy(&pcbddc->ChangeOfBasisMatrix);
5981:   MatDestroy(&pcbddc->ConstraintMatrix);
5982:   MatDestroy(&pcbddc->switch_static_change);
5983:   /* save info on constraints from previous setup (if any) */
5984:   olocal_primal_size = pcbddc->local_primal_size;
5985:   olocal_primal_size_cc = pcbddc->local_primal_size_cc;
5986:   PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);
5987:   PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));
5988:   PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));
5989:   PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);
5990:   PetscFree(pcbddc->primal_indices_local_idxs);

5992:   if (!pcbddc->adaptive_selection) {
5993:     IS           ISForVertices,*ISForFaces,*ISForEdges;
5994:     MatNullSpace nearnullsp;
5995:     const Vec    *nearnullvecs;
5996:     Vec          *localnearnullsp;
5997:     PetscScalar  *array;
5998:     PetscInt     n_ISForFaces,n_ISForEdges,nnsp_size;
5999:     PetscBool    nnsp_has_cnst;
6000:     /* LAPACK working arrays for SVD or POD */
6001:     PetscBool    skip_lapack,boolforchange;
6002:     PetscScalar  *work;
6003:     PetscReal    *singular_vals;
6004: #if defined(PETSC_USE_COMPLEX)
6005:     PetscReal    *rwork;
6006: #endif
6007: #if defined(PETSC_MISSING_LAPACK_GESVD)
6008:     PetscScalar  *temp_basis,*correlation_mat;
6009: #else
6010:     PetscBLASInt dummy_int=1;
6011:     PetscScalar  dummy_scalar=1.;
6012: #endif

6014:     /* Get index sets for faces, edges and vertices from graph */
6015:     PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);
6016:     /* print some info */
6017:     if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) {
6018:       PetscInt nv;

6020:       PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);
6021:       ISGetSize(ISForVertices,&nv);
6022:       PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
6023:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");
6024:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%D)\n",PetscGlobalRank,nv,pcbddc->use_vertices);
6025:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges    (%D)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);
6026:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces    (%D)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);
6027:       PetscViewerFlush(pcbddc->dbg_viewer);
6028:       PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);
6029:     }

6031:     /* free unneeded index sets */
6032:     if (!pcbddc->use_vertices) {
6033:       ISDestroy(&ISForVertices);
6034:     }
6035:     if (!pcbddc->use_edges) {
6036:       for (i=0;i<n_ISForEdges;i++) {
6037:         ISDestroy(&ISForEdges[i]);
6038:       }
6039:       PetscFree(ISForEdges);
6040:       n_ISForEdges = 0;
6041:     }
6042:     if (!pcbddc->use_faces) {
6043:       for (i=0;i<n_ISForFaces;i++) {
6044:         ISDestroy(&ISForFaces[i]);
6045:       }
6046:       PetscFree(ISForFaces);
6047:       n_ISForFaces = 0;
6048:     }

6050:     /* check if near null space is attached to global mat */
6051:     MatGetNearNullSpace(pc->pmat,&nearnullsp);
6052:     if (nearnullsp) {
6053:       MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);
6054:       /* remove any stored info */
6055:       MatNullSpaceDestroy(&pcbddc->onearnullspace);
6056:       PetscFree(pcbddc->onearnullvecs_state);
6057:       /* store information for BDDC solver reuse */
6058:       PetscObjectReference((PetscObject)nearnullsp);
6059:       pcbddc->onearnullspace = nearnullsp;
6060:       PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);
6061:       for (i=0;i<nnsp_size;i++) {
6062:         PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);
6063:       }
6064:     } else { /* if near null space is not provided BDDC uses constants by default */
6065:       nnsp_size = 0;
6066:       nnsp_has_cnst = PETSC_TRUE;
6067:     }
6068:     /* get max number of constraints on a single cc */
6069:     max_constraints = nnsp_size;
6070:     if (nnsp_has_cnst) max_constraints++;

6072:     /*
6073:          Evaluate maximum storage size needed by the procedure
6074:          - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]"
6075:          - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]"
6076:          There can be multiple constraints per connected component
6077:                                                                                                                                                            */
6078:     n_vertices = 0;
6079:     if (ISForVertices) {
6080:       ISGetSize(ISForVertices,&n_vertices);
6081:     }
6082:     ncc = n_vertices+n_ISForFaces+n_ISForEdges;
6083:     PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);

6085:     total_counts = n_ISForFaces+n_ISForEdges;
6086:     total_counts *= max_constraints;
6087:     total_counts += n_vertices;
6088:     PetscBTCreate(total_counts,&change_basis);

6090:     total_counts = 0;
6091:     max_size_of_constraint = 0;
6092:     for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
6093:       IS used_is;
6094:       if (i<n_ISForEdges) {
6095:         used_is = ISForEdges[i];
6096:       } else {
6097:         used_is = ISForFaces[i-n_ISForEdges];
6098:       }
6099:       ISGetSize(used_is,&j);
6100:       total_counts += j;
6101:       max_size_of_constraint = PetscMax(j,max_size_of_constraint);
6102:     }
6103:     PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);

6105:     /* get local part of global near null space vectors */
6106:     PetscMalloc1(nnsp_size,&localnearnullsp);
6107:     for (k=0;k<nnsp_size;k++) {
6108:       VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);
6109:       VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);
6110:       VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);
6111:     }

6113:     /* whether or not to skip lapack calls */
6114:     skip_lapack = PETSC_TRUE;
6115:     if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;

6117:     /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
6118:     if (!skip_lapack) {
6119:       PetscScalar temp_work;

6121: #if defined(PETSC_MISSING_LAPACK_GESVD)
6122:       /* Proper Orthogonal Decomposition (POD) using the snapshot method */
6123:       PetscMalloc1(max_constraints*max_constraints,&correlation_mat);
6124:       PetscMalloc1(max_constraints,&singular_vals);
6125:       PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);
6126: #if defined(PETSC_USE_COMPLEX)
6127:       PetscMalloc1(3*max_constraints,&rwork);
6128: #endif
6129:       /* now we evaluate the optimal workspace using query with lwork=-1 */
6130:       PetscBLASIntCast(max_constraints,&Blas_N);
6131:       PetscBLASIntCast(max_constraints,&Blas_LDA);
6132:       lwork = -1;
6133:       PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6134: #if !defined(PETSC_USE_COMPLEX)
6135:       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
6136: #else
6137:       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
6138: #endif
6139:       PetscFPTrapPop();
6140:       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
6141: #else /* on missing GESVD */
6142:       /* SVD */
6143:       PetscInt max_n,min_n;
6144:       max_n = max_size_of_constraint;
6145:       min_n = max_constraints;
6146:       if (max_size_of_constraint < max_constraints) {
6147:         min_n = max_size_of_constraint;
6148:         max_n = max_constraints;
6149:       }
6150:       PetscMalloc1(min_n,&singular_vals);
6151: #if defined(PETSC_USE_COMPLEX)
6152:       PetscMalloc1(5*min_n,&rwork);
6153: #endif
6154:       /* now we evaluate the optimal workspace using query with lwork=-1 */
6155:       lwork = -1;
6156:       PetscBLASIntCast(max_n,&Blas_M);
6157:       PetscBLASIntCast(min_n,&Blas_N);
6158:       PetscBLASIntCast(max_n,&Blas_LDA);
6159:       PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6160: #if !defined(PETSC_USE_COMPLEX)
6161:       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr));
6162: #else
6163:       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr));
6164: #endif
6165:       PetscFPTrapPop();
6166:       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
6167: #endif /* on missing GESVD */
6168:       /* Allocate optimal workspace */
6169:       PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);
6170:       PetscMalloc1(lwork,&work);
6171:     }
6172:     /* Now we can loop on constraining sets */
6173:     total_counts = 0;
6174:     constraints_idxs_ptr[0] = 0;
6175:     constraints_data_ptr[0] = 0;
6176:     /* vertices */
6177:     if (n_vertices) {
6178:       ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);
6179:       PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));
6180:       for (i=0;i<n_vertices;i++) {
6181:         constraints_n[total_counts] = 1;
6182:         constraints_data[total_counts] = 1.0;
6183:         constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1;
6184:         constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1;
6185:         total_counts++;
6186:       }
6187:       ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);
6188:       n_vertices = total_counts;
6189:     }

6191:     /* edges and faces */
6192:     total_counts_cc = total_counts;
6193:     for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
6194:       IS        used_is;
6195:       PetscBool idxs_copied = PETSC_FALSE;

6197:       if (ncc<n_ISForEdges) {
6198:         used_is = ISForEdges[ncc];
6199:         boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
6200:       } else {
6201:         used_is = ISForFaces[ncc-n_ISForEdges];
6202:         boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
6203:       }
6204:       temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */

6206:       ISGetSize(used_is,&size_of_constraint);
6207:       ISGetIndices(used_is,(const PetscInt**)&is_indices);
6208:       /* change of basis should not be performed on local periodic nodes */
6209:       if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
6210:       if (nnsp_has_cnst) {
6211:         PetscScalar quad_value;

6213:         PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));
6214:         idxs_copied = PETSC_TRUE;

6216:         if (!pcbddc->use_nnsp_true) {
6217:           quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
6218:         } else {
6219:           quad_value = 1.0;
6220:         }
6221:         for (j=0;j<size_of_constraint;j++) {
6222:           constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value;
6223:         }
6224:         temp_constraints++;
6225:         total_counts++;
6226:       }
6227:       for (k=0;k<nnsp_size;k++) {
6228:         PetscReal real_value;
6229:         PetscScalar *ptr_to_data;

6231:         VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);
6232:         ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint];
6233:         for (j=0;j<size_of_constraint;j++) {
6234:           ptr_to_data[j] = array[is_indices[j]];
6235:         }
6236:         VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);
6237:         /* check if array is null on the connected component */
6238:         PetscBLASIntCast(size_of_constraint,&Blas_N);
6239:         PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one));
6240:         if (real_value > tol*size_of_constraint) { /* keep indices and values */
6241:           temp_constraints++;
6242:           total_counts++;
6243:           if (!idxs_copied) {
6244:             PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));
6245:             idxs_copied = PETSC_TRUE;
6246:           }
6247:         }
6248:       }
6249:       ISRestoreIndices(used_is,(const PetscInt**)&is_indices);
6250:       valid_constraints = temp_constraints;
6251:       if (!pcbddc->use_nnsp_true && temp_constraints) {
6252:         if (temp_constraints == 1) { /* just normalize the constraint */
6253:           PetscScalar norm,*ptr_to_data;

6255:           ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
6256:           PetscBLASIntCast(size_of_constraint,&Blas_N);
6257:           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one));
6258:           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
6259:           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one));
6260:         } else { /* perform SVD */
6261:           PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];

6263: #if defined(PETSC_MISSING_LAPACK_GESVD)
6264:           /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
6265:              POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
6266:              -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
6267:                 the constraints basis will differ (by a complex factor with absolute value equal to 1)
6268:                 from that computed using LAPACKgesvd
6269:              -> This is due to a different computation of eigenvectors in LAPACKheev
6270:              -> The quality of the POD-computed basis will be the same */
6271:           PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));
6272:           /* Store upper triangular part of correlation matrix */
6273:           PetscBLASIntCast(size_of_constraint,&Blas_N);
6274:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6275:           for (j=0;j<temp_constraints;j++) {
6276:             for (k=0;k<j+1;k++) {
6277:               PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one));
6278:             }
6279:           }
6280:           /* compute eigenvalues and eigenvectors of correlation matrix */
6281:           PetscBLASIntCast(temp_constraints,&Blas_N);
6282:           PetscBLASIntCast(temp_constraints,&Blas_LDA);
6283: #if !defined(PETSC_USE_COMPLEX)
6284:           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
6285: #else
6286:           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
6287: #endif
6288:           PetscFPTrapPop();
6289:           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
6290:           /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
6291:           j = 0;
6292:           while (j < temp_constraints && singular_vals[j]/singular_vals[temp_constraints-1] < tol) j++;
6293:           total_counts = total_counts-j;
6294:           valid_constraints = temp_constraints-j;
6295:           /* scale and copy POD basis into used quadrature memory */
6296:           PetscBLASIntCast(size_of_constraint,&Blas_M);
6297:           PetscBLASIntCast(temp_constraints,&Blas_N);
6298:           PetscBLASIntCast(temp_constraints,&Blas_K);
6299:           PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6300:           PetscBLASIntCast(temp_constraints,&Blas_LDB);
6301:           PetscBLASIntCast(size_of_constraint,&Blas_LDC);
6302:           if (j<temp_constraints) {
6303:             PetscInt ii;
6304:             for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]);
6305:             PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6306:             PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
6307:             PetscFPTrapPop();
6308:             for (k=0;k<temp_constraints-j;k++) {
6309:               for (ii=0;ii<size_of_constraint;ii++) {
6310:                 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
6311:               }
6312:             }
6313:           }
6314: #else  /* on missing GESVD */
6315:           PetscBLASIntCast(size_of_constraint,&Blas_M);
6316:           PetscBLASIntCast(temp_constraints,&Blas_N);
6317:           PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6318:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6319: #if !defined(PETSC_USE_COMPLEX)
6320:           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr));
6321: #else
6322:           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr));
6323: #endif
6324:           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
6325:           PetscFPTrapPop();
6326:           /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
6327:           k = temp_constraints;
6328:           if (k > size_of_constraint) k = size_of_constraint;
6329:           j = 0;
6330:           while (j < k && singular_vals[k-j-1]/singular_vals[0] < tol) j++;
6331:           valid_constraints = k-j;
6332:           total_counts = total_counts-temp_constraints+valid_constraints;
6333: #endif /* on missing GESVD */
6334:         }
6335:       }
6336:       /* update pointers information */
6337:       if (valid_constraints) {
6338:         constraints_n[total_counts_cc] = valid_constraints;
6339:         constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint;
6340:         constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints;
6341:         /* set change_of_basis flag */
6342:         if (boolforchange) {
6343:           PetscBTSet(change_basis,total_counts_cc);
6344:         }
6345:         total_counts_cc++;
6346:       }
6347:     }
6348:     /* free workspace */
6349:     if (!skip_lapack) {
6350:       PetscFree(work);
6351: #if defined(PETSC_USE_COMPLEX)
6352:       PetscFree(rwork);
6353: #endif
6354:       PetscFree(singular_vals);
6355: #if defined(PETSC_MISSING_LAPACK_GESVD)
6356:       PetscFree(correlation_mat);
6357:       PetscFree(temp_basis);
6358: #endif
6359:     }
6360:     for (k=0;k<nnsp_size;k++) {
6361:       VecDestroy(&localnearnullsp[k]);
6362:     }
6363:     PetscFree(localnearnullsp);
6364:     /* free index sets of faces, edges and vertices */
6365:     for (i=0;i<n_ISForFaces;i++) {
6366:       ISDestroy(&ISForFaces[i]);
6367:     }
6368:     if (n_ISForFaces) {
6369:       PetscFree(ISForFaces);
6370:     }
6371:     for (i=0;i<n_ISForEdges;i++) {
6372:       ISDestroy(&ISForEdges[i]);
6373:     }
6374:     if (n_ISForEdges) {
6375:       PetscFree(ISForEdges);
6376:     }
6377:     ISDestroy(&ISForVertices);
6378:   } else {
6379:     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;

6381:     total_counts = 0;
6382:     n_vertices = 0;
6383:     if (sub_schurs->is_vertices && pcbddc->use_vertices) {
6384:       ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);
6385:     }
6386:     max_constraints = 0;
6387:     total_counts_cc = 0;
6388:     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
6389:       total_counts += pcbddc->adaptive_constraints_n[i];
6390:       if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++;
6391:       max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]);
6392:     }
6393:     constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr;
6394:     constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr;
6395:     constraints_idxs = pcbddc->adaptive_constraints_idxs;
6396:     constraints_data = pcbddc->adaptive_constraints_data;
6397:     /* constraints_n differs from pcbddc->adaptive_constraints_n */
6398:     PetscMalloc1(total_counts_cc,&constraints_n);
6399:     total_counts_cc = 0;
6400:     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
6401:       if (pcbddc->adaptive_constraints_n[i]) {
6402:         constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i];
6403:       }
6404:     }

6406:     max_size_of_constraint = 0;
6407:     for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]);
6408:     PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);
6409:     /* Change of basis */
6410:     PetscBTCreate(total_counts_cc,&change_basis);
6411:     if (pcbddc->use_change_of_basis) {
6412:       for (i=0;i<sub_schurs->n_subs;i++) {
6413:         if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) {
6414:           PetscBTSet(change_basis,i+n_vertices);
6415:         }
6416:       }
6417:     }
6418:   }
6419:   pcbddc->local_primal_size = total_counts;
6420:   PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);

6422:   /* map constraints_idxs in boundary numbering */
6423:   ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);
6424:   if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for constraints indices %D != %D",constraints_idxs_ptr[total_counts_cc],i);

6426:   /* Create constraint matrix */
6427:   MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);
6428:   MatSetType(pcbddc->ConstraintMatrix,MATAIJ);
6429:   MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);

6431:   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
6432:   /* determine if a QR strategy is needed for change of basis */
6433:   qr_needed = pcbddc->use_qr_single;
6434:   PetscBTCreate(total_counts_cc,&qr_needed_idx);
6435:   total_primal_vertices=0;
6436:   pcbddc->local_primal_size_cc = 0;
6437:   for (i=0;i<total_counts_cc;i++) {
6438:     size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
6439:     if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) {
6440:       pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]];
6441:       pcbddc->local_primal_size_cc += 1;
6442:     } else if (PetscBTLookup(change_basis,i)) {
6443:       for (k=0;k<constraints_n[i];k++) {
6444:         pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k];
6445:       }
6446:       pcbddc->local_primal_size_cc += constraints_n[i];
6447:       if (constraints_n[i] > 1 || pcbddc->use_qr_single) {
6448:         PetscBTSet(qr_needed_idx,i);
6449:         qr_needed = PETSC_TRUE;
6450:       }
6451:     } else {
6452:       pcbddc->local_primal_size_cc += 1;
6453:     }
6454:   }
6455:   /* note that the local variable n_vertices used below stores the number of pointwise constraints */
6456:   pcbddc->n_vertices = total_primal_vertices;
6457:   /* permute indices in order to have a sorted set of vertices */
6458:   PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);
6459:   PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);
6460:   PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));
6461:   for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1;

6463:   /* nonzero structure of constraint matrix */
6464:   /* and get reference dof for local constraints */
6465:   PetscMalloc1(pcbddc->local_primal_size,&nnz);
6466:   for (i=0;i<total_primal_vertices;i++) nnz[i] = 1;

6468:   j = total_primal_vertices;
6469:   total_counts = total_primal_vertices;
6470:   cum = total_primal_vertices;
6471:   for (i=n_vertices;i<total_counts_cc;i++) {
6472:     if (!PetscBTLookup(change_basis,i)) {
6473:       pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]];
6474:       pcbddc->local_primal_ref_mult[cum] = constraints_n[i];
6475:       cum++;
6476:       size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
6477:       for (k=0;k<constraints_n[i];k++) {
6478:         pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k];
6479:         nnz[j+k] = size_of_constraint;
6480:       }
6481:       j += constraints_n[i];
6482:     }
6483:   }
6484:   MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);
6485:   MatSetOption(pcbddc->ConstraintMatrix,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);
6486:   PetscFree(nnz);

6488:   /* set values in constraint matrix */
6489:   for (i=0;i<total_primal_vertices;i++) {
6490:     MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);
6491:   }
6492:   total_counts = total_primal_vertices;
6493:   for (i=n_vertices;i<total_counts_cc;i++) {
6494:     if (!PetscBTLookup(change_basis,i)) {
6495:       PetscInt *cols;

6497:       size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
6498:       cols = constraints_idxs+constraints_idxs_ptr[i];
6499:       for (k=0;k<constraints_n[i];k++) {
6500:         PetscInt    row = total_counts+k;
6501:         PetscScalar *vals;

6503:         vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint;
6504:         MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);
6505:       }
6506:       total_counts += constraints_n[i];
6507:     }
6508:   }
6509:   /* assembling */
6510:   MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);
6511:   MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);
6512:   MatViewFromOptions(pcbddc->ConstraintMatrix,NULL,"-pc_bddc_constraint_mat_view");

6514:   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
6515:   if (pcbddc->use_change_of_basis) {
6516:     /* dual and primal dofs on a single cc */
6517:     PetscInt     dual_dofs,primal_dofs;
6518:     /* working stuff for GEQRF */
6519:     PetscScalar  *qr_basis = NULL,*qr_tau = NULL,*qr_work = NULL,lqr_work_t;
6520:     PetscBLASInt lqr_work;
6521:     /* working stuff for UNGQR */
6522:     PetscScalar  *gqr_work = NULL,lgqr_work_t;
6523:     PetscBLASInt lgqr_work;
6524:     /* working stuff for TRTRS */
6525:     PetscScalar  *trs_rhs = NULL;
6526:     PetscBLASInt Blas_NRHS;
6527:     /* pointers for values insertion into change of basis matrix */
6528:     PetscInt     *start_rows,*start_cols;
6529:     PetscScalar  *start_vals;
6530:     /* working stuff for values insertion */
6531:     PetscBT      is_primal;
6532:     PetscInt     *aux_primal_numbering_B;
6533:     /* matrix sizes */
6534:     PetscInt     global_size,local_size;
6535:     /* temporary change of basis */
6536:     Mat          localChangeOfBasisMatrix;
6537:     /* extra space for debugging */
6538:     PetscScalar  *dbg_work = NULL;

6540:     /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
6541:     MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);
6542:     MatSetType(localChangeOfBasisMatrix,MATAIJ);
6543:     MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);
6544:     /* nonzeros for local mat */
6545:     PetscMalloc1(pcis->n,&nnz);
6546:     if (!pcbddc->benign_change || pcbddc->fake_change) {
6547:       for (i=0;i<pcis->n;i++) nnz[i]=1;
6548:     } else {
6549:       const PetscInt *ii;
6550:       PetscInt       n;
6551:       PetscBool      flg_row;
6552:       MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);
6553:       for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i];
6554:       MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);
6555:     }
6556:     for (i=n_vertices;i<total_counts_cc;i++) {
6557:       if (PetscBTLookup(change_basis,i)) {
6558:         size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
6559:         if (PetscBTLookup(qr_needed_idx,i)) {
6560:           for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint;
6561:         } else {
6562:           nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint;
6563:           for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2;
6564:         }
6565:       }
6566:     }
6567:     MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);
6568:     MatSetOption(localChangeOfBasisMatrix,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);
6569:     PetscFree(nnz);
6570:     /* Set interior change in the matrix */
6571:     if (!pcbddc->benign_change || pcbddc->fake_change) {
6572:       for (i=0;i<pcis->n;i++) {
6573:         MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);
6574:       }
6575:     } else {
6576:       const PetscInt *ii,*jj;
6577:       PetscScalar    *aa;
6578:       PetscInt       n;
6579:       PetscBool      flg_row;
6580:       MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);
6581:       MatSeqAIJGetArray(pcbddc->benign_change,&aa);
6582:       for (i=0;i<n;i++) {
6583:         MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);
6584:       }
6585:       MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);
6586:       MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);
6587:     }

6589:     if (pcbddc->dbg_flag) {
6590:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");
6591:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);
6592:     }


6595:     /* Now we loop on the constraints which need a change of basis */
6596:     /*
6597:        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
6598:        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)

6600:        Basic blocks of change of basis matrix T computed by

6602:           - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)

6604:             | 1        0   ...        0         s_1/S |
6605:             | 0        1   ...        0         s_2/S |
6606:             |              ...                        |
6607:             | 0        ...            1     s_{n-1}/S |
6608:             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |

6610:             with S = \sum_{i=1}^n s_i^2
6611:             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
6612:                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering

6614:           - QR decomposition of constraints otherwise
6615:     */
6616:     if (qr_needed && max_size_of_constraint) {
6617:       /* space to store Q */
6618:       PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);
6619:       /* array to store scaling factors for reflectors */
6620:       PetscMalloc1(max_constraints,&qr_tau);
6621:       /* first we issue queries for optimal work */
6622:       PetscBLASIntCast(max_size_of_constraint,&Blas_M);
6623:       PetscBLASIntCast(max_constraints,&Blas_N);
6624:       PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);
6625:       lqr_work = -1;
6626:       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
6627:       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
6628:       PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);
6629:       PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);
6630:       lgqr_work = -1;
6631:       PetscBLASIntCast(max_size_of_constraint,&Blas_M);
6632:       PetscBLASIntCast(max_size_of_constraint,&Blas_N);
6633:       PetscBLASIntCast(max_constraints,&Blas_K);
6634:       PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);
6635:       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
6636:       PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
6637:       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to ORGQR/UNGQR Lapack routine %d",(int)lierr);
6638:       PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);
6639:       PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);
6640:       /* array to store rhs and solution of triangular solver */
6641:       PetscMalloc1(max_constraints*max_constraints,&trs_rhs);
6642:       /* allocating workspace for check */
6643:       if (pcbddc->dbg_flag) {
6644:         PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);
6645:       }
6646:     }
6647:     /* array to store whether a node is primal or not */
6648:     PetscBTCreate(pcis->n_B,&is_primal);
6649:     PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);
6650:     ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);
6651:     if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D",total_primal_vertices,i);
6652:     for (i=0;i<total_primal_vertices;i++) {
6653:       PetscBTSet(is_primal,aux_primal_numbering_B[i]);
6654:     }
6655:     PetscFree(aux_primal_numbering_B);

6657:     /* loop on constraints and see whether or not they need a change of basis and compute it */
6658:     for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) {
6659:       size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts];
6660:       if (PetscBTLookup(change_basis,total_counts)) {
6661:         /* get constraint info */
6662:         primal_dofs = constraints_n[total_counts];
6663:         dual_dofs = size_of_constraint-primal_dofs;

6665:         if (pcbddc->dbg_flag) {
6666:           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %D: %D need a change of basis (size %D)\n",total_counts,primal_dofs,size_of_constraint);
6667:         }

6669:         if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */

6671:           /* copy quadrature constraints for change of basis check */
6672:           if (pcbddc->dbg_flag) {
6673:             PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));
6674:           }
6675:           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
6676:           PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));

6678:           /* compute QR decomposition of constraints */
6679:           PetscBLASIntCast(size_of_constraint,&Blas_M);
6680:           PetscBLASIntCast(primal_dofs,&Blas_N);
6681:           PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6682:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6683:           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
6684:           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
6685:           PetscFPTrapPop();

6687:           /* explictly compute R^-T */
6688:           PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));
6689:           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
6690:           PetscBLASIntCast(primal_dofs,&Blas_N);
6691:           PetscBLASIntCast(primal_dofs,&Blas_NRHS);
6692:           PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6693:           PetscBLASIntCast(primal_dofs,&Blas_LDB);
6694:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6695:           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
6696:           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
6697:           PetscFPTrapPop();

6699:           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
6700:           PetscBLASIntCast(size_of_constraint,&Blas_M);
6701:           PetscBLASIntCast(size_of_constraint,&Blas_N);
6702:           PetscBLASIntCast(primal_dofs,&Blas_K);
6703:           PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6704:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6705:           PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
6706:           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in ORGQR/UNGQR Lapack routine %d",(int)lierr);
6707:           PetscFPTrapPop();

6709:           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
6710:              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
6711:              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
6712:           PetscBLASIntCast(size_of_constraint,&Blas_M);
6713:           PetscBLASIntCast(primal_dofs,&Blas_N);
6714:           PetscBLASIntCast(primal_dofs,&Blas_K);
6715:           PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6716:           PetscBLASIntCast(primal_dofs,&Blas_LDB);
6717:           PetscBLASIntCast(size_of_constraint,&Blas_LDC);
6718:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6719:           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC));
6720:           PetscFPTrapPop();
6721:           PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));

6723:           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
6724:           start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]];
6725:           /* insert cols for primal dofs */
6726:           for (j=0;j<primal_dofs;j++) {
6727:             start_vals = &qr_basis[j*size_of_constraint];
6728:             start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j];
6729:             MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);
6730:           }
6731:           /* insert cols for dual dofs */
6732:           for (j=0,k=0;j<dual_dofs;k++) {
6733:             if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) {
6734:               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
6735:               start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k];
6736:               MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);
6737:               j++;
6738:             }
6739:           }

6741:           /* check change of basis */
6742:           if (pcbddc->dbg_flag) {
6743:             PetscInt   ii,jj;
6744:             PetscBool valid_qr=PETSC_TRUE;
6745:             PetscBLASIntCast(primal_dofs,&Blas_M);
6746:             PetscBLASIntCast(size_of_constraint,&Blas_N);
6747:             PetscBLASIntCast(size_of_constraint,&Blas_K);
6748:             PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6749:             PetscBLASIntCast(size_of_constraint,&Blas_LDB);
6750:             PetscBLASIntCast(primal_dofs,&Blas_LDC);
6751:             PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6752:             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC));
6753:             PetscFPTrapPop();
6754:             for (jj=0;jj<size_of_constraint;jj++) {
6755:               for (ii=0;ii<primal_dofs;ii++) {
6756:                 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
6757:                 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) valid_qr = PETSC_FALSE;
6758:               }
6759:             }
6760:             if (!valid_qr) {
6761:               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");
6762:               for (jj=0;jj<size_of_constraint;jj++) {
6763:                 for (ii=0;ii<primal_dofs;ii++) {
6764:                   if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
6765:                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %D is not orthogonal to constraint %D (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
6766:                   }
6767:                   if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) {
6768:                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %D is not unitary w.r.t constraint %D (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
6769:                   }
6770:                 }
6771:               }
6772:             } else {
6773:               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");
6774:             }
6775:           }
6776:         } else { /* simple transformation block */
6777:           PetscInt    row,col;
6778:           PetscScalar val,norm;

6780:           PetscBLASIntCast(size_of_constraint,&Blas_N);
6781:           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one));
6782:           for (j=0;j<size_of_constraint;j++) {
6783:             PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j];
6784:             row = constraints_idxs[constraints_idxs_ptr[total_counts]+j];
6785:             if (!PetscBTLookup(is_primal,row_B)) {
6786:               col = constraints_idxs[constraints_idxs_ptr[total_counts]];
6787:               MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);
6788:               MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);
6789:             } else {
6790:               for (k=0;k<size_of_constraint;k++) {
6791:                 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k];
6792:                 if (row != col) {
6793:                   val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]];
6794:                 } else {
6795:                   val = constraints_data[constraints_data_ptr[total_counts]]/norm;
6796:                 }
6797:                 MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);
6798:               }
6799:             }
6800:           }
6801:           if (pcbddc->dbg_flag) {
6802:             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");
6803:           }
6804:         }
6805:       } else {
6806:         if (pcbddc->dbg_flag) {
6807:           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %D does not need a change of basis (size %D)\n",total_counts,size_of_constraint);
6808:         }
6809:       }
6810:     }

6812:     /* free workspace */
6813:     if (qr_needed) {
6814:       if (pcbddc->dbg_flag) {
6815:         PetscFree(dbg_work);
6816:       }
6817:       PetscFree(trs_rhs);
6818:       PetscFree(qr_tau);
6819:       PetscFree(qr_work);
6820:       PetscFree(gqr_work);
6821:       PetscFree(qr_basis);
6822:     }
6823:     PetscBTDestroy(&is_primal);
6824:     MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);
6825:     MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);

6827:     /* assembling of global change of variable */
6828:     if (!pcbddc->fake_change) {
6829:       Mat      tmat;
6830:       PetscInt bs;

6832:       VecGetSize(pcis->vec1_global,&global_size);
6833:       VecGetLocalSize(pcis->vec1_global,&local_size);
6834:       MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);
6835:       MatISSetLocalMat(tmat,localChangeOfBasisMatrix);
6836:       MatAssemblyBegin(tmat,MAT_FINAL_ASSEMBLY);
6837:       MatAssemblyEnd(tmat,MAT_FINAL_ASSEMBLY);
6838:       MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);
6839:       MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);
6840:       MatGetBlockSize(pc->pmat,&bs);
6841:       MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);
6842:       MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);
6843:       MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);
6844:       MatConvert(tmat,MATAIJ,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);
6845:       MatDestroy(&tmat);
6846:       VecSet(pcis->vec1_global,0.0);
6847:       VecSet(pcis->vec1_N,1.0);
6848:       VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
6849:       VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
6850:       VecReciprocal(pcis->vec1_global);
6851:       MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);

6853:       /* check */
6854:       if (pcbddc->dbg_flag) {
6855:         PetscReal error;
6856:         Vec       x,x_change;

6858:         VecDuplicate(pcis->vec1_global,&x);
6859:         VecDuplicate(pcis->vec1_global,&x_change);
6860:         VecSetRandom(x,NULL);
6861:         VecCopy(x,pcis->vec1_global);
6862:         VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
6863:         VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
6864:         MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);
6865:         VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);
6866:         VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);
6867:         MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);
6868:         VecAXPY(x,-1.0,x_change);
6869:         VecNorm(x,NORM_INFINITY,&error);
6870:         if (error > PETSC_SMALL) {
6871:           SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e",error);
6872:         }
6873:         VecDestroy(&x);
6874:         VecDestroy(&x_change);
6875:       }
6876:       /* adapt sub_schurs computed (if any) */
6877:       if (pcbddc->use_deluxe_scaling) {
6878:         PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;

6880:         if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");
6881:         if (sub_schurs && sub_schurs->S_Ej_all) {
6882:           Mat                    S_new,tmat;
6883:           IS                     is_all_N,is_V_Sall = NULL;

6885:           ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);
6886:           MatCreateSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);
6887:           if (pcbddc->deluxe_zerorows) {
6888:             ISLocalToGlobalMapping NtoSall;
6889:             IS                     is_V;
6890:             ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);
6891:             ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);
6892:             ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);
6893:             ISLocalToGlobalMappingDestroy(&NtoSall);
6894:             ISDestroy(&is_V);
6895:           }
6896:           ISDestroy(&is_all_N);
6897:           MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);
6898:           MatDestroy(&sub_schurs->S_Ej_all);
6899:           PetscObjectReference((PetscObject)S_new);
6900:           if (pcbddc->deluxe_zerorows) {
6901:             const PetscScalar *array;
6902:             const PetscInt    *idxs_V,*idxs_all;
6903:             PetscInt          i,n_V;

6905:             MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);
6906:             ISGetLocalSize(is_V_Sall,&n_V);
6907:             ISGetIndices(is_V_Sall,&idxs_V);
6908:             ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);
6909:             VecGetArrayRead(pcis->D,&array);
6910:             for (i=0;i<n_V;i++) {
6911:               PetscScalar val;
6912:               PetscInt    idx;

6914:               idx = idxs_V[i];
6915:               val = array[idxs_all[idxs_V[i]]];
6916:               MatSetValue(S_new,idx,idx,val,INSERT_VALUES);
6917:             }
6918:             MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);
6919:             MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);
6920:             VecRestoreArrayRead(pcis->D,&array);
6921:             ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);
6922:             ISRestoreIndices(is_V_Sall,&idxs_V);
6923:           }
6924:           sub_schurs->S_Ej_all = S_new;
6925:           MatDestroy(&S_new);
6926:           if (sub_schurs->sum_S_Ej_all) {
6927:             MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);
6928:             MatDestroy(&sub_schurs->sum_S_Ej_all);
6929:             PetscObjectReference((PetscObject)S_new);
6930:             if (pcbddc->deluxe_zerorows) {
6931:               MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);
6932:             }
6933:             sub_schurs->sum_S_Ej_all = S_new;
6934:             MatDestroy(&S_new);
6935:           }
6936:           ISDestroy(&is_V_Sall);
6937:           MatDestroy(&tmat);
6938:         }
6939:         /* destroy any change of basis context in sub_schurs */
6940:         if (sub_schurs && sub_schurs->change) {
6941:           PetscInt i;

6943:           for (i=0;i<sub_schurs->n_subs;i++) {
6944:             KSPDestroy(&sub_schurs->change[i]);
6945:           }
6946:           PetscFree(sub_schurs->change);
6947:         }
6948:       }
6949:       if (pcbddc->switch_static) { /* need to save the local change */
6950:         pcbddc->switch_static_change = localChangeOfBasisMatrix;
6951:       } else {
6952:         MatDestroy(&localChangeOfBasisMatrix);
6953:       }
6954:       /* determine if any process has changed the pressures locally */
6955:       pcbddc->change_interior = pcbddc->benign_have_null;
6956:     } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */
6957:       MatDestroy(&pcbddc->ConstraintMatrix);
6958:       pcbddc->ConstraintMatrix = localChangeOfBasisMatrix;
6959:       pcbddc->use_qr_single = qr_needed;
6960:     }
6961:   } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) {
6962:     if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) {
6963:       PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);
6964:       pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
6965:     } else {
6966:       Mat benign_global = NULL;
6967:       if (pcbddc->benign_have_null) {
6968:         Mat M;

6970:         pcbddc->change_interior = PETSC_TRUE;
6971:         VecCopy(matis->counter,pcis->vec1_N);
6972:         VecReciprocal(pcis->vec1_N);
6973:         MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&benign_global);
6974:         if (pcbddc->benign_change) {
6975:           MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);
6976:           MatDiagonalScale(M,pcis->vec1_N,NULL);
6977:         } else {
6978:           MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&M);
6979:           MatDiagonalSet(M,pcis->vec1_N,INSERT_VALUES);
6980:         }
6981:         MatISSetLocalMat(benign_global,M);
6982:         MatDestroy(&M);
6983:         MatAssemblyBegin(benign_global,MAT_FINAL_ASSEMBLY);
6984:         MatAssemblyEnd(benign_global,MAT_FINAL_ASSEMBLY);
6985:       }
6986:       if (pcbddc->user_ChangeOfBasisMatrix) {
6987:         MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);
6988:         MatDestroy(&benign_global);
6989:       } else if (pcbddc->benign_have_null) {
6990:         pcbddc->ChangeOfBasisMatrix = benign_global;
6991:       }
6992:     }
6993:     if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */
6994:       IS             is_global;
6995:       const PetscInt *gidxs;

6997:       ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);
6998:       ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);
6999:       ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);
7000:       MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);
7001:       ISDestroy(&is_global);
7002:     }
7003:   }
7004:   if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) {
7005:     VecDuplicate(pcis->vec1_global,&pcbddc->work_change);
7006:   }

7008:   if (!pcbddc->fake_change) {
7009:     /* add pressure dofs to set of primal nodes for numbering purposes */
7010:     for (i=0;i<pcbddc->benign_n;i++) {
7011:       pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i];
7012:       pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i];
7013:       pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1;
7014:       pcbddc->local_primal_size_cc++;
7015:       pcbddc->local_primal_size++;
7016:     }

7018:     /* check if a new primal space has been introduced (also take into account benign trick) */
7019:     pcbddc->new_primal_space_local = PETSC_TRUE;
7020:     if (olocal_primal_size == pcbddc->local_primal_size) {
7021:       PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);
7022:       pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
7023:       if (!pcbddc->new_primal_space_local) {
7024:         PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);
7025:         pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
7026:       }
7027:     }
7028:     /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
7029:     MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
7030:   }
7031:   PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);

7033:   /* flush dbg viewer */
7034:   if (pcbddc->dbg_flag) {
7035:     PetscViewerFlush(pcbddc->dbg_viewer);
7036:   }

7038:   /* free workspace */
7039:   PetscBTDestroy(&qr_needed_idx);
7040:   PetscBTDestroy(&change_basis);
7041:   if (!pcbddc->adaptive_selection) {
7042:     PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);
7043:     PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);
7044:   } else {
7045:     PetscFree5(pcbddc->adaptive_constraints_n,
7046:                       pcbddc->adaptive_constraints_idxs_ptr,
7047:                       pcbddc->adaptive_constraints_data_ptr,
7048:                       pcbddc->adaptive_constraints_idxs,
7049:                       pcbddc->adaptive_constraints_data);
7050:     PetscFree(constraints_n);
7051:     PetscFree(constraints_idxs_B);
7052:   }
7053:   return(0);
7054: }
7055: /* #undef PETSC_MISSING_LAPACK_GESVD */

7057: PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
7058: {
7059:   ISLocalToGlobalMapping map;
7060:   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
7061:   Mat_IS                 *matis  = (Mat_IS*)pc->pmat->data;
7062:   PetscInt               i,N;
7063:   PetscBool              rcsr = PETSC_FALSE;
7064:   PetscErrorCode         ierr;

7067:   if (pcbddc->recompute_topography) {
7068:     pcbddc->graphanalyzed = PETSC_FALSE;
7069:     /* Reset previously computed graph */
7070:     PCBDDCGraphReset(pcbddc->mat_graph);
7071:     /* Init local Graph struct */
7072:     MatGetSize(pc->pmat,&N,NULL);
7073:     MatGetLocalToGlobalMapping(pc->pmat,&map,NULL);
7074:     PCBDDCGraphInit(pcbddc->mat_graph,map,N,pcbddc->graphmaxcount);

7076:     if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) {
7077:       PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->user_primal_vertices_local);
7078:     }
7079:     /* Check validity of the csr graph passed in by the user */
7080:     if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %D, expected %D",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs);

7082:     /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
7083:     if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) {
7084:       PetscInt  *xadj,*adjncy;
7085:       PetscInt  nvtxs;
7086:       PetscBool flg_row=PETSC_FALSE;

7088:       MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);
7089:       if (flg_row) {
7090:         PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);
7091:         pcbddc->computed_rowadj = PETSC_TRUE;
7092:       }
7093:       MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);
7094:       rcsr = PETSC_TRUE;
7095:     }
7096:     if (pcbddc->dbg_flag) {
7097:       PetscViewerFlush(pcbddc->dbg_viewer);
7098:     }

7100:     if (pcbddc->mat_graph->cdim && !pcbddc->mat_graph->cloc) {
7101:       PetscReal    *lcoords;
7102:       PetscInt     n;
7103:       MPI_Datatype dimrealtype;

7105:       /* TODO: support for blocked */
7106:       if (pcbddc->mat_graph->cnloc != pc->pmat->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid number of local coordinates! Got %D, expected %D",pcbddc->mat_graph->cnloc,pc->pmat->rmap->n);
7107:       MatGetLocalSize(matis->A,&n,NULL);
7108:       PetscMalloc1(pcbddc->mat_graph->cdim*n,&lcoords);
7109:       MPI_Type_contiguous(pcbddc->mat_graph->cdim,MPIU_REAL,&dimrealtype);
7110:       MPI_Type_commit(&dimrealtype);
7111:       PetscSFBcastBegin(matis->sf,dimrealtype,pcbddc->mat_graph->coords,lcoords);
7112:       PetscSFBcastEnd(matis->sf,dimrealtype,pcbddc->mat_graph->coords,lcoords);
7113:       MPI_Type_free(&dimrealtype);
7114:       PetscFree(pcbddc->mat_graph->coords);

7116:       pcbddc->mat_graph->coords = lcoords;
7117:       pcbddc->mat_graph->cloc   = PETSC_TRUE;
7118:       pcbddc->mat_graph->cnloc  = n;
7119:     }
7120:     if (pcbddc->mat_graph->cnloc && pcbddc->mat_graph->cnloc != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid number of local subdomain coordinates! Got %D, expected %D",pcbddc->mat_graph->cnloc,pcbddc->mat_graph->nvtxs);
7121:     pcbddc->mat_graph->active_coords = (PetscBool)(pcbddc->corner_selection && !pcbddc->corner_selected);

7123:     /* Setup of Graph */
7124:     pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */
7125:     PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);

7127:     /* attach info on disconnected subdomains if present */
7128:     if (pcbddc->n_local_subs) {
7129:       PetscInt *local_subs,n,totn;

7131:       MatGetLocalSize(matis->A,&n,NULL);
7132:       PetscMalloc1(n,&local_subs);
7133:       for (i=0;i<n;i++) local_subs[i] = pcbddc->n_local_subs;
7134:       for (i=0;i<pcbddc->n_local_subs;i++) {
7135:         const PetscInt *idxs;
7136:         PetscInt       nl,j;

7138:         ISGetLocalSize(pcbddc->local_subs[i],&nl);
7139:         ISGetIndices(pcbddc->local_subs[i],&idxs);
7140:         for (j=0;j<nl;j++) local_subs[idxs[j]] = i;
7141:         ISRestoreIndices(pcbddc->local_subs[i],&idxs);
7142:       }
7143:       for (i=0,totn=0;i<n;i++) totn = PetscMax(totn,local_subs[i]);
7144:       pcbddc->mat_graph->n_local_subs = totn + 1;
7145:       pcbddc->mat_graph->local_subs = local_subs;
7146:     }
7147:   }

7149:   if (!pcbddc->graphanalyzed) {
7150:     /* Graph's connected components analysis */
7151:     PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);
7152:     pcbddc->graphanalyzed = PETSC_TRUE;
7153:     pcbddc->corner_selected = pcbddc->corner_selection;
7154:   }
7155:   if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0;
7156:   return(0);
7157: }

7159: PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
7160: {
7161:   PetscInt       i,j;
7162:   PetscScalar    *alphas;
7163:   PetscReal      norm;

7167:   if (!n) return(0);
7168:   PetscMalloc1(n,&alphas);
7169:   VecNormalize(vecs[0],&norm);
7170:   if (norm < PETSC_SMALL) {
7171:     VecSet(vecs[0],0.0);
7172:   }
7173:   for (i=1;i<n;i++) {
7174:     VecMDot(vecs[i],i,vecs,alphas);
7175:     for (j=0;j<i;j++) alphas[j] = PetscConj(-alphas[j]);
7176:     VecMAXPY(vecs[i],i,alphas,vecs);
7177:     VecNormalize(vecs[i],&norm);
7178:     if (norm < PETSC_SMALL) {
7179:       VecSet(vecs[i],0.0);
7180:     }
7181:   }
7182:   PetscFree(alphas);
7183:   return(0);
7184: }

7186: PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void)
7187: {
7188:   Mat            A;
7189:   PetscInt       n_neighs,*neighs,*n_shared,**shared;
7190:   PetscMPIInt    size,rank,color;
7191:   PetscInt       *xadj,*adjncy;
7192:   PetscInt       *adjncy_wgt,*v_wgt,*ranks_send_to_idx;
7193:   PetscInt       im_active,active_procs,N,n,i,j,threshold = 2;
7194:   PetscInt       void_procs,*procs_candidates = NULL;
7195:   PetscInt       xadj_count,*count;
7196:   PetscBool      ismatis,use_vwgt=PETSC_FALSE;
7197:   PetscSubcomm   psubcomm;
7198:   MPI_Comm       subcomm;

7203:   PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);
7204:   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME);
7207:   if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %D",*n_subdomains);

7209:   if (have_void) *have_void = PETSC_FALSE;
7210:   MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);
7211:   MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);
7212:   MatISGetLocalMat(mat,&A);
7213:   MatGetLocalSize(A,&n,NULL);
7214:   im_active = !!n;
7215:   MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));
7216:   void_procs = size - active_procs;
7217:   /* get ranks of of non-active processes in mat communicator */
7218:   if (void_procs) {
7219:     PetscInt ncand;

7221:     if (have_void) *have_void = PETSC_TRUE;
7222:     PetscMalloc1(size,&procs_candidates);
7223:     MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));
7224:     for (i=0,ncand=0;i<size;i++) {
7225:       if (!procs_candidates[i]) {
7226:         procs_candidates[ncand++] = i;
7227:       }
7228:     }
7229:     /* force n_subdomains to be not greater that the number of non-active processes */
7230:     *n_subdomains = PetscMin(void_procs,*n_subdomains);
7231:   }

7233:   /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix
7234:      number of subdomains requested 1 -> send to master or first candidate in voids  */
7235:   MatGetSize(mat,&N,NULL);
7236:   if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) {
7237:     PetscInt issize,isidx,dest;
7238:     if (*n_subdomains == 1) dest = 0;
7239:     else dest = rank;
7240:     if (im_active) {
7241:       issize = 1;
7242:       if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7243:         isidx = procs_candidates[dest];
7244:       } else {
7245:         isidx = dest;
7246:       }
7247:     } else {
7248:       issize = 0;
7249:       isidx = -1;
7250:     }
7251:     if (*n_subdomains != 1) *n_subdomains = active_procs;
7252:     ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);
7253:     PetscFree(procs_candidates);
7254:     return(0);
7255:   }
7256:   PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);
7257:   PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);
7258:   threshold = PetscMax(threshold,2);

7260:   /* Get info on mapping */
7261:   ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);

7263:   /* build local CSR graph of subdomains' connectivity */
7264:   PetscMalloc1(2,&xadj);
7265:   xadj[0] = 0;
7266:   xadj[1] = PetscMax(n_neighs-1,0);
7267:   PetscMalloc1(xadj[1],&adjncy);
7268:   PetscMalloc1(xadj[1],&adjncy_wgt);
7269:   PetscCalloc1(n,&count);
7270:   for (i=1;i<n_neighs;i++)
7271:     for (j=0;j<n_shared[i];j++)
7272:       count[shared[i][j]] += 1;

7274:   xadj_count = 0;
7275:   for (i=1;i<n_neighs;i++) {
7276:     for (j=0;j<n_shared[i];j++) {
7277:       if (count[shared[i][j]] < threshold) {
7278:         adjncy[xadj_count] = neighs[i];
7279:         adjncy_wgt[xadj_count] = n_shared[i];
7280:         xadj_count++;
7281:         break;
7282:       }
7283:     }
7284:   }
7285:   xadj[1] = xadj_count;
7286:   PetscFree(count);
7287:   ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);
7288:   PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);

7290:   PetscMalloc1(1,&ranks_send_to_idx);

7292:   /* Restrict work on active processes only */
7293:   PetscMPIIntCast(im_active,&color);
7294:   if (void_procs) {
7295:     PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);
7296:     PetscSubcommSetNumber(psubcomm,2); /* 2 groups, active process and not active processes */
7297:     PetscSubcommSetTypeGeneral(psubcomm,color,rank);
7298:     subcomm = PetscSubcommChild(psubcomm);
7299:   } else {
7300:     psubcomm = NULL;
7301:     subcomm = PetscObjectComm((PetscObject)mat);
7302:   }

7304:   v_wgt = NULL;
7305:   if (!color) {
7306:     PetscFree(xadj);
7307:     PetscFree(adjncy);
7308:     PetscFree(adjncy_wgt);
7309:   } else {
7310:     Mat             subdomain_adj;
7311:     IS              new_ranks,new_ranks_contig;
7312:     MatPartitioning partitioner;
7313:     PetscInt        rstart=0,rend=0;
7314:     PetscInt        *is_indices,*oldranks;
7315:     PetscMPIInt     size;
7316:     PetscBool       aggregate;

7318:     MPI_Comm_size(subcomm,&size);
7319:     if (void_procs) {
7320:       PetscInt prank = rank;
7321:       PetscMalloc1(size,&oldranks);
7322:       MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);
7323:       for (i=0;i<xadj[1];i++) {
7324:         PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);
7325:       }
7326:       PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);
7327:     } else {
7328:       oldranks = NULL;
7329:     }
7330:     aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE);
7331:     if (aggregate) { /* TODO: all this part could be made more efficient */
7332:       PetscInt    lrows,row,ncols,*cols;
7333:       PetscMPIInt nrank;
7334:       PetscScalar *vals;

7336:       MPI_Comm_rank(subcomm,&nrank);
7337:       lrows = 0;
7338:       if (nrank<redprocs) {
7339:         lrows = size/redprocs;
7340:         if (nrank<size%redprocs) lrows++;
7341:       }
7342:       MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);
7343:       MatGetOwnershipRange(subdomain_adj,&rstart,&rend);
7344:       MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);
7345:       MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);
7346:       row = nrank;
7347:       ncols = xadj[1]-xadj[0];
7348:       cols = adjncy;
7349:       PetscMalloc1(ncols,&vals);
7350:       for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i];
7351:       MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);
7352:       MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);
7353:       MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);
7354:       PetscFree(xadj);
7355:       PetscFree(adjncy);
7356:       PetscFree(adjncy_wgt);
7357:       PetscFree(vals);
7358:       if (use_vwgt) {
7359:         Vec               v;
7360:         const PetscScalar *array;
7361:         PetscInt          nl;

7363:         MatCreateVecs(subdomain_adj,&v,NULL);
7364:         VecSetValue(v,row,(PetscScalar)n,INSERT_VALUES);
7365:         VecAssemblyBegin(v);
7366:         VecAssemblyEnd(v);
7367:         VecGetLocalSize(v,&nl);
7368:         VecGetArrayRead(v,&array);
7369:         PetscMalloc1(nl,&v_wgt);
7370:         for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]);
7371:         VecRestoreArrayRead(v,&array);
7372:         VecDestroy(&v);
7373:       }
7374:     } else {
7375:       MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);
7376:       if (use_vwgt) {
7377:         PetscMalloc1(1,&v_wgt);
7378:         v_wgt[0] = n;
7379:       }
7380:     }
7381:     /* MatView(subdomain_adj,0); */

7383:     /* Partition */
7384:     MatPartitioningCreate(subcomm,&partitioner);
7385: #if defined(PETSC_HAVE_PTSCOTCH)
7386:     MatPartitioningSetType(partitioner,MATPARTITIONINGPTSCOTCH);
7387: #elif defined(PETSC_HAVE_PARMETIS)
7388:     MatPartitioningSetType(partitioner,MATPARTITIONINGPARMETIS);
7389: #else
7390:     MatPartitioningSetType(partitioner,MATPARTITIONINGAVERAGE);
7391: #endif
7392:     MatPartitioningSetAdjacency(partitioner,subdomain_adj);
7393:     if (v_wgt) {
7394:       MatPartitioningSetVertexWeights(partitioner,v_wgt);
7395:     }
7396:     *n_subdomains = PetscMin((PetscInt)size,*n_subdomains);
7397:     MatPartitioningSetNParts(partitioner,*n_subdomains);
7398:     MatPartitioningSetFromOptions(partitioner);
7399:     MatPartitioningApply(partitioner,&new_ranks);
7400:     /* MatPartitioningView(partitioner,0); */

7402:     /* renumber new_ranks to avoid "holes" in new set of processors */
7403:     ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);
7404:     ISDestroy(&new_ranks);
7405:     ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);
7406:     if (!aggregate) {
7407:       if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7408: #if defined(PETSC_USE_DEBUG)
7409:         if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen");
7410: #endif
7411:         ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]];
7412:       } else if (oldranks) {
7413:         ranks_send_to_idx[0] = oldranks[is_indices[0]];
7414:       } else {
7415:         ranks_send_to_idx[0] = is_indices[0];
7416:       }
7417:     } else {
7418:       PetscInt    idx = 0;
7419:       PetscMPIInt tag;
7420:       MPI_Request *reqs;

7422:       PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);
7423:       PetscMalloc1(rend-rstart,&reqs);
7424:       for (i=rstart;i<rend;i++) {
7425:         MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);
7426:       }
7427:       MPI_Recv(&idx,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);
7428:       MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);
7429:       PetscFree(reqs);
7430:       if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7431: #if defined(PETSC_USE_DEBUG)
7432:         if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen");
7433: #endif
7434:         ranks_send_to_idx[0] = procs_candidates[oldranks[idx]];
7435:       } else if (oldranks) {
7436:         ranks_send_to_idx[0] = oldranks[idx];
7437:       } else {
7438:         ranks_send_to_idx[0] = idx;
7439:       }
7440:     }
7441:     ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);
7442:     /* clean up */
7443:     PetscFree(oldranks);
7444:     ISDestroy(&new_ranks_contig);
7445:     MatDestroy(&subdomain_adj);
7446:     MatPartitioningDestroy(&partitioner);
7447:   }
7448:   PetscSubcommDestroy(&psubcomm);
7449:   PetscFree(procs_candidates);

7451:   /* assemble parallel IS for sends */
7452:   i = 1;
7453:   if (!color) i=0;
7454:   ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);
7455:   return(0);
7456: }

7458: typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;

7460: PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[])
7461: {
7462:   Mat                    local_mat;
7463:   IS                     is_sends_internal;
7464:   PetscInt               rows,cols,new_local_rows;
7465:   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs;
7466:   PetscBool              ismatis,isdense,newisdense,destroy_mat;
7467:   ISLocalToGlobalMapping l2gmap;
7468:   PetscInt*              l2gmap_indices;
7469:   const PetscInt*        is_indices;
7470:   MatType                new_local_type;
7471:   /* buffers */
7472:   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
7473:   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
7474:   PetscInt               *recv_buffer_idxs_local;
7475:   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
7476:   PetscScalar            *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs;
7477:   /* MPI */
7478:   MPI_Comm               comm,comm_n;
7479:   PetscSubcomm           subcomm;
7480:   PetscMPIInt            n_sends,n_recvs,size;
7481:   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
7482:   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
7483:   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest;
7484:   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs;
7485:   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs;
7486:   PetscErrorCode         ierr;

7490:   PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);
7491:   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME);
7498:   if (nvecs) {
7499:     if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported");
7501:   }
7502:   /* further checks */
7503:   MatISGetLocalMat(mat,&local_mat);
7504:   PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);
7505:   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
7506:   MatGetSize(local_mat,&rows,&cols);
7507:   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
7508:   if (reuse && *mat_n) {
7509:     PetscInt mrows,mcols,mnrows,mncols;
7511:     PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);
7512:     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
7513:     MatGetSize(mat,&mrows,&mcols);
7514:     MatGetSize(*mat_n,&mnrows,&mncols);
7515:     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
7516:     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
7517:   }
7518:   MatGetBlockSize(local_mat,&bs);

7521:   /* prepare IS for sending if not provided */
7522:   if (!is_sends) {
7523:     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
7524:     PCBDDCMatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);
7525:   } else {
7526:     PetscObjectReference((PetscObject)is_sends);
7527:     is_sends_internal = is_sends;
7528:   }

7530:   /* get comm */
7531:   PetscObjectGetComm((PetscObject)mat,&comm);

7533:   /* compute number of sends */
7534:   ISGetLocalSize(is_sends_internal,&i);
7535:   PetscMPIIntCast(i,&n_sends);

7537:   /* compute number of receives */
7538:   MPI_Comm_size(comm,&size);
7539:   PetscMalloc1(size,&iflags);
7540:   PetscMemzero(iflags,size*sizeof(*iflags));
7541:   ISGetIndices(is_sends_internal,&is_indices);
7542:   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
7543:   PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);
7544:   PetscFree(iflags);

7546:   /* restrict comm if requested */
7547:   subcomm = 0;
7548:   destroy_mat = PETSC_FALSE;
7549:   if (restrict_comm) {
7550:     PetscMPIInt color,subcommsize;

7552:     color = 0;
7553:     if (restrict_full) {
7554:       if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */
7555:     } else {
7556:       if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */
7557:     }
7558:     MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);
7559:     subcommsize = size - subcommsize;
7560:     /* check if reuse has been requested */
7561:     if (reuse) {
7562:       if (*mat_n) {
7563:         PetscMPIInt subcommsize2;
7564:         MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);
7565:         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
7566:         comm_n = PetscObjectComm((PetscObject)*mat_n);
7567:       } else {
7568:         comm_n = PETSC_COMM_SELF;
7569:       }
7570:     } else { /* MAT_INITIAL_MATRIX */
7571:       PetscMPIInt rank;

7573:       MPI_Comm_rank(comm,&rank);
7574:       PetscSubcommCreate(comm,&subcomm);
7575:       PetscSubcommSetNumber(subcomm,2);
7576:       PetscSubcommSetTypeGeneral(subcomm,color,rank);
7577:       comm_n = PetscSubcommChild(subcomm);
7578:     }
7579:     /* flag to destroy *mat_n if not significative */
7580:     if (color) destroy_mat = PETSC_TRUE;
7581:   } else {
7582:     comm_n = comm;
7583:   }

7585:   /* prepare send/receive buffers */
7586:   PetscMalloc1(size,&ilengths_idxs);
7587:   PetscMemzero(ilengths_idxs,size*sizeof(*ilengths_idxs));
7588:   PetscMalloc1(size,&ilengths_vals);
7589:   PetscMemzero(ilengths_vals,size*sizeof(*ilengths_vals));
7590:   if (nis) {
7591:     PetscCalloc1(size,&ilengths_idxs_is);
7592:   }

7594:   /* Get data from local matrices */
7595:   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
7596:     /* TODO: See below some guidelines on how to prepare the local buffers */
7597:     /*
7598:        send_buffer_vals should contain the raw values of the local matrix
7599:        send_buffer_idxs should contain:
7600:        - MatType_PRIVATE type
7601:        - PetscInt        size_of_l2gmap
7602:        - PetscInt        global_row_indices[size_of_l2gmap]
7603:        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
7604:     */
7605:   else {
7606:     MatDenseGetArray(local_mat,&send_buffer_vals);
7607:     ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);
7608:     PetscMalloc1(i+2,&send_buffer_idxs);
7609:     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
7610:     send_buffer_idxs[1] = i;
7611:     ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);
7612:     PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));
7613:     ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);
7614:     PetscMPIIntCast(i,&len);
7615:     for (i=0;i<n_sends;i++) {
7616:       ilengths_vals[is_indices[i]] = len*len;
7617:       ilengths_idxs[is_indices[i]] = len+2;
7618:     }
7619:   }
7620:   PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);
7621:   /* additional is (if any) */
7622:   if (nis) {
7623:     PetscMPIInt psum;
7624:     PetscInt j;
7625:     for (j=0,psum=0;j<nis;j++) {
7626:       PetscInt plen;
7627:       ISGetLocalSize(isarray[j],&plen);
7628:       PetscMPIIntCast(plen,&len);
7629:       psum += len+1; /* indices + lenght */
7630:     }
7631:     PetscMalloc1(psum,&send_buffer_idxs_is);
7632:     for (j=0,psum=0;j<nis;j++) {
7633:       PetscInt plen;
7634:       const PetscInt *is_array_idxs;
7635:       ISGetLocalSize(isarray[j],&plen);
7636:       send_buffer_idxs_is[psum] = plen;
7637:       ISGetIndices(isarray[j],&is_array_idxs);
7638:       PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));
7639:       ISRestoreIndices(isarray[j],&is_array_idxs);
7640:       psum += plen+1; /* indices + lenght */
7641:     }
7642:     for (i=0;i<n_sends;i++) {
7643:       ilengths_idxs_is[is_indices[i]] = psum;
7644:     }
7645:     PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);
7646:   }
7647:   MatISRestoreLocalMat(mat,&local_mat);

7649:   buf_size_idxs = 0;
7650:   buf_size_vals = 0;
7651:   buf_size_idxs_is = 0;
7652:   buf_size_vecs = 0;
7653:   for (i=0;i<n_recvs;i++) {
7654:     buf_size_idxs += (PetscInt)olengths_idxs[i];
7655:     buf_size_vals += (PetscInt)olengths_vals[i];
7656:     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
7657:     if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i];
7658:   }
7659:   PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);
7660:   PetscMalloc1(buf_size_vals,&recv_buffer_vals);
7661:   PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);
7662:   PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);

7664:   /* get new tags for clean communications */
7665:   PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);
7666:   PetscObjectGetNewTag((PetscObject)mat,&tag_vals);
7667:   PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);
7668:   PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);

7670:   /* allocate for requests */
7671:   PetscMalloc1(n_sends,&send_req_idxs);
7672:   PetscMalloc1(n_sends,&send_req_vals);
7673:   PetscMalloc1(n_sends,&send_req_idxs_is);
7674:   PetscMalloc1(n_sends,&send_req_vecs);
7675:   PetscMalloc1(n_recvs,&recv_req_idxs);
7676:   PetscMalloc1(n_recvs,&recv_req_vals);
7677:   PetscMalloc1(n_recvs,&recv_req_idxs_is);
7678:   PetscMalloc1(n_recvs,&recv_req_vecs);

7680:   /* communications */
7681:   ptr_idxs = recv_buffer_idxs;
7682:   ptr_vals = recv_buffer_vals;
7683:   ptr_idxs_is = recv_buffer_idxs_is;
7684:   ptr_vecs = recv_buffer_vecs;
7685:   for (i=0;i<n_recvs;i++) {
7686:     source_dest = onodes[i];
7687:     MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);
7688:     MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);
7689:     ptr_idxs += olengths_idxs[i];
7690:     ptr_vals += olengths_vals[i];
7691:     if (nis) {
7692:       source_dest = onodes_is[i];
7693:       MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);
7694:       ptr_idxs_is += olengths_idxs_is[i];
7695:     }
7696:     if (nvecs) {
7697:       source_dest = onodes[i];
7698:       MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);
7699:       ptr_vecs += olengths_idxs[i]-2;
7700:     }
7701:   }
7702:   for (i=0;i<n_sends;i++) {
7703:     PetscMPIIntCast(is_indices[i],&source_dest);
7704:     MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);
7705:     MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);
7706:     if (nis) {
7707:       MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);
7708:     }
7709:     if (nvecs) {
7710:       VecGetArray(nnsp_vec[0],&send_buffer_vecs);
7711:       MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);
7712:     }
7713:   }
7714:   ISRestoreIndices(is_sends_internal,&is_indices);
7715:   ISDestroy(&is_sends_internal);

7717:   /* assemble new l2g map */
7718:   MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);
7719:   ptr_idxs = recv_buffer_idxs;
7720:   new_local_rows = 0;
7721:   for (i=0;i<n_recvs;i++) {
7722:     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
7723:     ptr_idxs += olengths_idxs[i];
7724:   }
7725:   PetscMalloc1(new_local_rows,&l2gmap_indices);
7726:   ptr_idxs = recv_buffer_idxs;
7727:   new_local_rows = 0;
7728:   for (i=0;i<n_recvs;i++) {
7729:     PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));
7730:     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
7731:     ptr_idxs += olengths_idxs[i];
7732:   }
7733:   PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);
7734:   ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);
7735:   PetscFree(l2gmap_indices);

7737:   /* infer new local matrix type from received local matrices type */
7738:   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
7739:   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
7740:   if (n_recvs) {
7741:     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
7742:     ptr_idxs = recv_buffer_idxs;
7743:     for (i=0;i<n_recvs;i++) {
7744:       if ((PetscInt)new_local_type_private != *ptr_idxs) {
7745:         new_local_type_private = MATAIJ_PRIVATE;
7746:         break;
7747:       }
7748:       ptr_idxs += olengths_idxs[i];
7749:     }
7750:     switch (new_local_type_private) {
7751:       case MATDENSE_PRIVATE:
7752:         new_local_type = MATSEQAIJ;
7753:         bs = 1;
7754:         break;
7755:       case MATAIJ_PRIVATE:
7756:         new_local_type = MATSEQAIJ;
7757:         bs = 1;
7758:         break;
7759:       case MATBAIJ_PRIVATE:
7760:         new_local_type = MATSEQBAIJ;
7761:         break;
7762:       case MATSBAIJ_PRIVATE:
7763:         new_local_type = MATSEQSBAIJ;
7764:         break;
7765:       default:
7766:         SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,PETSC_FUNCTION_NAME);
7767:         break;
7768:     }
7769:   } else { /* by default, new_local_type is seqaij */
7770:     new_local_type = MATSEQAIJ;
7771:     bs = 1;
7772:   }

7774:   /* create MATIS object if needed */
7775:   if (!reuse) {
7776:     MatGetSize(mat,&rows,&cols);
7777:     MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);
7778:   } else {
7779:     /* it also destroys the local matrices */
7780:     if (*mat_n) {
7781:       MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);
7782:     } else { /* this is a fake object */
7783:       MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);
7784:     }
7785:   }
7786:   MatISGetLocalMat(*mat_n,&local_mat);
7787:   MatSetType(local_mat,new_local_type);

7789:   MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);

7791:   /* Global to local map of received indices */
7792:   PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local); /* needed for values insertion */
7793:   ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);
7794:   ISLocalToGlobalMappingDestroy(&l2gmap);

7796:   /* restore attributes -> type of incoming data and its size */
7797:   buf_size_idxs = 0;
7798:   for (i=0;i<n_recvs;i++) {
7799:     recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
7800:     recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
7801:     buf_size_idxs += (PetscInt)olengths_idxs[i];
7802:   }
7803:   PetscFree(recv_buffer_idxs);

7805:   /* set preallocation */
7806:   PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);
7807:   if (!newisdense) {
7808:     PetscInt *new_local_nnz=0;

7810:     ptr_idxs = recv_buffer_idxs_local;
7811:     if (n_recvs) {
7812:       PetscCalloc1(new_local_rows,&new_local_nnz);
7813:     }
7814:     for (i=0;i<n_recvs;i++) {
7815:       PetscInt j;
7816:       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
7817:         for (j=0;j<*(ptr_idxs+1);j++) {
7818:           new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
7819:         }
7820:       } else {
7821:         /* TODO */
7822:       }
7823:       ptr_idxs += olengths_idxs[i];
7824:     }
7825:     if (new_local_nnz) {
7826:       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
7827:       MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);
7828:       for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
7829:       MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);
7830:       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
7831:       MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);
7832:     } else {
7833:       MatSetUp(local_mat);
7834:     }
7835:     PetscFree(new_local_nnz);
7836:   } else {
7837:     MatSetUp(local_mat);
7838:   }

7840:   /* set values */
7841:   ptr_vals = recv_buffer_vals;
7842:   ptr_idxs = recv_buffer_idxs_local;
7843:   for (i=0;i<n_recvs;i++) {
7844:     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
7845:       MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);
7846:       MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);
7847:       MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);
7848:       MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);
7849:       MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);
7850:     } else {
7851:       /* TODO */
7852:     }
7853:     ptr_idxs += olengths_idxs[i];
7854:     ptr_vals += olengths_vals[i];
7855:   }
7856:   MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);
7857:   MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);
7858:   MatISRestoreLocalMat(*mat_n,&local_mat);
7859:   MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);
7860:   MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);
7861:   PetscFree(recv_buffer_vals);

7863: #if 0
7864:   if (!restrict_comm) { /* check */
7865:     Vec       lvec,rvec;
7866:     PetscReal infty_error;

7868:     MatCreateVecs(mat,&rvec,&lvec);
7869:     VecSetRandom(rvec,NULL);
7870:     MatMult(mat,rvec,lvec);
7871:     VecScale(lvec,-1.0);
7872:     MatMultAdd(*mat_n,rvec,lvec,lvec);
7873:     VecNorm(lvec,NORM_INFINITY,&infty_error);
7874:     PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
7875:     VecDestroy(&rvec);
7876:     VecDestroy(&lvec);
7877:   }
7878: #endif

7880:   /* assemble new additional is (if any) */
7881:   if (nis) {
7882:     PetscInt **temp_idxs,*count_is,j,psum;

7884:     MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);
7885:     PetscCalloc1(nis,&count_is);
7886:     ptr_idxs = recv_buffer_idxs_is;
7887:     psum = 0;
7888:     for (i=0;i<n_recvs;i++) {
7889:       for (j=0;j<nis;j++) {
7890:         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
7891:         count_is[j] += plen; /* increment counting of buffer for j-th IS */
7892:         psum += plen;
7893:         ptr_idxs += plen+1; /* shift pointer to received data */
7894:       }
7895:     }
7896:     PetscMalloc1(nis,&temp_idxs);
7897:     PetscMalloc1(psum,&temp_idxs[0]);
7898:     for (i=1;i<nis;i++) {
7899:       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
7900:     }
7901:     PetscMemzero(count_is,nis*sizeof(PetscInt));
7902:     ptr_idxs = recv_buffer_idxs_is;
7903:     for (i=0;i<n_recvs;i++) {
7904:       for (j=0;j<nis;j++) {
7905:         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
7906:         PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));
7907:         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
7908:         ptr_idxs += plen+1; /* shift pointer to received data */
7909:       }
7910:     }
7911:     for (i=0;i<nis;i++) {
7912:       ISDestroy(&isarray[i]);
7913:       PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);
7914:       ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);
7915:     }
7916:     PetscFree(count_is);
7917:     PetscFree(temp_idxs[0]);
7918:     PetscFree(temp_idxs);
7919:   }
7920:   /* free workspace */
7921:   PetscFree(recv_buffer_idxs_is);
7922:   MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);
7923:   PetscFree(send_buffer_idxs);
7924:   MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);
7925:   if (isdense) {
7926:     MatISGetLocalMat(mat,&local_mat);
7927:     MatDenseRestoreArray(local_mat,&send_buffer_vals);
7928:     MatISRestoreLocalMat(mat,&local_mat);
7929:   } else {
7930:     /* PetscFree(send_buffer_vals); */
7931:   }
7932:   if (nis) {
7933:     MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);
7934:     PetscFree(send_buffer_idxs_is);
7935:   }

7937:   if (nvecs) {
7938:     MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);
7939:     MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);
7940:     VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);
7941:     VecDestroy(&nnsp_vec[0]);
7942:     VecCreate(comm_n,&nnsp_vec[0]);
7943:     VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);
7944:     VecSetType(nnsp_vec[0],VECSTANDARD);
7945:     /* set values */
7946:     ptr_vals = recv_buffer_vecs;
7947:     ptr_idxs = recv_buffer_idxs_local;
7948:     VecGetArray(nnsp_vec[0],&send_buffer_vecs);
7949:     for (i=0;i<n_recvs;i++) {
7950:       PetscInt j;
7951:       for (j=0;j<*(ptr_idxs+1);j++) {
7952:         send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j);
7953:       }
7954:       ptr_idxs += olengths_idxs[i];
7955:       ptr_vals += olengths_idxs[i]-2;
7956:     }
7957:     VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);
7958:     VecAssemblyBegin(nnsp_vec[0]);
7959:     VecAssemblyEnd(nnsp_vec[0]);
7960:   }

7962:   PetscFree(recv_buffer_vecs);
7963:   PetscFree(recv_buffer_idxs_local);
7964:   PetscFree(recv_req_idxs);
7965:   PetscFree(recv_req_vals);
7966:   PetscFree(recv_req_vecs);
7967:   PetscFree(recv_req_idxs_is);
7968:   PetscFree(send_req_idxs);
7969:   PetscFree(send_req_vals);
7970:   PetscFree(send_req_vecs);
7971:   PetscFree(send_req_idxs_is);
7972:   PetscFree(ilengths_vals);
7973:   PetscFree(ilengths_idxs);
7974:   PetscFree(olengths_vals);
7975:   PetscFree(olengths_idxs);
7976:   PetscFree(onodes);
7977:   if (nis) {
7978:     PetscFree(ilengths_idxs_is);
7979:     PetscFree(olengths_idxs_is);
7980:     PetscFree(onodes_is);
7981:   }
7982:   PetscSubcommDestroy(&subcomm);
7983:   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
7984:     MatDestroy(mat_n);
7985:     for (i=0;i<nis;i++) {
7986:       ISDestroy(&isarray[i]);
7987:     }
7988:     if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */
7989:       VecDestroy(&nnsp_vec[0]);
7990:     }
7991:     *mat_n = NULL;
7992:   }
7993:   return(0);
7994: }

7996: /* temporary hack into ksp private data structure */
7997:  #include <petsc/private/kspimpl.h>

7999: PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
8000: {
8001:   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
8002:   PC_IS                  *pcis = (PC_IS*)pc->data;
8003:   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
8004:   Mat                    coarsedivudotp = NULL;
8005:   Mat                    coarseG,t_coarse_mat_is;
8006:   MatNullSpace           CoarseNullSpace = NULL;
8007:   ISLocalToGlobalMapping coarse_islg;
8008:   IS                     coarse_is,*isarray,corners;
8009:   PetscInt               i,im_active=-1,active_procs=-1;
8010:   PetscInt               nis,nisdofs,nisneu,nisvert;
8011:   PetscInt               coarse_eqs_per_proc;
8012:   PC                     pc_temp;
8013:   PCType                 coarse_pc_type;
8014:   KSPType                coarse_ksp_type;
8015:   PetscBool              multilevel_requested,multilevel_allowed;
8016:   PetscBool              coarse_reuse;
8017:   PetscInt               ncoarse,nedcfield;
8018:   PetscBool              compute_vecs = PETSC_FALSE;
8019:   PetscScalar            *array;
8020:   MatReuse               coarse_mat_reuse;
8021:   PetscBool              restr, full_restr, have_void;
8022:   PetscMPIInt            size;
8023:   PetscErrorCode         ierr;

8026:   PetscLogEventBegin(PC_BDDC_CoarseSetUp[pcbddc->current_level],pc,0,0,0);
8027:   /* Assign global numbering to coarse dofs */
8028:   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
8029:     PetscInt ocoarse_size;
8030:     compute_vecs = PETSC_TRUE;

8032:     pcbddc->new_primal_space = PETSC_TRUE;
8033:     ocoarse_size = pcbddc->coarse_size;
8034:     PetscFree(pcbddc->global_primal_indices);
8035:     PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);
8036:     /* see if we can avoid some work */
8037:     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
8038:       /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */
8039:       if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) {
8040:         KSPReset(pcbddc->coarse_ksp);
8041:         coarse_reuse = PETSC_FALSE;
8042:       } else { /* we can safely reuse already computed coarse matrix */
8043:         coarse_reuse = PETSC_TRUE;
8044:       }
8045:     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
8046:       coarse_reuse = PETSC_FALSE;
8047:     }
8048:     /* reset any subassembling information */
8049:     if (!coarse_reuse || pcbddc->recompute_topography) {
8050:       ISDestroy(&pcbddc->coarse_subassembling);
8051:     }
8052:   } else { /* primal space is unchanged, so we can reuse coarse matrix */
8053:     coarse_reuse = PETSC_TRUE;
8054:   }
8055:   if (coarse_reuse && pcbddc->coarse_ksp) {
8056:     KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);
8057:     PetscObjectReference((PetscObject)coarse_mat);
8058:     coarse_mat_reuse = MAT_REUSE_MATRIX;
8059:   } else {
8060:     coarse_mat = NULL;
8061:     coarse_mat_reuse = MAT_INITIAL_MATRIX;
8062:   }

8064:   /* creates temporary l2gmap and IS for coarse indexes */
8065:   ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);
8066:   ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);

8068:   /* creates temporary MATIS object for coarse matrix */
8069:   MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);
8070:   MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);
8071:   MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);
8072:   MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);
8073:   MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);
8074:   MatDestroy(&coarse_submat_dense);

8076:   /* count "active" (i.e. with positive local size) and "void" processes */
8077:   im_active = !!(pcis->n);
8078:   MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));

8080:   /* determine number of processes partecipating to coarse solver and compute subassembling pattern */
8081:   /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */
8082:   /* full_restr : just use the receivers from the subassembling pattern */
8083:   MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);
8084:   coarse_mat_is        = NULL;
8085:   multilevel_allowed   = PETSC_FALSE;
8086:   multilevel_requested = PETSC_FALSE;
8087:   coarse_eqs_per_proc  = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc);
8088:   if (coarse_eqs_per_proc < 0) coarse_eqs_per_proc = pcbddc->coarse_size;
8089:   if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE;
8090:   if (pcbddc->coarse_size <= pcbddc->coarse_eqs_limit) multilevel_requested = PETSC_FALSE;
8091:   if (multilevel_requested) {
8092:     ncoarse    = active_procs/pcbddc->coarsening_ratio;
8093:     restr      = PETSC_FALSE;
8094:     full_restr = PETSC_FALSE;
8095:   } else {
8096:     ncoarse    = pcbddc->coarse_size/coarse_eqs_per_proc + !!(pcbddc->coarse_size%coarse_eqs_per_proc);
8097:     restr      = PETSC_TRUE;
8098:     full_restr = PETSC_TRUE;
8099:   }
8100:   if (!pcbddc->coarse_size || size == 1) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE;
8101:   ncoarse = PetscMax(1,ncoarse);
8102:   if (!pcbddc->coarse_subassembling) {
8103:     if (pcbddc->coarsening_ratio > 1) {
8104:       if (multilevel_requested) {
8105:         PCBDDCMatISGetSubassemblingPattern(pc->pmat,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);
8106:       } else {
8107:         PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);
8108:       }
8109:     } else {
8110:       PetscMPIInt rank;
8111:       MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);
8112:       have_void = (active_procs == (PetscInt)size) ? PETSC_FALSE : PETSC_TRUE;
8113:       ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);
8114:     }
8115:   } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */
8116:     PetscInt    psum;
8117:     if (pcbddc->coarse_ksp) psum = 1;
8118:     else psum = 0;
8119:     MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));
8120:     have_void = ncoarse < size ? PETSC_TRUE : PETSC_FALSE;
8121:   }
8122:   /* determine if we can go multilevel */
8123:   if (multilevel_requested) {
8124:     if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */
8125:     else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */
8126:   }
8127:   if (multilevel_allowed && have_void) restr = PETSC_TRUE;

8129:   /* dump subassembling pattern */
8130:   if (pcbddc->dbg_flag && multilevel_allowed) {
8131:     ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);
8132:   }
8133:   /* compute dofs splitting and neumann boundaries for coarse dofs */
8134:   nedcfield = -1;
8135:   corners = NULL;
8136:   if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal || pcbddc->corner_selected)) { /* protects from unneded computations */
8137:     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
8138:     const PetscInt         *idxs;
8139:     ISLocalToGlobalMapping tmap;

8141:     /* create map between primal indices (in local representative ordering) and local primal numbering */
8142:     ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);
8143:     /* allocate space for temporary storage */
8144:     PetscMalloc1(pcbddc->local_primal_size,&tidxs);
8145:     PetscMalloc1(pcbddc->local_primal_size,&tidxs2);
8146:     /* allocate for IS array */
8147:     nisdofs = pcbddc->n_ISForDofsLocal;
8148:     if (pcbddc->nedclocal) {
8149:       if (pcbddc->nedfield > -1) {
8150:         nedcfield = pcbddc->nedfield;
8151:       } else {
8152:         nedcfield = 0;
8153:         if (nisdofs) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen (%D)",nisdofs);
8154:         nisdofs = 1;
8155:       }
8156:     }
8157:     nisneu = !!pcbddc->NeumannBoundariesLocal;
8158:     nisvert = 0; /* nisvert is not used */
8159:     nis = nisdofs + nisneu + nisvert;
8160:     PetscMalloc1(nis,&isarray);
8161:     /* dofs splitting */
8162:     for (i=0;i<nisdofs;i++) {
8163:       /* ISView(pcbddc->ISForDofsLocal[i],0); */
8164:       if (nedcfield != i) {
8165:         ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);
8166:         ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);
8167:         ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);
8168:         ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);
8169:       } else {
8170:         ISGetLocalSize(pcbddc->nedclocal,&tsize);
8171:         ISGetIndices(pcbddc->nedclocal,&idxs);
8172:         ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);
8173:         if (tsize != nout) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping coarse nedelec field! %D != %D",tsize,nout);
8174:         ISRestoreIndices(pcbddc->nedclocal,&idxs);
8175:       }
8176:       ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);
8177:       ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);
8178:       /* ISView(isarray[i],0); */
8179:     }
8180:     /* neumann boundaries */
8181:     if (pcbddc->NeumannBoundariesLocal) {
8182:       /* ISView(pcbddc->NeumannBoundariesLocal,0); */
8183:       ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);
8184:       ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);
8185:       ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);
8186:       ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);
8187:       ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);
8188:       ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);
8189:       /* ISView(isarray[nisdofs],0); */
8190:     }
8191:     /* coordinates */
8192:     if (pcbddc->corner_selected) {
8193:       PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&corners);
8194:       ISGetLocalSize(corners,&tsize);
8195:       ISGetIndices(corners,&idxs);
8196:       ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);
8197:       if (tsize != nout) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping corners! %D != %D",tsize,nout);
8198:       ISRestoreIndices(corners,&idxs);
8199:       PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&corners);
8200:       ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);
8201:       ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&corners);
8202:     }
8203:     PetscFree(tidxs);
8204:     PetscFree(tidxs2);
8205:     ISLocalToGlobalMappingDestroy(&tmap);
8206:   } else {
8207:     nis = 0;
8208:     nisdofs = 0;
8209:     nisneu = 0;
8210:     nisvert = 0;
8211:     isarray = NULL;
8212:   }
8213:   /* destroy no longer needed map */
8214:   ISLocalToGlobalMappingDestroy(&coarse_islg);

8216:   /* subassemble */
8217:   if (multilevel_allowed) {
8218:     Vec       vp[1];
8219:     PetscInt  nvecs = 0;
8220:     PetscBool reuse,reuser;

8222:     if (coarse_mat) reuse = PETSC_TRUE;
8223:     else reuse = PETSC_FALSE;
8224:     MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
8225:     vp[0] = NULL;
8226:     if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */
8227:       VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);
8228:       VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);
8229:       VecSetType(vp[0],VECSTANDARD);
8230:       nvecs = 1;

8232:       if (pcbddc->divudotp) {
8233:         Mat      B,loc_divudotp;
8234:         Vec      v,p;
8235:         IS       dummy;
8236:         PetscInt np;

8238:         MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);
8239:         MatGetSize(loc_divudotp,&np,NULL);
8240:         ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);
8241:         MatCreateSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);
8242:         MatCreateVecs(B,&v,&p);
8243:         VecSet(p,1.);
8244:         MatMultTranspose(B,p,v);
8245:         VecDestroy(&p);
8246:         MatDestroy(&B);
8247:         VecGetArray(vp[0],&array);
8248:         VecPlaceArray(pcbddc->vec1_P,array);
8249:         VecRestoreArray(vp[0],&array);
8250:         MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);
8251:         VecResetArray(pcbddc->vec1_P);
8252:         ISDestroy(&dummy);
8253:         VecDestroy(&v);
8254:       }
8255:     }
8256:     if (reuser) {
8257:       PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);
8258:     } else {
8259:       PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);
8260:     }
8261:     if (vp[0]) { /* vp[0] could have been placed on a different set of processes */
8262:       PetscScalar *arraym,*arrayv;
8263:       PetscInt    nl;
8264:       VecGetLocalSize(vp[0],&nl);
8265:       MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);
8266:       MatDenseGetArray(coarsedivudotp,&arraym);
8267:       VecGetArray(vp[0],&arrayv);
8268:       PetscMemcpy(arraym,arrayv,nl*sizeof(PetscScalar));
8269:       VecRestoreArray(vp[0],&arrayv);
8270:       MatDenseRestoreArray(coarsedivudotp,&arraym);
8271:       VecDestroy(&vp[0]);
8272:     } else {
8273:       MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);
8274:     }
8275:   } else {
8276:     PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,0,NULL,0,NULL);
8277:   }
8278:   if (coarse_mat_is || coarse_mat) {
8279:     if (!multilevel_allowed) {
8280:       MatConvert(coarse_mat_is,MATAIJ,coarse_mat_reuse,&coarse_mat);
8281:     } else {
8282:       Mat A;

8284:       /* if this matrix is present, it means we are not reusing the coarse matrix */
8285:       if (coarse_mat_is) {
8286:         if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen");
8287:         PetscObjectReference((PetscObject)coarse_mat_is);
8288:         coarse_mat = coarse_mat_is;
8289:       }
8290:       /* be sure we don't have MatSeqDENSE as local mat */
8291:       MatISGetLocalMat(coarse_mat,&A);
8292:       MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);
8293:     }
8294:   }
8295:   MatDestroy(&t_coarse_mat_is);
8296:   MatDestroy(&coarse_mat_is);

8298:   /* create local to global scatters for coarse problem */
8299:   if (compute_vecs) {
8300:     PetscInt lrows;
8301:     VecDestroy(&pcbddc->coarse_vec);
8302:     if (coarse_mat) {
8303:       MatGetLocalSize(coarse_mat,&lrows,NULL);
8304:     } else {
8305:       lrows = 0;
8306:     }
8307:     VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);
8308:     VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);
8309:     VecSetType(pcbddc->coarse_vec,coarse_mat ? coarse_mat->defaultvectype : VECSTANDARD);
8310:     VecScatterDestroy(&pcbddc->coarse_loc_to_glob);
8311:     VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);
8312:   }
8313:   ISDestroy(&coarse_is);

8315:   /* set defaults for coarse KSP and PC */
8316:   if (multilevel_allowed) {
8317:     coarse_ksp_type = KSPRICHARDSON;
8318:     coarse_pc_type  = PCBDDC;
8319:   } else {
8320:     coarse_ksp_type = KSPPREONLY;
8321:     coarse_pc_type  = PCREDUNDANT;
8322:   }

8324:   /* print some info if requested */
8325:   if (pcbddc->dbg_flag) {
8326:     if (!multilevel_allowed) {
8327:       PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
8328:       if (multilevel_requested) {
8329:         PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %D (active processes %D, coarsening ratio %D)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);
8330:       } else if (pcbddc->max_levels) {
8331:         PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%D)\n",pcbddc->max_levels);
8332:       }
8333:       PetscViewerFlush(pcbddc->dbg_viewer);
8334:     }
8335:   }

8337:   /* communicate coarse discrete gradient */
8338:   coarseG = NULL;
8339:   if (pcbddc->nedcG && multilevel_allowed) {
8340:     MPI_Comm ccomm;
8341:     if (coarse_mat) {
8342:       ccomm = PetscObjectComm((PetscObject)coarse_mat);
8343:     } else {
8344:       ccomm = MPI_COMM_NULL;
8345:     }
8346:     MatMPIAIJRestrict(pcbddc->nedcG,ccomm,&coarseG);
8347:   }

8349:   /* create the coarse KSP object only once with defaults */
8350:   if (coarse_mat) {
8351:     PetscBool   isredundant,isnn,isbddc;
8352:     PetscViewer dbg_viewer = NULL;

8354:     if (pcbddc->dbg_flag) {
8355:       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat));
8356:       PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);
8357:     }
8358:     if (!pcbddc->coarse_ksp) {
8359:       char   prefix[256],str_level[16];
8360:       size_t len;

8362:       KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);
8363:       KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);
8364:       PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);
8365:       KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);
8366:       KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);
8367:       KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);
8368:       KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);
8369:       KSPGetPC(pcbddc->coarse_ksp,&pc_temp);
8370:       /* TODO is this logic correct? should check for coarse_mat type */
8371:       PCSetType(pc_temp,coarse_pc_type);
8372:       /* prefix */
8373:       PetscStrcpy(prefix,"");
8374:       PetscStrcpy(str_level,"");
8375:       if (!pcbddc->current_level) {
8376:         PetscStrncpy(prefix,((PetscObject)pc)->prefix,sizeof(prefix));
8377:         PetscStrlcat(prefix,"pc_bddc_coarse_",sizeof(prefix));
8378:       } else {
8379:         PetscStrlen(((PetscObject)pc)->prefix,&len);
8380:         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
8381:         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
8382:         /* Nonstandard use of PetscStrncpy() to copy only a portion of the string */
8383:         PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);
8384:         PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level));
8385:         PetscStrlcat(prefix,str_level,sizeof(prefix));
8386:       }
8387:       KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);
8388:       /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
8389:       PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);
8390:       PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);
8391:       PCBDDCSetLevels(pc_temp,pcbddc->max_levels);
8392:       /* allow user customization */
8393:       KSPSetFromOptions(pcbddc->coarse_ksp);
8394:       /* get some info after set from options */
8395:       KSPGetPC(pcbddc->coarse_ksp,&pc_temp);
8396:       /* multilevel cannot be done with coarse PCs different from BDDC or NN */
8397:       PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);
8398:       PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);
8399:       if (multilevel_allowed && !isbddc && !isnn) {
8400:         isbddc = PETSC_TRUE;
8401:         PCSetType(pc_temp,PCBDDC);
8402:         PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);
8403:         PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);
8404:         PCBDDCSetLevels(pc_temp,pcbddc->max_levels);
8405:         if (pc_temp->ops->setfromoptions) { /* need to setfromoptions again, skipping the pc_type */
8406:           PetscObjectOptionsBegin((PetscObject)pc_temp);
8407:           (*pc_temp->ops->setfromoptions)(PetscOptionsObject,pc_temp);
8408:           PetscObjectProcessOptionsHandlers(PetscOptionsObject,(PetscObject)pc_temp);
8409:           PetscOptionsEnd();
8410:           pc_temp->setfromoptionscalled++;
8411:         }
8412:       }
8413:     }
8414:     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
8415:     KSPGetPC(pcbddc->coarse_ksp,&pc_temp);
8416:     if (nisdofs) {
8417:       PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);
8418:       for (i=0;i<nisdofs;i++) {
8419:         ISDestroy(&isarray[i]);
8420:       }
8421:     }
8422:     if (nisneu) {
8423:       PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);
8424:       ISDestroy(&isarray[nisdofs]);
8425:     }
8426:     if (nisvert) {
8427:       PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);
8428:       ISDestroy(&isarray[nis-1]);
8429:     }
8430:     if (coarseG) {
8431:       PCBDDCSetDiscreteGradient(pc_temp,coarseG,1,nedcfield,PETSC_FALSE,PETSC_TRUE);
8432:     }

8434:     /* get some info after set from options */
8435:     PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);

8437:     /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */
8438:     if (isbddc && !multilevel_allowed) {
8439:       PCSetType(pc_temp,coarse_pc_type);
8440:       isbddc = PETSC_FALSE;
8441:     }
8442:     /* multilevel cannot be done with coarse PCs different from BDDC or NN */
8443:     PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);
8444:     if (multilevel_requested && multilevel_allowed && !isbddc && !isnn) {
8445:       PCSetType(pc_temp,PCBDDC);
8446:       isbddc = PETSC_TRUE;
8447:     }
8448:     PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);
8449:     if (isredundant) {
8450:       KSP inner_ksp;
8451:       PC  inner_pc;

8453:       PCRedundantGetKSP(pc_temp,&inner_ksp);
8454:       KSPGetPC(inner_ksp,&inner_pc);
8455:     }

8457:     /* parameters which miss an API */
8458:     PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);
8459:     if (isbddc) {
8460:       PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data;

8462:       pcbddc_coarse->detect_disconnected = PETSC_TRUE;
8463:       pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc;
8464:       pcbddc_coarse->coarse_eqs_limit    = pcbddc->coarse_eqs_limit;
8465:       pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null;
8466:       if (pcbddc_coarse->benign_saddle_point) {
8467:         Mat                    coarsedivudotp_is;
8468:         ISLocalToGlobalMapping l2gmap,rl2g,cl2g;
8469:         IS                     row,col;
8470:         const PetscInt         *gidxs;
8471:         PetscInt               n,st,M,N;

8473:         MatGetSize(coarsedivudotp,&n,NULL);
8474:         MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));
8475:         st   = st-n;
8476:         ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);
8477:         MatGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);
8478:         ISLocalToGlobalMappingGetSize(l2gmap,&n);
8479:         ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);
8480:         ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);
8481:         ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);
8482:         ISLocalToGlobalMappingCreateIS(row,&rl2g);
8483:         ISLocalToGlobalMappingCreateIS(col,&cl2g);
8484:         ISGetSize(row,&M);
8485:         MatGetSize(coarse_mat,&N,NULL);
8486:         ISDestroy(&row);
8487:         ISDestroy(&col);
8488:         MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);
8489:         MatSetType(coarsedivudotp_is,MATIS);
8490:         MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);
8491:         MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);
8492:         ISLocalToGlobalMappingDestroy(&rl2g);
8493:         ISLocalToGlobalMappingDestroy(&cl2g);
8494:         MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);
8495:         MatDestroy(&coarsedivudotp);
8496:         PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL);
8497:         MatDestroy(&coarsedivudotp_is);
8498:         pcbddc_coarse->adaptive_userdefined = PETSC_TRUE;
8499:         if (pcbddc->adaptive_threshold[0] == 0.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE;
8500:       }
8501:     }

8503:     /* propagate symmetry info of coarse matrix */
8504:     MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);
8505:     if (pc->pmat->symmetric_set) {
8506:       MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);
8507:     }
8508:     if (pc->pmat->hermitian_set) {
8509:       MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);
8510:     }
8511:     if (pc->pmat->spd_set) {
8512:       MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);
8513:     }
8514:     if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) {
8515:       MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);
8516:     }
8517:     /* set operators */
8518:     MatViewFromOptions(coarse_mat,(PetscObject)pc,"-pc_bddc_coarse_mat_view");
8519:     MatSetOptionsPrefix(coarse_mat,((PetscObject)pcbddc->coarse_ksp)->prefix);
8520:     KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);
8521:     if (pcbddc->dbg_flag) {
8522:       PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);
8523:     }
8524:   }
8525:   MatDestroy(&coarseG);
8526:   PetscFree(isarray);
8527: #if 0
8528:   {
8529:     PetscViewer viewer;
8530:     char filename[256];
8531:     sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level);
8532:     PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);
8533:     PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
8534:     MatView(coarse_mat,viewer);
8535:     PetscViewerPopFormat(viewer);
8536:     PetscViewerDestroy(&viewer);
8537:   }
8538: #endif

8540:   if (corners) {
8541:     Vec            gv;
8542:     IS             is;
8543:     const PetscInt *idxs;
8544:     PetscInt       i,d,N,n,cdim = pcbddc->mat_graph->cdim;
8545:     PetscScalar    *coords;

8547:     if (!pcbddc->mat_graph->cloc) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing local coordinates");
8548:     VecGetSize(pcbddc->coarse_vec,&N);
8549:     VecGetLocalSize(pcbddc->coarse_vec,&n);
8550:     VecCreate(PetscObjectComm((PetscObject)pcbddc->coarse_vec),&gv);
8551:     VecSetBlockSize(gv,cdim);
8552:     VecSetSizes(gv,n*cdim,N*cdim);
8553:     VecSetType(gv,VECSTANDARD);
8554:     VecSetFromOptions(gv);
8555:     VecSet(gv,PETSC_MAX_REAL); /* we only propagate coordinates from vertices constraints */

8557:     PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&is);
8558:     ISGetLocalSize(is,&n);
8559:     ISGetIndices(is,&idxs);
8560:     PetscMalloc1(n*cdim,&coords);
8561:     for (i=0;i<n;i++) {
8562:       for (d=0;d<cdim;d++) {
8563:         coords[cdim*i+d] = pcbddc->mat_graph->coords[cdim*idxs[i]+d];
8564:       }
8565:     }
8566:     ISRestoreIndices(is,&idxs);
8567:     PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&is);

8569:     ISGetLocalSize(corners,&n);
8570:     ISGetIndices(corners,&idxs);
8571:     VecSetValuesBlocked(gv,n,idxs,coords,INSERT_VALUES);
8572:     ISRestoreIndices(corners,&idxs);
8573:     PetscFree(coords);
8574:     VecAssemblyBegin(gv);
8575:     VecAssemblyEnd(gv);
8576:     VecGetArray(gv,&coords);
8577:     if (pcbddc->coarse_ksp) {
8578:       PC        coarse_pc;
8579:       PetscBool isbddc;

8581:       KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);
8582:       PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);
8583:       if (isbddc) { /* coarse coordinates have PETSC_MAX_REAL, specific for BDDC */
8584:         PetscReal *realcoords;

8586:         VecGetLocalSize(gv,&n);
8587: #if defined(PETSC_USE_COMPLEX)
8588:         PetscMalloc1(n,&realcoords);
8589:         for (i=0;i<n;i++) realcoords[i] = PetscRealPart(coords[i]);
8590: #else
8591:         realcoords = coords;
8592: #endif
8593:         PCSetCoordinates(coarse_pc,cdim,n/cdim,realcoords);
8594: #if defined(PETSC_USE_COMPLEX)
8595:         PetscFree(realcoords);
8596: #endif
8597:       }
8598:     }
8599:     VecRestoreArray(gv,&coords);
8600:     VecDestroy(&gv);
8601:   }
8602:   ISDestroy(&corners);

8604:   if (pcbddc->coarse_ksp) {
8605:     Vec crhs,csol;

8607:     KSPGetSolution(pcbddc->coarse_ksp,&csol);
8608:     KSPGetRhs(pcbddc->coarse_ksp,&crhs);
8609:     if (!csol) {
8610:       MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);
8611:     }
8612:     if (!crhs) {
8613:       MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));
8614:     }
8615:   }
8616:   MatDestroy(&coarsedivudotp);

8618:   /* compute null space for coarse solver if the benign trick has been requested */
8619:   if (pcbddc->benign_null) {

8621:     VecSet(pcbddc->vec1_P,0.);
8622:     for (i=0;i<pcbddc->benign_n;i++) {
8623:       VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);
8624:     }
8625:     VecAssemblyBegin(pcbddc->vec1_P);
8626:     VecAssemblyEnd(pcbddc->vec1_P);
8627:     VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);
8628:     VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);
8629:     if (coarse_mat) {
8630:       Vec         nullv;
8631:       PetscScalar *array,*array2;
8632:       PetscInt    nl;

8634:       MatCreateVecs(coarse_mat,&nullv,NULL);
8635:       VecGetLocalSize(nullv,&nl);
8636:       VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);
8637:       VecGetArray(nullv,&array2);
8638:       PetscMemcpy(array2,array,nl*sizeof(*array));
8639:       VecRestoreArray(nullv,&array2);
8640:       VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);
8641:       VecNormalize(nullv,NULL);
8642:       MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);
8643:       VecDestroy(&nullv);
8644:     }
8645:   }
8646:   PetscLogEventEnd(PC_BDDC_CoarseSetUp[pcbddc->current_level],pc,0,0,0);

8648:   PetscLogEventBegin(PC_BDDC_CoarseSolver[pcbddc->current_level],pc,0,0,0);
8649:   if (pcbddc->coarse_ksp) {
8650:     PetscBool ispreonly;

8652:     if (CoarseNullSpace) {
8653:       PetscBool isnull;
8654:       MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);
8655:       if (isnull) {
8656:         MatSetNullSpace(coarse_mat,CoarseNullSpace);
8657:       }
8658:       /* TODO: add local nullspaces (if any) */
8659:     }
8660:     /* setup coarse ksp */
8661:     KSPSetUp(pcbddc->coarse_ksp);
8662:     /* Check coarse problem if in debug mode or if solving with an iterative method */
8663:     PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);
8664:     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
8665:       KSP       check_ksp;
8666:       KSPType   check_ksp_type;
8667:       PC        check_pc;
8668:       Vec       check_vec,coarse_vec;
8669:       PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
8670:       PetscInt  its;
8671:       PetscBool compute_eigs;
8672:       PetscReal *eigs_r,*eigs_c;
8673:       PetscInt  neigs;
8674:       const char *prefix;

8676:       /* Create ksp object suitable for estimation of extreme eigenvalues */
8677:       KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);
8678:       PetscObjectIncrementTabLevel((PetscObject)check_ksp,(PetscObject)pcbddc->coarse_ksp,0);
8679:       KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,PETSC_FALSE);
8680:       KSPSetOperators(check_ksp,coarse_mat,coarse_mat);
8681:       KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);
8682:       /* prevent from setup unneeded object */
8683:       KSPGetPC(check_ksp,&check_pc);
8684:       PCSetType(check_pc,PCNONE);
8685:       if (ispreonly) {
8686:         check_ksp_type = KSPPREONLY;
8687:         compute_eigs = PETSC_FALSE;
8688:       } else {
8689:         check_ksp_type = KSPGMRES;
8690:         compute_eigs = PETSC_TRUE;
8691:       }
8692:       KSPSetType(check_ksp,check_ksp_type);
8693:       KSPSetComputeSingularValues(check_ksp,compute_eigs);
8694:       KSPSetComputeEigenvalues(check_ksp,compute_eigs);
8695:       KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);
8696:       KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);
8697:       KSPSetOptionsPrefix(check_ksp,prefix);
8698:       KSPAppendOptionsPrefix(check_ksp,"check_");
8699:       KSPSetFromOptions(check_ksp);
8700:       KSPSetUp(check_ksp);
8701:       KSPGetPC(pcbddc->coarse_ksp,&check_pc);
8702:       KSPSetPC(check_ksp,check_pc);
8703:       /* create random vec */
8704:       MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);
8705:       VecSetRandom(check_vec,NULL);
8706:       MatMult(coarse_mat,check_vec,coarse_vec);
8707:       /* solve coarse problem */
8708:       KSPSolve(check_ksp,coarse_vec,coarse_vec);
8709:       KSPCheckSolve(check_ksp,pc,coarse_vec);
8710:       /* set eigenvalue estimation if preonly has not been requested */
8711:       if (compute_eigs) {
8712:         PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);
8713:         PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);
8714:         KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);
8715:         if (neigs) {
8716:           lambda_max = eigs_r[neigs-1];
8717:           lambda_min = eigs_r[0];
8718:           if (pcbddc->use_coarse_estimates) {
8719:             if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */
8720:               KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);
8721:               KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));
8722:             }
8723:           }
8724:         }
8725:       }

8727:       /* check coarse problem residual error */
8728:       if (pcbddc->dbg_flag) {
8729:         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
8730:         PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));
8731:         VecAXPY(check_vec,-1.0,coarse_vec);
8732:         VecNorm(check_vec,NORM_INFINITY,&infty_error);
8733:         MatMult(coarse_mat,check_vec,coarse_vec);
8734:         VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);
8735:         PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);
8736:         PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);
8737:         PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);
8738:         PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);
8739:         PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);
8740:         if (CoarseNullSpace) {
8741:           PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");
8742:         }
8743:         if (compute_eigs) {
8744:           PetscReal          lambda_max_s,lambda_min_s;
8745:           KSPConvergedReason reason;
8746:           KSPGetType(check_ksp,&check_ksp_type);
8747:           KSPGetIterationNumber(check_ksp,&its);
8748:           KSPGetConvergedReason(check_ksp,&reason);
8749:           KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);
8750:           PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,reason,lambda_min,lambda_max,lambda_min_s,lambda_max_s);
8751:           for (i=0;i<neigs;i++) {
8752:             PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);
8753:           }
8754:         }
8755:         PetscViewerFlush(dbg_viewer);
8756:         PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));
8757:       }
8758:       VecDestroy(&check_vec);
8759:       VecDestroy(&coarse_vec);
8760:       KSPDestroy(&check_ksp);
8761:       if (compute_eigs) {
8762:         PetscFree(eigs_r);
8763:         PetscFree(eigs_c);
8764:       }
8765:     }
8766:   }
8767:   MatNullSpaceDestroy(&CoarseNullSpace);
8768:   /* print additional info */
8769:   if (pcbddc->dbg_flag) {
8770:     /* waits until all processes reaches this point */
8771:     PetscBarrier((PetscObject)pc);
8772:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %D\n",pcbddc->current_level);
8773:     PetscViewerFlush(pcbddc->dbg_viewer);
8774:   }

8776:   /* free memory */
8777:   MatDestroy(&coarse_mat);
8778:   PetscLogEventEnd(PC_BDDC_CoarseSolver[pcbddc->current_level],pc,0,0,0);
8779:   return(0);
8780: }

8782: PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
8783: {
8784:   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
8785:   PC_IS*         pcis = (PC_IS*)pc->data;
8786:   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
8787:   IS             subset,subset_mult,subset_n;
8788:   PetscInt       local_size,coarse_size=0;
8789:   PetscInt       *local_primal_indices=NULL;
8790:   const PetscInt *t_local_primal_indices;

8794:   /* Compute global number of coarse dofs */
8795:   if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first");
8796:   ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);
8797:   ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);
8798:   ISDestroy(&subset_n);
8799:   ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);
8800:   ISRenumber(subset,subset_mult,&coarse_size,&subset_n);
8801:   ISDestroy(&subset);
8802:   ISDestroy(&subset_mult);
8803:   ISGetLocalSize(subset_n,&local_size);
8804:   if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size);
8805:   PetscMalloc1(local_size,&local_primal_indices);
8806:   ISGetIndices(subset_n,&t_local_primal_indices);
8807:   PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));
8808:   ISRestoreIndices(subset_n,&t_local_primal_indices);
8809:   ISDestroy(&subset_n);

8811:   /* check numbering */
8812:   if (pcbddc->dbg_flag) {
8813:     PetscScalar coarsesum,*array,*array2;
8814:     PetscInt    i;
8815:     PetscBool   set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;

8817:     PetscViewerFlush(pcbddc->dbg_viewer);
8818:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
8819:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");
8820:     PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
8821:     /* counter */
8822:     VecSet(pcis->vec1_global,0.0);
8823:     VecSet(pcis->vec1_N,1.0);
8824:     VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8825:     VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8826:     VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);
8827:     VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);
8828:     VecSet(pcis->vec1_N,0.0);
8829:     for (i=0;i<pcbddc->local_primal_size;i++) {
8830:       VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);
8831:     }
8832:     VecAssemblyBegin(pcis->vec1_N);
8833:     VecAssemblyEnd(pcis->vec1_N);
8834:     VecSet(pcis->vec1_global,0.0);
8835:     VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8836:     VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8837:     VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
8838:     VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
8839:     VecGetArray(pcis->vec1_N,&array);
8840:     VecGetArray(pcis->vec2_N,&array2);
8841:     for (i=0;i<pcis->n;i++) {
8842:       if (array[i] != 0.0 && array[i] != array2[i]) {
8843:         PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi;
8844:         PetscInt neigh = (PetscInt)PetscRealPart(array2[i]);
8845:         set_error = PETSC_TRUE;
8846:         ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);
8847:         PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %D (gid %D) owned by %D processes instead of %D!\n",PetscGlobalRank,i,gi,owned,neigh);
8848:       }
8849:     }
8850:     VecRestoreArray(pcis->vec2_N,&array2);
8851:     MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
8852:     PetscViewerFlush(pcbddc->dbg_viewer);
8853:     for (i=0;i<pcis->n;i++) {
8854:       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
8855:     }
8856:     VecRestoreArray(pcis->vec1_N,&array);
8857:     VecSet(pcis->vec1_global,0.0);
8858:     VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8859:     VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8860:     VecSum(pcis->vec1_global,&coarsesum);
8861:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %D (%lf)\n",coarse_size,PetscRealPart(coarsesum));
8862:     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
8863:       PetscInt *gidxs;

8865:       PetscMalloc1(pcbddc->local_primal_size,&gidxs);
8866:       ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);
8867:       PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");
8868:       PetscViewerFlush(pcbddc->dbg_viewer);
8869:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);
8870:       for (i=0;i<pcbddc->local_primal_size;i++) {
8871:         PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%D]=%D (%D,%D)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);
8872:       }
8873:       PetscViewerFlush(pcbddc->dbg_viewer);
8874:       PetscFree(gidxs);
8875:     }
8876:     PetscViewerFlush(pcbddc->dbg_viewer);
8877:     PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
8878:     if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
8879:   }

8881:   /* get back data */
8882:   *coarse_size_n = coarse_size;
8883:   *local_primal_indices_n = local_primal_indices;
8884:   return(0);
8885: }

8887: PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
8888: {
8889:   IS             localis_t;
8890:   PetscInt       i,lsize,*idxs,n;
8891:   PetscScalar    *vals;

8895:   /* get indices in local ordering exploiting local to global map */
8896:   ISGetLocalSize(globalis,&lsize);
8897:   PetscMalloc1(lsize,&vals);
8898:   for (i=0;i<lsize;i++) vals[i] = 1.0;
8899:   ISGetIndices(globalis,(const PetscInt**)&idxs);
8900:   VecSet(gwork,0.0);
8901:   VecSet(lwork,0.0);
8902:   if (idxs) { /* multilevel guard */
8903:     VecSetOption(gwork,VEC_IGNORE_NEGATIVE_INDICES,PETSC_TRUE);
8904:     VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);
8905:   }
8906:   VecAssemblyBegin(gwork);
8907:   ISRestoreIndices(globalis,(const PetscInt**)&idxs);
8908:   PetscFree(vals);
8909:   VecAssemblyEnd(gwork);
8910:   /* now compute set in local ordering */
8911:   VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);
8912:   VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);
8913:   VecGetArrayRead(lwork,(const PetscScalar**)&vals);
8914:   VecGetSize(lwork,&n);
8915:   for (i=0,lsize=0;i<n;i++) {
8916:     if (PetscRealPart(vals[i]) > 0.5) {
8917:       lsize++;
8918:     }
8919:   }
8920:   PetscMalloc1(lsize,&idxs);
8921:   for (i=0,lsize=0;i<n;i++) {
8922:     if (PetscRealPart(vals[i]) > 0.5) {
8923:       idxs[lsize++] = i;
8924:     }
8925:   }
8926:   VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);
8927:   ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);
8928:   *localis = localis_t;
8929:   return(0);
8930: }

8932: PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
8933: {
8934:   PC_IS               *pcis=(PC_IS*)pc->data;
8935:   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
8936:   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
8937:   Mat                 S_j;
8938:   PetscInt            *used_xadj,*used_adjncy;
8939:   PetscBool           free_used_adj;
8940:   PetscErrorCode      ierr;

8943:   PetscLogEventBegin(PC_BDDC_Schurs[pcbddc->current_level],pc,0,0,0);
8944:   /* decide the adjacency to be used for determining internal problems for local schur on subsets */
8945:   free_used_adj = PETSC_FALSE;
8946:   if (pcbddc->sub_schurs_layers == -1) {
8947:     used_xadj = NULL;
8948:     used_adjncy = NULL;
8949:   } else {
8950:     if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
8951:       used_xadj = pcbddc->mat_graph->xadj;
8952:       used_adjncy = pcbddc->mat_graph->adjncy;
8953:     } else if (pcbddc->computed_rowadj) {
8954:       used_xadj = pcbddc->mat_graph->xadj;
8955:       used_adjncy = pcbddc->mat_graph->adjncy;
8956:     } else {
8957:       PetscBool      flg_row=PETSC_FALSE;
8958:       const PetscInt *xadj,*adjncy;
8959:       PetscInt       nvtxs;

8961:       MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);
8962:       if (flg_row) {
8963:         PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);
8964:         PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));
8965:         PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));
8966:         free_used_adj = PETSC_TRUE;
8967:       } else {
8968:         pcbddc->sub_schurs_layers = -1;
8969:         used_xadj = NULL;
8970:         used_adjncy = NULL;
8971:       }
8972:       MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);
8973:     }
8974:   }

8976:   /* setup sub_schurs data */
8977:   MatCreateSchurComplement(pcis->A_II,pcis->pA_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);
8978:   if (!sub_schurs->schur_explicit) {
8979:     /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */
8980:     MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);
8981:     PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);
8982:   } else {
8983:     Mat       change = NULL;
8984:     Vec       scaling = NULL;
8985:     IS        change_primal = NULL, iP;
8986:     PetscInt  benign_n;
8987:     PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis;
8988:     PetscBool isseqaij,need_change = PETSC_FALSE;
8989:     PetscBool discrete_harmonic = PETSC_FALSE;

8991:     if (!pcbddc->use_vertices && reuse_solvers) {
8992:       PetscInt n_vertices;

8994:       ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);
8995:       reuse_solvers = (PetscBool)!n_vertices;
8996:     }
8997:     PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);
8998:     if (!isseqaij) {
8999:       Mat_IS* matis = (Mat_IS*)pc->pmat->data;
9000:       if (matis->A == pcbddc->local_mat) {
9001:         MatDestroy(&pcbddc->local_mat);
9002:         MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);
9003:       } else {
9004:         MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);
9005:       }
9006:     }
9007:     if (!pcbddc->benign_change_explicit) {
9008:       benign_n = pcbddc->benign_n;
9009:     } else {
9010:       benign_n = 0;
9011:     }
9012:     /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc.
9013:        We need a global reduction to avoid possible deadlocks.
9014:        We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */
9015:     if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) {
9016:       PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change);
9017:       MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
9018:       need_change = (PetscBool)(!need_change);
9019:     }
9020:     /* If the user defines additional constraints, we import them here.
9021:        We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */
9022:     if (need_change) {
9023:       PC_IS   *pcisf;
9024:       PC_BDDC *pcbddcf;
9025:       PC      pcf;

9027:       if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph");
9028:       PCCreate(PetscObjectComm((PetscObject)pc),&pcf);
9029:       PCSetOperators(pcf,pc->mat,pc->pmat);
9030:       PCSetType(pcf,PCBDDC);

9032:       /* hacks */
9033:       pcisf                        = (PC_IS*)pcf->data;
9034:       pcisf->is_B_local            = pcis->is_B_local;
9035:       pcisf->vec1_N                = pcis->vec1_N;
9036:       pcisf->BtoNmap               = pcis->BtoNmap;
9037:       pcisf->n                     = pcis->n;
9038:       pcisf->n_B                   = pcis->n_B;
9039:       pcbddcf                      = (PC_BDDC*)pcf->data;
9040:       PetscFree(pcbddcf->mat_graph);
9041:       pcbddcf->mat_graph           = pcbddc->mat_graph;
9042:       pcbddcf->use_faces           = PETSC_TRUE;
9043:       pcbddcf->use_change_of_basis = PETSC_TRUE;
9044:       pcbddcf->use_change_on_faces = PETSC_TRUE;
9045:       pcbddcf->use_qr_single       = PETSC_TRUE;
9046:       pcbddcf->fake_change         = PETSC_TRUE;

9048:       /* setup constraints so that we can get information on primal vertices and change of basis (in local numbering) */
9049:       PCBDDCConstraintsSetUp(pcf);
9050:       sub_schurs->change_with_qr = pcbddcf->use_qr_single;
9051:       ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);
9052:       change = pcbddcf->ConstraintMatrix;
9053:       pcbddcf->ConstraintMatrix = NULL;

9055:       /* free unneeded memory allocated in PCBDDCConstraintsSetUp */
9056:       PetscFree(pcbddcf->sub_schurs);
9057:       MatNullSpaceDestroy(&pcbddcf->onearnullspace);
9058:       PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);
9059:       PetscFree(pcbddcf->primal_indices_local_idxs);
9060:       PetscFree(pcbddcf->onearnullvecs_state);
9061:       PetscFree(pcf->data);
9062:       pcf->ops->destroy = NULL;
9063:       pcf->ops->reset   = NULL;
9064:       PCDestroy(&pcf);
9065:     }
9066:     if (!pcbddc->use_deluxe_scaling) scaling = pcis->D;

9068:     PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_iP",(PetscObject*)&iP);
9069:     if (iP) {
9070:       PetscOptionsBegin(PetscObjectComm((PetscObject)iP),sub_schurs->prefix,"BDDC sub_schurs options","PC");
9071:       PetscOptionsBool("-sub_schurs_discrete_harmonic",NULL,NULL,discrete_harmonic,&discrete_harmonic,NULL);
9072:       PetscOptionsEnd();
9073:     }
9074:     if (discrete_harmonic) {
9075:       Mat A;
9076:       MatDuplicate(pcbddc->local_mat,MAT_COPY_VALUES,&A);
9077:       MatZeroRowsColumnsIS(A,iP,1.0,NULL,NULL);
9078:       PetscObjectCompose((PetscObject)A,"__KSPFETIDP_iP",(PetscObject)iP);
9079:       PCBDDCSubSchursSetUp(sub_schurs,A,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);
9080:       MatDestroy(&A);
9081:     } else {
9082:       PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);
9083:     }
9084:     MatDestroy(&change);
9085:     ISDestroy(&change_primal);
9086:   }
9087:   MatDestroy(&S_j);

9089:   /* free adjacency */
9090:   if (free_used_adj) {
9091:     PetscFree2(used_xadj,used_adjncy);
9092:   }
9093:   PetscLogEventEnd(PC_BDDC_Schurs[pcbddc->current_level],pc,0,0,0);
9094:   return(0);
9095: }

9097: PetscErrorCode PCBDDCInitSubSchurs(PC pc)
9098: {
9099:   PC_IS               *pcis=(PC_IS*)pc->data;
9100:   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
9101:   PCBDDCGraph         graph;
9102:   PetscErrorCode      ierr;

9105:   /* attach interface graph for determining subsets */
9106:   if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
9107:     IS       verticesIS,verticescomm;
9108:     PetscInt vsize,*idxs;

9110:     PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);
9111:     ISGetSize(verticesIS,&vsize);
9112:     ISGetIndices(verticesIS,(const PetscInt**)&idxs);
9113:     ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);
9114:     ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);
9115:     PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);
9116:     PCBDDCGraphCreate(&graph);
9117:     PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global,pcbddc->graphmaxcount);
9118:     PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);
9119:     ISDestroy(&verticescomm);
9120:     PCBDDCGraphComputeConnectedComponents(graph);
9121:   } else {
9122:     graph = pcbddc->mat_graph;
9123:   }
9124:   /* print some info */
9125:   if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) {
9126:     IS       vertices;
9127:     PetscInt nv,nedges,nfaces;
9128:     PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);
9129:     PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);
9130:     ISGetSize(vertices,&nv);
9131:     PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
9132:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");
9133:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%D)\n",PetscGlobalRank,(int)nv,pcbddc->use_vertices);
9134:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges    (%D)\n",PetscGlobalRank,(int)nedges,pcbddc->use_edges);
9135:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces    (%D)\n",PetscGlobalRank,(int)nfaces,pcbddc->use_faces);
9136:     PetscViewerFlush(pcbddc->dbg_viewer);
9137:     PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);
9138:     PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);
9139:   }

9141:   /* sub_schurs init */
9142:   if (!pcbddc->sub_schurs) {
9143:     PCBDDCSubSchursCreate(&pcbddc->sub_schurs);
9144:   }
9145:   PCBDDCSubSchursInit(pcbddc->sub_schurs,((PetscObject)pc)->prefix,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_rebuild);

9147:   /* free graph struct */
9148:   if (pcbddc->sub_schurs_rebuild) {
9149:     PCBDDCGraphDestroy(&graph);
9150:   }
9151:   return(0);
9152: }

9154: PetscErrorCode PCBDDCCheckOperator(PC pc)
9155: {
9156:   PC_IS               *pcis=(PC_IS*)pc->data;
9157:   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
9158:   PetscErrorCode      ierr;

9161:   if (pcbddc->n_vertices == pcbddc->local_primal_size) {
9162:     IS             zerodiag = NULL;
9163:     Mat            S_j,B0_B=NULL;
9164:     Vec            dummy_vec=NULL,vec_check_B,vec_scale_P;
9165:     PetscScalar    *p0_check,*array,*array2;
9166:     PetscReal      norm;
9167:     PetscInt       i;

9169:     /* B0 and B0_B */
9170:     if (zerodiag) {
9171:       IS       dummy;

9173:       ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);
9174:       MatCreateSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);
9175:       MatCreateVecs(B0_B,NULL,&dummy_vec);
9176:       ISDestroy(&dummy);
9177:     }
9178:     /* I need a primal vector to scale primal nodes since BDDC sums contibutions */
9179:     VecDuplicate(pcbddc->vec1_P,&vec_scale_P);
9180:     VecSet(pcbddc->vec1_P,1.0);
9181:     VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);
9182:     VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);
9183:     VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);
9184:     VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);
9185:     VecReciprocal(vec_scale_P);
9186:     /* S_j */
9187:     MatCreateSchurComplement(pcis->A_II,pcis->pA_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);
9188:     MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);

9190:     /* mimic vector in \widetilde{W}_\Gamma */
9191:     VecSetRandom(pcis->vec1_N,NULL);
9192:     /* continuous in primal space */
9193:     VecSetRandom(pcbddc->coarse_vec,NULL);
9194:     VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);
9195:     VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);
9196:     VecGetArray(pcbddc->vec1_P,&array);
9197:     PetscCalloc1(pcbddc->benign_n,&p0_check);
9198:     for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i];
9199:     VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);
9200:     VecRestoreArray(pcbddc->vec1_P,&array);
9201:     VecAssemblyBegin(pcis->vec1_N);
9202:     VecAssemblyEnd(pcis->vec1_N);
9203:     VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);
9204:     VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);
9205:     VecDuplicate(pcis->vec2_B,&vec_check_B);
9206:     VecCopy(pcis->vec2_B,vec_check_B);

9208:     /* assemble rhs for coarse problem */
9209:     /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */
9210:     /* local with Schur */
9211:     MatMult(S_j,pcis->vec2_B,pcis->vec1_B);
9212:     if (zerodiag) {
9213:       VecGetArray(dummy_vec,&array);
9214:       for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i];
9215:       VecRestoreArray(dummy_vec,&array);
9216:       MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);
9217:     }
9218:     /* sum on primal nodes the local contributions */
9219:     VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);
9220:     VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);
9221:     VecGetArray(pcis->vec1_N,&array);
9222:     VecGetArray(pcbddc->vec1_P,&array2);
9223:     for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]];
9224:     VecRestoreArray(pcbddc->vec1_P,&array2);
9225:     VecRestoreArray(pcis->vec1_N,&array);
9226:     VecSet(pcbddc->coarse_vec,0.);
9227:     VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);
9228:     VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);
9229:     VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);
9230:     VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);
9231:     VecGetArray(pcbddc->vec1_P,&array);
9232:     /* scale primal nodes (BDDC sums contibutions) */
9233:     VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);
9234:     VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);
9235:     VecRestoreArray(pcbddc->vec1_P,&array);
9236:     VecAssemblyBegin(pcis->vec1_N);
9237:     VecAssemblyEnd(pcis->vec1_N);
9238:     VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
9239:     VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
9240:     /* global: \widetilde{B0}_B w_\Gamma */
9241:     if (zerodiag) {
9242:       MatMult(B0_B,pcis->vec2_B,dummy_vec);
9243:       VecGetArray(dummy_vec,&array);
9244:       for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i];
9245:       VecRestoreArray(dummy_vec,&array);
9246:     }
9247:     /* BDDC */
9248:     VecSet(pcis->vec1_D,0.);
9249:     PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);

9251:     VecCopy(pcis->vec1_B,pcis->vec2_B);
9252:     VecAXPY(pcis->vec1_B,-1.0,vec_check_B);
9253:     VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);
9254:     PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm);
9255:     for (i=0;i<pcbddc->benign_n;i++) {
9256:       PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%D] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i]));
9257:     }
9258:     PetscFree(p0_check);
9259:     VecDestroy(&vec_scale_P);
9260:     VecDestroy(&vec_check_B);
9261:     VecDestroy(&dummy_vec);
9262:     MatDestroy(&S_j);
9263:     MatDestroy(&B0_B);
9264:   }
9265:   return(0);
9266: }

9268:  #include <../src/mat/impls/aij/mpi/mpiaij.h>
9269: PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B)
9270: {
9271:   Mat            At;
9272:   IS             rows;
9273:   PetscInt       rst,ren;
9275:   PetscLayout    rmap;

9278:   rst = ren = 0;
9279:   if (ccomm != MPI_COMM_NULL) {
9280:     PetscLayoutCreate(ccomm,&rmap);
9281:     PetscLayoutSetSize(rmap,A->rmap->N);
9282:     PetscLayoutSetBlockSize(rmap,1);
9283:     PetscLayoutSetUp(rmap);
9284:     PetscLayoutGetRange(rmap,&rst,&ren);
9285:   }
9286:   ISCreateStride(PetscObjectComm((PetscObject)A),ren-rst,rst,1,&rows);
9287:   MatCreateSubMatrix(A,rows,NULL,MAT_INITIAL_MATRIX,&At);
9288:   ISDestroy(&rows);

9290:   if (ccomm != MPI_COMM_NULL) {
9291:     Mat_MPIAIJ *a,*b;
9292:     IS         from,to;
9293:     Vec        gvec;
9294:     PetscInt   lsize;

9296:     MatCreate(ccomm,B);
9297:     MatSetSizes(*B,ren-rst,PETSC_DECIDE,PETSC_DECIDE,At->cmap->N);
9298:     MatSetType(*B,MATAIJ);
9299:     PetscLayoutDestroy(&((*B)->rmap));
9300:     PetscLayoutSetUp((*B)->cmap);
9301:     a    = (Mat_MPIAIJ*)At->data;
9302:     b    = (Mat_MPIAIJ*)(*B)->data;
9303:     MPI_Comm_size(ccomm,&b->size);
9304:     MPI_Comm_rank(ccomm,&b->rank);
9305:     PetscObjectReference((PetscObject)a->A);
9306:     PetscObjectReference((PetscObject)a->B);
9307:     b->A = a->A;
9308:     b->B = a->B;

9310:     b->donotstash      = a->donotstash;
9311:     b->roworiented     = a->roworiented;
9312:     b->rowindices      = 0;
9313:     b->rowvalues       = 0;
9314:     b->getrowactive    = PETSC_FALSE;

9316:     (*B)->rmap         = rmap;
9317:     (*B)->factortype   = A->factortype;
9318:     (*B)->assembled    = PETSC_TRUE;
9319:     (*B)->insertmode   = NOT_SET_VALUES;
9320:     (*B)->preallocated = PETSC_TRUE;

9322:     if (a->colmap) {
9323: #if defined(PETSC_USE_CTABLE)
9324:       PetscTableCreateCopy(a->colmap,&b->colmap);
9325: #else
9326:       PetscMalloc1(At->cmap->N,&b->colmap);
9327:       PetscLogObjectMemory((PetscObject)*B,At->cmap->N*sizeof(PetscInt));
9328:       PetscMemcpy(b->colmap,a->colmap,At->cmap->N*sizeof(PetscInt));
9329: #endif
9330:     } else b->colmap = 0;
9331:     if (a->garray) {
9332:       PetscInt len;
9333:       len  = a->B->cmap->n;
9334:       PetscMalloc1(len+1,&b->garray);
9335:       PetscLogObjectMemory((PetscObject)(*B),len*sizeof(PetscInt));
9336:       if (len) { PetscMemcpy(b->garray,a->garray,len*sizeof(PetscInt)); }
9337:     } else b->garray = 0;

9339:     PetscObjectReference((PetscObject)a->lvec);
9340:     b->lvec = a->lvec;
9341:     PetscLogObjectParent((PetscObject)*B,(PetscObject)b->lvec);

9343:     /* cannot use VecScatterCopy */
9344:     VecGetLocalSize(b->lvec,&lsize);
9345:     ISCreateGeneral(ccomm,lsize,b->garray,PETSC_USE_POINTER,&from);
9346:     ISCreateStride(PETSC_COMM_SELF,lsize,0,1,&to);
9347:     MatCreateVecs(*B,&gvec,NULL);
9348:     VecScatterCreate(gvec,from,b->lvec,to,&b->Mvctx);
9349:     PetscLogObjectParent((PetscObject)*B,(PetscObject)b->Mvctx);
9350:     ISDestroy(&from);
9351:     ISDestroy(&to);
9352:     VecDestroy(&gvec);
9353:   }
9354:   MatDestroy(&At);
9355:   return(0);
9356: }