Actual source code: bddcprivate.c
petsc-3.10.5 2019-03-28
1: #include <../src/mat/impls/aij/seq/aij.h>
2: #include <../src/ksp/pc/impls/bddc/bddc.h>
3: #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
4: #include <../src/mat/impls/dense/seq/dense.h>
5: #include <petscdmplex.h>
6: #include <petscblaslapack.h>
7: #include <petsc/private/sfimpl.h>
8: #include <petsc/private/dmpleximpl.h>
9: #include <petscdmda.h>
11: static PetscErrorCode MatMPIAIJRestrict(Mat,MPI_Comm,Mat*);
13: /* if range is true, it returns B s.t. span{B} = range(A)
14: if range is false, it returns B s.t. range(B) _|_ range(A) */
15: PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B)
16: {
17: #if !defined(PETSC_USE_COMPLEX)
18: PetscScalar *uwork,*data,*U, ds = 0.;
19: PetscReal *sing;
20: PetscBLASInt bM,bN,lwork,lierr,di = 1;
21: PetscInt ulw,i,nr,nc,n;
25: #if defined(PETSC_MISSING_LAPACK_GESVD)
26: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LAPACK _GESVD not available");
27: #else
28: MatGetSize(A,&nr,&nc);
29: if (!nr || !nc) return(0);
31: /* workspace */
32: if (!work) {
33: ulw = PetscMax(PetscMax(1,5*PetscMin(nr,nc)),3*PetscMin(nr,nc)+PetscMax(nr,nc));
34: PetscMalloc1(ulw,&uwork);
35: } else {
36: ulw = lw;
37: uwork = work;
38: }
39: n = PetscMin(nr,nc);
40: if (!rwork) {
41: PetscMalloc1(n,&sing);
42: } else {
43: sing = rwork;
44: }
46: /* SVD */
47: PetscMalloc1(nr*nr,&U);
48: PetscBLASIntCast(nr,&bM);
49: PetscBLASIntCast(nc,&bN);
50: PetscBLASIntCast(ulw,&lwork);
51: MatDenseGetArray(A,&data);
52: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
53: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,&lierr));
54: PetscFPTrapPop();
55: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
56: MatDenseRestoreArray(A,&data);
57: for (i=0;i<n;i++) if (sing[i] < PETSC_SMALL) break;
58: if (!rwork) {
59: PetscFree(sing);
60: }
61: if (!work) {
62: PetscFree(uwork);
63: }
64: /* create B */
65: if (!range) {
66: MatCreateSeqDense(PETSC_COMM_SELF,nr,nr-i,NULL,B);
67: MatDenseGetArray(*B,&data);
68: PetscMemcpy(data,U+nr*i,(nr-i)*nr*sizeof(PetscScalar));
69: } else {
70: MatCreateSeqDense(PETSC_COMM_SELF,nr,i,NULL,B);
71: MatDenseGetArray(*B,&data);
72: PetscMemcpy(data,U,i*nr*sizeof(PetscScalar));
73: }
74: MatDenseRestoreArray(*B,&data);
75: PetscFree(U);
76: #endif
77: #else /* PETSC_USE_COMPLEX */
79: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for complexes");
80: #endif
81: return(0);
82: }
84: /* TODO REMOVE */
85: #if defined(PRINT_GDET)
86: static int inc = 0;
87: static int lev = 0;
88: #endif
90: PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat* Gins, Mat* GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork)
91: {
93: Mat GE,GEd;
94: PetscInt rsize,csize,esize;
95: PetscScalar *ptr;
98: ISGetSize(edge,&esize);
99: if (!esize) return(0);
100: ISGetSize(extrow,&rsize);
101: ISGetSize(extcol,&csize);
103: /* gradients */
104: ptr = work + 5*esize;
105: MatCreateSubMatrix(lG,extrow,extcol,MAT_INITIAL_MATRIX,&GE);
106: MatCreateSeqDense(PETSC_COMM_SELF,rsize,csize,ptr,Gins);
107: MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,Gins);
108: MatDestroy(&GE);
110: /* constants */
111: ptr += rsize*csize;
112: MatCreateSeqDense(PETSC_COMM_SELF,esize,csize,ptr,&GEd);
113: MatCreateSubMatrix(lG,edge,extcol,MAT_INITIAL_MATRIX,&GE);
114: MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,&GEd);
115: MatDestroy(&GE);
116: MatDenseOrthogonalRangeOrComplement(GEd,PETSC_FALSE,5*esize,work,rwork,GKins);
117: MatDestroy(&GEd);
119: if (corners) {
120: Mat GEc;
121: PetscScalar *vals,v;
123: MatCreateSubMatrix(lG,edge,corners,MAT_INITIAL_MATRIX,&GEc);
124: MatTransposeMatMult(GEc,*GKins,MAT_INITIAL_MATRIX,1.0,&GEd);
125: MatDenseGetArray(GEd,&vals);
126: /* v = PetscAbsScalar(vals[0]) */;
127: v = 1.;
128: cvals[0] = vals[0]/v;
129: cvals[1] = vals[1]/v;
130: MatDenseRestoreArray(GEd,&vals);
131: MatScale(*GKins,1./v);
132: #if defined(PRINT_GDET)
133: {
134: PetscViewer viewer;
135: char filename[256];
136: sprintf(filename,"Gdet_l%d_r%d_cc%d.m",lev,PetscGlobalRank,inc++);
137: PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);
138: PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
139: PetscObjectSetName((PetscObject)GEc,"GEc");
140: MatView(GEc,viewer);
141: PetscObjectSetName((PetscObject)(*GKins),"GK");
142: MatView(*GKins,viewer);
143: PetscObjectSetName((PetscObject)GEd,"Gproj");
144: MatView(GEd,viewer);
145: PetscViewerDestroy(&viewer);
146: }
147: #endif
148: MatDestroy(&GEd);
149: MatDestroy(&GEc);
150: }
152: return(0);
153: }
155: PetscErrorCode PCBDDCNedelecSupport(PC pc)
156: {
157: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
158: Mat_IS *matis = (Mat_IS*)pc->pmat->data;
159: Mat G,T,conn,lG,lGt,lGis,lGall,lGe,lGinit;
160: Vec tvec;
161: PetscSF sfv;
162: ISLocalToGlobalMapping el2g,vl2g,fl2g,al2g;
163: MPI_Comm comm;
164: IS lned,primals,allprimals,nedfieldlocal;
165: IS *eedges,*extrows,*extcols,*alleedges;
166: PetscBT btv,bte,btvc,btb,btbd,btvcand,btvi,btee,bter;
167: PetscScalar *vals,*work;
168: PetscReal *rwork;
169: const PetscInt *idxs,*ii,*jj,*iit,*jjt;
170: PetscInt ne,nv,Lv,order,n,field;
171: PetscInt n_neigh,*neigh,*n_shared,**shared;
172: PetscInt i,j,extmem,cum,maxsize,nee;
173: PetscInt *extrow,*extrowcum,*marks,*vmarks,*gidxs;
174: PetscInt *sfvleaves,*sfvroots;
175: PetscInt *corners,*cedges;
176: PetscInt *ecount,**eneighs,*vcount,**vneighs;
177: #if defined(PETSC_USE_DEBUG)
178: PetscInt *emarks;
179: #endif
180: PetscBool print,eerr,done,lrc[2],conforming,global,singular,setprimal;
181: PetscErrorCode ierr;
184: /* If the discrete gradient is defined for a subset of dofs and global is true,
185: it assumes G is given in global ordering for all the dofs.
186: Otherwise, the ordering is global for the Nedelec field */
187: order = pcbddc->nedorder;
188: conforming = pcbddc->conforming;
189: field = pcbddc->nedfield;
190: global = pcbddc->nedglobal;
191: setprimal = PETSC_FALSE;
192: print = PETSC_FALSE;
193: singular = PETSC_FALSE;
195: /* Command line customization */
196: PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC Nedelec options","PC");
197: PetscOptionsBool("-pc_bddc_nedelec_field_primal","All edge dofs set as primals: Toselli's algorithm C",NULL,setprimal,&setprimal,NULL);
198: PetscOptionsBool("-pc_bddc_nedelec_singular","Infer nullspace from discrete gradient",NULL,singular,&singular,NULL);
199: PetscOptionsInt("-pc_bddc_nedelec_order","Test variable order code (to be removed)",NULL,order,&order,NULL);
200: /* print debug info TODO: to be removed */
201: PetscOptionsBool("-pc_bddc_nedelec_print","Print debug info",NULL,print,&print,NULL);
202: PetscOptionsEnd();
204: /* Return if there are no edges in the decomposition and the problem is not singular */
205: MatGetLocalToGlobalMapping(pc->pmat,&al2g,NULL);
206: ISLocalToGlobalMappingGetSize(al2g,&n);
207: PetscObjectGetComm((PetscObject)pc,&comm);
208: if (!singular) {
209: VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);
210: lrc[0] = PETSC_FALSE;
211: for (i=0;i<n;i++) {
212: if (PetscRealPart(vals[i]) > 2.) {
213: lrc[0] = PETSC_TRUE;
214: break;
215: }
216: }
217: VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);
218: MPIU_Allreduce(&lrc[0],&lrc[1],1,MPIU_BOOL,MPI_LOR,comm);
219: if (!lrc[1]) return(0);
220: }
222: /* Get Nedelec field */
223: MatISSetUpSF(pc->pmat);
224: if (pcbddc->n_ISForDofsLocal && field >= pcbddc->n_ISForDofsLocal) SETERRQ2(comm,PETSC_ERR_USER,"Invalid field for Nedelec %D: number of fields is %D",field,pcbddc->n_ISForDofsLocal);
225: if (pcbddc->n_ISForDofsLocal && field >= 0) {
226: PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]);
227: nedfieldlocal = pcbddc->ISForDofsLocal[field];
228: ISGetLocalSize(nedfieldlocal,&ne);
229: } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) {
230: ne = n;
231: nedfieldlocal = NULL;
232: global = PETSC_TRUE;
233: } else if (field == PETSC_DECIDE) {
234: PetscInt rst,ren,*idx;
236: PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));
237: PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));
238: MatGetOwnershipRange(pcbddc->discretegradient,&rst,&ren);
239: for (i=rst;i<ren;i++) {
240: PetscInt nc;
242: MatGetRow(pcbddc->discretegradient,i,&nc,NULL,NULL);
243: if (nc > 1) matis->sf_rootdata[i-rst] = 1;
244: MatRestoreRow(pcbddc->discretegradient,i,&nc,NULL,NULL);
245: }
246: PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);
247: PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);
248: PetscMalloc1(n,&idx);
249: for (i=0,ne=0;i<n;i++) if (matis->sf_leafdata[i]) idx[ne++] = i;
250: ISCreateGeneral(comm,ne,idx,PETSC_OWN_POINTER,&nedfieldlocal);
251: } else {
252: SETERRQ(comm,PETSC_ERR_USER,"When multiple fields are present, the Nedelec field has to be specified");
253: }
255: /* Sanity checks */
256: if (!order && !conforming) SETERRQ(comm,PETSC_ERR_SUP,"Variable order and non-conforming spaces are not supported at the same time");
257: if (pcbddc->user_ChangeOfBasisMatrix) SETERRQ(comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with user defined change of basis");
258: if (order && ne%order) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"The number of local edge dofs %D it's not a multiple of the order %D",ne,order);
260: /* Just set primal dofs and return */
261: if (setprimal) {
262: IS enedfieldlocal;
263: PetscInt *eidxs;
265: PetscMalloc1(ne,&eidxs);
266: VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);
267: if (nedfieldlocal) {
268: ISGetIndices(nedfieldlocal,&idxs);
269: for (i=0,cum=0;i<ne;i++) {
270: if (PetscRealPart(vals[idxs[i]]) > 2.) {
271: eidxs[cum++] = idxs[i];
272: }
273: }
274: ISRestoreIndices(nedfieldlocal,&idxs);
275: } else {
276: for (i=0,cum=0;i<ne;i++) {
277: if (PetscRealPart(vals[i]) > 2.) {
278: eidxs[cum++] = i;
279: }
280: }
281: }
282: VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);
283: ISCreateGeneral(comm,cum,eidxs,PETSC_COPY_VALUES,&enedfieldlocal);
284: PCBDDCSetPrimalVerticesLocalIS(pc,enedfieldlocal);
285: PetscFree(eidxs);
286: ISDestroy(&nedfieldlocal);
287: ISDestroy(&enedfieldlocal);
288: return(0);
289: }
291: /* Compute some l2g maps */
292: if (nedfieldlocal) {
293: IS is;
295: /* need to map from the local Nedelec field to local numbering */
296: ISLocalToGlobalMappingCreateIS(nedfieldlocal,&fl2g);
297: /* need to map from the local Nedelec field to global numbering for the whole dofs*/
298: ISLocalToGlobalMappingApplyIS(al2g,nedfieldlocal,&is);
299: ISLocalToGlobalMappingCreateIS(is,&al2g);
300: /* need to map from the local Nedelec field to global numbering (for Nedelec only) */
301: if (global) {
302: PetscObjectReference((PetscObject)al2g);
303: el2g = al2g;
304: } else {
305: IS gis;
307: ISRenumber(is,NULL,NULL,&gis);
308: ISLocalToGlobalMappingCreateIS(gis,&el2g);
309: ISDestroy(&gis);
310: }
311: ISDestroy(&is);
312: } else {
313: /* restore default */
314: pcbddc->nedfield = -1;
315: /* one ref for the destruction of al2g, one for el2g */
316: PetscObjectReference((PetscObject)al2g);
317: PetscObjectReference((PetscObject)al2g);
318: el2g = al2g;
319: fl2g = NULL;
320: }
322: /* Start communication to drop connections for interior edges (for cc analysis only) */
323: PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));
324: PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));
325: if (nedfieldlocal) {
326: ISGetIndices(nedfieldlocal,&idxs);
327: for (i=0;i<ne;i++) matis->sf_leafdata[idxs[i]] = 1;
328: ISRestoreIndices(nedfieldlocal,&idxs);
329: } else {
330: for (i=0;i<ne;i++) matis->sf_leafdata[i] = 1;
331: }
332: PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);
333: PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);
335: if (!singular) { /* drop connections with interior edges to avoid unneeded communications and memory movements */
336: MatDuplicate(pcbddc->discretegradient,MAT_COPY_VALUES,&G);
337: MatSetOption(G,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);
338: if (global) {
339: PetscInt rst;
341: MatGetOwnershipRange(G,&rst,NULL);
342: for (i=0,cum=0;i<pc->pmat->rmap->n;i++) {
343: if (matis->sf_rootdata[i] < 2) {
344: matis->sf_rootdata[cum++] = i + rst;
345: }
346: }
347: MatSetOption(G,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE);
348: MatZeroRows(G,cum,matis->sf_rootdata,0.,NULL,NULL);
349: } else {
350: PetscInt *tbz;
352: PetscMalloc1(ne,&tbz);
353: PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);
354: PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);
355: ISGetIndices(nedfieldlocal,&idxs);
356: for (i=0,cum=0;i<ne;i++)
357: if (matis->sf_leafdata[idxs[i]] == 1)
358: tbz[cum++] = i;
359: ISRestoreIndices(nedfieldlocal,&idxs);
360: ISLocalToGlobalMappingApply(el2g,cum,tbz,tbz);
361: MatZeroRows(G,cum,tbz,0.,NULL,NULL);
362: PetscFree(tbz);
363: }
364: } else { /* we need the entire G to infer the nullspace */
365: PetscObjectReference((PetscObject)pcbddc->discretegradient);
366: G = pcbddc->discretegradient;
367: }
369: /* Extract subdomain relevant rows of G */
370: ISLocalToGlobalMappingGetIndices(el2g,&idxs);
371: ISCreateGeneral(comm,ne,idxs,PETSC_USE_POINTER,&lned);
372: MatCreateSubMatrix(G,lned,NULL,MAT_INITIAL_MATRIX,&lGall);
373: ISLocalToGlobalMappingRestoreIndices(el2g,&idxs);
374: ISDestroy(&lned);
375: MatConvert(lGall,MATIS,MAT_INITIAL_MATRIX,&lGis);
376: MatDestroy(&lGall);
377: MatISGetLocalMat(lGis,&lG);
379: /* SF for nodal dofs communications */
380: MatGetLocalSize(G,NULL,&Lv);
381: MatGetLocalToGlobalMapping(lGis,NULL,&vl2g);
382: PetscObjectReference((PetscObject)vl2g);
383: ISLocalToGlobalMappingGetSize(vl2g,&nv);
384: PetscSFCreate(comm,&sfv);
385: ISLocalToGlobalMappingGetIndices(vl2g,&idxs);
386: PetscSFSetGraphLayout(sfv,lGis->cmap,nv,NULL,PETSC_OWN_POINTER,idxs);
387: ISLocalToGlobalMappingRestoreIndices(vl2g,&idxs);
388: i = singular ? 2 : 1;
389: PetscMalloc2(i*nv,&sfvleaves,i*Lv,&sfvroots);
391: /* Destroy temporary G created in MATIS format and modified G */
392: PetscObjectReference((PetscObject)lG);
393: MatDestroy(&lGis);
394: MatDestroy(&G);
396: if (print) {
397: PetscObjectSetName((PetscObject)lG,"initial_lG");
398: MatView(lG,NULL);
399: }
401: /* Save lG for values insertion in change of basis */
402: MatDuplicate(lG,MAT_COPY_VALUES,&lGinit);
404: /* Analyze the edge-nodes connections (duplicate lG) */
405: MatDuplicate(lG,MAT_COPY_VALUES,&lGe);
406: MatSetOption(lGe,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);
407: PetscBTCreate(nv,&btv);
408: PetscBTCreate(ne,&bte);
409: PetscBTCreate(ne,&btb);
410: PetscBTCreate(ne,&btbd);
411: PetscBTCreate(nv,&btvcand);
412: /* need to import the boundary specification to ensure the
413: proper detection of coarse edges' endpoints */
414: if (pcbddc->DirichletBoundariesLocal) {
415: IS is;
417: if (fl2g) {
418: ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->DirichletBoundariesLocal,&is);
419: } else {
420: is = pcbddc->DirichletBoundariesLocal;
421: }
422: ISGetLocalSize(is,&cum);
423: ISGetIndices(is,&idxs);
424: for (i=0;i<cum;i++) {
425: if (idxs[i] >= 0) {
426: PetscBTSet(btb,idxs[i]);
427: PetscBTSet(btbd,idxs[i]);
428: }
429: }
430: ISRestoreIndices(is,&idxs);
431: if (fl2g) {
432: ISDestroy(&is);
433: }
434: }
435: if (pcbddc->NeumannBoundariesLocal) {
436: IS is;
438: if (fl2g) {
439: ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->NeumannBoundariesLocal,&is);
440: } else {
441: is = pcbddc->NeumannBoundariesLocal;
442: }
443: ISGetLocalSize(is,&cum);
444: ISGetIndices(is,&idxs);
445: for (i=0;i<cum;i++) {
446: if (idxs[i] >= 0) {
447: PetscBTSet(btb,idxs[i]);
448: }
449: }
450: ISRestoreIndices(is,&idxs);
451: if (fl2g) {
452: ISDestroy(&is);
453: }
454: }
456: /* Count neighs per dof */
457: ISLocalToGlobalMappingGetNodeInfo(el2g,NULL,&ecount,&eneighs);
458: ISLocalToGlobalMappingGetNodeInfo(vl2g,NULL,&vcount,&vneighs);
460: /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs
461: for proper detection of coarse edges' endpoints */
462: PetscBTCreate(ne,&btee);
463: for (i=0;i<ne;i++) {
464: if ((ecount[i] > 2 && !PetscBTLookup(btbd,i)) || (ecount[i] == 2 && PetscBTLookup(btb,i))) {
465: PetscBTSet(btee,i);
466: }
467: }
468: PetscMalloc1(ne,&marks);
469: if (!conforming) {
470: MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);
471: MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
472: }
473: MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
474: MatSeqAIJGetArray(lGe,&vals);
475: cum = 0;
476: for (i=0;i<ne;i++) {
477: /* eliminate rows corresponding to edge dofs belonging to coarse faces */
478: if (!PetscBTLookup(btee,i)) {
479: marks[cum++] = i;
480: continue;
481: }
482: /* set badly connected edge dofs as primal */
483: if (!conforming) {
484: if (ii[i+1]-ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */
485: marks[cum++] = i;
486: PetscBTSet(bte,i);
487: for (j=ii[i];j<ii[i+1];j++) {
488: PetscBTSet(btv,jj[j]);
489: }
490: } else {
491: /* every edge dofs should be connected trough a certain number of nodal dofs
492: to other edge dofs belonging to coarse edges
493: - at most 2 endpoints
494: - order-1 interior nodal dofs
495: - no undefined nodal dofs (nconn < order)
496: */
497: PetscInt ends = 0,ints = 0, undef = 0;
498: for (j=ii[i];j<ii[i+1];j++) {
499: PetscInt v = jj[j],k;
500: PetscInt nconn = iit[v+1]-iit[v];
501: for (k=iit[v];k<iit[v+1];k++) if (!PetscBTLookup(btee,jjt[k])) nconn--;
502: if (nconn > order) ends++;
503: else if (nconn == order) ints++;
504: else undef++;
505: }
506: if (undef || ends > 2 || ints != order -1) {
507: marks[cum++] = i;
508: PetscBTSet(bte,i);
509: for (j=ii[i];j<ii[i+1];j++) {
510: PetscBTSet(btv,jj[j]);
511: }
512: }
513: }
514: }
515: /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */
516: if (!order && ii[i+1] != ii[i]) {
517: PetscScalar val = 1./(ii[i+1]-ii[i]-1);
518: for (j=ii[i];j<ii[i+1];j++) vals[j] = val;
519: }
520: }
521: PetscBTDestroy(&btee);
522: MatSeqAIJRestoreArray(lGe,&vals);
523: MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
524: if (!conforming) {
525: MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
526: MatDestroy(&lGt);
527: }
528: MatZeroRows(lGe,cum,marks,0.,NULL,NULL);
530: /* identify splitpoints and corner candidates */
531: MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);
532: if (print) {
533: PetscObjectSetName((PetscObject)lGe,"edgerestr_lG");
534: MatView(lGe,NULL);
535: PetscObjectSetName((PetscObject)lGt,"edgerestr_lGt");
536: MatView(lGt,NULL);
537: }
538: MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
539: MatSeqAIJGetArray(lGt,&vals);
540: for (i=0;i<nv;i++) {
541: PetscInt ord = order, test = ii[i+1]-ii[i], vc = vcount[i];
542: PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE;
543: if (!order) { /* variable order */
544: PetscReal vorder = 0.;
546: for (j=ii[i];j<ii[i+1];j++) vorder += PetscRealPart(vals[j]);
547: test = PetscFloorReal(vorder+10.*PETSC_SQRT_MACHINE_EPSILON);
548: if (vorder-test > PETSC_SQRT_MACHINE_EPSILON) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected value for vorder: %g (%D)",vorder,test);
549: ord = 1;
550: }
551: #if defined(PETSC_USE_DEBUG)
552: if (test%ord) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected number of edge dofs %D connected with nodal dof %D with order %D",test,i,ord);
553: #endif
554: for (j=ii[i];j<ii[i+1] && sneighs;j++) {
555: if (PetscBTLookup(btbd,jj[j])) {
556: bdir = PETSC_TRUE;
557: break;
558: }
559: if (vc != ecount[jj[j]]) {
560: sneighs = PETSC_FALSE;
561: } else {
562: PetscInt k,*vn = vneighs[i], *en = eneighs[jj[j]];
563: for (k=0;k<vc;k++) {
564: if (vn[k] != en[k]) {
565: sneighs = PETSC_FALSE;
566: break;
567: }
568: }
569: }
570: }
571: if (!sneighs || test >= 3*ord || bdir) { /* splitpoints */
572: if (print) PetscPrintf(PETSC_COMM_SELF,"SPLITPOINT %D (%D %D %D)\n",i,!sneighs,test >= 3*ord,bdir);
573: PetscBTSet(btv,i);
574: } else if (test == ord) {
575: if (order == 1 || (!order && ii[i+1]-ii[i] == 1)) {
576: if (print) PetscPrintf(PETSC_COMM_SELF,"ENDPOINT %D\n",i);
577: PetscBTSet(btv,i);
578: } else {
579: if (print) PetscPrintf(PETSC_COMM_SELF,"CORNER CANDIDATE %D\n",i);
580: PetscBTSet(btvcand,i);
581: }
582: }
583: }
584: ISLocalToGlobalMappingRestoreNodeInfo(el2g,NULL,&ecount,&eneighs);
585: ISLocalToGlobalMappingRestoreNodeInfo(vl2g,NULL,&vcount,&vneighs);
586: PetscBTDestroy(&btbd);
588: /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */
589: if (order != 1) {
590: if (print) PetscPrintf(PETSC_COMM_SELF,"INSPECTING CANDIDATES\n");
591: MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
592: for (i=0;i<nv;i++) {
593: if (PetscBTLookup(btvcand,i)) {
594: PetscBool found = PETSC_FALSE;
595: for (j=ii[i];j<ii[i+1] && !found;j++) {
596: PetscInt k,e = jj[j];
597: if (PetscBTLookup(bte,e)) continue;
598: for (k=iit[e];k<iit[e+1];k++) {
599: PetscInt v = jjt[k];
600: if (v != i && PetscBTLookup(btvcand,v)) {
601: found = PETSC_TRUE;
602: break;
603: }
604: }
605: }
606: if (!found) {
607: if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %D CLEARED\n",i);
608: PetscBTClear(btvcand,i);
609: } else {
610: if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %D ACCEPTED\n",i);
611: }
612: }
613: }
614: MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
615: }
616: MatSeqAIJRestoreArray(lGt,&vals);
617: MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
618: MatDestroy(&lGe);
620: /* Get the local G^T explicitly */
621: MatDestroy(&lGt);
622: MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);
623: MatSetOption(lGt,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);
625: /* Mark interior nodal dofs */
626: ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);
627: PetscBTCreate(nv,&btvi);
628: for (i=1;i<n_neigh;i++) {
629: for (j=0;j<n_shared[i];j++) {
630: PetscBTSet(btvi,shared[i][j]);
631: }
632: }
633: ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);
635: /* communicate corners and splitpoints */
636: PetscMalloc1(nv,&vmarks);
637: PetscMemzero(sfvleaves,nv*sizeof(PetscInt));
638: PetscMemzero(sfvroots,Lv*sizeof(PetscInt));
639: for (i=0;i<nv;i++) if (PetscUnlikely(PetscBTLookup(btv,i))) sfvleaves[i] = 1;
641: if (print) {
642: IS tbz;
644: cum = 0;
645: for (i=0;i<nv;i++)
646: if (sfvleaves[i])
647: vmarks[cum++] = i;
649: ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);
650: PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_local");
651: ISView(tbz,NULL);
652: ISDestroy(&tbz);
653: }
655: PetscSFReduceBegin(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);
656: PetscSFReduceEnd(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);
657: PetscSFBcastBegin(sfv,MPIU_INT,sfvroots,sfvleaves);
658: PetscSFBcastEnd(sfv,MPIU_INT,sfvroots,sfvleaves);
660: /* Zero rows of lGt corresponding to identified corners
661: and interior nodal dofs */
662: cum = 0;
663: for (i=0;i<nv;i++) {
664: if (sfvleaves[i]) {
665: vmarks[cum++] = i;
666: PetscBTSet(btv,i);
667: }
668: if (!PetscBTLookup(btvi,i)) vmarks[cum++] = i;
669: }
670: PetscBTDestroy(&btvi);
671: if (print) {
672: IS tbz;
674: ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);
675: PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_with_interior");
676: ISView(tbz,NULL);
677: ISDestroy(&tbz);
678: }
679: MatZeroRows(lGt,cum,vmarks,0.,NULL,NULL);
680: PetscFree(vmarks);
681: PetscSFDestroy(&sfv);
682: PetscFree2(sfvleaves,sfvroots);
684: /* Recompute G */
685: MatDestroy(&lG);
686: MatTranspose(lGt,MAT_INITIAL_MATRIX,&lG);
687: if (print) {
688: PetscObjectSetName((PetscObject)lG,"used_lG");
689: MatView(lG,NULL);
690: PetscObjectSetName((PetscObject)lGt,"used_lGt");
691: MatView(lGt,NULL);
692: }
694: /* Get primal dofs (if any) */
695: cum = 0;
696: for (i=0;i<ne;i++) {
697: if (PetscUnlikely(PetscBTLookup(bte,i))) marks[cum++] = i;
698: }
699: if (fl2g) {
700: ISLocalToGlobalMappingApply(fl2g,cum,marks,marks);
701: }
702: ISCreateGeneral(comm,cum,marks,PETSC_COPY_VALUES,&primals);
703: if (print) {
704: PetscObjectSetName((PetscObject)primals,"prescribed_primal_dofs");
705: ISView(primals,NULL);
706: }
707: PetscBTDestroy(&bte);
708: /* TODO: what if the user passed in some of them ? */
709: PCBDDCSetPrimalVerticesLocalIS(pc,primals);
710: ISDestroy(&primals);
712: /* Compute edge connectivity */
713: PetscObjectSetOptionsPrefix((PetscObject)lG,"econn_");
714: MatMatMultSymbolic(lG,lGt,PETSC_DEFAULT,&conn);
715: MatGetRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
716: if (fl2g) {
717: PetscBT btf;
718: PetscInt *iia,*jja,*iiu,*jju;
719: PetscBool rest = PETSC_FALSE,free = PETSC_FALSE;
721: /* create CSR for all local dofs */
722: PetscMalloc1(n+1,&iia);
723: if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */
724: if (pcbddc->mat_graph->nvtxs_csr != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid size of CSR graph %D. Should be %D",pcbddc->mat_graph->nvtxs_csr,n);
725: iiu = pcbddc->mat_graph->xadj;
726: jju = pcbddc->mat_graph->adjncy;
727: } else if (pcbddc->use_local_adj) {
728: rest = PETSC_TRUE;
729: MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);
730: } else {
731: free = PETSC_TRUE;
732: PetscMalloc2(n+1,&iiu,n,&jju);
733: iiu[0] = 0;
734: for (i=0;i<n;i++) {
735: iiu[i+1] = i+1;
736: jju[i] = -1;
737: }
738: }
740: /* import sizes of CSR */
741: iia[0] = 0;
742: for (i=0;i<n;i++) iia[i+1] = iiu[i+1]-iiu[i];
744: /* overwrite entries corresponding to the Nedelec field */
745: PetscBTCreate(n,&btf);
746: ISGetIndices(nedfieldlocal,&idxs);
747: for (i=0;i<ne;i++) {
748: PetscBTSet(btf,idxs[i]);
749: iia[idxs[i]+1] = ii[i+1]-ii[i];
750: }
752: /* iia in CSR */
753: for (i=0;i<n;i++) iia[i+1] += iia[i];
755: /* jja in CSR */
756: PetscMalloc1(iia[n],&jja);
757: for (i=0;i<n;i++)
758: if (!PetscBTLookup(btf,i))
759: for (j=0;j<iiu[i+1]-iiu[i];j++)
760: jja[iia[i]+j] = jju[iiu[i]+j];
762: /* map edge dofs connectivity */
763: if (jj) {
764: ISLocalToGlobalMappingApply(fl2g,ii[ne],jj,(PetscInt *)jj);
765: for (i=0;i<ne;i++) {
766: PetscInt e = idxs[i];
767: for (j=0;j<ii[i+1]-ii[i];j++) jja[iia[e]+j] = jj[ii[i]+j];
768: }
769: }
770: ISRestoreIndices(nedfieldlocal,&idxs);
771: PCBDDCSetLocalAdjacencyGraph(pc,n,iia,jja,PETSC_OWN_POINTER);
772: if (rest) {
773: MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);
774: }
775: if (free) {
776: PetscFree2(iiu,jju);
777: }
778: PetscBTDestroy(&btf);
779: } else {
780: PCBDDCSetLocalAdjacencyGraph(pc,n,ii,jj,PETSC_USE_POINTER);
781: }
783: /* Analyze interface for edge dofs */
784: PCBDDCAnalyzeInterface(pc);
785: pcbddc->mat_graph->twodim = PETSC_FALSE;
787: /* Get coarse edges in the edge space */
788: PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);
789: MatRestoreRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
791: if (fl2g) {
792: ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);
793: PetscMalloc1(nee,&eedges);
794: for (i=0;i<nee;i++) {
795: ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);
796: }
797: } else {
798: eedges = alleedges;
799: primals = allprimals;
800: }
802: /* Mark fine edge dofs with their coarse edge id */
803: PetscMemzero(marks,ne*sizeof(PetscInt));
804: ISGetLocalSize(primals,&cum);
805: ISGetIndices(primals,&idxs);
806: for (i=0;i<cum;i++) marks[idxs[i]] = nee+1;
807: ISRestoreIndices(primals,&idxs);
808: if (print) {
809: PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs");
810: ISView(primals,NULL);
811: }
813: maxsize = 0;
814: for (i=0;i<nee;i++) {
815: PetscInt size,mark = i+1;
817: ISGetLocalSize(eedges[i],&size);
818: ISGetIndices(eedges[i],&idxs);
819: for (j=0;j<size;j++) marks[idxs[j]] = mark;
820: ISRestoreIndices(eedges[i],&idxs);
821: maxsize = PetscMax(maxsize,size);
822: }
824: /* Find coarse edge endpoints */
825: MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
826: MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
827: for (i=0;i<nee;i++) {
828: PetscInt mark = i+1,size;
830: ISGetLocalSize(eedges[i],&size);
831: if (!size && nedfieldlocal) continue;
832: if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i);
833: ISGetIndices(eedges[i],&idxs);
834: if (print) {
835: PetscPrintf(PETSC_COMM_SELF,"ENDPOINTS ANALYSIS EDGE %D\n",i);
836: ISView(eedges[i],NULL);
837: }
838: for (j=0;j<size;j++) {
839: PetscInt k, ee = idxs[j];
840: if (print) PetscPrintf(PETSC_COMM_SELF," idx %D\n",ee);
841: for (k=ii[ee];k<ii[ee+1];k++) {
842: if (print) PetscPrintf(PETSC_COMM_SELF," inspect %D\n",jj[k]);
843: if (PetscBTLookup(btv,jj[k])) {
844: if (print) PetscPrintf(PETSC_COMM_SELF," corner found (already set) %D\n",jj[k]);
845: } else if (PetscBTLookup(btvcand,jj[k])) { /* is it ok? */
846: PetscInt k2;
847: PetscBool corner = PETSC_FALSE;
848: for (k2 = iit[jj[k]];k2 < iit[jj[k]+1];k2++) {
849: if (print) PetscPrintf(PETSC_COMM_SELF," INSPECTING %D: mark %D (ref mark %D), boundary %D\n",jjt[k2],marks[jjt[k2]],mark,!!PetscBTLookup(btb,jjt[k2]));
850: /* it's a corner if either is connected with an edge dof belonging to a different cc or
851: if the edge dof lie on the natural part of the boundary */
852: if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb,jjt[k2]))) {
853: corner = PETSC_TRUE;
854: break;
855: }
856: }
857: if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */
858: if (print) PetscPrintf(PETSC_COMM_SELF," corner found %D\n",jj[k]);
859: PetscBTSet(btv,jj[k]);
860: } else {
861: if (print) PetscPrintf(PETSC_COMM_SELF," no corners found\n");
862: }
863: }
864: }
865: }
866: ISRestoreIndices(eedges[i],&idxs);
867: }
868: MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
869: MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
870: PetscBTDestroy(&btb);
872: /* Reset marked primal dofs */
873: ISGetLocalSize(primals,&cum);
874: ISGetIndices(primals,&idxs);
875: for (i=0;i<cum;i++) marks[idxs[i]] = 0;
876: ISRestoreIndices(primals,&idxs);
878: /* Now use the initial lG */
879: MatDestroy(&lG);
880: MatDestroy(&lGt);
881: lG = lGinit;
882: MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);
884: /* Compute extended cols indices */
885: PetscBTCreate(nv,&btvc);
886: PetscBTCreate(nee,&bter);
887: MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
888: MatSeqAIJGetMaxRowNonzeros(lG,&i);
889: i *= maxsize;
890: PetscCalloc1(nee,&extcols);
891: PetscMalloc2(i,&extrow,i,&gidxs);
892: eerr = PETSC_FALSE;
893: for (i=0;i<nee;i++) {
894: PetscInt size,found = 0;
896: cum = 0;
897: ISGetLocalSize(eedges[i],&size);
898: if (!size && nedfieldlocal) continue;
899: if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i);
900: ISGetIndices(eedges[i],&idxs);
901: PetscBTMemzero(nv,btvc);
902: for (j=0;j<size;j++) {
903: PetscInt k,ee = idxs[j];
904: for (k=ii[ee];k<ii[ee+1];k++) {
905: PetscInt vv = jj[k];
906: if (!PetscBTLookup(btv,vv)) extrow[cum++] = vv;
907: else if (!PetscBTLookupSet(btvc,vv)) found++;
908: }
909: }
910: ISRestoreIndices(eedges[i],&idxs);
911: PetscSortRemoveDupsInt(&cum,extrow);
912: ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);
913: PetscSortIntWithArray(cum,gidxs,extrow);
914: ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);
915: /* it may happen that endpoints are not defined at this point
916: if it is the case, mark this edge for a second pass */
917: if (cum != size -1 || found != 2) {
918: PetscBTSet(bter,i);
919: if (print) {
920: PetscObjectSetName((PetscObject)eedges[i],"error_edge");
921: ISView(eedges[i],NULL);
922: PetscObjectSetName((PetscObject)extcols[i],"error_extcol");
923: ISView(extcols[i],NULL);
924: }
925: eerr = PETSC_TRUE;
926: }
927: }
928: /* if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */
929: MPIU_Allreduce(&eerr,&done,1,MPIU_BOOL,MPI_LOR,comm);
930: if (done) {
931: PetscInt *newprimals;
933: PetscMalloc1(ne,&newprimals);
934: ISGetLocalSize(primals,&cum);
935: ISGetIndices(primals,&idxs);
936: PetscMemcpy(newprimals,idxs,cum*sizeof(PetscInt));
937: ISRestoreIndices(primals,&idxs);
938: MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
939: if (print) PetscPrintf(PETSC_COMM_SELF,"DOING SECOND PASS (eerr %D)\n",eerr);
940: for (i=0;i<nee;i++) {
941: PetscBool has_candidates = PETSC_FALSE;
942: if (PetscBTLookup(bter,i)) {
943: PetscInt size,mark = i+1;
945: ISGetLocalSize(eedges[i],&size);
946: ISGetIndices(eedges[i],&idxs);
947: /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */
948: for (j=0;j<size;j++) {
949: PetscInt k,ee = idxs[j];
950: if (print) PetscPrintf(PETSC_COMM_SELF,"Inspecting edge dof %D [%D %D)\n",ee,ii[ee],ii[ee+1]);
951: for (k=ii[ee];k<ii[ee+1];k++) {
952: /* set all candidates located on the edge as corners */
953: if (PetscBTLookup(btvcand,jj[k])) {
954: PetscInt k2,vv = jj[k];
955: has_candidates = PETSC_TRUE;
956: if (print) PetscPrintf(PETSC_COMM_SELF," Candidate set to vertex %D\n",vv);
957: PetscBTSet(btv,vv);
958: /* set all edge dofs connected to candidate as primals */
959: for (k2=iit[vv];k2<iit[vv+1];k2++) {
960: if (marks[jjt[k2]] == mark) {
961: PetscInt k3,ee2 = jjt[k2];
962: if (print) PetscPrintf(PETSC_COMM_SELF," Connected edge dof set to primal %D\n",ee2);
963: newprimals[cum++] = ee2;
964: /* finally set the new corners */
965: for (k3=ii[ee2];k3<ii[ee2+1];k3++) {
966: if (print) PetscPrintf(PETSC_COMM_SELF," Connected nodal dof set to vertex %D\n",jj[k3]);
967: PetscBTSet(btv,jj[k3]);
968: }
969: }
970: }
971: } else {
972: if (print) PetscPrintf(PETSC_COMM_SELF," Not a candidate vertex %D\n",jj[k]);
973: }
974: }
975: }
976: if (!has_candidates) { /* circular edge */
977: PetscInt k, ee = idxs[0],*tmarks;
979: PetscCalloc1(ne,&tmarks);
980: if (print) PetscPrintf(PETSC_COMM_SELF," Circular edge %D\n",i);
981: for (k=ii[ee];k<ii[ee+1];k++) {
982: PetscInt k2;
983: if (print) PetscPrintf(PETSC_COMM_SELF," Set to corner %D\n",jj[k]);
984: PetscBTSet(btv,jj[k]);
985: for (k2=iit[jj[k]];k2<iit[jj[k]+1];k2++) tmarks[jjt[k2]]++;
986: }
987: for (j=0;j<size;j++) {
988: if (tmarks[idxs[j]] > 1) {
989: if (print) PetscPrintf(PETSC_COMM_SELF," Edge dof set to primal %D\n",idxs[j]);
990: newprimals[cum++] = idxs[j];
991: }
992: }
993: PetscFree(tmarks);
994: }
995: ISRestoreIndices(eedges[i],&idxs);
996: }
997: ISDestroy(&extcols[i]);
998: }
999: PetscFree(extcols);
1000: MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
1001: PetscSortRemoveDupsInt(&cum,newprimals);
1002: if (fl2g) {
1003: ISLocalToGlobalMappingApply(fl2g,cum,newprimals,newprimals);
1004: ISDestroy(&primals);
1005: for (i=0;i<nee;i++) {
1006: ISDestroy(&eedges[i]);
1007: }
1008: PetscFree(eedges);
1009: }
1010: PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);
1011: ISCreateGeneral(comm,cum,newprimals,PETSC_COPY_VALUES,&primals);
1012: PetscFree(newprimals);
1013: PCBDDCSetPrimalVerticesLocalIS(pc,primals);
1014: ISDestroy(&primals);
1015: PCBDDCAnalyzeInterface(pc);
1016: pcbddc->mat_graph->twodim = PETSC_FALSE;
1017: PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);
1018: if (fl2g) {
1019: ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);
1020: PetscMalloc1(nee,&eedges);
1021: for (i=0;i<nee;i++) {
1022: ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);
1023: }
1024: } else {
1025: eedges = alleedges;
1026: primals = allprimals;
1027: }
1028: PetscCalloc1(nee,&extcols);
1030: /* Mark again */
1031: PetscMemzero(marks,ne*sizeof(PetscInt));
1032: for (i=0;i<nee;i++) {
1033: PetscInt size,mark = i+1;
1035: ISGetLocalSize(eedges[i],&size);
1036: ISGetIndices(eedges[i],&idxs);
1037: for (j=0;j<size;j++) marks[idxs[j]] = mark;
1038: ISRestoreIndices(eedges[i],&idxs);
1039: }
1040: if (print) {
1041: PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs_secondpass");
1042: ISView(primals,NULL);
1043: }
1045: /* Recompute extended cols */
1046: eerr = PETSC_FALSE;
1047: for (i=0;i<nee;i++) {
1048: PetscInt size;
1050: cum = 0;
1051: ISGetLocalSize(eedges[i],&size);
1052: if (!size && nedfieldlocal) continue;
1053: if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i);
1054: ISGetIndices(eedges[i],&idxs);
1055: for (j=0;j<size;j++) {
1056: PetscInt k,ee = idxs[j];
1057: for (k=ii[ee];k<ii[ee+1];k++) if (!PetscBTLookup(btv,jj[k])) extrow[cum++] = jj[k];
1058: }
1059: ISRestoreIndices(eedges[i],&idxs);
1060: PetscSortRemoveDupsInt(&cum,extrow);
1061: ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);
1062: PetscSortIntWithArray(cum,gidxs,extrow);
1063: ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);
1064: if (cum != size -1) {
1065: if (print) {
1066: PetscObjectSetName((PetscObject)eedges[i],"error_edge_secondpass");
1067: ISView(eedges[i],NULL);
1068: PetscObjectSetName((PetscObject)extcols[i],"error_extcol_secondpass");
1069: ISView(extcols[i],NULL);
1070: }
1071: eerr = PETSC_TRUE;
1072: }
1073: }
1074: }
1075: MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1076: PetscFree2(extrow,gidxs);
1077: PetscBTDestroy(&bter);
1078: if (print) { PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF); }
1079: /* an error should not occur at this point */
1080: if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL SECOND PASS");
1082: /* Check the number of endpoints */
1083: MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1084: PetscMalloc1(2*nee,&corners);
1085: PetscMalloc1(nee,&cedges);
1086: for (i=0;i<nee;i++) {
1087: PetscInt size, found = 0, gc[2];
1089: /* init with defaults */
1090: cedges[i] = corners[i*2] = corners[i*2+1] = -1;
1091: ISGetLocalSize(eedges[i],&size);
1092: if (!size && nedfieldlocal) continue;
1093: if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i);
1094: ISGetIndices(eedges[i],&idxs);
1095: PetscBTMemzero(nv,btvc);
1096: for (j=0;j<size;j++) {
1097: PetscInt k,ee = idxs[j];
1098: for (k=ii[ee];k<ii[ee+1];k++) {
1099: PetscInt vv = jj[k];
1100: if (PetscBTLookup(btv,vv) && !PetscBTLookupSet(btvc,vv)) {
1101: if (found == 2) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found more then two corners for edge %D",i);
1102: corners[i*2+found++] = vv;
1103: }
1104: }
1105: }
1106: if (found != 2) {
1107: PetscInt e;
1108: if (fl2g) {
1109: ISLocalToGlobalMappingApply(fl2g,1,idxs,&e);
1110: } else {
1111: e = idxs[0];
1112: }
1113: SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found %D corners for edge %D (astart %D, estart %D)",found,i,e,idxs[0]);
1114: }
1116: /* get primal dof index on this coarse edge */
1117: ISLocalToGlobalMappingApply(vl2g,2,corners+2*i,gc);
1118: if (gc[0] > gc[1]) {
1119: PetscInt swap = corners[2*i];
1120: corners[2*i] = corners[2*i+1];
1121: corners[2*i+1] = swap;
1122: }
1123: cedges[i] = idxs[size-1];
1124: ISRestoreIndices(eedges[i],&idxs);
1125: if (print) PetscPrintf(PETSC_COMM_SELF,"EDGE %D: ce %D, corners (%D,%D)\n",i,cedges[i],corners[2*i],corners[2*i+1]);
1126: }
1127: MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1128: PetscBTDestroy(&btvc);
1130: #if defined(PETSC_USE_DEBUG)
1131: /* Inspects columns of lG (rows of lGt) and make sure the change of basis will
1132: not interfere with neighbouring coarse edges */
1133: PetscMalloc1(nee+1,&emarks);
1134: MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1135: for (i=0;i<nv;i++) {
1136: PetscInt emax = 0,eemax = 0;
1138: if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue;
1139: PetscMemzero(emarks,(nee+1)*sizeof(PetscInt));
1140: for (j=ii[i];j<ii[i+1];j++) emarks[marks[jj[j]]]++;
1141: for (j=1;j<nee+1;j++) {
1142: if (emax < emarks[j]) {
1143: emax = emarks[j];
1144: eemax = j;
1145: }
1146: }
1147: /* not relevant for edges */
1148: if (!eemax) continue;
1150: for (j=ii[i];j<ii[i+1];j++) {
1151: if (marks[jj[j]] && marks[jj[j]] != eemax) {
1152: SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_SUP,"Found 2 coarse edges (id %D and %D) connected through the %D nodal dof at edge dof %D",marks[jj[j]]-1,eemax,i,jj[j]);
1153: }
1154: }
1155: }
1156: PetscFree(emarks);
1157: MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1158: #endif
1160: /* Compute extended rows indices for edge blocks of the change of basis */
1161: MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1162: MatSeqAIJGetMaxRowNonzeros(lGt,&extmem);
1163: extmem *= maxsize;
1164: PetscMalloc1(extmem*nee,&extrow);
1165: PetscMalloc1(nee,&extrows);
1166: PetscCalloc1(nee,&extrowcum);
1167: for (i=0;i<nv;i++) {
1168: PetscInt mark = 0,size,start;
1170: if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue;
1171: for (j=ii[i];j<ii[i+1];j++)
1172: if (marks[jj[j]] && !mark)
1173: mark = marks[jj[j]];
1175: /* not relevant */
1176: if (!mark) continue;
1178: /* import extended row */
1179: mark--;
1180: start = mark*extmem+extrowcum[mark];
1181: size = ii[i+1]-ii[i];
1182: if (extrowcum[mark] + size > extmem) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not enough memory allocated %D > %D",extrowcum[mark] + size,extmem);
1183: PetscMemcpy(extrow+start,jj+ii[i],size*sizeof(PetscInt));
1184: extrowcum[mark] += size;
1185: }
1186: MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1187: MatDestroy(&lGt);
1188: PetscFree(marks);
1190: /* Compress extrows */
1191: cum = 0;
1192: for (i=0;i<nee;i++) {
1193: PetscInt size = extrowcum[i],*start = extrow + i*extmem;
1194: PetscSortRemoveDupsInt(&size,start);
1195: ISCreateGeneral(PETSC_COMM_SELF,size,start,PETSC_USE_POINTER,&extrows[i]);
1196: cum = PetscMax(cum,size);
1197: }
1198: PetscFree(extrowcum);
1199: PetscBTDestroy(&btv);
1200: PetscBTDestroy(&btvcand);
1202: /* Workspace for lapack inner calls and VecSetValues */
1203: PetscMalloc2((5+cum+maxsize)*maxsize,&work,maxsize,&rwork);
1205: /* Create change of basis matrix (preallocation can be improved) */
1206: MatCreate(comm,&T);
1207: MatSetSizes(T,pc->pmat->rmap->n,pc->pmat->rmap->n,
1208: pc->pmat->rmap->N,pc->pmat->rmap->N);
1209: MatSetType(T,MATAIJ);
1210: MatSeqAIJSetPreallocation(T,10,NULL);
1211: MatMPIAIJSetPreallocation(T,10,NULL,10,NULL);
1212: MatSetLocalToGlobalMapping(T,al2g,al2g);
1213: MatSetOption(T,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);
1214: MatSetOption(T,MAT_ROW_ORIENTED,PETSC_FALSE);
1215: ISLocalToGlobalMappingDestroy(&al2g);
1217: /* Defaults to identity */
1218: MatCreateVecs(pc->pmat,&tvec,NULL);
1219: VecSet(tvec,1.0);
1220: MatDiagonalSet(T,tvec,INSERT_VALUES);
1221: VecDestroy(&tvec);
1223: /* Create discrete gradient for the coarser level if needed */
1224: MatDestroy(&pcbddc->nedcG);
1225: ISDestroy(&pcbddc->nedclocal);
1226: if (pcbddc->current_level < pcbddc->max_levels) {
1227: ISLocalToGlobalMapping cel2g,cvl2g;
1228: IS wis,gwis;
1229: PetscInt cnv,cne;
1231: ISCreateGeneral(comm,nee,cedges,PETSC_COPY_VALUES,&wis);
1232: if (fl2g) {
1233: ISLocalToGlobalMappingApplyIS(fl2g,wis,&pcbddc->nedclocal);
1234: } else {
1235: PetscObjectReference((PetscObject)wis);
1236: pcbddc->nedclocal = wis;
1237: }
1238: ISLocalToGlobalMappingApplyIS(el2g,wis,&gwis);
1239: ISDestroy(&wis);
1240: ISRenumber(gwis,NULL,&cne,&wis);
1241: ISLocalToGlobalMappingCreateIS(wis,&cel2g);
1242: ISDestroy(&wis);
1243: ISDestroy(&gwis);
1245: ISCreateGeneral(comm,2*nee,corners,PETSC_USE_POINTER,&wis);
1246: ISLocalToGlobalMappingApplyIS(vl2g,wis,&gwis);
1247: ISDestroy(&wis);
1248: ISRenumber(gwis,NULL,&cnv,&wis);
1249: ISLocalToGlobalMappingCreateIS(wis,&cvl2g);
1250: ISDestroy(&wis);
1251: ISDestroy(&gwis);
1253: MatCreate(comm,&pcbddc->nedcG);
1254: MatSetSizes(pcbddc->nedcG,PETSC_DECIDE,PETSC_DECIDE,cne,cnv);
1255: MatSetType(pcbddc->nedcG,MATAIJ);
1256: MatSeqAIJSetPreallocation(pcbddc->nedcG,2,NULL);
1257: MatMPIAIJSetPreallocation(pcbddc->nedcG,2,NULL,2,NULL);
1258: MatSetLocalToGlobalMapping(pcbddc->nedcG,cel2g,cvl2g);
1259: ISLocalToGlobalMappingDestroy(&cel2g);
1260: ISLocalToGlobalMappingDestroy(&cvl2g);
1261: }
1262: ISLocalToGlobalMappingDestroy(&vl2g);
1264: #if defined(PRINT_GDET)
1265: inc = 0;
1266: lev = pcbddc->current_level;
1267: #endif
1269: /* Insert values in the change of basis matrix */
1270: for (i=0;i<nee;i++) {
1271: Mat Gins = NULL, GKins = NULL;
1272: IS cornersis = NULL;
1273: PetscScalar cvals[2];
1275: if (pcbddc->nedcG) {
1276: ISCreateGeneral(PETSC_COMM_SELF,2,corners+2*i,PETSC_USE_POINTER,&cornersis);
1277: }
1278: PCBDDCComputeNedelecChangeEdge(lG,eedges[i],extrows[i],extcols[i],cornersis,&Gins,&GKins,cvals,work,rwork);
1279: if (Gins && GKins) {
1280: PetscScalar *data;
1281: const PetscInt *rows,*cols;
1282: PetscInt nrh,nch,nrc,ncc;
1284: ISGetIndices(eedges[i],&cols);
1285: /* H1 */
1286: ISGetIndices(extrows[i],&rows);
1287: MatGetSize(Gins,&nrh,&nch);
1288: MatDenseGetArray(Gins,&data);
1289: MatSetValuesLocal(T,nrh,rows,nch,cols,data,INSERT_VALUES);
1290: MatDenseRestoreArray(Gins,&data);
1291: ISRestoreIndices(extrows[i],&rows);
1292: /* complement */
1293: MatGetSize(GKins,&nrc,&ncc);
1294: if (!ncc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Constant function has not been generated for coarse edge %D",i);
1295: if (ncc + nch != nrc) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"The sum of the number of columns of GKins %D and Gins %D does not match %D for coarse edge %D",ncc,nch,nrc,i);
1296: if (ncc != 1 && pcbddc->nedcG) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot generate the coarse discrete gradient for coarse edge %D with ncc %D",i,ncc);
1297: MatDenseGetArray(GKins,&data);
1298: MatSetValuesLocal(T,nrc,cols,ncc,cols+nch,data,INSERT_VALUES);
1299: MatDenseRestoreArray(GKins,&data);
1301: /* coarse discrete gradient */
1302: if (pcbddc->nedcG) {
1303: PetscInt cols[2];
1305: cols[0] = 2*i;
1306: cols[1] = 2*i+1;
1307: MatSetValuesLocal(pcbddc->nedcG,1,&i,2,cols,cvals,INSERT_VALUES);
1308: }
1309: ISRestoreIndices(eedges[i],&cols);
1310: }
1311: ISDestroy(&extrows[i]);
1312: ISDestroy(&extcols[i]);
1313: ISDestroy(&cornersis);
1314: MatDestroy(&Gins);
1315: MatDestroy(&GKins);
1316: }
1317: ISLocalToGlobalMappingDestroy(&el2g);
1319: /* Start assembling */
1320: MatAssemblyBegin(T,MAT_FINAL_ASSEMBLY);
1321: if (pcbddc->nedcG) {
1322: MatAssemblyBegin(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);
1323: }
1325: /* Free */
1326: if (fl2g) {
1327: ISDestroy(&primals);
1328: for (i=0;i<nee;i++) {
1329: ISDestroy(&eedges[i]);
1330: }
1331: PetscFree(eedges);
1332: }
1334: /* hack mat_graph with primal dofs on the coarse edges */
1335: {
1336: PCBDDCGraph graph = pcbddc->mat_graph;
1337: PetscInt *oqueue = graph->queue;
1338: PetscInt *ocptr = graph->cptr;
1339: PetscInt ncc,*idxs;
1341: /* find first primal edge */
1342: if (pcbddc->nedclocal) {
1343: ISGetIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);
1344: } else {
1345: if (fl2g) {
1346: ISLocalToGlobalMappingApply(fl2g,nee,cedges,cedges);
1347: }
1348: idxs = cedges;
1349: }
1350: cum = 0;
1351: while (cum < nee && cedges[cum] < 0) cum++;
1353: /* adapt connected components */
1354: PetscMalloc2(graph->nvtxs+1,&graph->cptr,ocptr[graph->ncc],&graph->queue);
1355: graph->cptr[0] = 0;
1356: for (i=0,ncc=0;i<graph->ncc;i++) {
1357: PetscInt lc = ocptr[i+1]-ocptr[i];
1358: if (cum != nee && oqueue[ocptr[i+1]-1] == cedges[cum]) { /* this cc has a primal dof */
1359: graph->cptr[ncc+1] = graph->cptr[ncc]+1;
1360: graph->queue[graph->cptr[ncc]] = cedges[cum];
1361: ncc++;
1362: lc--;
1363: cum++;
1364: while (cum < nee && cedges[cum] < 0) cum++;
1365: }
1366: graph->cptr[ncc+1] = graph->cptr[ncc] + lc;
1367: for (j=0;j<lc;j++) graph->queue[graph->cptr[ncc]+j] = oqueue[ocptr[i]+j];
1368: ncc++;
1369: }
1370: graph->ncc = ncc;
1371: if (pcbddc->nedclocal) {
1372: ISRestoreIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);
1373: }
1374: PetscFree2(ocptr,oqueue);
1375: }
1376: ISLocalToGlobalMappingDestroy(&fl2g);
1377: PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);
1378: PCBDDCGraphResetCSR(pcbddc->mat_graph);
1379: MatDestroy(&conn);
1381: ISDestroy(&nedfieldlocal);
1382: PetscFree(extrow);
1383: PetscFree2(work,rwork);
1384: PetscFree(corners);
1385: PetscFree(cedges);
1386: PetscFree(extrows);
1387: PetscFree(extcols);
1388: MatDestroy(&lG);
1390: /* Complete assembling */
1391: MatAssemblyEnd(T,MAT_FINAL_ASSEMBLY);
1392: if (pcbddc->nedcG) {
1393: MatAssemblyEnd(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);
1394: #if 0
1395: PetscObjectSetName((PetscObject)pcbddc->nedcG,"coarse_G");
1396: MatView(pcbddc->nedcG,NULL);
1397: #endif
1398: }
1400: /* set change of basis */
1401: PCBDDCSetChangeOfBasisMat(pc,T,singular);
1402: MatDestroy(&T);
1404: return(0);
1405: }
1407: /* the near-null space of BDDC carries information on quadrature weights,
1408: and these can be collinear -> so cheat with MatNullSpaceCreate
1409: and create a suitable set of basis vectors first */
1410: PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp)
1411: {
1413: PetscInt i;
1416: for (i=0;i<nvecs;i++) {
1417: PetscInt first,last;
1419: VecGetOwnershipRange(quad_vecs[i],&first,&last);
1420: if (last-first < 2*nvecs && has_const) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented");
1421: if (i>=first && i < last) {
1422: PetscScalar *data;
1423: VecGetArray(quad_vecs[i],&data);
1424: if (!has_const) {
1425: data[i-first] = 1.;
1426: } else {
1427: data[2*i-first] = 1./PetscSqrtReal(2.);
1428: data[2*i-first+1] = -1./PetscSqrtReal(2.);
1429: }
1430: VecRestoreArray(quad_vecs[i],&data);
1431: }
1432: PetscObjectStateIncrease((PetscObject)quad_vecs[i]);
1433: }
1434: MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp);
1435: for (i=0;i<nvecs;i++) { /* reset vectors */
1436: PetscInt first,last;
1437: VecLockPop(quad_vecs[i]);
1438: VecGetOwnershipRange(quad_vecs[i],&first,&last);
1439: if (i>=first && i < last) {
1440: PetscScalar *data;
1441: VecGetArray(quad_vecs[i],&data);
1442: if (!has_const) {
1443: data[i-first] = 0.;
1444: } else {
1445: data[2*i-first] = 0.;
1446: data[2*i-first+1] = 0.;
1447: }
1448: VecRestoreArray(quad_vecs[i],&data);
1449: }
1450: PetscObjectStateIncrease((PetscObject)quad_vecs[i]);
1451: VecLockPush(quad_vecs[i]);
1452: }
1453: return(0);
1454: }
1456: PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp)
1457: {
1458: Mat loc_divudotp;
1459: Vec p,v,vins,quad_vec,*quad_vecs;
1460: ISLocalToGlobalMapping map;
1461: PetscScalar *vals;
1462: const PetscScalar *array;
1463: PetscInt i,maxneighs,maxsize;
1464: PetscInt n_neigh,*neigh,*n_shared,**shared;
1465: PetscMPIInt rank;
1466: PetscErrorCode ierr;
1469: ISLocalToGlobalMappingGetInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);
1470: MPIU_Allreduce(&n_neigh,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));
1471: if (!maxneighs) {
1472: ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);
1473: *nnsp = NULL;
1474: return(0);
1475: }
1476: maxsize = 0;
1477: for (i=0;i<n_neigh;i++) maxsize = PetscMax(n_shared[i],maxsize);
1478: PetscMalloc1(maxsize,&vals);
1479: /* create vectors to hold quadrature weights */
1480: MatCreateVecs(A,&quad_vec,NULL);
1481: if (!transpose) {
1482: MatGetLocalToGlobalMapping(A,&map,NULL);
1483: } else {
1484: MatGetLocalToGlobalMapping(A,NULL,&map);
1485: }
1486: VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);
1487: VecDestroy(&quad_vec);
1488: PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);
1489: for (i=0;i<maxneighs;i++) {
1490: VecLockPop(quad_vecs[i]);
1491: VecSetLocalToGlobalMapping(quad_vecs[i],map);
1492: }
1494: /* compute local quad vec */
1495: MatISGetLocalMat(divudotp,&loc_divudotp);
1496: if (!transpose) {
1497: MatCreateVecs(loc_divudotp,&v,&p);
1498: } else {
1499: MatCreateVecs(loc_divudotp,&p,&v);
1500: }
1501: VecSet(p,1.);
1502: if (!transpose) {
1503: MatMultTranspose(loc_divudotp,p,v);
1504: } else {
1505: MatMult(loc_divudotp,p,v);
1506: }
1507: if (vl2l) {
1508: Mat lA;
1509: VecScatter sc;
1511: MatISGetLocalMat(A,&lA);
1512: MatCreateVecs(lA,&vins,NULL);
1513: VecScatterCreate(v,NULL,vins,vl2l,&sc);
1514: VecScatterBegin(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);
1515: VecScatterEnd(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);
1516: VecScatterDestroy(&sc);
1517: } else {
1518: vins = v;
1519: }
1520: VecGetArrayRead(vins,&array);
1521: VecDestroy(&p);
1523: /* insert in global quadrature vecs */
1524: MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);
1525: for (i=0;i<n_neigh;i++) {
1526: const PetscInt *idxs;
1527: PetscInt idx,nn,j;
1529: idxs = shared[i];
1530: nn = n_shared[i];
1531: for (j=0;j<nn;j++) vals[j] = array[idxs[j]];
1532: PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);
1533: idx = -(idx+1);
1534: VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);
1535: }
1536: ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);
1537: VecRestoreArrayRead(vins,&array);
1538: if (vl2l) {
1539: VecDestroy(&vins);
1540: }
1541: VecDestroy(&v);
1542: PetscFree(vals);
1544: /* assemble near null space */
1545: for (i=0;i<maxneighs;i++) {
1546: VecAssemblyBegin(quad_vecs[i]);
1547: }
1548: for (i=0;i<maxneighs;i++) {
1549: VecAssemblyEnd(quad_vecs[i]);
1550: VecViewFromOptions(quad_vecs[i],NULL,"-pc_bddc_quad_vecs_view");
1551: VecLockPush(quad_vecs[i]);
1552: }
1553: VecDestroyVecs(maxneighs,&quad_vecs);
1554: return(0);
1555: }
1557: PetscErrorCode PCBDDCAddPrimalVerticesLocalIS(PC pc, IS primalv)
1558: {
1559: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
1563: if (primalv) {
1564: if (pcbddc->user_primal_vertices_local) {
1565: IS list[2], newp;
1567: list[0] = primalv;
1568: list[1] = pcbddc->user_primal_vertices_local;
1569: ISConcatenate(PetscObjectComm((PetscObject)pc),2,list,&newp);
1570: ISSortRemoveDups(newp);
1571: ISDestroy(&list[1]);
1572: pcbddc->user_primal_vertices_local = newp;
1573: } else {
1574: PCBDDCSetPrimalVerticesLocalIS(pc,primalv);
1575: }
1576: }
1577: return(0);
1578: }
1580: static PetscErrorCode func_coords_private(PetscInt dim, PetscReal t, const PetscReal X[], PetscInt Nf, PetscScalar *out, void *ctx)
1581: {
1582: PetscInt f, *comp = (PetscInt *)ctx;
1585: for (f=0;f<Nf;f++) out[f] = X[*comp];
1586: return(0);
1587: }
1589: PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc)
1590: {
1592: Vec local,global;
1593: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
1594: Mat_IS *matis = (Mat_IS*)pc->pmat->data;
1595: PetscBool monolithic = PETSC_FALSE;
1598: PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC topology options","PC");
1599: PetscOptionsBool("-pc_bddc_monolithic","Discard any information on dofs splitting",NULL,monolithic,&monolithic,NULL);
1600: PetscOptionsEnd();
1601: /* need to convert from global to local topology information and remove references to information in global ordering */
1602: MatCreateVecs(pc->pmat,&global,NULL);
1603: MatCreateVecs(matis->A,&local,NULL);
1604: if (monolithic) { /* just get block size to properly compute vertices */
1605: if (pcbddc->vertex_size == 1) {
1606: MatGetBlockSize(pc->pmat,&pcbddc->vertex_size);
1607: }
1608: goto boundary;
1609: }
1611: if (pcbddc->user_provided_isfordofs) {
1612: if (pcbddc->n_ISForDofs) {
1613: PetscInt i;
1614: PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);
1615: for (i=0;i<pcbddc->n_ISForDofs;i++) {
1616: PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);
1617: ISDestroy(&pcbddc->ISForDofs[i]);
1618: }
1619: pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
1620: pcbddc->n_ISForDofs = 0;
1621: PetscFree(pcbddc->ISForDofs);
1622: }
1623: } else {
1624: if (!pcbddc->n_ISForDofsLocal) { /* field split not present */
1625: DM dm;
1627: PCGetDM(pc, &dm);
1628: if (!dm) {
1629: MatGetDM(pc->pmat, &dm);
1630: }
1631: if (dm) {
1632: IS *fields;
1633: PetscInt nf,i;
1634: DMCreateFieldDecomposition(dm,&nf,NULL,&fields,NULL);
1635: PetscMalloc1(nf,&pcbddc->ISForDofsLocal);
1636: for (i=0;i<nf;i++) {
1637: PCBDDCGlobalToLocal(matis->rctx,global,local,fields[i],&pcbddc->ISForDofsLocal[i]);
1638: ISDestroy(&fields[i]);
1639: }
1640: PetscFree(fields);
1641: pcbddc->n_ISForDofsLocal = nf;
1642: } else { /* See if MATIS has fields attached by the conversion from MatNest */
1643: PetscContainer c;
1645: PetscObjectQuery((PetscObject)pc->pmat,"_convert_nest_lfields",(PetscObject*)&c);
1646: if (c) {
1647: MatISLocalFields lf;
1648: PetscContainerGetPointer(c,(void**)&lf);
1649: PCBDDCSetDofsSplittingLocal(pc,lf->nr,lf->rf);
1650: } else { /* fallback, create the default fields if bs > 1 */
1651: PetscInt i, n = matis->A->rmap->n;
1652: MatGetBlockSize(pc->pmat,&i);
1653: if (i > 1) {
1654: pcbddc->n_ISForDofsLocal = i;
1655: PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);
1656: for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
1657: ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);
1658: }
1659: }
1660: }
1661: }
1662: } else {
1663: PetscInt i;
1664: for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
1665: PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->ISForDofsLocal[i]);
1666: }
1667: }
1668: }
1670: boundary:
1671: if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) {
1672: PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);
1673: } else if (pcbddc->DirichletBoundariesLocal) {
1674: PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->DirichletBoundariesLocal);
1675: }
1676: if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) {
1677: PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);
1678: } else if (pcbddc->NeumannBoundariesLocal) {
1679: PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->NeumannBoundariesLocal);
1680: }
1681: if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) {
1682: PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);
1683: }
1684: VecDestroy(&global);
1685: VecDestroy(&local);
1686: /* detect local disconnected subdomains if requested (use matis->A) */
1687: if (pcbddc->detect_disconnected) {
1688: IS primalv = NULL;
1689: PetscInt i;
1690: PetscBool filter = pcbddc->detect_disconnected_filter;
1692: for (i=0;i<pcbddc->n_local_subs;i++) {
1693: ISDestroy(&pcbddc->local_subs[i]);
1694: }
1695: PetscFree(pcbddc->local_subs);
1696: PCBDDCDetectDisconnectedComponents(pc,filter,&pcbddc->n_local_subs,&pcbddc->local_subs,&primalv);
1697: PCBDDCAddPrimalVerticesLocalIS(pc,primalv);
1698: ISDestroy(&primalv);
1699: }
1700: /* early stage corner detection */
1701: {
1702: DM dm;
1704: MatGetDM(pc->pmat,&dm);
1705: if (dm) {
1706: PetscBool isda;
1708: PetscObjectTypeCompare((PetscObject)dm,DMDA,&isda);
1709: if (isda) {
1710: ISLocalToGlobalMapping l2l;
1711: IS corners;
1712: Mat lA;
1714: DMDAGetSubdomainCornersIS(dm,&corners);
1715: MatISGetLocalMat(pc->pmat,&lA);
1716: MatGetLocalToGlobalMapping(lA,&l2l,NULL);
1717: MatISRestoreLocalMat(pc->pmat,&lA);
1718: if (l2l && corners) {
1719: const PetscInt *idx;
1720: PetscInt dof,bs,*idxout,n;
1722: DMDAGetInfo(dm,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL);
1723: ISLocalToGlobalMappingGetBlockSize(l2l,&bs);
1724: ISGetLocalSize(corners,&n);
1725: ISGetIndices(corners,&idx);
1726: if (bs == dof) {
1727: PetscMalloc1(n,&idxout);
1728: ISLocalToGlobalMappingApplyBlock(l2l,n,idx,idxout);
1729: } else { /* the original DMDA local-to-local map have been modified */
1730: PetscInt i,d;
1732: PetscMalloc1(dof*n,&idxout);
1733: for (i=0;i<n;i++) for (d=0;d<dof;d++) idxout[dof*i+d] = dof*idx[i]+d;
1734: ISLocalToGlobalMappingApply(l2l,dof*n,idxout,idxout);
1736: bs = 1;
1737: n *= dof;
1738: }
1739: ISRestoreIndices(corners,&idx);
1740: DMDARestoreSubdomainCornersIS(dm,&corners);
1741: ISCreateBlock(PetscObjectComm((PetscObject)pc),bs,n,idxout,PETSC_OWN_POINTER,&corners);
1742: PCBDDCAddPrimalVerticesLocalIS(pc,corners);
1743: ISDestroy(&corners);
1744: pcbddc->corner_selected = PETSC_TRUE;
1745: } else if (corners) { /* not from DMDA */
1746: DMDARestoreSubdomainCornersIS(dm,&corners);
1747: }
1748: }
1749: }
1750: }
1751: if (pcbddc->corner_selection && !pcbddc->mat_graph->cdim) {
1752: DM dm;
1754: PCGetDM(pc,&dm);
1755: if (!dm) {
1756: MatGetDM(pc->pmat,&dm);
1757: }
1758: if (dm) {
1759: Vec vcoords;
1760: PetscSection section;
1761: PetscReal *coords;
1762: PetscInt d,cdim,nl,nf,**ctxs;
1763: PetscErrorCode (**funcs)(PetscInt, PetscReal, const PetscReal *, PetscInt, PetscScalar *, void *);
1765: DMGetCoordinateDim(dm,&cdim);
1766: DMGetSection(dm,§ion);
1767: PetscSectionGetNumFields(section,&nf);
1768: DMCreateGlobalVector(dm,&vcoords);
1769: VecGetLocalSize(vcoords,&nl);
1770: PetscMalloc1(nl*cdim,&coords);
1771: PetscMalloc2(nf,&funcs,nf,&ctxs);
1772: PetscMalloc1(nf,&ctxs[0]);
1773: for (d=0;d<nf;d++) funcs[d] = func_coords_private;
1774: for (d=1;d<nf;d++) ctxs[d] = ctxs[d-1] + 1;
1775: for (d=0;d<cdim;d++) {
1776: PetscInt i;
1777: const PetscScalar *v;
1779: for (i=0;i<nf;i++) ctxs[i][0] = d;
1780: DMProjectFunction(dm,0.0,funcs,(void**)ctxs,INSERT_VALUES,vcoords);
1781: VecGetArrayRead(vcoords,&v);
1782: for (i=0;i<nl;i++) coords[i*cdim+d] = PetscRealPart(v[i]);
1783: VecRestoreArrayRead(vcoords,&v);
1784: }
1785: VecDestroy(&vcoords);
1786: PCSetCoordinates(pc,cdim,nl,coords);
1787: PetscFree(coords);
1788: PetscFree(ctxs[0]);
1789: PetscFree2(funcs,ctxs);
1790: }
1791: }
1792: return(0);
1793: }
1795: PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is)
1796: {
1797: Mat_IS *matis = (Mat_IS*)(pc->pmat->data);
1798: PetscErrorCode ierr;
1799: IS nis;
1800: const PetscInt *idxs;
1801: PetscInt i,nd,n = matis->A->rmap->n,*nidxs,nnd;
1802: PetscBool *ld;
1805: if (mop != MPI_LAND && mop != MPI_LOR) SETERRQ(PetscObjectComm((PetscObject)(pc)),PETSC_ERR_SUP,"Supported are MPI_LAND and MPI_LOR");
1806: MatISSetUpSF(pc->pmat);
1807: if (mop == MPI_LAND) {
1808: /* init rootdata with true */
1809: ld = (PetscBool*) matis->sf_rootdata;
1810: for (i=0;i<pc->pmat->rmap->n;i++) ld[i] = PETSC_TRUE;
1811: } else {
1812: PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscBool));
1813: }
1814: PetscMemzero(matis->sf_leafdata,n*sizeof(PetscBool));
1815: ISGetLocalSize(*is,&nd);
1816: ISGetIndices(*is,&idxs);
1817: ld = (PetscBool*) matis->sf_leafdata;
1818: for (i=0;i<nd;i++)
1819: if (-1 < idxs[i] && idxs[i] < n)
1820: ld[idxs[i]] = PETSC_TRUE;
1821: ISRestoreIndices(*is,&idxs);
1822: PetscSFReduceBegin(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);
1823: PetscSFReduceEnd(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);
1824: PetscSFBcastBegin(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);
1825: PetscSFBcastEnd(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);
1826: if (mop == MPI_LAND) {
1827: PetscMalloc1(nd,&nidxs);
1828: } else {
1829: PetscMalloc1(n,&nidxs);
1830: }
1831: for (i=0,nnd=0;i<n;i++)
1832: if (ld[i])
1833: nidxs[nnd++] = i;
1834: ISCreateGeneral(PetscObjectComm((PetscObject)(*is)),nnd,nidxs,PETSC_OWN_POINTER,&nis);
1835: ISDestroy(is);
1836: *is = nis;
1837: return(0);
1838: }
1840: PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z)
1841: {
1842: PC_IS *pcis = (PC_IS*)(pc->data);
1843: PC_BDDC *pcbddc = (PC_BDDC*)(pc->data);
1844: PetscErrorCode ierr;
1847: if (!pcbddc->benign_have_null) {
1848: return(0);
1849: }
1850: if (pcbddc->ChangeOfBasisMatrix) {
1851: Vec swap;
1853: MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);
1854: swap = pcbddc->work_change;
1855: pcbddc->work_change = r;
1856: r = swap;
1857: }
1858: VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
1859: VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
1860: KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);
1861: VecSet(z,0.);
1862: VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);
1863: VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);
1864: if (pcbddc->ChangeOfBasisMatrix) {
1865: pcbddc->work_change = r;
1866: VecCopy(z,pcbddc->work_change);
1867: MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);
1868: }
1869: return(0);
1870: }
1872: PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose)
1873: {
1874: PCBDDCBenignMatMult_ctx ctx;
1875: PetscErrorCode ierr;
1876: PetscBool apply_right,apply_left,reset_x;
1879: MatShellGetContext(A,&ctx);
1880: if (transpose) {
1881: apply_right = ctx->apply_left;
1882: apply_left = ctx->apply_right;
1883: } else {
1884: apply_right = ctx->apply_right;
1885: apply_left = ctx->apply_left;
1886: }
1887: reset_x = PETSC_FALSE;
1888: if (apply_right) {
1889: const PetscScalar *ax;
1890: PetscInt nl,i;
1892: VecGetLocalSize(x,&nl);
1893: VecGetArrayRead(x,&ax);
1894: PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));
1895: VecRestoreArrayRead(x,&ax);
1896: for (i=0;i<ctx->benign_n;i++) {
1897: PetscScalar sum,val;
1898: const PetscInt *idxs;
1899: PetscInt nz,j;
1900: ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);
1901: ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);
1902: sum = 0.;
1903: if (ctx->apply_p0) {
1904: val = ctx->work[idxs[nz-1]];
1905: for (j=0;j<nz-1;j++) {
1906: sum += ctx->work[idxs[j]];
1907: ctx->work[idxs[j]] += val;
1908: }
1909: } else {
1910: for (j=0;j<nz-1;j++) {
1911: sum += ctx->work[idxs[j]];
1912: }
1913: }
1914: ctx->work[idxs[nz-1]] -= sum;
1915: ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);
1916: }
1917: VecPlaceArray(x,ctx->work);
1918: reset_x = PETSC_TRUE;
1919: }
1920: if (transpose) {
1921: MatMultTranspose(ctx->A,x,y);
1922: } else {
1923: MatMult(ctx->A,x,y);
1924: }
1925: if (reset_x) {
1926: VecResetArray(x);
1927: }
1928: if (apply_left) {
1929: PetscScalar *ay;
1930: PetscInt i;
1932: VecGetArray(y,&ay);
1933: for (i=0;i<ctx->benign_n;i++) {
1934: PetscScalar sum,val;
1935: const PetscInt *idxs;
1936: PetscInt nz,j;
1937: ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);
1938: ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);
1939: val = -ay[idxs[nz-1]];
1940: if (ctx->apply_p0) {
1941: sum = 0.;
1942: for (j=0;j<nz-1;j++) {
1943: sum += ay[idxs[j]];
1944: ay[idxs[j]] += val;
1945: }
1946: ay[idxs[nz-1]] += sum;
1947: } else {
1948: for (j=0;j<nz-1;j++) {
1949: ay[idxs[j]] += val;
1950: }
1951: ay[idxs[nz-1]] = 0.;
1952: }
1953: ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);
1954: }
1955: VecRestoreArray(y,&ay);
1956: }
1957: return(0);
1958: }
1960: PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y)
1961: {
1965: PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);
1966: return(0);
1967: }
1969: PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y)
1970: {
1974: PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);
1975: return(0);
1976: }
1978: PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore)
1979: {
1980: PC_IS *pcis = (PC_IS*)pc->data;
1981: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
1982: PCBDDCBenignMatMult_ctx ctx;
1983: PetscErrorCode ierr;
1986: if (!restore) {
1987: Mat A_IB,A_BI;
1988: PetscScalar *work;
1989: PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL;
1991: if (pcbddc->benign_original_mat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored");
1992: if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) return(0);
1993: PetscMalloc1(pcis->n,&work);
1994: MatCreate(PETSC_COMM_SELF,&A_IB);
1995: MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);
1996: MatSetType(A_IB,MATSHELL);
1997: MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);
1998: MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);
1999: PetscNew(&ctx);
2000: MatShellSetContext(A_IB,ctx);
2001: ctx->apply_left = PETSC_TRUE;
2002: ctx->apply_right = PETSC_FALSE;
2003: ctx->apply_p0 = PETSC_FALSE;
2004: ctx->benign_n = pcbddc->benign_n;
2005: if (reuse) {
2006: ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs;
2007: ctx->free = PETSC_FALSE;
2008: } else { /* TODO: could be optimized for successive solves */
2009: ISLocalToGlobalMapping N_to_D;
2010: PetscInt i;
2012: ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);
2013: PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);
2014: for (i=0;i<pcbddc->benign_n;i++) {
2015: ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);
2016: }
2017: ISLocalToGlobalMappingDestroy(&N_to_D);
2018: ctx->free = PETSC_TRUE;
2019: }
2020: ctx->A = pcis->A_IB;
2021: ctx->work = work;
2022: MatSetUp(A_IB);
2023: MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);
2024: MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);
2025: pcis->A_IB = A_IB;
2027: /* A_BI as A_IB^T */
2028: MatCreateTranspose(A_IB,&A_BI);
2029: pcbddc->benign_original_mat = pcis->A_BI;
2030: pcis->A_BI = A_BI;
2031: } else {
2032: if (!pcbddc->benign_original_mat) {
2033: return(0);
2034: }
2035: MatShellGetContext(pcis->A_IB,&ctx);
2036: MatDestroy(&pcis->A_IB);
2037: pcis->A_IB = ctx->A;
2038: ctx->A = NULL;
2039: MatDestroy(&pcis->A_BI);
2040: pcis->A_BI = pcbddc->benign_original_mat;
2041: pcbddc->benign_original_mat = NULL;
2042: if (ctx->free) {
2043: PetscInt i;
2044: for (i=0;i<ctx->benign_n;i++) {
2045: ISDestroy(&ctx->benign_zerodiag_subs[i]);
2046: }
2047: PetscFree(ctx->benign_zerodiag_subs);
2048: }
2049: PetscFree(ctx->work);
2050: PetscFree(ctx);
2051: }
2052: return(0);
2053: }
2055: /* used just in bddc debug mode */
2056: PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B)
2057: {
2058: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
2059: Mat_IS *matis = (Mat_IS*)pc->pmat->data;
2060: Mat An;
2064: MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);
2065: MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);
2066: if (is1) {
2067: MatCreateSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);
2068: MatDestroy(&An);
2069: } else {
2070: *B = An;
2071: }
2072: return(0);
2073: }
2075: /* TODO: add reuse flag */
2076: PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B)
2077: {
2078: Mat Bt;
2079: PetscScalar *a,*bdata;
2080: const PetscInt *ii,*ij;
2081: PetscInt m,n,i,nnz,*bii,*bij;
2082: PetscBool flg_row;
2086: MatGetSize(A,&n,&m);
2087: MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);
2088: MatSeqAIJGetArray(A,&a);
2089: nnz = n;
2090: for (i=0;i<ii[n];i++) {
2091: if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++;
2092: }
2093: PetscMalloc1(n+1,&bii);
2094: PetscMalloc1(nnz,&bij);
2095: PetscMalloc1(nnz,&bdata);
2096: nnz = 0;
2097: bii[0] = 0;
2098: for (i=0;i<n;i++) {
2099: PetscInt j;
2100: for (j=ii[i];j<ii[i+1];j++) {
2101: PetscScalar entry = a[j];
2102: if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || (n == m && ij[j] == i)) {
2103: bij[nnz] = ij[j];
2104: bdata[nnz] = entry;
2105: nnz++;
2106: }
2107: }
2108: bii[i+1] = nnz;
2109: }
2110: MatSeqAIJRestoreArray(A,&a);
2111: MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);
2112: MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);
2113: {
2114: Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data);
2115: b->free_a = PETSC_TRUE;
2116: b->free_ij = PETSC_TRUE;
2117: }
2118: if (*B == A) {
2119: MatDestroy(&A);
2120: }
2121: *B = Bt;
2122: return(0);
2123: }
2125: PetscErrorCode PCBDDCDetectDisconnectedComponents(PC pc, PetscBool filter, PetscInt *ncc, IS* cc[], IS* primalv)
2126: {
2127: Mat B = NULL;
2128: DM dm;
2129: IS is_dummy,*cc_n;
2130: ISLocalToGlobalMapping l2gmap_dummy;
2131: PCBDDCGraph graph;
2132: PetscInt *xadj_filtered = NULL,*adjncy_filtered = NULL;
2133: PetscInt i,n;
2134: PetscInt *xadj,*adjncy;
2135: PetscBool isplex = PETSC_FALSE;
2136: PetscErrorCode ierr;
2139: if (ncc) *ncc = 0;
2140: if (cc) *cc = NULL;
2141: if (primalv) *primalv = NULL;
2142: PCBDDCGraphCreate(&graph);
2143: PCGetDM(pc,&dm);
2144: if (!dm) {
2145: MatGetDM(pc->pmat,&dm);
2146: }
2147: if (dm) {
2148: PetscObjectTypeCompare((PetscObject)dm,DMPLEX,&isplex);
2149: }
2150: if (filter) isplex = PETSC_FALSE;
2152: if (isplex) { /* this code has been modified from plexpartition.c */
2153: PetscInt p, pStart, pEnd, a, adjSize, idx, size, nroots;
2154: PetscInt *adj = NULL;
2155: IS cellNumbering;
2156: const PetscInt *cellNum;
2157: PetscBool useCone, useClosure;
2158: PetscSection section;
2159: PetscSegBuffer adjBuffer;
2160: PetscSF sfPoint;
2164: DMPlexGetHeightStratum(dm, 0, &pStart, &pEnd);
2165: DMGetPointSF(dm, &sfPoint);
2166: PetscSFGetGraph(sfPoint, &nroots, NULL, NULL, NULL);
2167: /* Build adjacency graph via a section/segbuffer */
2168: PetscSectionCreate(PetscObjectComm((PetscObject) dm), §ion);
2169: PetscSectionSetChart(section, pStart, pEnd);
2170: PetscSegBufferCreate(sizeof(PetscInt),1000,&adjBuffer);
2171: /* Always use FVM adjacency to create partitioner graph */
2172: DMPlexGetAdjacencyUseCone(dm, &useCone);
2173: DMPlexGetAdjacencyUseClosure(dm, &useClosure);
2174: DMPlexSetAdjacencyUseCone(dm, PETSC_TRUE);
2175: DMPlexSetAdjacencyUseClosure(dm, PETSC_FALSE);
2176: DMPlexGetCellNumbering(dm, &cellNumbering);
2177: ISGetIndices(cellNumbering, &cellNum);
2178: for (n = 0, p = pStart; p < pEnd; p++) {
2179: /* Skip non-owned cells in parallel (ParMetis expects no overlap) */
2180: if (nroots > 0) {if (cellNum[p] < 0) continue;}
2181: adjSize = PETSC_DETERMINE;
2182: DMPlexGetAdjacency(dm, p, &adjSize, &adj);
2183: for (a = 0; a < adjSize; ++a) {
2184: const PetscInt point = adj[a];
2185: if (pStart <= point && point < pEnd) {
2186: PetscInt *PETSC_RESTRICT pBuf;
2187: PetscSectionAddDof(section, p, 1);
2188: PetscSegBufferGetInts(adjBuffer, 1, &pBuf);
2189: *pBuf = point;
2190: }
2191: }
2192: n++;
2193: }
2194: DMPlexSetAdjacencyUseCone(dm, useCone);
2195: DMPlexSetAdjacencyUseClosure(dm, useClosure);
2196: /* Derive CSR graph from section/segbuffer */
2197: PetscSectionSetUp(section);
2198: PetscSectionGetStorageSize(section, &size);
2199: PetscMalloc1(n+1, &xadj);
2200: for (idx = 0, p = pStart; p < pEnd; p++) {
2201: if (nroots > 0) {if (cellNum[p] < 0) continue;}
2202: PetscSectionGetOffset(section, p, &(xadj[idx++]));
2203: }
2204: xadj[n] = size;
2205: PetscSegBufferExtractAlloc(adjBuffer, &adjncy);
2206: /* Clean up */
2207: PetscSegBufferDestroy(&adjBuffer);
2208: PetscSectionDestroy(§ion);
2209: PetscFree(adj);
2210: graph->xadj = xadj;
2211: graph->adjncy = adjncy;
2212: } else {
2213: Mat A;
2214: PetscBool isseqaij, flg_row;
2216: MatISGetLocalMat(pc->pmat,&A);
2217: if (!A->rmap->N || !A->cmap->N) {
2218: PCBDDCGraphDestroy(&graph);
2219: return(0);
2220: }
2221: PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);
2222: if (!isseqaij && filter) {
2223: PetscBool isseqdense;
2225: PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);
2226: if (!isseqdense) {
2227: MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
2228: } else { /* TODO: rectangular case and LDA */
2229: PetscScalar *array;
2230: PetscReal chop=1.e-6;
2232: MatDuplicate(A,MAT_COPY_VALUES,&B);
2233: MatDenseGetArray(B,&array);
2234: MatGetSize(B,&n,NULL);
2235: for (i=0;i<n;i++) {
2236: PetscInt j;
2237: for (j=i+1;j<n;j++) {
2238: PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)]));
2239: if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.;
2240: if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.;
2241: }
2242: }
2243: MatDenseRestoreArray(B,&array);
2244: MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);
2245: }
2246: } else {
2247: PetscObjectReference((PetscObject)A);
2248: B = A;
2249: }
2250: MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);
2252: /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */
2253: if (filter) {
2254: PetscScalar *data;
2255: PetscInt j,cum;
2257: PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);
2258: MatSeqAIJGetArray(B,&data);
2259: cum = 0;
2260: for (i=0;i<n;i++) {
2261: PetscInt t;
2263: for (j=xadj[i];j<xadj[i+1];j++) {
2264: if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) {
2265: continue;
2266: }
2267: adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j];
2268: }
2269: t = xadj_filtered[i];
2270: xadj_filtered[i] = cum;
2271: cum += t;
2272: }
2273: MatSeqAIJRestoreArray(B,&data);
2274: graph->xadj = xadj_filtered;
2275: graph->adjncy = adjncy_filtered;
2276: } else {
2277: graph->xadj = xadj;
2278: graph->adjncy = adjncy;
2279: }
2280: }
2281: /* compute local connected components using PCBDDCGraph */
2282: ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);
2283: ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);
2284: ISDestroy(&is_dummy);
2285: PCBDDCGraphInit(graph,l2gmap_dummy,n,PETSC_MAX_INT);
2286: ISLocalToGlobalMappingDestroy(&l2gmap_dummy);
2287: PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);
2288: PCBDDCGraphComputeConnectedComponents(graph);
2290: /* partial clean up */
2291: PetscFree2(xadj_filtered,adjncy_filtered);
2292: if (B) {
2293: PetscBool flg_row;
2294: MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);
2295: MatDestroy(&B);
2296: }
2297: if (isplex) {
2298: PetscFree(xadj);
2299: PetscFree(adjncy);
2300: }
2302: /* get back data */
2303: if (isplex) {
2304: if (ncc) *ncc = graph->ncc;
2305: if (cc || primalv) {
2306: Mat A;
2307: PetscBT btv,btvt;
2308: PetscSection subSection;
2309: PetscInt *ids,cum,cump,*cids,*pids;
2311: DMPlexGetSubdomainSection(dm,&subSection);
2312: MatISGetLocalMat(pc->pmat,&A);
2313: PetscMalloc3(A->rmap->n,&ids,graph->ncc+1,&cids,A->rmap->n,&pids);
2314: PetscBTCreate(A->rmap->n,&btv);
2315: PetscBTCreate(A->rmap->n,&btvt);
2317: cids[0] = 0;
2318: for (i = 0, cump = 0, cum = 0; i < graph->ncc; i++) {
2319: PetscInt j;
2321: PetscBTMemzero(A->rmap->n,btvt);
2322: for (j = graph->cptr[i]; j < graph->cptr[i+1]; j++) {
2323: PetscInt k, size, *closure = NULL, cell = graph->queue[j];
2325: DMPlexGetTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);
2326: for (k = 0; k < 2*size; k += 2) {
2327: PetscInt s, p = closure[k], off, dof, cdof;
2329: PetscSectionGetConstraintDof(subSection, p, &cdof);
2330: PetscSectionGetOffset(subSection,p,&off);
2331: PetscSectionGetDof(subSection,p,&dof);
2332: for (s = 0; s < dof-cdof; s++) {
2333: if (PetscBTLookupSet(btvt,off+s)) continue;
2334: if (!PetscBTLookup(btv,off+s)) {
2335: ids[cum++] = off+s;
2336: } else { /* cross-vertex */
2337: pids[cump++] = off+s;
2338: }
2339: }
2340: }
2341: DMPlexRestoreTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);
2342: }
2343: cids[i+1] = cum;
2344: /* mark dofs as already assigned */
2345: for (j = cids[i]; j < cids[i+1]; j++) {
2346: PetscBTSet(btv,ids[j]);
2347: }
2348: }
2349: if (cc) {
2350: PetscMalloc1(graph->ncc,&cc_n);
2351: for (i = 0; i < graph->ncc; i++) {
2352: ISCreateGeneral(PETSC_COMM_SELF,cids[i+1]-cids[i],ids+cids[i],PETSC_COPY_VALUES,&cc_n[i]);
2353: }
2354: *cc = cc_n;
2355: }
2356: if (primalv) {
2357: ISCreateGeneral(PetscObjectComm((PetscObject)pc),cump,pids,PETSC_COPY_VALUES,primalv);
2358: }
2359: PetscFree3(ids,cids,pids);
2360: PetscBTDestroy(&btv);
2361: PetscBTDestroy(&btvt);
2362: }
2363: } else {
2364: if (ncc) *ncc = graph->ncc;
2365: if (cc) {
2366: PetscMalloc1(graph->ncc,&cc_n);
2367: for (i=0;i<graph->ncc;i++) {
2368: ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);
2369: }
2370: *cc = cc_n;
2371: }
2372: }
2373: /* clean up graph */
2374: graph->xadj = 0;
2375: graph->adjncy = 0;
2376: PCBDDCGraphDestroy(&graph);
2377: return(0);
2378: }
2380: PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag)
2381: {
2382: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
2383: PC_IS* pcis = (PC_IS*)(pc->data);
2384: IS dirIS = NULL;
2385: PetscInt i;
2389: PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);
2390: if (zerodiag) {
2391: Mat A;
2392: Vec vec3_N;
2393: PetscScalar *vals;
2394: const PetscInt *idxs;
2395: PetscInt nz,*count;
2397: /* p0 */
2398: VecSet(pcis->vec1_N,0.);
2399: PetscMalloc1(pcis->n,&vals);
2400: ISGetLocalSize(zerodiag,&nz);
2401: ISGetIndices(zerodiag,&idxs);
2402: for (i=0;i<nz;i++) vals[i] = 1.;
2403: VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);
2404: VecAssemblyBegin(pcis->vec1_N);
2405: VecAssemblyEnd(pcis->vec1_N);
2406: /* v_I */
2407: VecSetRandom(pcis->vec2_N,NULL);
2408: for (i=0;i<nz;i++) vals[i] = 0.;
2409: VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);
2410: ISRestoreIndices(zerodiag,&idxs);
2411: ISGetIndices(pcis->is_B_local,&idxs);
2412: for (i=0;i<pcis->n_B;i++) vals[i] = 0.;
2413: VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);
2414: ISRestoreIndices(pcis->is_B_local,&idxs);
2415: if (dirIS) {
2416: PetscInt n;
2418: ISGetLocalSize(dirIS,&n);
2419: ISGetIndices(dirIS,&idxs);
2420: for (i=0;i<n;i++) vals[i] = 0.;
2421: VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);
2422: ISRestoreIndices(dirIS,&idxs);
2423: }
2424: VecAssemblyBegin(pcis->vec2_N);
2425: VecAssemblyEnd(pcis->vec2_N);
2426: VecDuplicate(pcis->vec1_N,&vec3_N);
2427: VecSet(vec3_N,0.);
2428: MatISGetLocalMat(pc->pmat,&A);
2429: MatMult(A,pcis->vec1_N,vec3_N);
2430: VecDot(vec3_N,pcis->vec2_N,&vals[0]);
2431: if (PetscAbsScalar(vals[0]) > 1.e-1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0]));
2432: PetscFree(vals);
2433: VecDestroy(&vec3_N);
2435: /* there should not be any pressure dofs lying on the interface */
2436: PetscCalloc1(pcis->n,&count);
2437: ISGetIndices(pcis->is_B_local,&idxs);
2438: for (i=0;i<pcis->n_B;i++) count[idxs[i]]++;
2439: ISRestoreIndices(pcis->is_B_local,&idxs);
2440: ISGetIndices(zerodiag,&idxs);
2441: for (i=0;i<nz;i++) if (count[idxs[i]]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %D is an interface dof",idxs[i]);
2442: ISRestoreIndices(zerodiag,&idxs);
2443: PetscFree(count);
2444: }
2445: ISDestroy(&dirIS);
2447: /* check PCBDDCBenignGetOrSetP0 */
2448: VecSetRandom(pcis->vec1_global,NULL);
2449: for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i;
2450: PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);
2451: for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1;
2452: PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);
2453: for (i=0;i<pcbddc->benign_n;i++) {
2454: PetscInt val = PetscRealPart(pcbddc->benign_p0[i]);
2455: if (val != -PetscGlobalRank-i) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %D instead of %g",PetscRealPart(pcbddc->benign_p0[i]),i,-PetscGlobalRank-i);
2456: }
2457: return(0);
2458: }
2460: PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal)
2461: {
2462: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
2463: IS pressures,zerodiag,zerodiag_save,*zerodiag_subs;
2464: PetscInt nz,n;
2465: PetscInt *interior_dofs,n_interior_dofs,nneu;
2466: PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag,checkb;
2470: PetscSFDestroy(&pcbddc->benign_sf);
2471: MatDestroy(&pcbddc->benign_B0);
2472: for (n=0;n<pcbddc->benign_n;n++) {
2473: ISDestroy(&pcbddc->benign_zerodiag_subs[n]);
2474: }
2475: PetscFree(pcbddc->benign_zerodiag_subs);
2476: pcbddc->benign_n = 0;
2478: /* if a local info on dofs is present, uses the last field for "pressures" (or fid by command line)
2479: otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not)
2480: Checks if all the pressure dofs in each subdomain have a zero diagonal
2481: If not, a change of basis on pressures is not needed
2482: since the local Schur complements are already SPD
2483: */
2484: has_null_pressures = PETSC_TRUE;
2485: have_null = PETSC_TRUE;
2486: if (pcbddc->n_ISForDofsLocal) {
2487: IS iP = NULL;
2488: PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1;
2490: PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC benign options","PC");
2491: PetscOptionsInt("-pc_bddc_pressure_field","Field id for pressures",NULL,p,&p,NULL);
2492: PetscOptionsEnd();
2493: if (p < 0 || p > pcbddc->n_ISForDofsLocal-1) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Invalid field id for pressures %D",p);
2494: /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */
2495: ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);
2496: ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);
2497: ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);
2498: ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);
2499: /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */
2500: PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lP",(PetscObject*)&iP);
2501: if (iP) {
2502: IS newpressures;
2504: ISDifference(pressures,iP,&newpressures);
2505: ISDestroy(&pressures);
2506: pressures = newpressures;
2507: }
2508: ISSorted(pressures,&sorted);
2509: if (!sorted) {
2510: ISSort(pressures);
2511: }
2512: } else {
2513: pressures = NULL;
2514: }
2515: /* pcis has not been setup yet, so get the local size from the subdomain matrix */
2516: MatGetLocalSize(pcbddc->local_mat,&n,NULL);
2517: if (!n) pcbddc->benign_change_explicit = PETSC_TRUE;
2518: MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);
2519: ISSorted(zerodiag,&sorted);
2520: if (!sorted) {
2521: ISSort(zerodiag);
2522: }
2523: PetscObjectReference((PetscObject)zerodiag);
2524: zerodiag_save = zerodiag;
2525: ISGetLocalSize(zerodiag,&nz);
2526: if (!nz) {
2527: if (n) have_null = PETSC_FALSE;
2528: has_null_pressures = PETSC_FALSE;
2529: ISDestroy(&zerodiag);
2530: }
2531: recompute_zerodiag = PETSC_FALSE;
2532: /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */
2533: zerodiag_subs = NULL;
2534: pcbddc->benign_n = 0;
2535: n_interior_dofs = 0;
2536: interior_dofs = NULL;
2537: nneu = 0;
2538: if (pcbddc->NeumannBoundariesLocal) {
2539: ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nneu);
2540: }
2541: checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level);
2542: if (checkb) { /* need to compute interior nodes */
2543: PetscInt n,i,j;
2544: PetscInt n_neigh,*neigh,*n_shared,**shared;
2545: PetscInt *iwork;
2547: ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);
2548: ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);
2549: PetscCalloc1(n,&iwork);
2550: PetscMalloc1(n,&interior_dofs);
2551: for (i=1;i<n_neigh;i++)
2552: for (j=0;j<n_shared[i];j++)
2553: iwork[shared[i][j]] += 1;
2554: for (i=0;i<n;i++)
2555: if (!iwork[i])
2556: interior_dofs[n_interior_dofs++] = i;
2557: PetscFree(iwork);
2558: ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);
2559: }
2560: if (has_null_pressures) {
2561: IS *subs;
2562: PetscInt nsubs,i,j,nl;
2563: const PetscInt *idxs;
2564: PetscScalar *array;
2565: Vec *work;
2566: Mat_IS* matis = (Mat_IS*)(pc->pmat->data);
2568: subs = pcbddc->local_subs;
2569: nsubs = pcbddc->n_local_subs;
2570: /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */
2571: if (checkb) {
2572: VecDuplicateVecs(matis->y,2,&work);
2573: ISGetLocalSize(zerodiag,&nl);
2574: ISGetIndices(zerodiag,&idxs);
2575: /* work[0] = 1_p */
2576: VecSet(work[0],0.);
2577: VecGetArray(work[0],&array);
2578: for (j=0;j<nl;j++) array[idxs[j]] = 1.;
2579: VecRestoreArray(work[0],&array);
2580: /* work[0] = 1_v */
2581: VecSet(work[1],1.);
2582: VecGetArray(work[1],&array);
2583: for (j=0;j<nl;j++) array[idxs[j]] = 0.;
2584: VecRestoreArray(work[1],&array);
2585: ISRestoreIndices(zerodiag,&idxs);
2586: }
2587: if (nsubs > 1) {
2588: PetscCalloc1(nsubs,&zerodiag_subs);
2589: for (i=0;i<nsubs;i++) {
2590: ISLocalToGlobalMapping l2g;
2591: IS t_zerodiag_subs;
2592: PetscInt nl;
2594: ISLocalToGlobalMappingCreateIS(subs[i],&l2g);
2595: ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);
2596: ISGetLocalSize(t_zerodiag_subs,&nl);
2597: if (nl) {
2598: PetscBool valid = PETSC_TRUE;
2600: if (checkb) {
2601: VecSet(matis->x,0);
2602: ISGetLocalSize(subs[i],&nl);
2603: ISGetIndices(subs[i],&idxs);
2604: VecGetArray(matis->x,&array);
2605: for (j=0;j<nl;j++) array[idxs[j]] = 1.;
2606: VecRestoreArray(matis->x,&array);
2607: ISRestoreIndices(subs[i],&idxs);
2608: VecPointwiseMult(matis->x,work[0],matis->x);
2609: MatMult(matis->A,matis->x,matis->y);
2610: VecPointwiseMult(matis->y,work[1],matis->y);
2611: VecGetArray(matis->y,&array);
2612: for (j=0;j<n_interior_dofs;j++) {
2613: if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) {
2614: valid = PETSC_FALSE;
2615: break;
2616: }
2617: }
2618: VecRestoreArray(matis->y,&array);
2619: }
2620: if (valid && nneu) {
2621: const PetscInt *idxs;
2622: PetscInt nzb;
2624: ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);
2625: ISGlobalToLocalMappingApply(l2g,IS_GTOLM_DROP,nneu,idxs,&nzb,NULL);
2626: ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);
2627: if (nzb) valid = PETSC_FALSE;
2628: }
2629: if (valid && pressures) {
2630: IS t_pressure_subs;
2631: ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);
2632: ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);
2633: ISDestroy(&t_pressure_subs);
2634: }
2635: if (valid) {
2636: ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);
2637: pcbddc->benign_n++;
2638: } else {
2639: recompute_zerodiag = PETSC_TRUE;
2640: }
2641: }
2642: ISDestroy(&t_zerodiag_subs);
2643: ISLocalToGlobalMappingDestroy(&l2g);
2644: }
2645: } else { /* there's just one subdomain (or zero if they have not been detected */
2646: PetscBool valid = PETSC_TRUE;
2648: if (nneu) valid = PETSC_FALSE;
2649: if (valid && pressures) {
2650: ISEqual(pressures,zerodiag,&valid);
2651: }
2652: if (valid && checkb) {
2653: MatMult(matis->A,work[0],matis->x);
2654: VecPointwiseMult(matis->x,work[1],matis->x);
2655: VecGetArray(matis->x,&array);
2656: for (j=0;j<n_interior_dofs;j++) {
2657: if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) {
2658: valid = PETSC_FALSE;
2659: break;
2660: }
2661: }
2662: VecRestoreArray(matis->x,&array);
2663: }
2664: if (valid) {
2665: pcbddc->benign_n = 1;
2666: PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);
2667: PetscObjectReference((PetscObject)zerodiag);
2668: zerodiag_subs[0] = zerodiag;
2669: }
2670: }
2671: if (checkb) {
2672: VecDestroyVecs(2,&work);
2673: }
2674: }
2675: PetscFree(interior_dofs);
2677: if (!pcbddc->benign_n) {
2678: PetscInt n;
2680: ISDestroy(&zerodiag);
2681: recompute_zerodiag = PETSC_FALSE;
2682: MatGetLocalSize(pcbddc->local_mat,&n,NULL);
2683: if (n) {
2684: has_null_pressures = PETSC_FALSE;
2685: have_null = PETSC_FALSE;
2686: }
2687: }
2689: /* final check for null pressures */
2690: if (zerodiag && pressures) {
2691: PetscInt nz,np;
2692: ISGetLocalSize(zerodiag,&nz);
2693: ISGetLocalSize(pressures,&np);
2694: if (nz != np) have_null = PETSC_FALSE;
2695: }
2697: if (recompute_zerodiag) {
2698: ISDestroy(&zerodiag);
2699: if (pcbddc->benign_n == 1) {
2700: PetscObjectReference((PetscObject)zerodiag_subs[0]);
2701: zerodiag = zerodiag_subs[0];
2702: } else {
2703: PetscInt i,nzn,*new_idxs;
2705: nzn = 0;
2706: for (i=0;i<pcbddc->benign_n;i++) {
2707: PetscInt ns;
2708: ISGetLocalSize(zerodiag_subs[i],&ns);
2709: nzn += ns;
2710: }
2711: PetscMalloc1(nzn,&new_idxs);
2712: nzn = 0;
2713: for (i=0;i<pcbddc->benign_n;i++) {
2714: PetscInt ns,*idxs;
2715: ISGetLocalSize(zerodiag_subs[i],&ns);
2716: ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);
2717: PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));
2718: ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);
2719: nzn += ns;
2720: }
2721: PetscSortInt(nzn,new_idxs);
2722: ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);
2723: }
2724: have_null = PETSC_FALSE;
2725: }
2727: /* Prepare matrix to compute no-net-flux */
2728: if (pcbddc->compute_nonetflux && !pcbddc->divudotp) {
2729: Mat A,loc_divudotp;
2730: ISLocalToGlobalMapping rl2g,cl2g,l2gmap;
2731: IS row,col,isused = NULL;
2732: PetscInt M,N,n,st,n_isused;
2734: if (pressures) {
2735: isused = pressures;
2736: } else {
2737: isused = zerodiag_save;
2738: }
2739: MatGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);
2740: MatISGetLocalMat(pc->pmat,&A);
2741: MatGetLocalSize(A,&n,NULL);
2742: if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field");
2743: n_isused = 0;
2744: if (isused) {
2745: ISGetLocalSize(isused,&n_isused);
2746: }
2747: MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));
2748: st = st-n_isused;
2749: if (n) {
2750: const PetscInt *gidxs;
2752: MatCreateSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);
2753: ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);
2754: /* TODO: extend ISCreateStride with st = PETSC_DECIDE */
2755: ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);
2756: ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);
2757: ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);
2758: } else {
2759: MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);
2760: ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);
2761: ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);
2762: }
2763: MatGetSize(pc->pmat,NULL,&N);
2764: ISGetSize(row,&M);
2765: ISLocalToGlobalMappingCreateIS(row,&rl2g);
2766: ISLocalToGlobalMappingCreateIS(col,&cl2g);
2767: ISDestroy(&row);
2768: ISDestroy(&col);
2769: MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);
2770: MatSetType(pcbddc->divudotp,MATIS);
2771: MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);
2772: MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);
2773: ISLocalToGlobalMappingDestroy(&rl2g);
2774: ISLocalToGlobalMappingDestroy(&cl2g);
2775: MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);
2776: MatDestroy(&loc_divudotp);
2777: MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);
2778: MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);
2779: }
2780: ISDestroy(&zerodiag_save);
2782: /* change of basis and p0 dofs */
2783: if (has_null_pressures) {
2784: IS zerodiagc;
2785: const PetscInt *idxs,*idxsc;
2786: PetscInt i,s,*nnz;
2788: ISGetLocalSize(zerodiag,&nz);
2789: ISComplement(zerodiag,0,n,&zerodiagc);
2790: ISGetIndices(zerodiagc,&idxsc);
2791: /* local change of basis for pressures */
2792: MatDestroy(&pcbddc->benign_change);
2793: MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);
2794: MatSetType(pcbddc->benign_change,MATAIJ);
2795: MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);
2796: PetscMalloc1(n,&nnz);
2797: for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */
2798: for (i=0;i<pcbddc->benign_n;i++) {
2799: PetscInt nzs,j;
2801: ISGetLocalSize(zerodiag_subs[i],&nzs);
2802: ISGetIndices(zerodiag_subs[i],&idxs);
2803: for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */
2804: nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */
2805: ISRestoreIndices(zerodiag_subs[i],&idxs);
2806: }
2807: MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);
2808: MatSetOption(pcbddc->benign_change,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);
2809: PetscFree(nnz);
2810: /* set identity on velocities */
2811: for (i=0;i<n-nz;i++) {
2812: MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);
2813: }
2814: ISRestoreIndices(zerodiagc,&idxsc);
2815: ISDestroy(&zerodiagc);
2816: PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);
2817: PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);
2818: /* set change on pressures */
2819: for (s=0;s<pcbddc->benign_n;s++) {
2820: PetscScalar *array;
2821: PetscInt nzs;
2823: ISGetLocalSize(zerodiag_subs[s],&nzs);
2824: ISGetIndices(zerodiag_subs[s],&idxs);
2825: for (i=0;i<nzs-1;i++) {
2826: PetscScalar vals[2];
2827: PetscInt cols[2];
2829: cols[0] = idxs[i];
2830: cols[1] = idxs[nzs-1];
2831: vals[0] = 1.;
2832: vals[1] = 1.;
2833: MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);
2834: }
2835: PetscMalloc1(nzs,&array);
2836: for (i=0;i<nzs-1;i++) array[i] = -1.;
2837: array[nzs-1] = 1.;
2838: MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);
2839: /* store local idxs for p0 */
2840: pcbddc->benign_p0_lidx[s] = idxs[nzs-1];
2841: ISRestoreIndices(zerodiag_subs[s],&idxs);
2842: PetscFree(array);
2843: }
2844: MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);
2845: MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);
2846: /* project if needed */
2847: if (pcbddc->benign_change_explicit) {
2848: Mat M;
2850: MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);
2851: MatDestroy(&pcbddc->local_mat);
2852: MatSeqAIJCompress(M,&pcbddc->local_mat);
2853: MatDestroy(&M);
2854: }
2855: /* store global idxs for p0 */
2856: ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);
2857: }
2858: pcbddc->benign_zerodiag_subs = zerodiag_subs;
2859: ISDestroy(&pressures);
2861: /* determines if the coarse solver will be singular or not */
2862: MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));
2863: /* determines if the problem has subdomains with 0 pressure block */
2864: have_null = (PetscBool)(!!pcbddc->benign_n);
2865: MPI_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
2866: *zerodiaglocal = zerodiag;
2867: return(0);
2868: }
2870: PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get)
2871: {
2872: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
2873: PetscScalar *array;
2877: if (!pcbddc->benign_sf) {
2878: PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);
2879: PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);
2880: }
2881: if (get) {
2882: VecGetArrayRead(v,(const PetscScalar**)&array);
2883: PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);
2884: PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);
2885: VecRestoreArrayRead(v,(const PetscScalar**)&array);
2886: } else {
2887: VecGetArray(v,&array);
2888: PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);
2889: PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);
2890: VecRestoreArray(v,&array);
2891: }
2892: return(0);
2893: }
2895: PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop)
2896: {
2897: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
2901: /* TODO: add error checking
2902: - avoid nested pop (or push) calls.
2903: - cannot push before pop.
2904: - cannot call this if pcbddc->local_mat is NULL
2905: */
2906: if (!pcbddc->benign_n) {
2907: return(0);
2908: }
2909: if (pop) {
2910: if (pcbddc->benign_change_explicit) {
2911: IS is_p0;
2912: MatReuse reuse;
2914: /* extract B_0 */
2915: reuse = MAT_INITIAL_MATRIX;
2916: if (pcbddc->benign_B0) {
2917: reuse = MAT_REUSE_MATRIX;
2918: }
2919: ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);
2920: MatCreateSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);
2921: /* remove rows and cols from local problem */
2922: MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);
2923: MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);
2924: MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);
2925: ISDestroy(&is_p0);
2926: } else {
2927: Mat_IS *matis = (Mat_IS*)pc->pmat->data;
2928: PetscScalar *vals;
2929: PetscInt i,n,*idxs_ins;
2931: VecGetLocalSize(matis->y,&n);
2932: PetscMalloc2(n,&idxs_ins,n,&vals);
2933: if (!pcbddc->benign_B0) {
2934: PetscInt *nnz;
2935: MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);
2936: MatSetType(pcbddc->benign_B0,MATAIJ);
2937: MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);
2938: PetscMalloc1(pcbddc->benign_n,&nnz);
2939: for (i=0;i<pcbddc->benign_n;i++) {
2940: ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);
2941: nnz[i] = n - nnz[i];
2942: }
2943: MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);
2944: MatSetOption(pcbddc->benign_B0,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);
2945: PetscFree(nnz);
2946: }
2948: for (i=0;i<pcbddc->benign_n;i++) {
2949: PetscScalar *array;
2950: PetscInt *idxs,j,nz,cum;
2952: VecSet(matis->x,0.);
2953: ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);
2954: ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);
2955: for (j=0;j<nz;j++) vals[j] = 1.;
2956: VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);
2957: VecAssemblyBegin(matis->x);
2958: VecAssemblyEnd(matis->x);
2959: VecSet(matis->y,0.);
2960: MatMult(matis->A,matis->x,matis->y);
2961: VecGetArray(matis->y,&array);
2962: cum = 0;
2963: for (j=0;j<n;j++) {
2964: if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) {
2965: vals[cum] = array[j];
2966: idxs_ins[cum] = j;
2967: cum++;
2968: }
2969: }
2970: MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);
2971: VecRestoreArray(matis->y,&array);
2972: ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);
2973: }
2974: MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);
2975: MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);
2976: PetscFree2(idxs_ins,vals);
2977: }
2978: } else { /* push */
2979: if (pcbddc->benign_change_explicit) {
2980: PetscInt i;
2982: for (i=0;i<pcbddc->benign_n;i++) {
2983: PetscScalar *B0_vals;
2984: PetscInt *B0_cols,B0_ncol;
2986: MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);
2987: MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);
2988: MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);
2989: MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);
2990: MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);
2991: }
2992: MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);
2993: MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);
2994: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!");
2995: }
2996: return(0);
2997: }
2999: PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
3000: {
3001: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
3002: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
3003: PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork;
3004: PetscBLASInt *B_iwork,*B_ifail;
3005: PetscScalar *work,lwork;
3006: PetscScalar *St,*S,*eigv;
3007: PetscScalar *Sarray,*Starray;
3008: PetscReal *eigs,thresh,lthresh,uthresh;
3009: PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs;
3010: PetscBool allocated_S_St;
3011: #if defined(PETSC_USE_COMPLEX)
3012: PetscReal *rwork;
3013: #endif
3014: PetscErrorCode ierr;
3017: if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data");
3018: if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO");
3019: if (sub_schurs->n_subs && (!sub_schurs->is_symmetric)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for this matrix pencil (herm %d, symm %d, posdef %d)",sub_schurs->is_hermitian,sub_schurs->is_symmetric,sub_schurs->is_posdef);
3020: PetscLogEventBegin(PC_BDDC_AdaptiveSetUp[pcbddc->current_level],pc,0,0,0);
3022: if (pcbddc->dbg_flag) {
3023: PetscViewerFlush(pcbddc->dbg_viewer);
3024: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
3025: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");
3026: PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
3027: }
3029: if (pcbddc->dbg_flag) {
3030: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %D (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef);
3031: }
3033: /* max size of subsets */
3034: mss = 0;
3035: for (i=0;i<sub_schurs->n_subs;i++) {
3036: PetscInt subset_size;
3038: ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
3039: mss = PetscMax(mss,subset_size);
3040: }
3042: /* min/max and threshold */
3043: nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
3044: nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0;
3045: nmax = PetscMax(nmin,nmax);
3046: allocated_S_St = PETSC_FALSE;
3047: if (nmin || !sub_schurs->is_posdef) { /* XXX */
3048: allocated_S_St = PETSC_TRUE;
3049: }
3051: /* allocate lapack workspace */
3052: cum = cum2 = 0;
3053: maxneigs = 0;
3054: for (i=0;i<sub_schurs->n_subs;i++) {
3055: PetscInt n,subset_size;
3057: ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
3058: n = PetscMin(subset_size,nmax);
3059: cum += subset_size;
3060: cum2 += subset_size*n;
3061: maxneigs = PetscMax(maxneigs,n);
3062: }
3063: if (mss) {
3064: if (sub_schurs->is_symmetric) {
3065: PetscBLASInt B_itype = 1;
3066: PetscBLASInt B_N = mss;
3067: PetscReal zero = 0.0;
3068: PetscReal eps = 0.0; /* dlamch? */
3070: B_lwork = -1;
3071: S = NULL;
3072: St = NULL;
3073: eigs = NULL;
3074: eigv = NULL;
3075: B_iwork = NULL;
3076: B_ifail = NULL;
3077: #if defined(PETSC_USE_COMPLEX)
3078: rwork = NULL;
3079: #endif
3080: thresh = 1.0;
3081: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
3082: #if defined(PETSC_USE_COMPLEX)
3083: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3084: #else
3085: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr));
3086: #endif
3087: if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr);
3088: PetscFPTrapPop();
3089: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented");
3090: } else {
3091: lwork = 0;
3092: }
3094: nv = 0;
3095: if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */
3096: ISGetLocalSize(sub_schurs->is_vertices,&nv);
3097: }
3098: PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);
3099: if (allocated_S_St) {
3100: PetscMalloc2(mss*mss,&S,mss*mss,&St);
3101: }
3102: PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);
3103: #if defined(PETSC_USE_COMPLEX)
3104: PetscMalloc1(7*mss,&rwork);
3105: #endif
3106: PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n,
3107: nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr,
3108: nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr,
3109: nv+cum,&pcbddc->adaptive_constraints_idxs,
3110: nv+cum2,&pcbddc->adaptive_constraints_data);
3111: PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));
3113: maxneigs = 0;
3114: cum = cumarray = 0;
3115: pcbddc->adaptive_constraints_idxs_ptr[0] = 0;
3116: pcbddc->adaptive_constraints_data_ptr[0] = 0;
3117: if (sub_schurs->is_vertices && pcbddc->use_vertices) {
3118: const PetscInt *idxs;
3120: ISGetIndices(sub_schurs->is_vertices,&idxs);
3121: for (cum=0;cum<nv;cum++) {
3122: pcbddc->adaptive_constraints_n[cum] = 1;
3123: pcbddc->adaptive_constraints_idxs[cum] = idxs[cum];
3124: pcbddc->adaptive_constraints_data[cum] = 1.0;
3125: pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1;
3126: pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1;
3127: }
3128: ISRestoreIndices(sub_schurs->is_vertices,&idxs);
3129: }
3131: if (mss) { /* multilevel */
3132: MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);
3133: MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);
3134: }
3136: lthresh = pcbddc->adaptive_threshold[0];
3137: uthresh = pcbddc->adaptive_threshold[1];
3138: for (i=0;i<sub_schurs->n_subs;i++) {
3139: const PetscInt *idxs;
3140: PetscReal upper,lower;
3141: PetscInt j,subset_size,eigs_start = 0;
3142: PetscBLASInt B_N;
3143: PetscBool same_data = PETSC_FALSE;
3144: PetscBool scal = PETSC_FALSE;
3146: if (pcbddc->use_deluxe_scaling) {
3147: upper = PETSC_MAX_REAL;
3148: lower = uthresh;
3149: } else {
3150: if (!sub_schurs->is_posdef) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented without deluxe scaling");
3151: upper = 1./uthresh;
3152: lower = 0.;
3153: }
3154: ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
3155: ISGetIndices(sub_schurs->is_subs[i],&idxs);
3156: PetscBLASIntCast(subset_size,&B_N);
3157: /* this is experimental: we assume the dofs have been properly grouped to have
3158: the diagonal blocks Schur complements either positive or negative definite (true for Stokes) */
3159: if (!sub_schurs->is_posdef) {
3160: Mat T;
3162: for (j=0;j<subset_size;j++) {
3163: if (PetscRealPart(*(Sarray+cumarray+j*(subset_size+1))) < 0.0) {
3164: MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,Sarray+cumarray,&T);
3165: MatScale(T,-1.0);
3166: MatDestroy(&T);
3167: MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,Starray+cumarray,&T);
3168: MatScale(T,-1.0);
3169: MatDestroy(&T);
3170: if (sub_schurs->change_primal_sub) {
3171: PetscInt nz,k;
3172: const PetscInt *idxs;
3174: ISGetLocalSize(sub_schurs->change_primal_sub[i],&nz);
3175: ISGetIndices(sub_schurs->change_primal_sub[i],&idxs);
3176: for (k=0;k<nz;k++) {
3177: *( Sarray + cumarray + idxs[k]*(subset_size+1)) *= -1.0;
3178: *(Starray + cumarray + idxs[k]*(subset_size+1)) = 0.0;
3179: }
3180: ISRestoreIndices(sub_schurs->change_primal_sub[i],&idxs);
3181: }
3182: scal = PETSC_TRUE;
3183: break;
3184: }
3185: }
3186: }
3188: if (allocated_S_St) { /* S and S_t should be copied since we could need them later */
3189: if (sub_schurs->is_symmetric) {
3190: PetscInt j,k;
3191: if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */
3192: PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));
3193: PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));
3194: }
3195: for (j=0;j<subset_size;j++) {
3196: for (k=j;k<subset_size;k++) {
3197: S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k];
3198: St[j*subset_size+k] = Starray[cumarray+j*subset_size+k];
3199: }
3200: }
3201: } else {
3202: PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3203: PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3204: }
3205: } else {
3206: S = Sarray + cumarray;
3207: St = Starray + cumarray;
3208: }
3209: /* see if we can save some work */
3210: if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) {
3211: PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);
3212: }
3214: if (same_data && !sub_schurs->change) { /* there's no need of constraints here */
3215: B_neigs = 0;
3216: } else {
3217: if (sub_schurs->is_symmetric) {
3218: PetscBLASInt B_itype = 1;
3219: PetscBLASInt B_IL, B_IU;
3220: PetscReal eps = -1.0; /* dlamch? */
3221: PetscInt nmin_s;
3222: PetscBool compute_range;
3224: B_neigs = 0;
3225: compute_range = (PetscBool)!same_data;
3226: if (nmin >= subset_size) compute_range = PETSC_FALSE;
3228: if (pcbddc->dbg_flag) {
3229: PetscInt nc = 0;
3231: if (sub_schurs->change_primal_sub) {
3232: ISGetLocalSize(sub_schurs->change_primal_sub[i],&nc);
3233: }
3234: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %D/%D size %D count %D fid %D (range %d) (change %D).\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]+1,pcbddc->mat_graph->which_dof[idxs[0]],compute_range,nc);
3235: }
3237: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
3238: if (compute_range) {
3240: /* ask for eigenvalues larger than thresh */
3241: if (sub_schurs->is_posdef) {
3242: #if defined(PETSC_USE_COMPLEX)
3243: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3244: #else
3245: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3246: #endif
3247: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3248: } else { /* no theory so far, but it works nicely */
3249: PetscInt recipe = 0,recipe_m = 1;
3250: PetscReal bb[2];
3252: PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe",&recipe,NULL);
3253: switch (recipe) {
3254: case 0:
3255: if (scal) { bb[0] = PETSC_MIN_REAL; bb[1] = lthresh; }
3256: else { bb[0] = uthresh; bb[1] = PETSC_MAX_REAL; }
3257: #if defined(PETSC_USE_COMPLEX)
3258: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3259: #else
3260: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3261: #endif
3262: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3263: break;
3264: case 1:
3265: bb[0] = PETSC_MIN_REAL; bb[1] = lthresh*lthresh;
3266: #if defined(PETSC_USE_COMPLEX)
3267: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3268: #else
3269: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3270: #endif
3271: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3272: if (!scal) {
3273: PetscBLASInt B_neigs2 = 0;
3275: bb[0] = PetscMax(lthresh*lthresh,uthresh); bb[1] = PETSC_MAX_REAL;
3276: PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3277: PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3278: #if defined(PETSC_USE_COMPLEX)
3279: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3280: #else
3281: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3282: #endif
3283: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3284: B_neigs += B_neigs2;
3285: }
3286: break;
3287: case 2:
3288: if (scal) {
3289: bb[0] = PETSC_MIN_REAL;
3290: bb[1] = 0;
3291: #if defined(PETSC_USE_COMPLEX)
3292: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3293: #else
3294: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3295: #endif
3296: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3297: } else {
3298: PetscBLASInt B_neigs2 = 0;
3299: PetscBool import = PETSC_FALSE;
3301: lthresh = PetscMax(lthresh,0.0);
3302: if (lthresh > 0.0) {
3303: bb[0] = PETSC_MIN_REAL;
3304: bb[1] = lthresh*lthresh;
3306: import = PETSC_TRUE;
3307: #if defined(PETSC_USE_COMPLEX)
3308: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3309: #else
3310: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3311: #endif
3312: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3313: }
3314: bb[0] = PetscMax(lthresh*lthresh,uthresh);
3315: bb[1] = PETSC_MAX_REAL;
3316: if (import) {
3317: PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3318: PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3319: }
3320: #if defined(PETSC_USE_COMPLEX)
3321: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3322: #else
3323: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3324: #endif
3325: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3326: B_neigs += B_neigs2;
3327: }
3328: break;
3329: case 3:
3330: if (scal) {
3331: PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe3_min_scal",&recipe_m,NULL);
3332: } else {
3333: PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe3_min",&recipe_m,NULL);
3334: }
3335: if (!scal) {
3336: bb[0] = uthresh;
3337: bb[1] = PETSC_MAX_REAL;
3338: #if defined(PETSC_USE_COMPLEX)
3339: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3340: #else
3341: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3342: #endif
3343: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3344: }
3345: if (recipe_m > 0 && B_N - B_neigs > 0) {
3346: PetscBLASInt B_neigs2 = 0;
3348: B_IL = 1;
3349: PetscBLASIntCast(PetscMin(recipe_m,B_N - B_neigs),&B_IU);
3350: PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3351: PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3352: #if defined(PETSC_USE_COMPLEX)
3353: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3354: #else
3355: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3356: #endif
3357: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3358: B_neigs += B_neigs2;
3359: }
3360: break;
3361: case 4:
3362: bb[0] = PETSC_MIN_REAL; bb[1] = lthresh;
3363: #if defined(PETSC_USE_COMPLEX)
3364: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3365: #else
3366: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3367: #endif
3368: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3369: {
3370: PetscBLASInt B_neigs2 = 0;
3372: bb[0] = PetscMax(lthresh+PETSC_SMALL,uthresh); bb[1] = PETSC_MAX_REAL;
3373: PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3374: PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3375: #if defined(PETSC_USE_COMPLEX)
3376: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3377: #else
3378: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3379: #endif
3380: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3381: B_neigs += B_neigs2;
3382: }
3383: break;
3384: case 5: /* same as before: first compute all eigenvalues, then filter */
3385: #if defined(PETSC_USE_COMPLEX)
3386: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","A","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3387: #else
3388: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","A","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3389: #endif
3390: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3391: {
3392: PetscInt e,k,ne;
3393: for (e=0,ne=0;e<B_neigs;e++) {
3394: if (eigs[e] < lthresh || eigs[e] > uthresh) {
3395: for (k=0;k<B_N;k++) S[ne*B_N+k] = eigv[e*B_N+k];
3396: eigs[ne] = eigs[e];
3397: ne++;
3398: }
3399: }
3400: PetscMemcpy(eigv,S,B_N*ne*sizeof(PetscScalar));
3401: B_neigs = ne;
3402: }
3403: break;
3404: default:
3405: SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Unknown recipe %D",recipe);
3406: break;
3407: }
3408: }
3409: } else if (!same_data) { /* this is just to see all the eigenvalues */
3410: B_IU = PetscMax(1,PetscMin(B_N,nmax));
3411: B_IL = 1;
3412: #if defined(PETSC_USE_COMPLEX)
3413: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3414: #else
3415: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3416: #endif
3417: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3418: } else { /* same_data is true, so just get the adaptive functional requested by the user */
3419: PetscInt k;
3420: if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen");
3421: ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);
3422: PetscBLASIntCast(nmax,&B_neigs);
3423: nmin = nmax;
3424: PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));
3425: for (k=0;k<nmax;k++) {
3426: eigs[k] = 1./PETSC_SMALL;
3427: eigv[k*(subset_size+1)] = 1.0;
3428: }
3429: }
3430: PetscFPTrapPop();
3431: if (B_ierr) {
3432: if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
3433: else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
3434: else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
3435: }
3437: if (B_neigs > nmax) {
3438: if (pcbddc->dbg_flag) {
3439: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %D.\n",B_neigs,nmax);
3440: }
3441: if (pcbddc->use_deluxe_scaling) eigs_start = scal ? 0 : B_neigs-nmax;
3442: B_neigs = nmax;
3443: }
3445: nmin_s = PetscMin(nmin,B_N);
3446: if (B_neigs < nmin_s) {
3447: PetscBLASInt B_neigs2 = 0;
3449: if (pcbddc->use_deluxe_scaling) {
3450: if (scal) {
3451: B_IU = nmin_s;
3452: B_IL = B_neigs + 1;
3453: } else {
3454: B_IL = B_N - nmin_s + 1;
3455: B_IU = B_N - B_neigs;
3456: }
3457: } else {
3458: B_IL = B_neigs + 1;
3459: B_IU = nmin_s;
3460: }
3461: if (pcbddc->dbg_flag) {
3462: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %D. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU);
3463: }
3464: if (sub_schurs->is_symmetric) {
3465: PetscInt j,k;
3466: for (j=0;j<subset_size;j++) {
3467: for (k=j;k<subset_size;k++) {
3468: S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k];
3469: St[j*subset_size+k] = Starray[cumarray+j*subset_size+k];
3470: }
3471: }
3472: } else {
3473: PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3474: PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3475: }
3476: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
3477: #if defined(PETSC_USE_COMPLEX)
3478: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3479: #else
3480: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3481: #endif
3482: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3483: PetscFPTrapPop();
3484: B_neigs += B_neigs2;
3485: }
3486: if (B_ierr) {
3487: if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
3488: else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
3489: else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
3490: }
3491: if (pcbddc->dbg_flag) {
3492: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);
3493: for (j=0;j<B_neigs;j++) {
3494: if (eigs[j] == 0.0) {
3495: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");
3496: } else {
3497: if (pcbddc->use_deluxe_scaling) {
3498: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);
3499: } else {
3500: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);
3501: }
3502: }
3503: }
3504: }
3505: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented");
3506: }
3507: /* change the basis back to the original one */
3508: if (sub_schurs->change) {
3509: Mat change,phi,phit;
3511: if (pcbddc->dbg_flag > 2) {
3512: PetscInt ii;
3513: for (ii=0;ii<B_neigs;ii++) {
3514: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);
3515: for (j=0;j<B_N;j++) {
3516: #if defined(PETSC_USE_COMPLEX)
3517: PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]);
3518: PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]);
3519: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);
3520: #else
3521: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);
3522: #endif
3523: }
3524: }
3525: }
3526: KSPGetOperators(sub_schurs->change[i],&change,NULL);
3527: MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);
3528: MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);
3529: MatCopy(phi,phit,SAME_NONZERO_PATTERN);
3530: MatDestroy(&phit);
3531: MatDestroy(&phi);
3532: }
3533: maxneigs = PetscMax(B_neigs,maxneigs);
3534: pcbddc->adaptive_constraints_n[i+nv] = B_neigs;
3535: if (B_neigs) {
3536: PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));
3538: if (pcbddc->dbg_flag > 1) {
3539: PetscInt ii;
3540: for (ii=0;ii<B_neigs;ii++) {
3541: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);
3542: for (j=0;j<B_N;j++) {
3543: #if defined(PETSC_USE_COMPLEX)
3544: PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
3545: PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
3546: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);
3547: #else
3548: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
3549: #endif
3550: }
3551: }
3552: }
3553: PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));
3554: pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size;
3555: pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs;
3556: cum++;
3557: }
3558: ISRestoreIndices(sub_schurs->is_subs[i],&idxs);
3559: /* shift for next computation */
3560: cumarray += subset_size*subset_size;
3561: }
3562: if (pcbddc->dbg_flag) {
3563: PetscViewerFlush(pcbddc->dbg_viewer);
3564: }
3566: if (mss) {
3567: MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);
3568: MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);
3569: /* destroy matrices (junk) */
3570: MatDestroy(&sub_schurs->sum_S_Ej_inv_all);
3571: MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);
3572: }
3573: if (allocated_S_St) {
3574: PetscFree2(S,St);
3575: }
3576: PetscFree5(eigv,eigs,work,B_iwork,B_ifail);
3577: #if defined(PETSC_USE_COMPLEX)
3578: PetscFree(rwork);
3579: #endif
3580: if (pcbddc->dbg_flag) {
3581: PetscInt maxneigs_r;
3582: MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));
3583: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %D\n",maxneigs_r);
3584: }
3585: PetscLogEventEnd(PC_BDDC_AdaptiveSetUp[pcbddc->current_level],pc,0,0,0);
3586: return(0);
3587: }
3589: PetscErrorCode PCBDDCSetUpSolvers(PC pc)
3590: {
3591: PetscScalar *coarse_submat_vals;
3595: /* Setup local scatters R_to_B and (optionally) R_to_D */
3596: /* PCBDDCSetUpLocalWorkVectors should be called first! */
3597: PCBDDCSetUpLocalScatters(pc);
3599: /* Setup local neumann solver ksp_R */
3600: /* PCBDDCSetUpLocalScatters should be called first! */
3601: PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);
3603: /*
3604: Setup local correction and local part of coarse basis.
3605: Gives back the dense local part of the coarse matrix in column major ordering
3606: */
3607: PCBDDCSetUpCorrection(pc,&coarse_submat_vals);
3609: /* Compute total number of coarse nodes and setup coarse solver */
3610: PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);
3612: /* free */
3613: PetscFree(coarse_submat_vals);
3614: return(0);
3615: }
3617: PetscErrorCode PCBDDCResetCustomization(PC pc)
3618: {
3619: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
3623: ISDestroy(&pcbddc->user_primal_vertices);
3624: ISDestroy(&pcbddc->user_primal_vertices_local);
3625: ISDestroy(&pcbddc->NeumannBoundaries);
3626: ISDestroy(&pcbddc->NeumannBoundariesLocal);
3627: ISDestroy(&pcbddc->DirichletBoundaries);
3628: MatNullSpaceDestroy(&pcbddc->onearnullspace);
3629: PetscFree(pcbddc->onearnullvecs_state);
3630: ISDestroy(&pcbddc->DirichletBoundariesLocal);
3631: PCBDDCSetDofsSplitting(pc,0,NULL);
3632: PCBDDCSetDofsSplittingLocal(pc,0,NULL);
3633: return(0);
3634: }
3636: PetscErrorCode PCBDDCResetTopography(PC pc)
3637: {
3638: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
3639: PetscInt i;
3643: MatDestroy(&pcbddc->nedcG);
3644: ISDestroy(&pcbddc->nedclocal);
3645: MatDestroy(&pcbddc->discretegradient);
3646: MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);
3647: MatDestroy(&pcbddc->ChangeOfBasisMatrix);
3648: MatDestroy(&pcbddc->switch_static_change);
3649: VecDestroy(&pcbddc->work_change);
3650: MatDestroy(&pcbddc->ConstraintMatrix);
3651: MatDestroy(&pcbddc->divudotp);
3652: ISDestroy(&pcbddc->divudotp_vl2l);
3653: PCBDDCGraphDestroy(&pcbddc->mat_graph);
3654: for (i=0;i<pcbddc->n_local_subs;i++) {
3655: ISDestroy(&pcbddc->local_subs[i]);
3656: }
3657: pcbddc->n_local_subs = 0;
3658: PetscFree(pcbddc->local_subs);
3659: PCBDDCSubSchursDestroy(&pcbddc->sub_schurs);
3660: pcbddc->graphanalyzed = PETSC_FALSE;
3661: pcbddc->recompute_topography = PETSC_TRUE;
3662: pcbddc->corner_selected = PETSC_FALSE;
3663: return(0);
3664: }
3666: PetscErrorCode PCBDDCResetSolvers(PC pc)
3667: {
3668: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
3672: VecDestroy(&pcbddc->coarse_vec);
3673: if (pcbddc->coarse_phi_B) {
3674: PetscScalar *array;
3675: MatDenseGetArray(pcbddc->coarse_phi_B,&array);
3676: PetscFree(array);
3677: }
3678: MatDestroy(&pcbddc->coarse_phi_B);
3679: MatDestroy(&pcbddc->coarse_phi_D);
3680: MatDestroy(&pcbddc->coarse_psi_B);
3681: MatDestroy(&pcbddc->coarse_psi_D);
3682: VecDestroy(&pcbddc->vec1_P);
3683: VecDestroy(&pcbddc->vec1_C);
3684: MatDestroy(&pcbddc->local_auxmat2);
3685: MatDestroy(&pcbddc->local_auxmat1);
3686: VecDestroy(&pcbddc->vec1_R);
3687: VecDestroy(&pcbddc->vec2_R);
3688: ISDestroy(&pcbddc->is_R_local);
3689: VecScatterDestroy(&pcbddc->R_to_B);
3690: VecScatterDestroy(&pcbddc->R_to_D);
3691: VecScatterDestroy(&pcbddc->coarse_loc_to_glob);
3692: KSPReset(pcbddc->ksp_D);
3693: KSPReset(pcbddc->ksp_R);
3694: KSPReset(pcbddc->coarse_ksp);
3695: MatDestroy(&pcbddc->local_mat);
3696: PetscFree(pcbddc->primal_indices_local_idxs);
3697: PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);
3698: PetscFree(pcbddc->global_primal_indices);
3699: ISDestroy(&pcbddc->coarse_subassembling);
3700: MatDestroy(&pcbddc->benign_change);
3701: VecDestroy(&pcbddc->benign_vec);
3702: PCBDDCBenignShellMat(pc,PETSC_TRUE);
3703: MatDestroy(&pcbddc->benign_B0);
3704: PetscSFDestroy(&pcbddc->benign_sf);
3705: if (pcbddc->benign_zerodiag_subs) {
3706: PetscInt i;
3707: for (i=0;i<pcbddc->benign_n;i++) {
3708: ISDestroy(&pcbddc->benign_zerodiag_subs[i]);
3709: }
3710: PetscFree(pcbddc->benign_zerodiag_subs);
3711: }
3712: PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);
3713: return(0);
3714: }
3716: PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
3717: {
3718: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
3719: PC_IS *pcis = (PC_IS*)pc->data;
3720: VecType impVecType;
3721: PetscInt n_constraints,n_R,old_size;
3725: n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices;
3726: n_R = pcis->n - pcbddc->n_vertices;
3727: VecGetType(pcis->vec1_N,&impVecType);
3728: /* local work vectors (try to avoid unneeded work)*/
3729: /* R nodes */
3730: old_size = -1;
3731: if (pcbddc->vec1_R) {
3732: VecGetSize(pcbddc->vec1_R,&old_size);
3733: }
3734: if (n_R != old_size) {
3735: VecDestroy(&pcbddc->vec1_R);
3736: VecDestroy(&pcbddc->vec2_R);
3737: VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);
3738: VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);
3739: VecSetType(pcbddc->vec1_R,impVecType);
3740: VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);
3741: }
3742: /* local primal dofs */
3743: old_size = -1;
3744: if (pcbddc->vec1_P) {
3745: VecGetSize(pcbddc->vec1_P,&old_size);
3746: }
3747: if (pcbddc->local_primal_size != old_size) {
3748: VecDestroy(&pcbddc->vec1_P);
3749: VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);
3750: VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);
3751: VecSetType(pcbddc->vec1_P,impVecType);
3752: }
3753: /* local explicit constraints */
3754: old_size = -1;
3755: if (pcbddc->vec1_C) {
3756: VecGetSize(pcbddc->vec1_C,&old_size);
3757: }
3758: if (n_constraints && n_constraints != old_size) {
3759: VecDestroy(&pcbddc->vec1_C);
3760: VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);
3761: VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);
3762: VecSetType(pcbddc->vec1_C,impVecType);
3763: }
3764: return(0);
3765: }
3767: PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
3768: {
3769: PetscErrorCode ierr;
3770: /* pointers to pcis and pcbddc */
3771: PC_IS* pcis = (PC_IS*)pc->data;
3772: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
3773: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
3774: /* submatrices of local problem */
3775: Mat A_RV,A_VR,A_VV,local_auxmat2_R;
3776: /* submatrices of local coarse problem */
3777: Mat S_VV,S_CV,S_VC,S_CC;
3778: /* working matrices */
3779: Mat C_CR;
3780: /* additional working stuff */
3781: PC pc_R;
3782: Mat F,Brhs = NULL;
3783: Vec dummy_vec;
3784: PetscBool isLU,isCHOL,isILU,need_benign_correction,sparserhs;
3785: PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */
3786: PetscScalar *work;
3787: PetscInt *idx_V_B;
3788: PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I;
3789: PetscInt i,n_R,n_D,n_B;
3791: /* some shortcuts to scalars */
3792: PetscScalar one=1.0,m_one=-1.0;
3795: if (!pcbddc->symmetric_primal && pcbddc->benign_n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented");
3796: PetscLogEventBegin(PC_BDDC_CorrectionSetUp[pcbddc->current_level],pc,0,0,0);
3798: /* Set Non-overlapping dimensions */
3799: n_vertices = pcbddc->n_vertices;
3800: n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices;
3801: n_B = pcis->n_B;
3802: n_D = pcis->n - n_B;
3803: n_R = pcis->n - n_vertices;
3805: /* vertices in boundary numbering */
3806: PetscMalloc1(n_vertices,&idx_V_B);
3807: ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);
3808: if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D",n_vertices,i);
3810: /* Subdomain contribution (Non-overlapping) to coarse matrix */
3811: PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);
3812: MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);
3813: MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);
3814: MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);
3815: MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);
3816: MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);
3817: MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);
3818: MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);
3819: MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);
3821: /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
3822: KSPGetPC(pcbddc->ksp_R,&pc_R);
3823: PCSetUp(pc_R);
3824: PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);
3825: PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);
3826: PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);
3827: lda_rhs = n_R;
3828: need_benign_correction = PETSC_FALSE;
3829: if (isLU || isILU || isCHOL) {
3830: PCFactorGetMatrix(pc_R,&F);
3831: } else if (sub_schurs && sub_schurs->reuse_solver) {
3832: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
3833: MatFactorType type;
3835: F = reuse_solver->F;
3836: MatGetFactorType(F,&type);
3837: if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE;
3838: MatGetSize(F,&lda_rhs,NULL);
3839: need_benign_correction = (PetscBool)(!!reuse_solver->benign_n);
3840: } else {
3841: F = NULL;
3842: }
3844: /* determine if we can use a sparse right-hand side */
3845: sparserhs = PETSC_FALSE;
3846: if (F) {
3847: MatSolverType solver;
3849: MatFactorGetSolverType(F,&solver);
3850: PetscStrcmp(solver,MATSOLVERMUMPS,&sparserhs);
3851: }
3853: /* allocate workspace */
3854: n = 0;
3855: if (n_constraints) {
3856: n += lda_rhs*n_constraints;
3857: }
3858: if (n_vertices) {
3859: n = PetscMax(2*lda_rhs*n_vertices,n);
3860: n = PetscMax((lda_rhs+n_B)*n_vertices,n);
3861: }
3862: if (!pcbddc->symmetric_primal) {
3863: n = PetscMax(2*lda_rhs*pcbddc->local_primal_size,n);
3864: }
3865: PetscMalloc1(n,&work);
3867: /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */
3868: dummy_vec = NULL;
3869: if (need_benign_correction && lda_rhs != n_R && F) {
3870: VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&dummy_vec);
3871: VecSetSizes(dummy_vec,lda_rhs,PETSC_DECIDE);
3872: VecSetType(dummy_vec,((PetscObject)pcis->vec1_N)->type_name);
3873: }
3875: /* Precompute stuffs needed for preprocessing and application of BDDC*/
3876: if (n_constraints) {
3877: Mat M3,C_B;
3878: IS is_aux;
3879: PetscScalar *array,*array2;
3881: MatDestroy(&pcbddc->local_auxmat1);
3882: MatDestroy(&pcbddc->local_auxmat2);
3884: /* Extract constraints on R nodes: C_{CR} */
3885: ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);
3886: MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);
3887: MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);
3889: /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */
3890: /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */
3891: if (!sparserhs) {
3892: PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));
3893: for (i=0;i<n_constraints;i++) {
3894: const PetscScalar *row_cmat_values;
3895: const PetscInt *row_cmat_indices;
3896: PetscInt size_of_constraint,j;
3898: MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);
3899: for (j=0;j<size_of_constraint;j++) {
3900: work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j];
3901: }
3902: MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);
3903: }
3904: MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&Brhs);
3905: } else {
3906: Mat tC_CR;
3908: MatScale(C_CR,-1.0);
3909: if (lda_rhs != n_R) {
3910: PetscScalar *aa;
3911: PetscInt r,*ii,*jj;
3912: PetscBool done;
3914: MatGetRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);
3915: if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed");
3916: MatSeqAIJGetArray(C_CR,&aa);
3917: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_constraints,lda_rhs,ii,jj,aa,&tC_CR);
3918: MatRestoreRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);
3919: if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed");
3920: } else {
3921: PetscObjectReference((PetscObject)C_CR);
3922: tC_CR = C_CR;
3923: }
3924: MatCreateTranspose(tC_CR,&Brhs);
3925: MatDestroy(&tC_CR);
3926: }
3927: MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);
3928: if (F) {
3929: if (need_benign_correction) {
3930: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
3932: /* rhs is already zero on interior dofs, no need to change the rhs */
3933: PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));
3934: }
3935: MatMatSolve(F,Brhs,local_auxmat2_R);
3936: if (need_benign_correction) {
3937: PetscScalar *marr;
3938: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
3940: MatDenseGetArray(local_auxmat2_R,&marr);
3941: if (lda_rhs != n_R) {
3942: for (i=0;i<n_constraints;i++) {
3943: VecPlaceArray(dummy_vec,marr+i*lda_rhs);
3944: PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);
3945: VecResetArray(dummy_vec);
3946: }
3947: } else {
3948: for (i=0;i<n_constraints;i++) {
3949: VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);
3950: PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);
3951: VecResetArray(pcbddc->vec1_R);
3952: }
3953: }
3954: MatDenseRestoreArray(local_auxmat2_R,&marr);
3955: }
3956: } else {
3957: PetscScalar *marr;
3959: MatDenseGetArray(local_auxmat2_R,&marr);
3960: for (i=0;i<n_constraints;i++) {
3961: VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);
3962: VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);
3963: KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
3964: VecResetArray(pcbddc->vec1_R);
3965: VecResetArray(pcbddc->vec2_R);
3966: }
3967: MatDenseRestoreArray(local_auxmat2_R,&marr);
3968: }
3969: if (sparserhs) {
3970: MatScale(C_CR,-1.0);
3971: }
3972: MatDestroy(&Brhs);
3973: if (!pcbddc->switch_static) {
3974: MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);
3975: MatDenseGetArray(pcbddc->local_auxmat2,&array);
3976: MatDenseGetArray(local_auxmat2_R,&array2);
3977: for (i=0;i<n_constraints;i++) {
3978: VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);
3979: VecPlaceArray(pcis->vec1_B,array+i*n_B);
3980: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
3981: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
3982: VecResetArray(pcis->vec1_B);
3983: VecResetArray(pcbddc->vec1_R);
3984: }
3985: MatDenseRestoreArray(local_auxmat2_R,&array2);
3986: MatDenseRestoreArray(pcbddc->local_auxmat2,&array);
3987: MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);
3988: } else {
3989: if (lda_rhs != n_R) {
3990: IS dummy;
3992: ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);
3993: MatCreateSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);
3994: ISDestroy(&dummy);
3995: } else {
3996: PetscObjectReference((PetscObject)local_auxmat2_R);
3997: pcbddc->local_auxmat2 = local_auxmat2_R;
3998: }
3999: MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);
4000: }
4001: ISDestroy(&is_aux);
4002: /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */
4003: MatScale(M3,m_one);
4004: if (isCHOL) {
4005: MatCholeskyFactor(M3,NULL,NULL);
4006: } else {
4007: MatLUFactor(M3,NULL,NULL,NULL);
4008: }
4009: MatSeqDenseInvertFactors_Private(M3);
4010: /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */
4011: MatMatMult(M3,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);
4012: MatDestroy(&C_B);
4013: MatCopy(M3,S_CC,SAME_NONZERO_PATTERN); /* S_CC can have a different LDA, MatMatSolve doesn't support it */
4014: MatDestroy(&M3);
4015: }
4017: /* Get submatrices from subdomain matrix */
4018: if (n_vertices) {
4019: IS is_aux;
4020: PetscBool isseqaij;
4022: if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */
4023: IS tis;
4025: ISDuplicate(pcbddc->is_R_local,&tis);
4026: ISSort(tis);
4027: ISComplement(tis,0,pcis->n,&is_aux);
4028: ISDestroy(&tis);
4029: } else {
4030: ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);
4031: }
4032: MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);
4033: MatCreateSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);
4034: PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);
4035: if (!isseqaij) { /* MatMatMult(A_VR,A_RRmA_RV) below will raise an error */
4036: MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);
4037: }
4038: MatCreateSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);
4039: ISDestroy(&is_aux);
4040: }
4042: /* Matrix of coarse basis functions (local) */
4043: if (pcbddc->coarse_phi_B) {
4044: PetscInt on_B,on_primal,on_D=n_D;
4045: if (pcbddc->coarse_phi_D) {
4046: MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);
4047: }
4048: MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);
4049: if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) {
4050: PetscScalar *marray;
4052: MatDenseGetArray(pcbddc->coarse_phi_B,&marray);
4053: PetscFree(marray);
4054: MatDestroy(&pcbddc->coarse_phi_B);
4055: MatDestroy(&pcbddc->coarse_psi_B);
4056: MatDestroy(&pcbddc->coarse_phi_D);
4057: MatDestroy(&pcbddc->coarse_psi_D);
4058: }
4059: }
4061: if (!pcbddc->coarse_phi_B) {
4062: PetscScalar *marr;
4064: /* memory size */
4065: n = n_B*pcbddc->local_primal_size;
4066: if (pcbddc->switch_static || pcbddc->dbg_flag) n += n_D*pcbddc->local_primal_size;
4067: if (!pcbddc->symmetric_primal) n *= 2;
4068: PetscCalloc1(n,&marr);
4069: MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_B);
4070: marr += n_B*pcbddc->local_primal_size;
4071: if (pcbddc->switch_static || pcbddc->dbg_flag) {
4072: MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_D);
4073: marr += n_D*pcbddc->local_primal_size;
4074: }
4075: if (!pcbddc->symmetric_primal) {
4076: MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_B);
4077: marr += n_B*pcbddc->local_primal_size;
4078: if (pcbddc->switch_static || pcbddc->dbg_flag) {
4079: MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_D);
4080: }
4081: } else {
4082: PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);
4083: pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
4084: if (pcbddc->switch_static || pcbddc->dbg_flag) {
4085: PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);
4086: pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
4087: }
4088: }
4089: }
4091: /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
4092: p0_lidx_I = NULL;
4093: if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) {
4094: const PetscInt *idxs;
4096: ISGetIndices(pcis->is_I_local,&idxs);
4097: PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);
4098: for (i=0;i<pcbddc->benign_n;i++) {
4099: PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);
4100: }
4101: ISRestoreIndices(pcis->is_I_local,&idxs);
4102: }
4104: /* vertices */
4105: if (n_vertices) {
4106: PetscBool restoreavr = PETSC_FALSE;
4108: MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);
4110: if (n_R) {
4111: Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */
4112: PetscBLASInt B_N,B_one = 1;
4113: PetscScalar *x,*y;
4115: MatScale(A_RV,m_one);
4116: if (need_benign_correction) {
4117: ISLocalToGlobalMapping RtoN;
4118: IS is_p0;
4119: PetscInt *idxs_p0,n;
4121: PetscMalloc1(pcbddc->benign_n,&idxs_p0);
4122: ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);
4123: ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);
4124: if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %D != %D",n,pcbddc->benign_n);
4125: ISLocalToGlobalMappingDestroy(&RtoN);
4126: ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);
4127: MatCreateSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);
4128: ISDestroy(&is_p0);
4129: }
4131: MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);
4132: if (!sparserhs || need_benign_correction) {
4133: if (lda_rhs == n_R) {
4134: MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);
4135: } else {
4136: PetscScalar *av,*array;
4137: const PetscInt *xadj,*adjncy;
4138: PetscInt n;
4139: PetscBool flg_row;
4141: array = work+lda_rhs*n_vertices;
4142: PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));
4143: MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);
4144: MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);
4145: MatSeqAIJGetArray(A_RV,&av);
4146: for (i=0;i<n;i++) {
4147: PetscInt j;
4148: for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j];
4149: }
4150: MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);
4151: MatDestroy(&A_RV);
4152: MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);
4153: }
4154: if (need_benign_correction) {
4155: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4156: PetscScalar *marr;
4158: MatDenseGetArray(A_RV,&marr);
4159: /* need \Phi^T A_RV = (I+L)A_RV, L given by
4161: | 0 0 0 | (V)
4162: L = | 0 0 -1 | (P-p0)
4163: | 0 0 -1 | (p0)
4165: */
4166: for (i=0;i<reuse_solver->benign_n;i++) {
4167: const PetscScalar *vals;
4168: const PetscInt *idxs,*idxs_zero;
4169: PetscInt n,j,nz;
4171: ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);
4172: ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);
4173: MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);
4174: for (j=0;j<n;j++) {
4175: PetscScalar val = vals[j];
4176: PetscInt k,col = idxs[j];
4177: for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val;
4178: }
4179: MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);
4180: ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);
4181: }
4182: MatDenseRestoreArray(A_RV,&marr);
4183: }
4184: PetscObjectReference((PetscObject)A_RV);
4185: Brhs = A_RV;
4186: } else {
4187: Mat tA_RVT,A_RVT;
4189: if (!pcbddc->symmetric_primal) {
4190: /* A_RV already scaled by -1 */
4191: MatTranspose(A_RV,MAT_INITIAL_MATRIX,&A_RVT);
4192: } else {
4193: restoreavr = PETSC_TRUE;
4194: MatScale(A_VR,-1.0);
4195: PetscObjectReference((PetscObject)A_VR);
4196: A_RVT = A_VR;
4197: }
4198: if (lda_rhs != n_R) {
4199: PetscScalar *aa;
4200: PetscInt r,*ii,*jj;
4201: PetscBool done;
4203: MatGetRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);
4204: if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed");
4205: MatSeqAIJGetArray(A_RVT,&aa);
4206: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_vertices,lda_rhs,ii,jj,aa,&tA_RVT);
4207: MatRestoreRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);
4208: if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed");
4209: } else {
4210: PetscObjectReference((PetscObject)A_RVT);
4211: tA_RVT = A_RVT;
4212: }
4213: MatCreateTranspose(tA_RVT,&Brhs);
4214: MatDestroy(&tA_RVT);
4215: MatDestroy(&A_RVT);
4216: }
4217: if (F) {
4218: /* need to correct the rhs */
4219: if (need_benign_correction) {
4220: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4221: PetscScalar *marr;
4223: MatDenseGetArray(Brhs,&marr);
4224: if (lda_rhs != n_R) {
4225: for (i=0;i<n_vertices;i++) {
4226: VecPlaceArray(dummy_vec,marr+i*lda_rhs);
4227: PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);
4228: VecResetArray(dummy_vec);
4229: }
4230: } else {
4231: for (i=0;i<n_vertices;i++) {
4232: VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);
4233: PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);
4234: VecResetArray(pcbddc->vec1_R);
4235: }
4236: }
4237: MatDenseRestoreArray(Brhs,&marr);
4238: }
4239: MatMatSolve(F,Brhs,A_RRmA_RV);
4240: if (restoreavr) {
4241: MatScale(A_VR,-1.0);
4242: }
4243: /* need to correct the solution */
4244: if (need_benign_correction) {
4245: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4246: PetscScalar *marr;
4248: MatDenseGetArray(A_RRmA_RV,&marr);
4249: if (lda_rhs != n_R) {
4250: for (i=0;i<n_vertices;i++) {
4251: VecPlaceArray(dummy_vec,marr+i*lda_rhs);
4252: PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);
4253: VecResetArray(dummy_vec);
4254: }
4255: } else {
4256: for (i=0;i<n_vertices;i++) {
4257: VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);
4258: PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);
4259: VecResetArray(pcbddc->vec1_R);
4260: }
4261: }
4262: MatDenseRestoreArray(A_RRmA_RV,&marr);
4263: }
4264: } else {
4265: MatDenseGetArray(Brhs,&y);
4266: for (i=0;i<n_vertices;i++) {
4267: VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);
4268: VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);
4269: KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
4270: VecResetArray(pcbddc->vec1_R);
4271: VecResetArray(pcbddc->vec2_R);
4272: }
4273: MatDenseRestoreArray(Brhs,&y);
4274: }
4275: MatDestroy(&A_RV);
4276: MatDestroy(&Brhs);
4277: /* S_VV and S_CV */
4278: if (n_constraints) {
4279: Mat B;
4281: PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));
4282: for (i=0;i<n_vertices;i++) {
4283: VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);
4284: VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);
4285: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4286: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4287: VecResetArray(pcis->vec1_B);
4288: VecResetArray(pcbddc->vec1_R);
4289: }
4290: MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);
4291: MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);
4292: MatDestroy(&B);
4293: MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);
4294: MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);
4295: MatScale(S_CV,m_one);
4296: PetscBLASIntCast(lda_rhs*n_vertices,&B_N);
4297: PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one));
4298: MatDestroy(&B);
4299: }
4300: if (lda_rhs != n_R) {
4301: MatDestroy(&A_RRmA_RV);
4302: MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);
4303: MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);
4304: }
4305: MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);
4306: /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */
4307: if (need_benign_correction) {
4308: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4309: PetscScalar *marr,*sums;
4311: PetscMalloc1(n_vertices,&sums);
4312: MatDenseGetArray(S_VVt,&marr);
4313: for (i=0;i<reuse_solver->benign_n;i++) {
4314: const PetscScalar *vals;
4315: const PetscInt *idxs,*idxs_zero;
4316: PetscInt n,j,nz;
4318: ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);
4319: ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);
4320: for (j=0;j<n_vertices;j++) {
4321: PetscInt k;
4322: sums[j] = 0.;
4323: for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs];
4324: }
4325: MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);
4326: for (j=0;j<n;j++) {
4327: PetscScalar val = vals[j];
4328: PetscInt k;
4329: for (k=0;k<n_vertices;k++) {
4330: marr[idxs[j]+k*n_vertices] += val*sums[k];
4331: }
4332: }
4333: MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);
4334: ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);
4335: }
4336: PetscFree(sums);
4337: MatDenseRestoreArray(S_VVt,&marr);
4338: MatDestroy(&A_RV_bcorr);
4339: }
4340: MatDestroy(&A_RRmA_RV);
4341: PetscBLASIntCast(n_vertices*n_vertices,&B_N);
4342: MatDenseGetArray(A_VV,&x);
4343: MatDenseGetArray(S_VVt,&y);
4344: PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one));
4345: MatDenseRestoreArray(A_VV,&x);
4346: MatDenseRestoreArray(S_VVt,&y);
4347: MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);
4348: MatDestroy(&S_VVt);
4349: } else {
4350: MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);
4351: }
4352: MatDestroy(&A_VV);
4354: /* coarse basis functions */
4355: for (i=0;i<n_vertices;i++) {
4356: PetscScalar *y;
4358: VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);
4359: MatDenseGetArray(pcbddc->coarse_phi_B,&y);
4360: VecPlaceArray(pcis->vec1_B,y+n_B*i);
4361: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4362: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4363: y[n_B*i+idx_V_B[i]] = 1.0;
4364: MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);
4365: VecResetArray(pcis->vec1_B);
4367: if (pcbddc->switch_static || pcbddc->dbg_flag) {
4368: PetscInt j;
4370: MatDenseGetArray(pcbddc->coarse_phi_D,&y);
4371: VecPlaceArray(pcis->vec1_D,y+n_D*i);
4372: VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4373: VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4374: VecResetArray(pcis->vec1_D);
4375: for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0;
4376: MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);
4377: }
4378: VecResetArray(pcbddc->vec1_R);
4379: }
4380: /* if n_R == 0 the object is not destroyed */
4381: MatDestroy(&A_RV);
4382: }
4383: VecDestroy(&dummy_vec);
4385: if (n_constraints) {
4386: Mat B;
4388: MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);
4389: MatScale(S_CC,m_one);
4390: MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);
4391: MatScale(S_CC,m_one);
4392: if (n_vertices) {
4393: if (isCHOL || need_benign_correction) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */
4394: MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);
4395: } else {
4396: Mat S_VCt;
4398: if (lda_rhs != n_R) {
4399: MatDestroy(&B);
4400: MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);
4401: MatSeqDenseSetLDA(B,lda_rhs);
4402: }
4403: MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);
4404: MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);
4405: MatDestroy(&S_VCt);
4406: }
4407: }
4408: MatDestroy(&B);
4409: /* coarse basis functions */
4410: for (i=0;i<n_constraints;i++) {
4411: PetscScalar *y;
4413: VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);
4414: MatDenseGetArray(pcbddc->coarse_phi_B,&y);
4415: VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));
4416: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4417: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4418: MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);
4419: VecResetArray(pcis->vec1_B);
4420: if (pcbddc->switch_static || pcbddc->dbg_flag) {
4421: PetscInt j;
4423: MatDenseGetArray(pcbddc->coarse_phi_D,&y);
4424: VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));
4425: VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4426: VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4427: VecResetArray(pcis->vec1_D);
4428: for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0;
4429: MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);
4430: }
4431: VecResetArray(pcbddc->vec1_R);
4432: }
4433: }
4434: if (n_constraints) {
4435: MatDestroy(&local_auxmat2_R);
4436: }
4437: PetscFree(p0_lidx_I);
4439: /* coarse matrix entries relative to B_0 */
4440: if (pcbddc->benign_n) {
4441: Mat B0_B,B0_BPHI;
4442: IS is_dummy;
4443: PetscScalar *data;
4444: PetscInt j;
4446: ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);
4447: MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);
4448: ISDestroy(&is_dummy);
4449: MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);
4450: MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);
4451: MatDenseGetArray(B0_BPHI,&data);
4452: for (j=0;j<pcbddc->benign_n;j++) {
4453: PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j;
4454: for (i=0;i<pcbddc->local_primal_size;i++) {
4455: coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j];
4456: coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j];
4457: }
4458: }
4459: MatDenseRestoreArray(B0_BPHI,&data);
4460: MatDestroy(&B0_B);
4461: MatDestroy(&B0_BPHI);
4462: }
4464: /* compute other basis functions for non-symmetric problems */
4465: if (!pcbddc->symmetric_primal) {
4466: Mat B_V=NULL,B_C=NULL;
4467: PetscScalar *marray;
4469: if (n_constraints) {
4470: Mat S_CCT,C_CRT;
4472: MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT);
4473: MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);
4474: MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);
4475: MatDestroy(&S_CCT);
4476: if (n_vertices) {
4477: Mat S_VCT;
4479: MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);
4480: MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);
4481: MatDestroy(&S_VCT);
4482: }
4483: MatDestroy(&C_CRT);
4484: } else {
4485: MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);
4486: }
4487: if (n_vertices && n_R) {
4488: PetscScalar *av,*marray;
4489: const PetscInt *xadj,*adjncy;
4490: PetscInt n;
4491: PetscBool flg_row;
4493: /* B_V = B_V - A_VR^T */
4494: MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);
4495: MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);
4496: MatSeqAIJGetArray(A_VR,&av);
4497: MatDenseGetArray(B_V,&marray);
4498: for (i=0;i<n;i++) {
4499: PetscInt j;
4500: for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j];
4501: }
4502: MatDenseRestoreArray(B_V,&marray);
4503: MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);
4504: MatDestroy(&A_VR);
4505: }
4507: /* currently there's no support for MatTransposeMatSolve(F,B,X) */
4508: if (n_vertices) {
4509: MatDenseGetArray(B_V,&marray);
4510: for (i=0;i<n_vertices;i++) {
4511: VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);
4512: VecPlaceArray(pcbddc->vec2_R,work+i*n_R);
4513: KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
4514: VecResetArray(pcbddc->vec1_R);
4515: VecResetArray(pcbddc->vec2_R);
4516: }
4517: MatDenseRestoreArray(B_V,&marray);
4518: }
4519: if (B_C) {
4520: MatDenseGetArray(B_C,&marray);
4521: for (i=n_vertices;i<n_constraints+n_vertices;i++) {
4522: VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);
4523: VecPlaceArray(pcbddc->vec2_R,work+i*n_R);
4524: KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
4525: VecResetArray(pcbddc->vec1_R);
4526: VecResetArray(pcbddc->vec2_R);
4527: }
4528: MatDenseRestoreArray(B_C,&marray);
4529: }
4530: /* coarse basis functions */
4531: for (i=0;i<pcbddc->local_primal_size;i++) {
4532: PetscScalar *y;
4534: VecPlaceArray(pcbddc->vec1_R,work+i*n_R);
4535: MatDenseGetArray(pcbddc->coarse_psi_B,&y);
4536: VecPlaceArray(pcis->vec1_B,y+n_B*i);
4537: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4538: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4539: if (i<n_vertices) {
4540: y[n_B*i+idx_V_B[i]] = 1.0;
4541: }
4542: MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);
4543: VecResetArray(pcis->vec1_B);
4545: if (pcbddc->switch_static || pcbddc->dbg_flag) {
4546: MatDenseGetArray(pcbddc->coarse_psi_D,&y);
4547: VecPlaceArray(pcis->vec1_D,y+n_D*i);
4548: VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4549: VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4550: VecResetArray(pcis->vec1_D);
4551: MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);
4552: }
4553: VecResetArray(pcbddc->vec1_R);
4554: }
4555: MatDestroy(&B_V);
4556: MatDestroy(&B_C);
4557: }
4559: /* free memory */
4560: PetscFree(idx_V_B);
4561: MatDestroy(&S_VV);
4562: MatDestroy(&S_CV);
4563: MatDestroy(&S_VC);
4564: MatDestroy(&S_CC);
4565: PetscFree(work);
4566: if (n_vertices) {
4567: MatDestroy(&A_VR);
4568: }
4569: if (n_constraints) {
4570: MatDestroy(&C_CR);
4571: }
4572: /* Checking coarse_sub_mat and coarse basis functios */
4573: /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
4574: /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
4575: if (pcbddc->dbg_flag) {
4576: Mat coarse_sub_mat;
4577: Mat AUXMAT,TM1,TM2,TM3,TM4;
4578: Mat coarse_phi_D,coarse_phi_B;
4579: Mat coarse_psi_D,coarse_psi_B;
4580: Mat A_II,A_BB,A_IB,A_BI;
4581: Mat C_B,CPHI;
4582: IS is_dummy;
4583: Vec mones;
4584: MatType checkmattype=MATSEQAIJ;
4585: PetscReal real_value;
4587: if (pcbddc->benign_n && !pcbddc->benign_change_explicit) {
4588: Mat A;
4589: PCBDDCBenignProject(pc,NULL,NULL,&A);
4590: MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);
4591: MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);
4592: MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);
4593: MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);
4594: MatDestroy(&A);
4595: } else {
4596: MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);
4597: MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);
4598: MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);
4599: MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);
4600: }
4601: MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);
4602: MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);
4603: if (!pcbddc->symmetric_primal) {
4604: MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);
4605: MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);
4606: }
4607: MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);
4609: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
4610: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);
4611: PetscViewerFlush(pcbddc->dbg_viewer);
4612: if (!pcbddc->symmetric_primal) {
4613: MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4614: MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);
4615: MatDestroy(&AUXMAT);
4616: MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4617: MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);
4618: MatDestroy(&AUXMAT);
4619: MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4620: MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);
4621: MatDestroy(&AUXMAT);
4622: MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4623: MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);
4624: MatDestroy(&AUXMAT);
4625: } else {
4626: MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);
4627: MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);
4628: MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4629: MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);
4630: MatDestroy(&AUXMAT);
4631: MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4632: MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);
4633: MatDestroy(&AUXMAT);
4634: }
4635: MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);
4636: MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);
4637: MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);
4638: MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);
4639: if (pcbddc->benign_n) {
4640: Mat B0_B,B0_BPHI;
4641: PetscScalar *data,*data2;
4642: PetscInt j;
4644: ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);
4645: MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);
4646: MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);
4647: MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);
4648: MatDenseGetArray(TM1,&data);
4649: MatDenseGetArray(B0_BPHI,&data2);
4650: for (j=0;j<pcbddc->benign_n;j++) {
4651: PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j;
4652: for (i=0;i<pcbddc->local_primal_size;i++) {
4653: data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j];
4654: data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j];
4655: }
4656: }
4657: MatDenseRestoreArray(TM1,&data);
4658: MatDenseRestoreArray(B0_BPHI,&data2);
4659: MatDestroy(&B0_B);
4660: ISDestroy(&is_dummy);
4661: MatDestroy(&B0_BPHI);
4662: }
4663: #if 0
4664: {
4665: PetscViewer viewer;
4666: char filename[256];
4667: sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level);
4668: PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);
4669: PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
4670: PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");
4671: MatView(coarse_sub_mat,viewer);
4672: PetscObjectSetName((PetscObject)TM1,"projected");
4673: MatView(TM1,viewer);
4674: if (pcbddc->coarse_phi_B) {
4675: PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");
4676: MatView(pcbddc->coarse_phi_B,viewer);
4677: }
4678: if (pcbddc->coarse_phi_D) {
4679: PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");
4680: MatView(pcbddc->coarse_phi_D,viewer);
4681: }
4682: if (pcbddc->coarse_psi_B) {
4683: PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");
4684: MatView(pcbddc->coarse_psi_B,viewer);
4685: }
4686: if (pcbddc->coarse_psi_D) {
4687: PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");
4688: MatView(pcbddc->coarse_psi_D,viewer);
4689: }
4690: PetscObjectSetName((PetscObject)pcbddc->local_mat,"A");
4691: MatView(pcbddc->local_mat,viewer);
4692: PetscObjectSetName((PetscObject)pcbddc->ConstraintMatrix,"C");
4693: MatView(pcbddc->ConstraintMatrix,viewer);
4694: PetscObjectSetName((PetscObject)pcis->is_I_local,"I");
4695: ISView(pcis->is_I_local,viewer);
4696: PetscObjectSetName((PetscObject)pcis->is_B_local,"B");
4697: ISView(pcis->is_B_local,viewer);
4698: PetscObjectSetName((PetscObject)pcbddc->is_R_local,"R");
4699: ISView(pcbddc->is_R_local,viewer);
4700: PetscViewerDestroy(&viewer);
4701: }
4702: #endif
4703: MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);
4704: MatNorm(TM1,NORM_FROBENIUS,&real_value);
4705: PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
4706: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);
4708: /* check constraints */
4709: ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);
4710: MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);
4711: if (!pcbddc->benign_n) { /* TODO: add benign case */
4712: MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);
4713: } else {
4714: PetscScalar *data;
4715: Mat tmat;
4716: MatDenseGetArray(pcbddc->coarse_phi_B,&data);
4717: MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);
4718: MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);
4719: MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);
4720: MatDestroy(&tmat);
4721: }
4722: MatCreateVecs(CPHI,&mones,NULL);
4723: VecSet(mones,-1.0);
4724: MatDiagonalSet(CPHI,mones,ADD_VALUES);
4725: MatNorm(CPHI,NORM_FROBENIUS,&real_value);
4726: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);
4727: if (!pcbddc->symmetric_primal) {
4728: MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);
4729: VecSet(mones,-1.0);
4730: MatDiagonalSet(CPHI,mones,ADD_VALUES);
4731: MatNorm(CPHI,NORM_FROBENIUS,&real_value);
4732: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);
4733: }
4734: MatDestroy(&C_B);
4735: MatDestroy(&CPHI);
4736: ISDestroy(&is_dummy);
4737: VecDestroy(&mones);
4738: PetscViewerFlush(pcbddc->dbg_viewer);
4739: MatDestroy(&A_II);
4740: MatDestroy(&A_BB);
4741: MatDestroy(&A_IB);
4742: MatDestroy(&A_BI);
4743: MatDestroy(&TM1);
4744: MatDestroy(&TM2);
4745: MatDestroy(&TM3);
4746: MatDestroy(&TM4);
4747: MatDestroy(&coarse_phi_D);
4748: MatDestroy(&coarse_phi_B);
4749: if (!pcbddc->symmetric_primal) {
4750: MatDestroy(&coarse_psi_D);
4751: MatDestroy(&coarse_psi_B);
4752: }
4753: MatDestroy(&coarse_sub_mat);
4754: }
4755: /* get back data */
4756: *coarse_submat_vals_n = coarse_submat_vals;
4757: PetscLogEventEnd(PC_BDDC_CorrectionSetUp[pcbddc->current_level],pc,0,0,0);
4758: return(0);
4759: }
4761: PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B)
4762: {
4763: Mat *work_mat;
4764: IS isrow_s,iscol_s;
4765: PetscBool rsorted,csorted;
4766: PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL;
4770: ISSorted(isrow,&rsorted);
4771: ISSorted(iscol,&csorted);
4772: ISGetLocalSize(isrow,&rsize);
4773: ISGetLocalSize(iscol,&csize);
4775: if (!rsorted) {
4776: const PetscInt *idxs;
4777: PetscInt *idxs_sorted,i;
4779: PetscMalloc1(rsize,&idxs_perm_r);
4780: PetscMalloc1(rsize,&idxs_sorted);
4781: for (i=0;i<rsize;i++) {
4782: idxs_perm_r[i] = i;
4783: }
4784: ISGetIndices(isrow,&idxs);
4785: PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);
4786: for (i=0;i<rsize;i++) {
4787: idxs_sorted[i] = idxs[idxs_perm_r[i]];
4788: }
4789: ISRestoreIndices(isrow,&idxs);
4790: ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);
4791: } else {
4792: PetscObjectReference((PetscObject)isrow);
4793: isrow_s = isrow;
4794: }
4796: if (!csorted) {
4797: if (isrow == iscol) {
4798: PetscObjectReference((PetscObject)isrow_s);
4799: iscol_s = isrow_s;
4800: } else {
4801: const PetscInt *idxs;
4802: PetscInt *idxs_sorted,i;
4804: PetscMalloc1(csize,&idxs_perm_c);
4805: PetscMalloc1(csize,&idxs_sorted);
4806: for (i=0;i<csize;i++) {
4807: idxs_perm_c[i] = i;
4808: }
4809: ISGetIndices(iscol,&idxs);
4810: PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);
4811: for (i=0;i<csize;i++) {
4812: idxs_sorted[i] = idxs[idxs_perm_c[i]];
4813: }
4814: ISRestoreIndices(iscol,&idxs);
4815: ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);
4816: }
4817: } else {
4818: PetscObjectReference((PetscObject)iscol);
4819: iscol_s = iscol;
4820: }
4822: MatCreateSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);
4824: if (!rsorted || !csorted) {
4825: Mat new_mat;
4826: IS is_perm_r,is_perm_c;
4828: if (!rsorted) {
4829: PetscInt *idxs_r,i;
4830: PetscMalloc1(rsize,&idxs_r);
4831: for (i=0;i<rsize;i++) {
4832: idxs_r[idxs_perm_r[i]] = i;
4833: }
4834: PetscFree(idxs_perm_r);
4835: ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);
4836: } else {
4837: ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);
4838: }
4839: ISSetPermutation(is_perm_r);
4841: if (!csorted) {
4842: if (isrow_s == iscol_s) {
4843: PetscObjectReference((PetscObject)is_perm_r);
4844: is_perm_c = is_perm_r;
4845: } else {
4846: PetscInt *idxs_c,i;
4847: if (!idxs_perm_c) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Permutation array not present");
4848: PetscMalloc1(csize,&idxs_c);
4849: for (i=0;i<csize;i++) {
4850: idxs_c[idxs_perm_c[i]] = i;
4851: }
4852: PetscFree(idxs_perm_c);
4853: ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);
4854: }
4855: } else {
4856: ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);
4857: }
4858: ISSetPermutation(is_perm_c);
4860: MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);
4861: MatDestroy(&work_mat[0]);
4862: work_mat[0] = new_mat;
4863: ISDestroy(&is_perm_r);
4864: ISDestroy(&is_perm_c);
4865: }
4867: PetscObjectReference((PetscObject)work_mat[0]);
4868: *B = work_mat[0];
4869: MatDestroyMatrices(1,&work_mat);
4870: ISDestroy(&isrow_s);
4871: ISDestroy(&iscol_s);
4872: return(0);
4873: }
4875: PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
4876: {
4877: Mat_IS* matis = (Mat_IS*)pc->pmat->data;
4878: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
4879: Mat new_mat,lA;
4880: IS is_local,is_global;
4881: PetscInt local_size;
4882: PetscBool isseqaij;
4886: MatDestroy(&pcbddc->local_mat);
4887: MatGetSize(matis->A,&local_size,NULL);
4888: ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);
4889: ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);
4890: ISDestroy(&is_local);
4891: MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);
4892: ISDestroy(&is_global);
4894: /* check */
4895: if (pcbddc->dbg_flag) {
4896: Vec x,x_change;
4897: PetscReal error;
4899: MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);
4900: VecSetRandom(x,NULL);
4901: MatMult(ChangeOfBasisMatrix,x,x_change);
4902: VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);
4903: VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);
4904: MatMult(new_mat,matis->x,matis->y);
4905: if (!pcbddc->change_interior) {
4906: const PetscScalar *x,*y,*v;
4907: PetscReal lerror = 0.;
4908: PetscInt i;
4910: VecGetArrayRead(matis->x,&x);
4911: VecGetArrayRead(matis->y,&y);
4912: VecGetArrayRead(matis->counter,&v);
4913: for (i=0;i<local_size;i++)
4914: if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i]-y[i]) > lerror)
4915: lerror = PetscAbsScalar(x[i]-y[i]);
4916: VecRestoreArrayRead(matis->x,&x);
4917: VecRestoreArrayRead(matis->y,&y);
4918: VecRestoreArrayRead(matis->counter,&v);
4919: MPIU_Allreduce(&lerror,&error,1,MPIU_REAL,MPI_MAX,PetscObjectComm((PetscObject)pc));
4920: if (error > PETSC_SMALL) {
4921: if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) {
4922: SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on I: %1.6e",error);
4923: } else {
4924: SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on I: %1.6e",error);
4925: }
4926: }
4927: }
4928: VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);
4929: VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);
4930: VecAXPY(x,-1.0,x_change);
4931: VecNorm(x,NORM_INFINITY,&error);
4932: if (error > PETSC_SMALL) {
4933: if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) {
4934: SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e",error);
4935: } else {
4936: SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on N: %1.6e",error);
4937: }
4938: }
4939: VecDestroy(&x);
4940: VecDestroy(&x_change);
4941: }
4943: /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */
4944: PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject*)&lA);
4946: /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
4947: PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);
4948: if (isseqaij) {
4949: MatDestroy(&pcbddc->local_mat);
4950: MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);
4951: if (lA) {
4952: Mat work;
4953: MatPtAP(lA,new_mat,MAT_INITIAL_MATRIX,2.0,&work);
4954: PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);
4955: MatDestroy(&work);
4956: }
4957: } else {
4958: Mat work_mat;
4960: MatDestroy(&pcbddc->local_mat);
4961: MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);
4962: MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);
4963: MatDestroy(&work_mat);
4964: if (lA) {
4965: Mat work;
4966: MatConvert(lA,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);
4967: MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&work);
4968: PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);
4969: MatDestroy(&work);
4970: }
4971: }
4972: if (matis->A->symmetric_set) {
4973: MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);
4974: #if !defined(PETSC_USE_COMPLEX)
4975: MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);
4976: #endif
4977: }
4978: MatDestroy(&new_mat);
4979: return(0);
4980: }
4982: PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
4983: {
4984: PC_IS* pcis = (PC_IS*)(pc->data);
4985: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
4986: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
4987: PetscInt *idx_R_local=NULL;
4988: PetscInt n_vertices,i,j,n_R,n_D,n_B;
4989: PetscInt vbs,bs;
4990: PetscBT bitmask=NULL;
4991: PetscErrorCode ierr;
4994: /*
4995: No need to setup local scatters if
4996: - primal space is unchanged
4997: AND
4998: - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
4999: AND
5000: - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
5001: */
5002: if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
5003: return(0);
5004: }
5005: /* destroy old objects */
5006: ISDestroy(&pcbddc->is_R_local);
5007: VecScatterDestroy(&pcbddc->R_to_B);
5008: VecScatterDestroy(&pcbddc->R_to_D);
5009: /* Set Non-overlapping dimensions */
5010: n_B = pcis->n_B;
5011: n_D = pcis->n - n_B;
5012: n_vertices = pcbddc->n_vertices;
5014: /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
5016: /* create auxiliary bitmask and allocate workspace */
5017: if (!sub_schurs || !sub_schurs->reuse_solver) {
5018: PetscMalloc1(pcis->n-n_vertices,&idx_R_local);
5019: PetscBTCreate(pcis->n,&bitmask);
5020: for (i=0;i<n_vertices;i++) {
5021: PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);
5022: }
5024: for (i=0, n_R=0; i<pcis->n; i++) {
5025: if (!PetscBTLookup(bitmask,i)) {
5026: idx_R_local[n_R++] = i;
5027: }
5028: }
5029: } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */
5030: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5032: ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);
5033: ISGetLocalSize(reuse_solver->is_R,&n_R);
5034: }
5036: /* Block code */
5037: vbs = 1;
5038: MatGetBlockSize(pcbddc->local_mat,&bs);
5039: if (bs>1 && !(n_vertices%bs)) {
5040: PetscBool is_blocked = PETSC_TRUE;
5041: PetscInt *vary;
5042: if (!sub_schurs || !sub_schurs->reuse_solver) {
5043: PetscMalloc1(pcis->n/bs,&vary);
5044: PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));
5045: /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
5046: /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */
5047: for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++;
5048: for (i=0; i<pcis->n/bs; i++) {
5049: if (vary[i]!=0 && vary[i]!=bs) {
5050: is_blocked = PETSC_FALSE;
5051: break;
5052: }
5053: }
5054: PetscFree(vary);
5055: } else {
5056: /* Verify directly the R set */
5057: for (i=0; i<n_R/bs; i++) {
5058: PetscInt j,node=idx_R_local[bs*i];
5059: for (j=1; j<bs; j++) {
5060: if (node != idx_R_local[bs*i+j]-j) {
5061: is_blocked = PETSC_FALSE;
5062: break;
5063: }
5064: }
5065: }
5066: }
5067: if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
5068: vbs = bs;
5069: for (i=0;i<n_R/vbs;i++) {
5070: idx_R_local[i] = idx_R_local[vbs*i]/vbs;
5071: }
5072: }
5073: }
5074: ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);
5075: if (sub_schurs && sub_schurs->reuse_solver) {
5076: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5078: ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);
5079: ISDestroy(&reuse_solver->is_R);
5080: PetscObjectReference((PetscObject)pcbddc->is_R_local);
5081: reuse_solver->is_R = pcbddc->is_R_local;
5082: } else {
5083: PetscFree(idx_R_local);
5084: }
5086: /* print some info if requested */
5087: if (pcbddc->dbg_flag) {
5088: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
5089: PetscViewerFlush(pcbddc->dbg_viewer);
5090: PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
5091: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);
5092: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %D, dirichlet_size = %D, boundary_size = %D\n",pcis->n,n_D,n_B);
5093: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %D, v_size = %D, constraints = %D, local_primal_size = %D\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);
5094: PetscViewerFlush(pcbddc->dbg_viewer);
5095: }
5097: /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
5098: if (!sub_schurs || !sub_schurs->reuse_solver) {
5099: IS is_aux1,is_aux2;
5100: PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local;
5102: ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);
5103: PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);
5104: PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);
5105: ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);
5106: for (i=0; i<n_D; i++) {
5107: PetscBTSet(bitmask,is_indices[i]);
5108: }
5109: ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);
5110: for (i=0, j=0; i<n_R; i++) {
5111: if (!PetscBTLookup(bitmask,idx_R_local[i])) {
5112: aux_array1[j++] = i;
5113: }
5114: }
5115: ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);
5116: ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);
5117: for (i=0, j=0; i<n_B; i++) {
5118: if (!PetscBTLookup(bitmask,is_indices[i])) {
5119: aux_array2[j++] = i;
5120: }
5121: }
5122: ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);
5123: ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);
5124: VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);
5125: ISDestroy(&is_aux1);
5126: ISDestroy(&is_aux2);
5128: if (pcbddc->switch_static || pcbddc->dbg_flag) {
5129: PetscMalloc1(n_D,&aux_array1);
5130: for (i=0, j=0; i<n_R; i++) {
5131: if (PetscBTLookup(bitmask,idx_R_local[i])) {
5132: aux_array1[j++] = i;
5133: }
5134: }
5135: ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);
5136: VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);
5137: ISDestroy(&is_aux1);
5138: }
5139: PetscBTDestroy(&bitmask);
5140: ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);
5141: } else {
5142: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5143: IS tis;
5144: PetscInt schur_size;
5146: ISGetLocalSize(reuse_solver->is_B,&schur_size);
5147: ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);
5148: VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);
5149: ISDestroy(&tis);
5150: if (pcbddc->switch_static || pcbddc->dbg_flag) {
5151: ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);
5152: VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);
5153: ISDestroy(&tis);
5154: }
5155: }
5156: return(0);
5157: }
5160: PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
5161: {
5162: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
5163: PC_IS *pcis = (PC_IS*)pc->data;
5164: PC pc_temp;
5165: Mat A_RR;
5166: MatReuse reuse;
5167: PetscScalar m_one = -1.0;
5168: PetscReal value;
5169: PetscInt n_D,n_R;
5170: PetscBool check_corr,issbaij;
5172: /* prefixes stuff */
5173: char dir_prefix[256],neu_prefix[256],str_level[16];
5174: size_t len;
5177: PetscLogEventBegin(PC_BDDC_LocalSolvers[pcbddc->current_level],pc,0,0,0);
5178: /* compute prefixes */
5179: PetscStrcpy(dir_prefix,"");
5180: PetscStrcpy(neu_prefix,"");
5181: if (!pcbddc->current_level) {
5182: PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,sizeof(dir_prefix));
5183: PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,sizeof(neu_prefix));
5184: PetscStrlcat(dir_prefix,"pc_bddc_dirichlet_",sizeof(dir_prefix));
5185: PetscStrlcat(neu_prefix,"pc_bddc_neumann_",sizeof(neu_prefix));
5186: } else {
5187: PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level));
5188: PetscStrlen(((PetscObject)pc)->prefix,&len);
5189: len -= 15; /* remove "pc_bddc_coarse_" */
5190: if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
5191: if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
5192: /* Nonstandard use of PetscStrncpy() to only copy a portion of the input string */
5193: PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);
5194: PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);
5195: PetscStrlcat(dir_prefix,"pc_bddc_dirichlet_",sizeof(dir_prefix));
5196: PetscStrlcat(neu_prefix,"pc_bddc_neumann_",sizeof(neu_prefix));
5197: PetscStrlcat(dir_prefix,str_level,sizeof(dir_prefix));
5198: PetscStrlcat(neu_prefix,str_level,sizeof(neu_prefix));
5199: }
5201: /* DIRICHLET PROBLEM */
5202: if (dirichlet) {
5203: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5204: if (pcbddc->benign_n && !pcbddc->benign_change_explicit) {
5205: if (!sub_schurs || !sub_schurs->reuse_solver) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented");
5206: if (pcbddc->dbg_flag) {
5207: Mat A_IIn;
5209: PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);
5210: MatDestroy(&pcis->A_II);
5211: pcis->A_II = A_IIn;
5212: }
5213: }
5214: if (pcbddc->local_mat->symmetric_set) {
5215: MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);
5216: }
5217: /* Matrix for Dirichlet problem is pcis->A_II */
5218: n_D = pcis->n - pcis->n_B;
5219: if (!pcbddc->ksp_D) { /* create object if not yet build */
5220: void (*f)(void) = 0;
5222: KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);
5223: PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);
5224: /* default */
5225: KSPSetType(pcbddc->ksp_D,KSPPREONLY);
5226: KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);
5227: PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);
5228: KSPGetPC(pcbddc->ksp_D,&pc_temp);
5229: if (issbaij) {
5230: PCSetType(pc_temp,PCCHOLESKY);
5231: } else {
5232: PCSetType(pc_temp,PCLU);
5233: }
5234: KSPSetErrorIfNotConverged(pcbddc->ksp_D,pc->erroriffailure);
5235: /* Allow user's customization */
5236: KSPSetFromOptions(pcbddc->ksp_D);
5237: PetscObjectQueryFunction((PetscObject)pc_temp,"PCSetCoordinates_C",&f);
5238: if (f && pcbddc->mat_graph->cloc) {
5239: PetscReal *coords = pcbddc->mat_graph->coords,*scoords;
5240: const PetscInt *idxs;
5241: PetscInt cdim = pcbddc->mat_graph->cdim,nl,i,d;
5243: ISGetLocalSize(pcis->is_I_local,&nl);
5244: ISGetIndices(pcis->is_I_local,&idxs);
5245: PetscMalloc1(nl*cdim,&scoords);
5246: for (i=0;i<nl;i++) {
5247: for (d=0;d<cdim;d++) {
5248: scoords[i*cdim+d] = coords[idxs[i]*cdim+d];
5249: }
5250: }
5251: ISRestoreIndices(pcis->is_I_local,&idxs);
5252: PCSetCoordinates(pc_temp,cdim,nl,scoords);
5253: PetscFree(scoords);
5254: }
5255: }
5256: MatSetOptionsPrefix(pcis->A_II,((PetscObject)pcbddc->ksp_D)->prefix);
5257: KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);
5258: if (sub_schurs && sub_schurs->reuse_solver) {
5259: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5261: KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);
5262: }
5263: /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
5264: if (!n_D) {
5265: KSPGetPC(pcbddc->ksp_D,&pc_temp);
5266: PCSetType(pc_temp,PCNONE);
5267: }
5268: /* set ksp_D into pcis data */
5269: KSPDestroy(&pcis->ksp_D);
5270: PetscObjectReference((PetscObject)pcbddc->ksp_D);
5271: pcis->ksp_D = pcbddc->ksp_D;
5272: }
5274: /* NEUMANN PROBLEM */
5275: A_RR = 0;
5276: if (neumann) {
5277: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5278: PetscInt ibs,mbs;
5279: PetscBool issbaij, reuse_neumann_solver;
5280: Mat_IS* matis = (Mat_IS*)pc->pmat->data;
5282: reuse_neumann_solver = PETSC_FALSE;
5283: if (sub_schurs && sub_schurs->reuse_solver) {
5284: IS iP;
5286: reuse_neumann_solver = PETSC_TRUE;
5287: PetscObjectQuery((PetscObject)sub_schurs->A,"__KSPFETIDP_iP",(PetscObject*)&iP);
5288: if (iP) reuse_neumann_solver = PETSC_FALSE;
5289: }
5290: /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
5291: ISGetSize(pcbddc->is_R_local,&n_R);
5292: if (pcbddc->ksp_R) { /* already created ksp */
5293: PetscInt nn_R;
5294: KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);
5295: PetscObjectReference((PetscObject)A_RR);
5296: MatGetSize(A_RR,&nn_R,NULL);
5297: if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
5298: KSPReset(pcbddc->ksp_R);
5299: MatDestroy(&A_RR);
5300: reuse = MAT_INITIAL_MATRIX;
5301: } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
5302: if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
5303: MatDestroy(&A_RR);
5304: reuse = MAT_INITIAL_MATRIX;
5305: } else { /* safe to reuse the matrix */
5306: reuse = MAT_REUSE_MATRIX;
5307: }
5308: }
5309: /* last check */
5310: if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
5311: MatDestroy(&A_RR);
5312: reuse = MAT_INITIAL_MATRIX;
5313: }
5314: } else { /* first time, so we need to create the matrix */
5315: reuse = MAT_INITIAL_MATRIX;
5316: }
5317: /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */
5318: MatGetBlockSize(pcbddc->local_mat,&mbs);
5319: ISGetBlockSize(pcbddc->is_R_local,&ibs);
5320: PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);
5321: if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */
5322: if (matis->A == pcbddc->local_mat) {
5323: MatDestroy(&pcbddc->local_mat);
5324: MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);
5325: } else {
5326: MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);
5327: }
5328: } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
5329: if (matis->A == pcbddc->local_mat) {
5330: MatDestroy(&pcbddc->local_mat);
5331: MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);
5332: } else {
5333: MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);
5334: }
5335: }
5336: /* extract A_RR */
5337: if (reuse_neumann_solver) {
5338: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5340: if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */
5341: MatDestroy(&A_RR);
5342: if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */
5343: PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);
5344: } else {
5345: MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);
5346: }
5347: } else {
5348: MatDestroy(&A_RR);
5349: PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);
5350: PetscObjectReference((PetscObject)A_RR);
5351: }
5352: } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */
5353: MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);
5354: }
5355: if (pcbddc->local_mat->symmetric_set) {
5356: MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);
5357: }
5358: if (!pcbddc->ksp_R) { /* create object if not present */
5359: void (*f)(void) = 0;
5361: KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);
5362: PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);
5363: /* default */
5364: KSPSetType(pcbddc->ksp_R,KSPPREONLY);
5365: KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);
5366: KSPGetPC(pcbddc->ksp_R,&pc_temp);
5367: PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);
5368: if (issbaij) {
5369: PCSetType(pc_temp,PCCHOLESKY);
5370: } else {
5371: PCSetType(pc_temp,PCLU);
5372: }
5373: KSPSetErrorIfNotConverged(pcbddc->ksp_R,pc->erroriffailure);
5374: /* Allow user's customization */
5375: KSPSetFromOptions(pcbddc->ksp_R);
5376: PetscObjectQueryFunction((PetscObject)pc_temp,"PCSetCoordinates_C",&f);
5377: if (f && pcbddc->mat_graph->cloc) {
5378: PetscReal *coords = pcbddc->mat_graph->coords,*scoords;
5379: const PetscInt *idxs;
5380: PetscInt cdim = pcbddc->mat_graph->cdim,nl,i,d;
5382: ISGetLocalSize(pcbddc->is_R_local,&nl);
5383: ISGetIndices(pcbddc->is_R_local,&idxs);
5384: PetscMalloc1(nl*cdim,&scoords);
5385: for (i=0;i<nl;i++) {
5386: for (d=0;d<cdim;d++) {
5387: scoords[i*cdim+d] = coords[idxs[i]*cdim+d];
5388: }
5389: }
5390: ISRestoreIndices(pcbddc->is_R_local,&idxs);
5391: PCSetCoordinates(pc_temp,cdim,nl,scoords);
5392: PetscFree(scoords);
5393: }
5394: }
5395: /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
5396: if (!n_R) {
5397: KSPGetPC(pcbddc->ksp_R,&pc_temp);
5398: PCSetType(pc_temp,PCNONE);
5399: }
5400: MatSetOptionsPrefix(A_RR,((PetscObject)pcbddc->ksp_R)->prefix);
5401: KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);
5402: /* Reuse solver if it is present */
5403: if (reuse_neumann_solver) {
5404: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5406: KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);
5407: }
5408: }
5410: if (pcbddc->dbg_flag) {
5411: PetscViewerFlush(pcbddc->dbg_viewer);
5412: PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
5413: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
5414: }
5416: /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */
5417: check_corr = PETSC_FALSE;
5418: if (pcbddc->NullSpace_corr[0]) {
5419: PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);
5420: }
5421: if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) {
5422: check_corr = PETSC_TRUE;
5423: PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);
5424: }
5425: if (neumann && pcbddc->NullSpace_corr[2]) {
5426: check_corr = PETSC_TRUE;
5427: PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);
5428: }
5429: /* check Dirichlet and Neumann solvers */
5430: if (pcbddc->dbg_flag) {
5431: if (dirichlet) { /* Dirichlet */
5432: VecSetRandom(pcis->vec1_D,NULL);
5433: MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);
5434: KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);
5435: VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);
5436: VecNorm(pcis->vec1_D,NORM_INFINITY,&value);
5437: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);
5438: if (check_corr) {
5439: PCBDDCNullSpaceCheckCorrection(pc,PETSC_TRUE);
5440: }
5441: PetscViewerFlush(pcbddc->dbg_viewer);
5442: }
5443: if (neumann) { /* Neumann */
5444: VecSetRandom(pcbddc->vec1_R,NULL);
5445: MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);
5446: KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);
5447: VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);
5448: VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);
5449: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);
5450: if (check_corr) {
5451: PCBDDCNullSpaceCheckCorrection(pc,PETSC_FALSE);
5452: }
5453: PetscViewerFlush(pcbddc->dbg_viewer);
5454: }
5455: }
5456: /* free Neumann problem's matrix */
5457: MatDestroy(&A_RR);
5458: PetscLogEventEnd(PC_BDDC_LocalSolvers[pcbddc->current_level],pc,0,0,0);
5459: return(0);
5460: }
5462: static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose)
5463: {
5464: PetscErrorCode ierr;
5465: PC_BDDC* pcbddc = (PC_BDDC*)(pc->data);
5466: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5467: PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE ) : PETSC_FALSE;
5470: if (!reuse_solver) {
5471: VecSet(pcbddc->vec1_R,0.);
5472: }
5473: if (!pcbddc->switch_static) {
5474: if (applytranspose && pcbddc->local_auxmat1) {
5475: MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);
5476: MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);
5477: }
5478: if (!reuse_solver) {
5479: VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5480: VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5481: } else {
5482: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5484: VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);
5485: VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);
5486: }
5487: } else {
5488: VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5489: VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5490: VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5491: VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5492: if (applytranspose && pcbddc->local_auxmat1) {
5493: MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);
5494: MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);
5495: VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5496: VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5497: }
5498: }
5499: if (!reuse_solver || pcbddc->switch_static) {
5500: if (applytranspose) {
5501: KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);
5502: } else {
5503: KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);
5504: }
5505: } else {
5506: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5508: if (applytranspose) {
5509: MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);
5510: } else {
5511: MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);
5512: }
5513: }
5514: VecSet(inout_B,0.);
5515: if (!pcbddc->switch_static) {
5516: if (!reuse_solver) {
5517: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5518: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5519: } else {
5520: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5522: VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);
5523: VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);
5524: }
5525: if (!applytranspose && pcbddc->local_auxmat1) {
5526: MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);
5527: MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);
5528: }
5529: } else {
5530: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5531: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5532: VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
5533: VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
5534: if (!applytranspose && pcbddc->local_auxmat1) {
5535: MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);
5536: MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);
5537: }
5538: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5539: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5540: VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
5541: VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
5542: }
5543: return(0);
5544: }
5546: /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
5547: PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
5548: {
5550: PC_BDDC* pcbddc = (PC_BDDC*)(pc->data);
5551: PC_IS* pcis = (PC_IS*) (pc->data);
5552: const PetscScalar zero = 0.0;
5555: /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
5556: if (!pcbddc->benign_apply_coarse_only) {
5557: if (applytranspose) {
5558: MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);
5559: if (pcbddc->switch_static) { MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P); }
5560: } else {
5561: MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);
5562: if (pcbddc->switch_static) { MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P); }
5563: }
5564: } else {
5565: VecSet(pcbddc->vec1_P,zero);
5566: }
5568: /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */
5569: if (pcbddc->benign_n) {
5570: PetscScalar *array;
5571: PetscInt j;
5573: VecGetArray(pcbddc->vec1_P,&array);
5574: for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j];
5575: VecRestoreArray(pcbddc->vec1_P,&array);
5576: }
5578: /* start communications from local primal nodes to rhs of coarse solver */
5579: VecSet(pcbddc->coarse_vec,zero);
5580: PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);
5581: PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);
5583: /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
5584: if (pcbddc->coarse_ksp) {
5585: Mat coarse_mat;
5586: Vec rhs,sol;
5587: MatNullSpace nullsp;
5588: PetscBool isbddc = PETSC_FALSE;
5590: if (pcbddc->benign_have_null) {
5591: PC coarse_pc;
5593: KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);
5594: PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);
5595: /* we need to propagate to coarser levels the need for a possible benign correction */
5596: if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) {
5597: PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data);
5598: coarsepcbddc->benign_skip_correction = PETSC_FALSE;
5599: coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE;
5600: }
5601: }
5602: KSPGetRhs(pcbddc->coarse_ksp,&rhs);
5603: KSPGetSolution(pcbddc->coarse_ksp,&sol);
5604: KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);
5605: if (applytranspose) {
5606: if (pcbddc->benign_apply_coarse_only) SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented");
5607: KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);
5608: MatGetTransposeNullSpace(coarse_mat,&nullsp);
5609: if (nullsp) {
5610: MatNullSpaceRemove(nullsp,sol);
5611: }
5612: } else {
5613: MatGetNullSpace(coarse_mat,&nullsp);
5614: if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */
5615: PC coarse_pc;
5617: if (nullsp) {
5618: MatNullSpaceRemove(nullsp,rhs);
5619: }
5620: KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);
5621: PCPreSolve(coarse_pc,pcbddc->coarse_ksp);
5622: PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);
5623: PCPostSolve(coarse_pc,pcbddc->coarse_ksp);
5624: } else {
5625: KSPSolve(pcbddc->coarse_ksp,rhs,sol);
5626: if (nullsp) {
5627: MatNullSpaceRemove(nullsp,sol);
5628: }
5629: }
5630: }
5631: /* we don't need the benign correction at coarser levels anymore */
5632: if (pcbddc->benign_have_null && isbddc) {
5633: PC coarse_pc;
5634: PC_BDDC* coarsepcbddc;
5636: KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);
5637: coarsepcbddc = (PC_BDDC*)(coarse_pc->data);
5638: coarsepcbddc->benign_skip_correction = PETSC_TRUE;
5639: coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE;
5640: }
5641: }
5643: /* Local solution on R nodes */
5644: if (pcis->n && !pcbddc->benign_apply_coarse_only) {
5645: PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);
5646: }
5647: /* communications from coarse sol to local primal nodes */
5648: PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);
5649: PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);
5651: /* Sum contributions from the two levels */
5652: if (!pcbddc->benign_apply_coarse_only) {
5653: if (applytranspose) {
5654: MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);
5655: if (pcbddc->switch_static) { MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D); }
5656: } else {
5657: MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);
5658: if (pcbddc->switch_static) { MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D); }
5659: }
5660: /* store p0 */
5661: if (pcbddc->benign_n) {
5662: PetscScalar *array;
5663: PetscInt j;
5665: VecGetArray(pcbddc->vec1_P,&array);
5666: for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j];
5667: VecRestoreArray(pcbddc->vec1_P,&array);
5668: }
5669: } else { /* expand the coarse solution */
5670: if (applytranspose) {
5671: MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);
5672: } else {
5673: MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);
5674: }
5675: }
5676: return(0);
5677: }
5679: PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
5680: {
5682: PC_BDDC* pcbddc = (PC_BDDC*)(pc->data);
5683: PetscScalar *array;
5684: Vec from,to;
5687: if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
5688: from = pcbddc->coarse_vec;
5689: to = pcbddc->vec1_P;
5690: if (pcbddc->coarse_ksp) { /* get array from coarse processes */
5691: Vec tvec;
5693: KSPGetRhs(pcbddc->coarse_ksp,&tvec);
5694: VecResetArray(tvec);
5695: KSPGetSolution(pcbddc->coarse_ksp,&tvec);
5696: VecGetArray(tvec,&array);
5697: VecPlaceArray(from,array);
5698: VecRestoreArray(tvec,&array);
5699: }
5700: } else { /* from local to global -> put data in coarse right hand side */
5701: from = pcbddc->vec1_P;
5702: to = pcbddc->coarse_vec;
5703: }
5704: VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);
5705: return(0);
5706: }
5708: PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
5709: {
5711: PC_BDDC* pcbddc = (PC_BDDC*)(pc->data);
5712: PetscScalar *array;
5713: Vec from,to;
5716: if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
5717: from = pcbddc->coarse_vec;
5718: to = pcbddc->vec1_P;
5719: } else { /* from local to global -> put data in coarse right hand side */
5720: from = pcbddc->vec1_P;
5721: to = pcbddc->coarse_vec;
5722: }
5723: VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);
5724: if (smode == SCATTER_FORWARD) {
5725: if (pcbddc->coarse_ksp) { /* get array from coarse processes */
5726: Vec tvec;
5728: KSPGetRhs(pcbddc->coarse_ksp,&tvec);
5729: VecGetArray(to,&array);
5730: VecPlaceArray(tvec,array);
5731: VecRestoreArray(to,&array);
5732: }
5733: } else {
5734: if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
5735: VecResetArray(from);
5736: }
5737: }
5738: return(0);
5739: }
5741: /* uncomment for testing purposes */
5742: /* #define PETSC_MISSING_LAPACK_GESVD 1 */
5743: PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
5744: {
5745: PetscErrorCode ierr;
5746: PC_IS* pcis = (PC_IS*)(pc->data);
5747: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
5748: Mat_IS* matis = (Mat_IS*)pc->pmat->data;
5749: /* one and zero */
5750: PetscScalar one=1.0,zero=0.0;
5751: /* space to store constraints and their local indices */
5752: PetscScalar *constraints_data;
5753: PetscInt *constraints_idxs,*constraints_idxs_B;
5754: PetscInt *constraints_idxs_ptr,*constraints_data_ptr;
5755: PetscInt *constraints_n;
5756: /* iterators */
5757: PetscInt i,j,k,total_counts,total_counts_cc,cum;
5758: /* BLAS integers */
5759: PetscBLASInt lwork,lierr;
5760: PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1;
5761: PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC;
5762: /* reuse */
5763: PetscInt olocal_primal_size,olocal_primal_size_cc;
5764: PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult;
5765: /* change of basis */
5766: PetscBool qr_needed;
5767: PetscBT change_basis,qr_needed_idx;
5768: /* auxiliary stuff */
5769: PetscInt *nnz,*is_indices;
5770: PetscInt ncc;
5771: /* some quantities */
5772: PetscInt n_vertices,total_primal_vertices,valid_constraints;
5773: PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints;
5774: PetscReal tol; /* tolerance for retaining eigenmodes */
5777: tol = PetscSqrtReal(PETSC_SMALL);
5778: /* Destroy Mat objects computed previously */
5779: MatDestroy(&pcbddc->ChangeOfBasisMatrix);
5780: MatDestroy(&pcbddc->ConstraintMatrix);
5781: MatDestroy(&pcbddc->switch_static_change);
5782: /* save info on constraints from previous setup (if any) */
5783: olocal_primal_size = pcbddc->local_primal_size;
5784: olocal_primal_size_cc = pcbddc->local_primal_size_cc;
5785: PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);
5786: PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));
5787: PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));
5788: PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);
5789: PetscFree(pcbddc->primal_indices_local_idxs);
5791: if (!pcbddc->adaptive_selection) {
5792: IS ISForVertices,*ISForFaces,*ISForEdges;
5793: MatNullSpace nearnullsp;
5794: const Vec *nearnullvecs;
5795: Vec *localnearnullsp;
5796: PetscScalar *array;
5797: PetscInt n_ISForFaces,n_ISForEdges,nnsp_size;
5798: PetscBool nnsp_has_cnst;
5799: /* LAPACK working arrays for SVD or POD */
5800: PetscBool skip_lapack,boolforchange;
5801: PetscScalar *work;
5802: PetscReal *singular_vals;
5803: #if defined(PETSC_USE_COMPLEX)
5804: PetscReal *rwork;
5805: #endif
5806: #if defined(PETSC_MISSING_LAPACK_GESVD)
5807: PetscScalar *temp_basis,*correlation_mat;
5808: #else
5809: PetscBLASInt dummy_int=1;
5810: PetscScalar dummy_scalar=1.;
5811: #endif
5813: /* Get index sets for faces, edges and vertices from graph */
5814: PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);
5815: /* print some info */
5816: if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) {
5817: PetscInt nv;
5819: PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);
5820: ISGetSize(ISForVertices,&nv);
5821: PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
5822: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");
5823: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%D)\n",PetscGlobalRank,nv,pcbddc->use_vertices);
5824: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%D)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);
5825: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%D)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);
5826: PetscViewerFlush(pcbddc->dbg_viewer);
5827: PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);
5828: }
5830: /* free unneeded index sets */
5831: if (!pcbddc->use_vertices) {
5832: ISDestroy(&ISForVertices);
5833: }
5834: if (!pcbddc->use_edges) {
5835: for (i=0;i<n_ISForEdges;i++) {
5836: ISDestroy(&ISForEdges[i]);
5837: }
5838: PetscFree(ISForEdges);
5839: n_ISForEdges = 0;
5840: }
5841: if (!pcbddc->use_faces) {
5842: for (i=0;i<n_ISForFaces;i++) {
5843: ISDestroy(&ISForFaces[i]);
5844: }
5845: PetscFree(ISForFaces);
5846: n_ISForFaces = 0;
5847: }
5849: /* check if near null space is attached to global mat */
5850: MatGetNearNullSpace(pc->pmat,&nearnullsp);
5851: if (nearnullsp) {
5852: MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);
5853: /* remove any stored info */
5854: MatNullSpaceDestroy(&pcbddc->onearnullspace);
5855: PetscFree(pcbddc->onearnullvecs_state);
5856: /* store information for BDDC solver reuse */
5857: PetscObjectReference((PetscObject)nearnullsp);
5858: pcbddc->onearnullspace = nearnullsp;
5859: PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);
5860: for (i=0;i<nnsp_size;i++) {
5861: PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);
5862: }
5863: } else { /* if near null space is not provided BDDC uses constants by default */
5864: nnsp_size = 0;
5865: nnsp_has_cnst = PETSC_TRUE;
5866: }
5867: /* get max number of constraints on a single cc */
5868: max_constraints = nnsp_size;
5869: if (nnsp_has_cnst) max_constraints++;
5871: /*
5872: Evaluate maximum storage size needed by the procedure
5873: - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]"
5874: - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]"
5875: There can be multiple constraints per connected component
5876: */
5877: n_vertices = 0;
5878: if (ISForVertices) {
5879: ISGetSize(ISForVertices,&n_vertices);
5880: }
5881: ncc = n_vertices+n_ISForFaces+n_ISForEdges;
5882: PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);
5884: total_counts = n_ISForFaces+n_ISForEdges;
5885: total_counts *= max_constraints;
5886: total_counts += n_vertices;
5887: PetscBTCreate(total_counts,&change_basis);
5889: total_counts = 0;
5890: max_size_of_constraint = 0;
5891: for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
5892: IS used_is;
5893: if (i<n_ISForEdges) {
5894: used_is = ISForEdges[i];
5895: } else {
5896: used_is = ISForFaces[i-n_ISForEdges];
5897: }
5898: ISGetSize(used_is,&j);
5899: total_counts += j;
5900: max_size_of_constraint = PetscMax(j,max_size_of_constraint);
5901: }
5902: PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);
5904: /* get local part of global near null space vectors */
5905: PetscMalloc1(nnsp_size,&localnearnullsp);
5906: for (k=0;k<nnsp_size;k++) {
5907: VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);
5908: VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);
5909: VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);
5910: }
5912: /* whether or not to skip lapack calls */
5913: skip_lapack = PETSC_TRUE;
5914: if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
5916: /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
5917: if (!skip_lapack) {
5918: PetscScalar temp_work;
5920: #if defined(PETSC_MISSING_LAPACK_GESVD)
5921: /* Proper Orthogonal Decomposition (POD) using the snapshot method */
5922: PetscMalloc1(max_constraints*max_constraints,&correlation_mat);
5923: PetscMalloc1(max_constraints,&singular_vals);
5924: PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);
5925: #if defined(PETSC_USE_COMPLEX)
5926: PetscMalloc1(3*max_constraints,&rwork);
5927: #endif
5928: /* now we evaluate the optimal workspace using query with lwork=-1 */
5929: PetscBLASIntCast(max_constraints,&Blas_N);
5930: PetscBLASIntCast(max_constraints,&Blas_LDA);
5931: lwork = -1;
5932: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
5933: #if !defined(PETSC_USE_COMPLEX)
5934: PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
5935: #else
5936: PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
5937: #endif
5938: PetscFPTrapPop();
5939: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
5940: #else /* on missing GESVD */
5941: /* SVD */
5942: PetscInt max_n,min_n;
5943: max_n = max_size_of_constraint;
5944: min_n = max_constraints;
5945: if (max_size_of_constraint < max_constraints) {
5946: min_n = max_size_of_constraint;
5947: max_n = max_constraints;
5948: }
5949: PetscMalloc1(min_n,&singular_vals);
5950: #if defined(PETSC_USE_COMPLEX)
5951: PetscMalloc1(5*min_n,&rwork);
5952: #endif
5953: /* now we evaluate the optimal workspace using query with lwork=-1 */
5954: lwork = -1;
5955: PetscBLASIntCast(max_n,&Blas_M);
5956: PetscBLASIntCast(min_n,&Blas_N);
5957: PetscBLASIntCast(max_n,&Blas_LDA);
5958: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
5959: #if !defined(PETSC_USE_COMPLEX)
5960: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr));
5961: #else
5962: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr));
5963: #endif
5964: PetscFPTrapPop();
5965: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
5966: #endif /* on missing GESVD */
5967: /* Allocate optimal workspace */
5968: PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);
5969: PetscMalloc1(lwork,&work);
5970: }
5971: /* Now we can loop on constraining sets */
5972: total_counts = 0;
5973: constraints_idxs_ptr[0] = 0;
5974: constraints_data_ptr[0] = 0;
5975: /* vertices */
5976: if (n_vertices) {
5977: ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);
5978: PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));
5979: for (i=0;i<n_vertices;i++) {
5980: constraints_n[total_counts] = 1;
5981: constraints_data[total_counts] = 1.0;
5982: constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1;
5983: constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1;
5984: total_counts++;
5985: }
5986: ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);
5987: n_vertices = total_counts;
5988: }
5990: /* edges and faces */
5991: total_counts_cc = total_counts;
5992: for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
5993: IS used_is;
5994: PetscBool idxs_copied = PETSC_FALSE;
5996: if (ncc<n_ISForEdges) {
5997: used_is = ISForEdges[ncc];
5998: boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
5999: } else {
6000: used_is = ISForFaces[ncc-n_ISForEdges];
6001: boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
6002: }
6003: temp_constraints = 0; /* zero the number of constraints I have on this conn comp */
6005: ISGetSize(used_is,&size_of_constraint);
6006: ISGetIndices(used_is,(const PetscInt**)&is_indices);
6007: /* change of basis should not be performed on local periodic nodes */
6008: if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
6009: if (nnsp_has_cnst) {
6010: PetscScalar quad_value;
6012: PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));
6013: idxs_copied = PETSC_TRUE;
6015: if (!pcbddc->use_nnsp_true) {
6016: quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
6017: } else {
6018: quad_value = 1.0;
6019: }
6020: for (j=0;j<size_of_constraint;j++) {
6021: constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value;
6022: }
6023: temp_constraints++;
6024: total_counts++;
6025: }
6026: for (k=0;k<nnsp_size;k++) {
6027: PetscReal real_value;
6028: PetscScalar *ptr_to_data;
6030: VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);
6031: ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint];
6032: for (j=0;j<size_of_constraint;j++) {
6033: ptr_to_data[j] = array[is_indices[j]];
6034: }
6035: VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);
6036: /* check if array is null on the connected component */
6037: PetscBLASIntCast(size_of_constraint,&Blas_N);
6038: PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one));
6039: if (real_value > tol*size_of_constraint) { /* keep indices and values */
6040: temp_constraints++;
6041: total_counts++;
6042: if (!idxs_copied) {
6043: PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));
6044: idxs_copied = PETSC_TRUE;
6045: }
6046: }
6047: }
6048: ISRestoreIndices(used_is,(const PetscInt**)&is_indices);
6049: valid_constraints = temp_constraints;
6050: if (!pcbddc->use_nnsp_true && temp_constraints) {
6051: if (temp_constraints == 1) { /* just normalize the constraint */
6052: PetscScalar norm,*ptr_to_data;
6054: ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
6055: PetscBLASIntCast(size_of_constraint,&Blas_N);
6056: PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one));
6057: norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
6058: PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one));
6059: } else { /* perform SVD */
6060: PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
6062: #if defined(PETSC_MISSING_LAPACK_GESVD)
6063: /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
6064: POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
6065: -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
6066: the constraints basis will differ (by a complex factor with absolute value equal to 1)
6067: from that computed using LAPACKgesvd
6068: -> This is due to a different computation of eigenvectors in LAPACKheev
6069: -> The quality of the POD-computed basis will be the same */
6070: PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));
6071: /* Store upper triangular part of correlation matrix */
6072: PetscBLASIntCast(size_of_constraint,&Blas_N);
6073: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6074: for (j=0;j<temp_constraints;j++) {
6075: for (k=0;k<j+1;k++) {
6076: PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one));
6077: }
6078: }
6079: /* compute eigenvalues and eigenvectors of correlation matrix */
6080: PetscBLASIntCast(temp_constraints,&Blas_N);
6081: PetscBLASIntCast(temp_constraints,&Blas_LDA);
6082: #if !defined(PETSC_USE_COMPLEX)
6083: PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
6084: #else
6085: PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
6086: #endif
6087: PetscFPTrapPop();
6088: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
6089: /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
6090: j = 0;
6091: while (j < temp_constraints && singular_vals[j]/singular_vals[temp_constraints-1] < tol) j++;
6092: total_counts = total_counts-j;
6093: valid_constraints = temp_constraints-j;
6094: /* scale and copy POD basis into used quadrature memory */
6095: PetscBLASIntCast(size_of_constraint,&Blas_M);
6096: PetscBLASIntCast(temp_constraints,&Blas_N);
6097: PetscBLASIntCast(temp_constraints,&Blas_K);
6098: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6099: PetscBLASIntCast(temp_constraints,&Blas_LDB);
6100: PetscBLASIntCast(size_of_constraint,&Blas_LDC);
6101: if (j<temp_constraints) {
6102: PetscInt ii;
6103: for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]);
6104: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6105: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
6106: PetscFPTrapPop();
6107: for (k=0;k<temp_constraints-j;k++) {
6108: for (ii=0;ii<size_of_constraint;ii++) {
6109: ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
6110: }
6111: }
6112: }
6113: #else /* on missing GESVD */
6114: PetscBLASIntCast(size_of_constraint,&Blas_M);
6115: PetscBLASIntCast(temp_constraints,&Blas_N);
6116: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6117: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6118: #if !defined(PETSC_USE_COMPLEX)
6119: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr));
6120: #else
6121: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr));
6122: #endif
6123: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
6124: PetscFPTrapPop();
6125: /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
6126: k = temp_constraints;
6127: if (k > size_of_constraint) k = size_of_constraint;
6128: j = 0;
6129: while (j < k && singular_vals[k-j-1]/singular_vals[0] < tol) j++;
6130: valid_constraints = k-j;
6131: total_counts = total_counts-temp_constraints+valid_constraints;
6132: #endif /* on missing GESVD */
6133: }
6134: }
6135: /* update pointers information */
6136: if (valid_constraints) {
6137: constraints_n[total_counts_cc] = valid_constraints;
6138: constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint;
6139: constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints;
6140: /* set change_of_basis flag */
6141: if (boolforchange) {
6142: PetscBTSet(change_basis,total_counts_cc);
6143: }
6144: total_counts_cc++;
6145: }
6146: }
6147: /* free workspace */
6148: if (!skip_lapack) {
6149: PetscFree(work);
6150: #if defined(PETSC_USE_COMPLEX)
6151: PetscFree(rwork);
6152: #endif
6153: PetscFree(singular_vals);
6154: #if defined(PETSC_MISSING_LAPACK_GESVD)
6155: PetscFree(correlation_mat);
6156: PetscFree(temp_basis);
6157: #endif
6158: }
6159: for (k=0;k<nnsp_size;k++) {
6160: VecDestroy(&localnearnullsp[k]);
6161: }
6162: PetscFree(localnearnullsp);
6163: /* free index sets of faces, edges and vertices */
6164: for (i=0;i<n_ISForFaces;i++) {
6165: ISDestroy(&ISForFaces[i]);
6166: }
6167: if (n_ISForFaces) {
6168: PetscFree(ISForFaces);
6169: }
6170: for (i=0;i<n_ISForEdges;i++) {
6171: ISDestroy(&ISForEdges[i]);
6172: }
6173: if (n_ISForEdges) {
6174: PetscFree(ISForEdges);
6175: }
6176: ISDestroy(&ISForVertices);
6177: } else {
6178: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
6180: total_counts = 0;
6181: n_vertices = 0;
6182: if (sub_schurs->is_vertices && pcbddc->use_vertices) {
6183: ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);
6184: }
6185: max_constraints = 0;
6186: total_counts_cc = 0;
6187: for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
6188: total_counts += pcbddc->adaptive_constraints_n[i];
6189: if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++;
6190: max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]);
6191: }
6192: constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr;
6193: constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr;
6194: constraints_idxs = pcbddc->adaptive_constraints_idxs;
6195: constraints_data = pcbddc->adaptive_constraints_data;
6196: /* constraints_n differs from pcbddc->adaptive_constraints_n */
6197: PetscMalloc1(total_counts_cc,&constraints_n);
6198: total_counts_cc = 0;
6199: for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
6200: if (pcbddc->adaptive_constraints_n[i]) {
6201: constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i];
6202: }
6203: }
6205: max_size_of_constraint = 0;
6206: for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]);
6207: PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);
6208: /* Change of basis */
6209: PetscBTCreate(total_counts_cc,&change_basis);
6210: if (pcbddc->use_change_of_basis) {
6211: for (i=0;i<sub_schurs->n_subs;i++) {
6212: if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) {
6213: PetscBTSet(change_basis,i+n_vertices);
6214: }
6215: }
6216: }
6217: }
6218: pcbddc->local_primal_size = total_counts;
6219: PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);
6221: /* map constraints_idxs in boundary numbering */
6222: ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);
6223: if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for constraints indices %D != %D",constraints_idxs_ptr[total_counts_cc],i);
6225: /* Create constraint matrix */
6226: MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);
6227: MatSetType(pcbddc->ConstraintMatrix,MATAIJ);
6228: MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);
6230: /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
6231: /* determine if a QR strategy is needed for change of basis */
6232: qr_needed = pcbddc->use_qr_single;
6233: PetscBTCreate(total_counts_cc,&qr_needed_idx);
6234: total_primal_vertices=0;
6235: pcbddc->local_primal_size_cc = 0;
6236: for (i=0;i<total_counts_cc;i++) {
6237: size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
6238: if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) {
6239: pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]];
6240: pcbddc->local_primal_size_cc += 1;
6241: } else if (PetscBTLookup(change_basis,i)) {
6242: for (k=0;k<constraints_n[i];k++) {
6243: pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k];
6244: }
6245: pcbddc->local_primal_size_cc += constraints_n[i];
6246: if (constraints_n[i] > 1 || pcbddc->use_qr_single) {
6247: PetscBTSet(qr_needed_idx,i);
6248: qr_needed = PETSC_TRUE;
6249: }
6250: } else {
6251: pcbddc->local_primal_size_cc += 1;
6252: }
6253: }
6254: /* note that the local variable n_vertices used below stores the number of pointwise constraints */
6255: pcbddc->n_vertices = total_primal_vertices;
6256: /* permute indices in order to have a sorted set of vertices */
6257: PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);
6258: PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);
6259: PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));
6260: for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1;
6262: /* nonzero structure of constraint matrix */
6263: /* and get reference dof for local constraints */
6264: PetscMalloc1(pcbddc->local_primal_size,&nnz);
6265: for (i=0;i<total_primal_vertices;i++) nnz[i] = 1;
6267: j = total_primal_vertices;
6268: total_counts = total_primal_vertices;
6269: cum = total_primal_vertices;
6270: for (i=n_vertices;i<total_counts_cc;i++) {
6271: if (!PetscBTLookup(change_basis,i)) {
6272: pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]];
6273: pcbddc->local_primal_ref_mult[cum] = constraints_n[i];
6274: cum++;
6275: size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
6276: for (k=0;k<constraints_n[i];k++) {
6277: pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k];
6278: nnz[j+k] = size_of_constraint;
6279: }
6280: j += constraints_n[i];
6281: }
6282: }
6283: MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);
6284: MatSetOption(pcbddc->ConstraintMatrix,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);
6285: PetscFree(nnz);
6287: /* set values in constraint matrix */
6288: for (i=0;i<total_primal_vertices;i++) {
6289: MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);
6290: }
6291: total_counts = total_primal_vertices;
6292: for (i=n_vertices;i<total_counts_cc;i++) {
6293: if (!PetscBTLookup(change_basis,i)) {
6294: PetscInt *cols;
6296: size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
6297: cols = constraints_idxs+constraints_idxs_ptr[i];
6298: for (k=0;k<constraints_n[i];k++) {
6299: PetscInt row = total_counts+k;
6300: PetscScalar *vals;
6302: vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint;
6303: MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);
6304: }
6305: total_counts += constraints_n[i];
6306: }
6307: }
6308: /* assembling */
6309: MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);
6310: MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);
6311: MatViewFromOptions(pcbddc->ConstraintMatrix,NULL,"-pc_bddc_constraint_mat_view");
6313: /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
6314: if (pcbddc->use_change_of_basis) {
6315: /* dual and primal dofs on a single cc */
6316: PetscInt dual_dofs,primal_dofs;
6317: /* working stuff for GEQRF */
6318: PetscScalar *qr_basis = NULL,*qr_tau = NULL,*qr_work = NULL,lqr_work_t;
6319: PetscBLASInt lqr_work;
6320: /* working stuff for UNGQR */
6321: PetscScalar *gqr_work = NULL,lgqr_work_t;
6322: PetscBLASInt lgqr_work;
6323: /* working stuff for TRTRS */
6324: PetscScalar *trs_rhs = NULL;
6325: PetscBLASInt Blas_NRHS;
6326: /* pointers for values insertion into change of basis matrix */
6327: PetscInt *start_rows,*start_cols;
6328: PetscScalar *start_vals;
6329: /* working stuff for values insertion */
6330: PetscBT is_primal;
6331: PetscInt *aux_primal_numbering_B;
6332: /* matrix sizes */
6333: PetscInt global_size,local_size;
6334: /* temporary change of basis */
6335: Mat localChangeOfBasisMatrix;
6336: /* extra space for debugging */
6337: PetscScalar *dbg_work = NULL;
6339: /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
6340: MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);
6341: MatSetType(localChangeOfBasisMatrix,MATAIJ);
6342: MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);
6343: /* nonzeros for local mat */
6344: PetscMalloc1(pcis->n,&nnz);
6345: if (!pcbddc->benign_change || pcbddc->fake_change) {
6346: for (i=0;i<pcis->n;i++) nnz[i]=1;
6347: } else {
6348: const PetscInt *ii;
6349: PetscInt n;
6350: PetscBool flg_row;
6351: MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);
6352: for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i];
6353: MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);
6354: }
6355: for (i=n_vertices;i<total_counts_cc;i++) {
6356: if (PetscBTLookup(change_basis,i)) {
6357: size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
6358: if (PetscBTLookup(qr_needed_idx,i)) {
6359: for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint;
6360: } else {
6361: nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint;
6362: for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2;
6363: }
6364: }
6365: }
6366: MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);
6367: MatSetOption(localChangeOfBasisMatrix,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);
6368: PetscFree(nnz);
6369: /* Set interior change in the matrix */
6370: if (!pcbddc->benign_change || pcbddc->fake_change) {
6371: for (i=0;i<pcis->n;i++) {
6372: MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);
6373: }
6374: } else {
6375: const PetscInt *ii,*jj;
6376: PetscScalar *aa;
6377: PetscInt n;
6378: PetscBool flg_row;
6379: MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);
6380: MatSeqAIJGetArray(pcbddc->benign_change,&aa);
6381: for (i=0;i<n;i++) {
6382: MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);
6383: }
6384: MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);
6385: MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);
6386: }
6388: if (pcbddc->dbg_flag) {
6389: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");
6390: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);
6391: }
6394: /* Now we loop on the constraints which need a change of basis */
6395: /*
6396: Change of basis matrix is evaluated similarly to the FIRST APPROACH in
6397: Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
6399: Basic blocks of change of basis matrix T computed by
6401: - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
6403: | 1 0 ... 0 s_1/S |
6404: | 0 1 ... 0 s_2/S |
6405: | ... |
6406: | 0 ... 1 s_{n-1}/S |
6407: | -s_1/s_n ... -s_{n-1}/s_n s_n/S |
6409: with S = \sum_{i=1}^n s_i^2
6410: NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
6411: in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
6413: - QR decomposition of constraints otherwise
6414: */
6415: if (qr_needed && max_size_of_constraint) {
6416: /* space to store Q */
6417: PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);
6418: /* array to store scaling factors for reflectors */
6419: PetscMalloc1(max_constraints,&qr_tau);
6420: /* first we issue queries for optimal work */
6421: PetscBLASIntCast(max_size_of_constraint,&Blas_M);
6422: PetscBLASIntCast(max_constraints,&Blas_N);
6423: PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);
6424: lqr_work = -1;
6425: PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
6426: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
6427: PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);
6428: PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);
6429: lgqr_work = -1;
6430: PetscBLASIntCast(max_size_of_constraint,&Blas_M);
6431: PetscBLASIntCast(max_size_of_constraint,&Blas_N);
6432: PetscBLASIntCast(max_constraints,&Blas_K);
6433: PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);
6434: if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
6435: PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
6436: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to ORGQR/UNGQR Lapack routine %d",(int)lierr);
6437: PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);
6438: PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);
6439: /* array to store rhs and solution of triangular solver */
6440: PetscMalloc1(max_constraints*max_constraints,&trs_rhs);
6441: /* allocating workspace for check */
6442: if (pcbddc->dbg_flag) {
6443: PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);
6444: }
6445: }
6446: /* array to store whether a node is primal or not */
6447: PetscBTCreate(pcis->n_B,&is_primal);
6448: PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);
6449: ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);
6450: if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D",total_primal_vertices,i);
6451: for (i=0;i<total_primal_vertices;i++) {
6452: PetscBTSet(is_primal,aux_primal_numbering_B[i]);
6453: }
6454: PetscFree(aux_primal_numbering_B);
6456: /* loop on constraints and see whether or not they need a change of basis and compute it */
6457: for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) {
6458: size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts];
6459: if (PetscBTLookup(change_basis,total_counts)) {
6460: /* get constraint info */
6461: primal_dofs = constraints_n[total_counts];
6462: dual_dofs = size_of_constraint-primal_dofs;
6464: if (pcbddc->dbg_flag) {
6465: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %D: %D need a change of basis (size %D)\n",total_counts,primal_dofs,size_of_constraint);
6466: }
6468: if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */
6470: /* copy quadrature constraints for change of basis check */
6471: if (pcbddc->dbg_flag) {
6472: PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));
6473: }
6474: /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
6475: PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));
6477: /* compute QR decomposition of constraints */
6478: PetscBLASIntCast(size_of_constraint,&Blas_M);
6479: PetscBLASIntCast(primal_dofs,&Blas_N);
6480: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6481: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6482: PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
6483: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
6484: PetscFPTrapPop();
6486: /* explictly compute R^-T */
6487: PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));
6488: for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
6489: PetscBLASIntCast(primal_dofs,&Blas_N);
6490: PetscBLASIntCast(primal_dofs,&Blas_NRHS);
6491: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6492: PetscBLASIntCast(primal_dofs,&Blas_LDB);
6493: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6494: PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
6495: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
6496: PetscFPTrapPop();
6498: /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
6499: PetscBLASIntCast(size_of_constraint,&Blas_M);
6500: PetscBLASIntCast(size_of_constraint,&Blas_N);
6501: PetscBLASIntCast(primal_dofs,&Blas_K);
6502: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6503: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6504: PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
6505: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in ORGQR/UNGQR Lapack routine %d",(int)lierr);
6506: PetscFPTrapPop();
6508: /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
6509: i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
6510: where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
6511: PetscBLASIntCast(size_of_constraint,&Blas_M);
6512: PetscBLASIntCast(primal_dofs,&Blas_N);
6513: PetscBLASIntCast(primal_dofs,&Blas_K);
6514: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6515: PetscBLASIntCast(primal_dofs,&Blas_LDB);
6516: PetscBLASIntCast(size_of_constraint,&Blas_LDC);
6517: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6518: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC));
6519: PetscFPTrapPop();
6520: PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));
6522: /* insert values in change of basis matrix respecting global ordering of new primal dofs */
6523: start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]];
6524: /* insert cols for primal dofs */
6525: for (j=0;j<primal_dofs;j++) {
6526: start_vals = &qr_basis[j*size_of_constraint];
6527: start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j];
6528: MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);
6529: }
6530: /* insert cols for dual dofs */
6531: for (j=0,k=0;j<dual_dofs;k++) {
6532: if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) {
6533: start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
6534: start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k];
6535: MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);
6536: j++;
6537: }
6538: }
6540: /* check change of basis */
6541: if (pcbddc->dbg_flag) {
6542: PetscInt ii,jj;
6543: PetscBool valid_qr=PETSC_TRUE;
6544: PetscBLASIntCast(primal_dofs,&Blas_M);
6545: PetscBLASIntCast(size_of_constraint,&Blas_N);
6546: PetscBLASIntCast(size_of_constraint,&Blas_K);
6547: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6548: PetscBLASIntCast(size_of_constraint,&Blas_LDB);
6549: PetscBLASIntCast(primal_dofs,&Blas_LDC);
6550: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6551: PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC));
6552: PetscFPTrapPop();
6553: for (jj=0;jj<size_of_constraint;jj++) {
6554: for (ii=0;ii<primal_dofs;ii++) {
6555: if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
6556: if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) valid_qr = PETSC_FALSE;
6557: }
6558: }
6559: if (!valid_qr) {
6560: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");
6561: for (jj=0;jj<size_of_constraint;jj++) {
6562: for (ii=0;ii<primal_dofs;ii++) {
6563: if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
6564: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %D is not orthogonal to constraint %D (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
6565: }
6566: if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) {
6567: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %D is not unitary w.r.t constraint %D (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
6568: }
6569: }
6570: }
6571: } else {
6572: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");
6573: }
6574: }
6575: } else { /* simple transformation block */
6576: PetscInt row,col;
6577: PetscScalar val,norm;
6579: PetscBLASIntCast(size_of_constraint,&Blas_N);
6580: PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one));
6581: for (j=0;j<size_of_constraint;j++) {
6582: PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j];
6583: row = constraints_idxs[constraints_idxs_ptr[total_counts]+j];
6584: if (!PetscBTLookup(is_primal,row_B)) {
6585: col = constraints_idxs[constraints_idxs_ptr[total_counts]];
6586: MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);
6587: MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);
6588: } else {
6589: for (k=0;k<size_of_constraint;k++) {
6590: col = constraints_idxs[constraints_idxs_ptr[total_counts]+k];
6591: if (row != col) {
6592: val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]];
6593: } else {
6594: val = constraints_data[constraints_data_ptr[total_counts]]/norm;
6595: }
6596: MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);
6597: }
6598: }
6599: }
6600: if (pcbddc->dbg_flag) {
6601: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");
6602: }
6603: }
6604: } else {
6605: if (pcbddc->dbg_flag) {
6606: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %D does not need a change of basis (size %D)\n",total_counts,size_of_constraint);
6607: }
6608: }
6609: }
6611: /* free workspace */
6612: if (qr_needed) {
6613: if (pcbddc->dbg_flag) {
6614: PetscFree(dbg_work);
6615: }
6616: PetscFree(trs_rhs);
6617: PetscFree(qr_tau);
6618: PetscFree(qr_work);
6619: PetscFree(gqr_work);
6620: PetscFree(qr_basis);
6621: }
6622: PetscBTDestroy(&is_primal);
6623: MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);
6624: MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);
6626: /* assembling of global change of variable */
6627: if (!pcbddc->fake_change) {
6628: Mat tmat;
6629: PetscInt bs;
6631: VecGetSize(pcis->vec1_global,&global_size);
6632: VecGetLocalSize(pcis->vec1_global,&local_size);
6633: MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);
6634: MatISSetLocalMat(tmat,localChangeOfBasisMatrix);
6635: MatAssemblyBegin(tmat,MAT_FINAL_ASSEMBLY);
6636: MatAssemblyEnd(tmat,MAT_FINAL_ASSEMBLY);
6637: MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);
6638: MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);
6639: MatGetBlockSize(pc->pmat,&bs);
6640: MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);
6641: MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);
6642: MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);
6643: MatConvert(tmat,MATAIJ,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);
6644: MatDestroy(&tmat);
6645: VecSet(pcis->vec1_global,0.0);
6646: VecSet(pcis->vec1_N,1.0);
6647: VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
6648: VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
6649: VecReciprocal(pcis->vec1_global);
6650: MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);
6652: /* check */
6653: if (pcbddc->dbg_flag) {
6654: PetscReal error;
6655: Vec x,x_change;
6657: VecDuplicate(pcis->vec1_global,&x);
6658: VecDuplicate(pcis->vec1_global,&x_change);
6659: VecSetRandom(x,NULL);
6660: VecCopy(x,pcis->vec1_global);
6661: VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
6662: VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
6663: MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);
6664: VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);
6665: VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);
6666: MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);
6667: VecAXPY(x,-1.0,x_change);
6668: VecNorm(x,NORM_INFINITY,&error);
6669: if (error > PETSC_SMALL) {
6670: SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e",error);
6671: }
6672: VecDestroy(&x);
6673: VecDestroy(&x_change);
6674: }
6675: /* adapt sub_schurs computed (if any) */
6676: if (pcbddc->use_deluxe_scaling) {
6677: PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
6679: if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");
6680: if (sub_schurs && sub_schurs->S_Ej_all) {
6681: Mat S_new,tmat;
6682: IS is_all_N,is_V_Sall = NULL;
6684: ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);
6685: MatCreateSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);
6686: if (pcbddc->deluxe_zerorows) {
6687: ISLocalToGlobalMapping NtoSall;
6688: IS is_V;
6689: ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);
6690: ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);
6691: ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);
6692: ISLocalToGlobalMappingDestroy(&NtoSall);
6693: ISDestroy(&is_V);
6694: }
6695: ISDestroy(&is_all_N);
6696: MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);
6697: MatDestroy(&sub_schurs->S_Ej_all);
6698: PetscObjectReference((PetscObject)S_new);
6699: if (pcbddc->deluxe_zerorows) {
6700: const PetscScalar *array;
6701: const PetscInt *idxs_V,*idxs_all;
6702: PetscInt i,n_V;
6704: MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);
6705: ISGetLocalSize(is_V_Sall,&n_V);
6706: ISGetIndices(is_V_Sall,&idxs_V);
6707: ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);
6708: VecGetArrayRead(pcis->D,&array);
6709: for (i=0;i<n_V;i++) {
6710: PetscScalar val;
6711: PetscInt idx;
6713: idx = idxs_V[i];
6714: val = array[idxs_all[idxs_V[i]]];
6715: MatSetValue(S_new,idx,idx,val,INSERT_VALUES);
6716: }
6717: MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);
6718: MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);
6719: VecRestoreArrayRead(pcis->D,&array);
6720: ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);
6721: ISRestoreIndices(is_V_Sall,&idxs_V);
6722: }
6723: sub_schurs->S_Ej_all = S_new;
6724: MatDestroy(&S_new);
6725: if (sub_schurs->sum_S_Ej_all) {
6726: MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);
6727: MatDestroy(&sub_schurs->sum_S_Ej_all);
6728: PetscObjectReference((PetscObject)S_new);
6729: if (pcbddc->deluxe_zerorows) {
6730: MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);
6731: }
6732: sub_schurs->sum_S_Ej_all = S_new;
6733: MatDestroy(&S_new);
6734: }
6735: ISDestroy(&is_V_Sall);
6736: MatDestroy(&tmat);
6737: }
6738: /* destroy any change of basis context in sub_schurs */
6739: if (sub_schurs && sub_schurs->change) {
6740: PetscInt i;
6742: for (i=0;i<sub_schurs->n_subs;i++) {
6743: KSPDestroy(&sub_schurs->change[i]);
6744: }
6745: PetscFree(sub_schurs->change);
6746: }
6747: }
6748: if (pcbddc->switch_static) { /* need to save the local change */
6749: pcbddc->switch_static_change = localChangeOfBasisMatrix;
6750: } else {
6751: MatDestroy(&localChangeOfBasisMatrix);
6752: }
6753: /* determine if any process has changed the pressures locally */
6754: pcbddc->change_interior = pcbddc->benign_have_null;
6755: } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */
6756: MatDestroy(&pcbddc->ConstraintMatrix);
6757: pcbddc->ConstraintMatrix = localChangeOfBasisMatrix;
6758: pcbddc->use_qr_single = qr_needed;
6759: }
6760: } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) {
6761: if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) {
6762: PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);
6763: pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
6764: } else {
6765: Mat benign_global = NULL;
6766: if (pcbddc->benign_have_null) {
6767: Mat M;
6769: pcbddc->change_interior = PETSC_TRUE;
6770: VecCopy(matis->counter,pcis->vec1_N);
6771: VecReciprocal(pcis->vec1_N);
6772: MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&benign_global);
6773: if (pcbddc->benign_change) {
6774: MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);
6775: MatDiagonalScale(M,pcis->vec1_N,NULL);
6776: } else {
6777: MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&M);
6778: MatDiagonalSet(M,pcis->vec1_N,INSERT_VALUES);
6779: }
6780: MatISSetLocalMat(benign_global,M);
6781: MatDestroy(&M);
6782: MatAssemblyBegin(benign_global,MAT_FINAL_ASSEMBLY);
6783: MatAssemblyEnd(benign_global,MAT_FINAL_ASSEMBLY);
6784: }
6785: if (pcbddc->user_ChangeOfBasisMatrix) {
6786: MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);
6787: MatDestroy(&benign_global);
6788: } else if (pcbddc->benign_have_null) {
6789: pcbddc->ChangeOfBasisMatrix = benign_global;
6790: }
6791: }
6792: if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */
6793: IS is_global;
6794: const PetscInt *gidxs;
6796: ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);
6797: ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);
6798: ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);
6799: MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);
6800: ISDestroy(&is_global);
6801: }
6802: }
6803: if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) {
6804: VecDuplicate(pcis->vec1_global,&pcbddc->work_change);
6805: }
6807: if (!pcbddc->fake_change) {
6808: /* add pressure dofs to set of primal nodes for numbering purposes */
6809: for (i=0;i<pcbddc->benign_n;i++) {
6810: pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i];
6811: pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i];
6812: pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1;
6813: pcbddc->local_primal_size_cc++;
6814: pcbddc->local_primal_size++;
6815: }
6817: /* check if a new primal space has been introduced (also take into account benign trick) */
6818: pcbddc->new_primal_space_local = PETSC_TRUE;
6819: if (olocal_primal_size == pcbddc->local_primal_size) {
6820: PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);
6821: pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
6822: if (!pcbddc->new_primal_space_local) {
6823: PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);
6824: pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
6825: }
6826: }
6827: /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
6828: MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
6829: }
6830: PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);
6832: /* flush dbg viewer */
6833: if (pcbddc->dbg_flag) {
6834: PetscViewerFlush(pcbddc->dbg_viewer);
6835: }
6837: /* free workspace */
6838: PetscBTDestroy(&qr_needed_idx);
6839: PetscBTDestroy(&change_basis);
6840: if (!pcbddc->adaptive_selection) {
6841: PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);
6842: PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);
6843: } else {
6844: PetscFree5(pcbddc->adaptive_constraints_n,
6845: pcbddc->adaptive_constraints_idxs_ptr,
6846: pcbddc->adaptive_constraints_data_ptr,
6847: pcbddc->adaptive_constraints_idxs,
6848: pcbddc->adaptive_constraints_data);
6849: PetscFree(constraints_n);
6850: PetscFree(constraints_idxs_B);
6851: }
6852: return(0);
6853: }
6854: /* #undef PETSC_MISSING_LAPACK_GESVD */
6856: PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
6857: {
6858: ISLocalToGlobalMapping map;
6859: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
6860: Mat_IS *matis = (Mat_IS*)pc->pmat->data;
6861: PetscInt i,N;
6862: PetscBool rcsr = PETSC_FALSE;
6863: PetscErrorCode ierr;
6866: if (pcbddc->recompute_topography) {
6867: pcbddc->graphanalyzed = PETSC_FALSE;
6868: /* Reset previously computed graph */
6869: PCBDDCGraphReset(pcbddc->mat_graph);
6870: /* Init local Graph struct */
6871: MatGetSize(pc->pmat,&N,NULL);
6872: MatGetLocalToGlobalMapping(pc->pmat,&map,NULL);
6873: PCBDDCGraphInit(pcbddc->mat_graph,map,N,pcbddc->graphmaxcount);
6875: if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) {
6876: PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->user_primal_vertices_local);
6877: }
6878: /* Check validity of the csr graph passed in by the user */
6879: if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %D, expected %D",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs);
6881: /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
6882: if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) {
6883: PetscInt *xadj,*adjncy;
6884: PetscInt nvtxs;
6885: PetscBool flg_row=PETSC_FALSE;
6887: MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);
6888: if (flg_row) {
6889: PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);
6890: pcbddc->computed_rowadj = PETSC_TRUE;
6891: }
6892: MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);
6893: rcsr = PETSC_TRUE;
6894: }
6895: if (pcbddc->dbg_flag) {
6896: PetscViewerFlush(pcbddc->dbg_viewer);
6897: }
6899: if (pcbddc->mat_graph->cdim && !pcbddc->mat_graph->cloc) {
6900: PetscReal *lcoords;
6901: PetscInt n;
6902: MPI_Datatype dimrealtype;
6904: if (pcbddc->mat_graph->cnloc != pc->pmat->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid number of local coordinates! Got %D, expected %D",pcbddc->mat_graph->cnloc,pc->pmat->rmap->n);
6905: MatGetLocalSize(matis->A,&n,NULL);
6906: MatISSetUpSF(pc->pmat);
6907: PetscMalloc1(pcbddc->mat_graph->cdim*n,&lcoords);
6908: MPI_Type_contiguous(pcbddc->mat_graph->cdim,MPIU_REAL,&dimrealtype);
6909: MPI_Type_commit(&dimrealtype);
6910: PetscSFBcastBegin(matis->sf,dimrealtype,pcbddc->mat_graph->coords,lcoords);
6911: PetscSFBcastEnd(matis->sf,dimrealtype,pcbddc->mat_graph->coords,lcoords);
6912: MPI_Type_free(&dimrealtype);
6913: PetscFree(pcbddc->mat_graph->coords);
6915: pcbddc->mat_graph->coords = lcoords;
6916: pcbddc->mat_graph->cloc = PETSC_TRUE;
6917: pcbddc->mat_graph->cnloc = n;
6918: }
6919: if (pcbddc->mat_graph->cnloc && pcbddc->mat_graph->cnloc != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid number of local subdomain coordinates! Got %D, expected %D",pcbddc->mat_graph->cnloc,pcbddc->mat_graph->nvtxs);
6920: pcbddc->mat_graph->active_coords = (PetscBool)(pcbddc->corner_selection && !pcbddc->corner_selected);
6922: /* Setup of Graph */
6923: pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */
6924: PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);
6926: /* attach info on disconnected subdomains if present */
6927: if (pcbddc->n_local_subs) {
6928: PetscInt *local_subs;
6930: PetscMalloc1(N,&local_subs);
6931: for (i=0;i<pcbddc->n_local_subs;i++) {
6932: const PetscInt *idxs;
6933: PetscInt nl,j;
6935: ISGetLocalSize(pcbddc->local_subs[i],&nl);
6936: ISGetIndices(pcbddc->local_subs[i],&idxs);
6937: for (j=0;j<nl;j++) local_subs[idxs[j]] = i;
6938: ISRestoreIndices(pcbddc->local_subs[i],&idxs);
6939: }
6940: pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs;
6941: pcbddc->mat_graph->local_subs = local_subs;
6942: }
6943: }
6945: if (!pcbddc->graphanalyzed) {
6946: /* Graph's connected components analysis */
6947: PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);
6948: pcbddc->graphanalyzed = PETSC_TRUE;
6949: }
6950: if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0;
6951: return(0);
6952: }
6954: PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
6955: {
6956: PetscInt i,j;
6957: PetscScalar *alphas;
6961: if (!n) return(0);
6962: PetscMalloc1(n,&alphas);
6963: VecNormalize(vecs[0],NULL);
6964: for (i=1;i<n;i++) {
6965: VecMDot(vecs[i],i,vecs,alphas);
6966: for (j=0;j<i;j++) alphas[j] = PetscConj(-alphas[j]);
6967: VecMAXPY(vecs[i],i,alphas,vecs);
6968: VecNormalize(vecs[i],NULL);
6969: }
6970: PetscFree(alphas);
6971: return(0);
6972: }
6974: PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void)
6975: {
6976: Mat A;
6977: PetscInt n_neighs,*neighs,*n_shared,**shared;
6978: PetscMPIInt size,rank,color;
6979: PetscInt *xadj,*adjncy;
6980: PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx;
6981: PetscInt im_active,active_procs,N,n,i,j,threshold = 2;
6982: PetscInt void_procs,*procs_candidates = NULL;
6983: PetscInt xadj_count,*count;
6984: PetscBool ismatis,use_vwgt=PETSC_FALSE;
6985: PetscSubcomm psubcomm;
6986: MPI_Comm subcomm;
6991: PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);
6992: if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME);
6995: if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %D",*n_subdomains);
6997: if (have_void) *have_void = PETSC_FALSE;
6998: MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);
6999: MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);
7000: MatISGetLocalMat(mat,&A);
7001: MatGetLocalSize(A,&n,NULL);
7002: im_active = !!n;
7003: MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));
7004: void_procs = size - active_procs;
7005: /* get ranks of of non-active processes in mat communicator */
7006: if (void_procs) {
7007: PetscInt ncand;
7009: if (have_void) *have_void = PETSC_TRUE;
7010: PetscMalloc1(size,&procs_candidates);
7011: MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));
7012: for (i=0,ncand=0;i<size;i++) {
7013: if (!procs_candidates[i]) {
7014: procs_candidates[ncand++] = i;
7015: }
7016: }
7017: /* force n_subdomains to be not greater that the number of non-active processes */
7018: *n_subdomains = PetscMin(void_procs,*n_subdomains);
7019: }
7021: /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix
7022: number of subdomains requested 1 -> send to master or first candidate in voids */
7023: MatGetSize(mat,&N,NULL);
7024: if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) {
7025: PetscInt issize,isidx,dest;
7026: if (*n_subdomains == 1) dest = 0;
7027: else dest = rank;
7028: if (im_active) {
7029: issize = 1;
7030: if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7031: isidx = procs_candidates[dest];
7032: } else {
7033: isidx = dest;
7034: }
7035: } else {
7036: issize = 0;
7037: isidx = -1;
7038: }
7039: if (*n_subdomains != 1) *n_subdomains = active_procs;
7040: ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);
7041: PetscFree(procs_candidates);
7042: return(0);
7043: }
7044: PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);
7045: PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);
7046: threshold = PetscMax(threshold,2);
7048: /* Get info on mapping */
7049: ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);
7051: /* build local CSR graph of subdomains' connectivity */
7052: PetscMalloc1(2,&xadj);
7053: xadj[0] = 0;
7054: xadj[1] = PetscMax(n_neighs-1,0);
7055: PetscMalloc1(xadj[1],&adjncy);
7056: PetscMalloc1(xadj[1],&adjncy_wgt);
7057: PetscCalloc1(n,&count);
7058: for (i=1;i<n_neighs;i++)
7059: for (j=0;j<n_shared[i];j++)
7060: count[shared[i][j]] += 1;
7062: xadj_count = 0;
7063: for (i=1;i<n_neighs;i++) {
7064: for (j=0;j<n_shared[i];j++) {
7065: if (count[shared[i][j]] < threshold) {
7066: adjncy[xadj_count] = neighs[i];
7067: adjncy_wgt[xadj_count] = n_shared[i];
7068: xadj_count++;
7069: break;
7070: }
7071: }
7072: }
7073: xadj[1] = xadj_count;
7074: PetscFree(count);
7075: ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);
7076: PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);
7078: PetscMalloc1(1,&ranks_send_to_idx);
7080: /* Restrict work on active processes only */
7081: PetscMPIIntCast(im_active,&color);
7082: if (void_procs) {
7083: PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);
7084: PetscSubcommSetNumber(psubcomm,2); /* 2 groups, active process and not active processes */
7085: PetscSubcommSetTypeGeneral(psubcomm,color,rank);
7086: subcomm = PetscSubcommChild(psubcomm);
7087: } else {
7088: psubcomm = NULL;
7089: subcomm = PetscObjectComm((PetscObject)mat);
7090: }
7092: v_wgt = NULL;
7093: if (!color) {
7094: PetscFree(xadj);
7095: PetscFree(adjncy);
7096: PetscFree(adjncy_wgt);
7097: } else {
7098: Mat subdomain_adj;
7099: IS new_ranks,new_ranks_contig;
7100: MatPartitioning partitioner;
7101: PetscInt rstart=0,rend=0;
7102: PetscInt *is_indices,*oldranks;
7103: PetscMPIInt size;
7104: PetscBool aggregate;
7106: MPI_Comm_size(subcomm,&size);
7107: if (void_procs) {
7108: PetscInt prank = rank;
7109: PetscMalloc1(size,&oldranks);
7110: MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);
7111: for (i=0;i<xadj[1];i++) {
7112: PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);
7113: }
7114: PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);
7115: } else {
7116: oldranks = NULL;
7117: }
7118: aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE);
7119: if (aggregate) { /* TODO: all this part could be made more efficient */
7120: PetscInt lrows,row,ncols,*cols;
7121: PetscMPIInt nrank;
7122: PetscScalar *vals;
7124: MPI_Comm_rank(subcomm,&nrank);
7125: lrows = 0;
7126: if (nrank<redprocs) {
7127: lrows = size/redprocs;
7128: if (nrank<size%redprocs) lrows++;
7129: }
7130: MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);
7131: MatGetOwnershipRange(subdomain_adj,&rstart,&rend);
7132: MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);
7133: MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);
7134: row = nrank;
7135: ncols = xadj[1]-xadj[0];
7136: cols = adjncy;
7137: PetscMalloc1(ncols,&vals);
7138: for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i];
7139: MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);
7140: MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);
7141: MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);
7142: PetscFree(xadj);
7143: PetscFree(adjncy);
7144: PetscFree(adjncy_wgt);
7145: PetscFree(vals);
7146: if (use_vwgt) {
7147: Vec v;
7148: const PetscScalar *array;
7149: PetscInt nl;
7151: MatCreateVecs(subdomain_adj,&v,NULL);
7152: VecSetValue(v,row,(PetscScalar)n,INSERT_VALUES);
7153: VecAssemblyBegin(v);
7154: VecAssemblyEnd(v);
7155: VecGetLocalSize(v,&nl);
7156: VecGetArrayRead(v,&array);
7157: PetscMalloc1(nl,&v_wgt);
7158: for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]);
7159: VecRestoreArrayRead(v,&array);
7160: VecDestroy(&v);
7161: }
7162: } else {
7163: MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);
7164: if (use_vwgt) {
7165: PetscMalloc1(1,&v_wgt);
7166: v_wgt[0] = n;
7167: }
7168: }
7169: /* MatView(subdomain_adj,0); */
7171: /* Partition */
7172: MatPartitioningCreate(subcomm,&partitioner);
7173: MatPartitioningSetAdjacency(partitioner,subdomain_adj);
7174: if (v_wgt) {
7175: MatPartitioningSetVertexWeights(partitioner,v_wgt);
7176: }
7177: *n_subdomains = PetscMin((PetscInt)size,*n_subdomains);
7178: MatPartitioningSetNParts(partitioner,*n_subdomains);
7179: MatPartitioningSetFromOptions(partitioner);
7180: MatPartitioningApply(partitioner,&new_ranks);
7181: /* MatPartitioningView(partitioner,0); */
7183: /* renumber new_ranks to avoid "holes" in new set of processors */
7184: ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);
7185: ISDestroy(&new_ranks);
7186: ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);
7187: if (!aggregate) {
7188: if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7189: #if defined(PETSC_USE_DEBUG)
7190: if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen");
7191: #endif
7192: ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]];
7193: } else if (oldranks) {
7194: ranks_send_to_idx[0] = oldranks[is_indices[0]];
7195: } else {
7196: ranks_send_to_idx[0] = is_indices[0];
7197: }
7198: } else {
7199: PetscInt idx = 0;
7200: PetscMPIInt tag;
7201: MPI_Request *reqs;
7203: PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);
7204: PetscMalloc1(rend-rstart,&reqs);
7205: for (i=rstart;i<rend;i++) {
7206: MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);
7207: }
7208: MPI_Recv(&idx,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);
7209: MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);
7210: PetscFree(reqs);
7211: if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7212: #if defined(PETSC_USE_DEBUG)
7213: if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen");
7214: #endif
7215: ranks_send_to_idx[0] = procs_candidates[oldranks[idx]];
7216: } else if (oldranks) {
7217: ranks_send_to_idx[0] = oldranks[idx];
7218: } else {
7219: ranks_send_to_idx[0] = idx;
7220: }
7221: }
7222: ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);
7223: /* clean up */
7224: PetscFree(oldranks);
7225: ISDestroy(&new_ranks_contig);
7226: MatDestroy(&subdomain_adj);
7227: MatPartitioningDestroy(&partitioner);
7228: }
7229: PetscSubcommDestroy(&psubcomm);
7230: PetscFree(procs_candidates);
7232: /* assemble parallel IS for sends */
7233: i = 1;
7234: if (!color) i=0;
7235: ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);
7236: return(0);
7237: }
7239: typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
7241: PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[])
7242: {
7243: Mat local_mat;
7244: IS is_sends_internal;
7245: PetscInt rows,cols,new_local_rows;
7246: PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs;
7247: PetscBool ismatis,isdense,newisdense,destroy_mat;
7248: ISLocalToGlobalMapping l2gmap;
7249: PetscInt* l2gmap_indices;
7250: const PetscInt* is_indices;
7251: MatType new_local_type;
7252: /* buffers */
7253: PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
7254: PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
7255: PetscInt *recv_buffer_idxs_local;
7256: PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
7257: PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs;
7258: /* MPI */
7259: MPI_Comm comm,comm_n;
7260: PetscSubcomm subcomm;
7261: PetscMPIInt n_sends,n_recvs,size;
7262: PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
7263: PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
7264: PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest;
7265: MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs;
7266: MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs;
7267: PetscErrorCode ierr;
7271: PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);
7272: if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME);
7279: if (nvecs) {
7280: if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported");
7282: }
7283: /* further checks */
7284: MatISGetLocalMat(mat,&local_mat);
7285: PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);
7286: if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
7287: MatGetSize(local_mat,&rows,&cols);
7288: if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
7289: if (reuse && *mat_n) {
7290: PetscInt mrows,mcols,mnrows,mncols;
7292: PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);
7293: if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
7294: MatGetSize(mat,&mrows,&mcols);
7295: MatGetSize(*mat_n,&mnrows,&mncols);
7296: if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
7297: if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
7298: }
7299: MatGetBlockSize(local_mat,&bs);
7302: /* prepare IS for sending if not provided */
7303: if (!is_sends) {
7304: if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
7305: PCBDDCMatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);
7306: } else {
7307: PetscObjectReference((PetscObject)is_sends);
7308: is_sends_internal = is_sends;
7309: }
7311: /* get comm */
7312: PetscObjectGetComm((PetscObject)mat,&comm);
7314: /* compute number of sends */
7315: ISGetLocalSize(is_sends_internal,&i);
7316: PetscMPIIntCast(i,&n_sends);
7318: /* compute number of receives */
7319: MPI_Comm_size(comm,&size);
7320: PetscMalloc1(size,&iflags);
7321: PetscMemzero(iflags,size*sizeof(*iflags));
7322: ISGetIndices(is_sends_internal,&is_indices);
7323: for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
7324: PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);
7325: PetscFree(iflags);
7327: /* restrict comm if requested */
7328: subcomm = 0;
7329: destroy_mat = PETSC_FALSE;
7330: if (restrict_comm) {
7331: PetscMPIInt color,subcommsize;
7333: color = 0;
7334: if (restrict_full) {
7335: if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */
7336: } else {
7337: if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */
7338: }
7339: MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);
7340: subcommsize = size - subcommsize;
7341: /* check if reuse has been requested */
7342: if (reuse) {
7343: if (*mat_n) {
7344: PetscMPIInt subcommsize2;
7345: MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);
7346: if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
7347: comm_n = PetscObjectComm((PetscObject)*mat_n);
7348: } else {
7349: comm_n = PETSC_COMM_SELF;
7350: }
7351: } else { /* MAT_INITIAL_MATRIX */
7352: PetscMPIInt rank;
7354: MPI_Comm_rank(comm,&rank);
7355: PetscSubcommCreate(comm,&subcomm);
7356: PetscSubcommSetNumber(subcomm,2);
7357: PetscSubcommSetTypeGeneral(subcomm,color,rank);
7358: comm_n = PetscSubcommChild(subcomm);
7359: }
7360: /* flag to destroy *mat_n if not significative */
7361: if (color) destroy_mat = PETSC_TRUE;
7362: } else {
7363: comm_n = comm;
7364: }
7366: /* prepare send/receive buffers */
7367: PetscMalloc1(size,&ilengths_idxs);
7368: PetscMemzero(ilengths_idxs,size*sizeof(*ilengths_idxs));
7369: PetscMalloc1(size,&ilengths_vals);
7370: PetscMemzero(ilengths_vals,size*sizeof(*ilengths_vals));
7371: if (nis) {
7372: PetscCalloc1(size,&ilengths_idxs_is);
7373: }
7375: /* Get data from local matrices */
7376: if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
7377: /* TODO: See below some guidelines on how to prepare the local buffers */
7378: /*
7379: send_buffer_vals should contain the raw values of the local matrix
7380: send_buffer_idxs should contain:
7381: - MatType_PRIVATE type
7382: - PetscInt size_of_l2gmap
7383: - PetscInt global_row_indices[size_of_l2gmap]
7384: - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values
7385: */
7386: else {
7387: MatDenseGetArray(local_mat,&send_buffer_vals);
7388: ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);
7389: PetscMalloc1(i+2,&send_buffer_idxs);
7390: send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
7391: send_buffer_idxs[1] = i;
7392: ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);
7393: PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));
7394: ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);
7395: PetscMPIIntCast(i,&len);
7396: for (i=0;i<n_sends;i++) {
7397: ilengths_vals[is_indices[i]] = len*len;
7398: ilengths_idxs[is_indices[i]] = len+2;
7399: }
7400: }
7401: PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);
7402: /* additional is (if any) */
7403: if (nis) {
7404: PetscMPIInt psum;
7405: PetscInt j;
7406: for (j=0,psum=0;j<nis;j++) {
7407: PetscInt plen;
7408: ISGetLocalSize(isarray[j],&plen);
7409: PetscMPIIntCast(plen,&len);
7410: psum += len+1; /* indices + lenght */
7411: }
7412: PetscMalloc1(psum,&send_buffer_idxs_is);
7413: for (j=0,psum=0;j<nis;j++) {
7414: PetscInt plen;
7415: const PetscInt *is_array_idxs;
7416: ISGetLocalSize(isarray[j],&plen);
7417: send_buffer_idxs_is[psum] = plen;
7418: ISGetIndices(isarray[j],&is_array_idxs);
7419: PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));
7420: ISRestoreIndices(isarray[j],&is_array_idxs);
7421: psum += plen+1; /* indices + lenght */
7422: }
7423: for (i=0;i<n_sends;i++) {
7424: ilengths_idxs_is[is_indices[i]] = psum;
7425: }
7426: PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);
7427: }
7428: MatISRestoreLocalMat(mat,&local_mat);
7430: buf_size_idxs = 0;
7431: buf_size_vals = 0;
7432: buf_size_idxs_is = 0;
7433: buf_size_vecs = 0;
7434: for (i=0;i<n_recvs;i++) {
7435: buf_size_idxs += (PetscInt)olengths_idxs[i];
7436: buf_size_vals += (PetscInt)olengths_vals[i];
7437: if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
7438: if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i];
7439: }
7440: PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);
7441: PetscMalloc1(buf_size_vals,&recv_buffer_vals);
7442: PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);
7443: PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);
7445: /* get new tags for clean communications */
7446: PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);
7447: PetscObjectGetNewTag((PetscObject)mat,&tag_vals);
7448: PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);
7449: PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);
7451: /* allocate for requests */
7452: PetscMalloc1(n_sends,&send_req_idxs);
7453: PetscMalloc1(n_sends,&send_req_vals);
7454: PetscMalloc1(n_sends,&send_req_idxs_is);
7455: PetscMalloc1(n_sends,&send_req_vecs);
7456: PetscMalloc1(n_recvs,&recv_req_idxs);
7457: PetscMalloc1(n_recvs,&recv_req_vals);
7458: PetscMalloc1(n_recvs,&recv_req_idxs_is);
7459: PetscMalloc1(n_recvs,&recv_req_vecs);
7461: /* communications */
7462: ptr_idxs = recv_buffer_idxs;
7463: ptr_vals = recv_buffer_vals;
7464: ptr_idxs_is = recv_buffer_idxs_is;
7465: ptr_vecs = recv_buffer_vecs;
7466: for (i=0;i<n_recvs;i++) {
7467: source_dest = onodes[i];
7468: MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);
7469: MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);
7470: ptr_idxs += olengths_idxs[i];
7471: ptr_vals += olengths_vals[i];
7472: if (nis) {
7473: source_dest = onodes_is[i];
7474: MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);
7475: ptr_idxs_is += olengths_idxs_is[i];
7476: }
7477: if (nvecs) {
7478: source_dest = onodes[i];
7479: MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);
7480: ptr_vecs += olengths_idxs[i]-2;
7481: }
7482: }
7483: for (i=0;i<n_sends;i++) {
7484: PetscMPIIntCast(is_indices[i],&source_dest);
7485: MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);
7486: MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);
7487: if (nis) {
7488: MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);
7489: }
7490: if (nvecs) {
7491: VecGetArray(nnsp_vec[0],&send_buffer_vecs);
7492: MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);
7493: }
7494: }
7495: ISRestoreIndices(is_sends_internal,&is_indices);
7496: ISDestroy(&is_sends_internal);
7498: /* assemble new l2g map */
7499: MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);
7500: ptr_idxs = recv_buffer_idxs;
7501: new_local_rows = 0;
7502: for (i=0;i<n_recvs;i++) {
7503: new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
7504: ptr_idxs += olengths_idxs[i];
7505: }
7506: PetscMalloc1(new_local_rows,&l2gmap_indices);
7507: ptr_idxs = recv_buffer_idxs;
7508: new_local_rows = 0;
7509: for (i=0;i<n_recvs;i++) {
7510: PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));
7511: new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
7512: ptr_idxs += olengths_idxs[i];
7513: }
7514: PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);
7515: ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);
7516: PetscFree(l2gmap_indices);
7518: /* infer new local matrix type from received local matrices type */
7519: /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
7520: /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
7521: if (n_recvs) {
7522: MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
7523: ptr_idxs = recv_buffer_idxs;
7524: for (i=0;i<n_recvs;i++) {
7525: if ((PetscInt)new_local_type_private != *ptr_idxs) {
7526: new_local_type_private = MATAIJ_PRIVATE;
7527: break;
7528: }
7529: ptr_idxs += olengths_idxs[i];
7530: }
7531: switch (new_local_type_private) {
7532: case MATDENSE_PRIVATE:
7533: new_local_type = MATSEQAIJ;
7534: bs = 1;
7535: break;
7536: case MATAIJ_PRIVATE:
7537: new_local_type = MATSEQAIJ;
7538: bs = 1;
7539: break;
7540: case MATBAIJ_PRIVATE:
7541: new_local_type = MATSEQBAIJ;
7542: break;
7543: case MATSBAIJ_PRIVATE:
7544: new_local_type = MATSEQSBAIJ;
7545: break;
7546: default:
7547: SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,PETSC_FUNCTION_NAME);
7548: break;
7549: }
7550: } else { /* by default, new_local_type is seqaij */
7551: new_local_type = MATSEQAIJ;
7552: bs = 1;
7553: }
7555: /* create MATIS object if needed */
7556: if (!reuse) {
7557: MatGetSize(mat,&rows,&cols);
7558: MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);
7559: } else {
7560: /* it also destroys the local matrices */
7561: if (*mat_n) {
7562: MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);
7563: } else { /* this is a fake object */
7564: MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);
7565: }
7566: }
7567: MatISGetLocalMat(*mat_n,&local_mat);
7568: MatSetType(local_mat,new_local_type);
7570: MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);
7572: /* Global to local map of received indices */
7573: PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local); /* needed for values insertion */
7574: ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);
7575: ISLocalToGlobalMappingDestroy(&l2gmap);
7577: /* restore attributes -> type of incoming data and its size */
7578: buf_size_idxs = 0;
7579: for (i=0;i<n_recvs;i++) {
7580: recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
7581: recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
7582: buf_size_idxs += (PetscInt)olengths_idxs[i];
7583: }
7584: PetscFree(recv_buffer_idxs);
7586: /* set preallocation */
7587: PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);
7588: if (!newisdense) {
7589: PetscInt *new_local_nnz=0;
7591: ptr_idxs = recv_buffer_idxs_local;
7592: if (n_recvs) {
7593: PetscCalloc1(new_local_rows,&new_local_nnz);
7594: }
7595: for (i=0;i<n_recvs;i++) {
7596: PetscInt j;
7597: if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
7598: for (j=0;j<*(ptr_idxs+1);j++) {
7599: new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
7600: }
7601: } else {
7602: /* TODO */
7603: }
7604: ptr_idxs += olengths_idxs[i];
7605: }
7606: if (new_local_nnz) {
7607: for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
7608: MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);
7609: for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
7610: MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);
7611: for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
7612: MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);
7613: } else {
7614: MatSetUp(local_mat);
7615: }
7616: PetscFree(new_local_nnz);
7617: } else {
7618: MatSetUp(local_mat);
7619: }
7621: /* set values */
7622: ptr_vals = recv_buffer_vals;
7623: ptr_idxs = recv_buffer_idxs_local;
7624: for (i=0;i<n_recvs;i++) {
7625: if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
7626: MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);
7627: MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);
7628: MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);
7629: MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);
7630: MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);
7631: } else {
7632: /* TODO */
7633: }
7634: ptr_idxs += olengths_idxs[i];
7635: ptr_vals += olengths_vals[i];
7636: }
7637: MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);
7638: MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);
7639: MatISRestoreLocalMat(*mat_n,&local_mat);
7640: MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);
7641: MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);
7642: PetscFree(recv_buffer_vals);
7644: #if 0
7645: if (!restrict_comm) { /* check */
7646: Vec lvec,rvec;
7647: PetscReal infty_error;
7649: MatCreateVecs(mat,&rvec,&lvec);
7650: VecSetRandom(rvec,NULL);
7651: MatMult(mat,rvec,lvec);
7652: VecScale(lvec,-1.0);
7653: MatMultAdd(*mat_n,rvec,lvec,lvec);
7654: VecNorm(lvec,NORM_INFINITY,&infty_error);
7655: PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
7656: VecDestroy(&rvec);
7657: VecDestroy(&lvec);
7658: }
7659: #endif
7661: /* assemble new additional is (if any) */
7662: if (nis) {
7663: PetscInt **temp_idxs,*count_is,j,psum;
7665: MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);
7666: PetscCalloc1(nis,&count_is);
7667: ptr_idxs = recv_buffer_idxs_is;
7668: psum = 0;
7669: for (i=0;i<n_recvs;i++) {
7670: for (j=0;j<nis;j++) {
7671: PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
7672: count_is[j] += plen; /* increment counting of buffer for j-th IS */
7673: psum += plen;
7674: ptr_idxs += plen+1; /* shift pointer to received data */
7675: }
7676: }
7677: PetscMalloc1(nis,&temp_idxs);
7678: PetscMalloc1(psum,&temp_idxs[0]);
7679: for (i=1;i<nis;i++) {
7680: temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
7681: }
7682: PetscMemzero(count_is,nis*sizeof(PetscInt));
7683: ptr_idxs = recv_buffer_idxs_is;
7684: for (i=0;i<n_recvs;i++) {
7685: for (j=0;j<nis;j++) {
7686: PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
7687: PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));
7688: count_is[j] += plen; /* increment starting point of buffer for j-th IS */
7689: ptr_idxs += plen+1; /* shift pointer to received data */
7690: }
7691: }
7692: for (i=0;i<nis;i++) {
7693: ISDestroy(&isarray[i]);
7694: PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);
7695: ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);
7696: }
7697: PetscFree(count_is);
7698: PetscFree(temp_idxs[0]);
7699: PetscFree(temp_idxs);
7700: }
7701: /* free workspace */
7702: PetscFree(recv_buffer_idxs_is);
7703: MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);
7704: PetscFree(send_buffer_idxs);
7705: MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);
7706: if (isdense) {
7707: MatISGetLocalMat(mat,&local_mat);
7708: MatDenseRestoreArray(local_mat,&send_buffer_vals);
7709: MatISRestoreLocalMat(mat,&local_mat);
7710: } else {
7711: /* PetscFree(send_buffer_vals); */
7712: }
7713: if (nis) {
7714: MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);
7715: PetscFree(send_buffer_idxs_is);
7716: }
7718: if (nvecs) {
7719: MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);
7720: MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);
7721: VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);
7722: VecDestroy(&nnsp_vec[0]);
7723: VecCreate(comm_n,&nnsp_vec[0]);
7724: VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);
7725: VecSetType(nnsp_vec[0],VECSTANDARD);
7726: /* set values */
7727: ptr_vals = recv_buffer_vecs;
7728: ptr_idxs = recv_buffer_idxs_local;
7729: VecGetArray(nnsp_vec[0],&send_buffer_vecs);
7730: for (i=0;i<n_recvs;i++) {
7731: PetscInt j;
7732: for (j=0;j<*(ptr_idxs+1);j++) {
7733: send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j);
7734: }
7735: ptr_idxs += olengths_idxs[i];
7736: ptr_vals += olengths_idxs[i]-2;
7737: }
7738: VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);
7739: VecAssemblyBegin(nnsp_vec[0]);
7740: VecAssemblyEnd(nnsp_vec[0]);
7741: }
7743: PetscFree(recv_buffer_vecs);
7744: PetscFree(recv_buffer_idxs_local);
7745: PetscFree(recv_req_idxs);
7746: PetscFree(recv_req_vals);
7747: PetscFree(recv_req_vecs);
7748: PetscFree(recv_req_idxs_is);
7749: PetscFree(send_req_idxs);
7750: PetscFree(send_req_vals);
7751: PetscFree(send_req_vecs);
7752: PetscFree(send_req_idxs_is);
7753: PetscFree(ilengths_vals);
7754: PetscFree(ilengths_idxs);
7755: PetscFree(olengths_vals);
7756: PetscFree(olengths_idxs);
7757: PetscFree(onodes);
7758: if (nis) {
7759: PetscFree(ilengths_idxs_is);
7760: PetscFree(olengths_idxs_is);
7761: PetscFree(onodes_is);
7762: }
7763: PetscSubcommDestroy(&subcomm);
7764: if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
7765: MatDestroy(mat_n);
7766: for (i=0;i<nis;i++) {
7767: ISDestroy(&isarray[i]);
7768: }
7769: if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */
7770: VecDestroy(&nnsp_vec[0]);
7771: }
7772: *mat_n = NULL;
7773: }
7774: return(0);
7775: }
7777: /* temporary hack into ksp private data structure */
7778: #include <petsc/private/kspimpl.h>
7780: PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
7781: {
7782: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
7783: PC_IS *pcis = (PC_IS*)pc->data;
7784: Mat coarse_mat,coarse_mat_is,coarse_submat_dense;
7785: Mat coarsedivudotp = NULL;
7786: Mat coarseG,t_coarse_mat_is;
7787: MatNullSpace CoarseNullSpace = NULL;
7788: ISLocalToGlobalMapping coarse_islg;
7789: IS coarse_is,*isarray;
7790: PetscInt i,im_active=-1,active_procs=-1;
7791: PetscInt nis,nisdofs,nisneu,nisvert;
7792: PetscInt coarse_eqs_per_proc;
7793: PC pc_temp;
7794: PCType coarse_pc_type;
7795: KSPType coarse_ksp_type;
7796: PetscBool multilevel_requested,multilevel_allowed;
7797: PetscBool coarse_reuse;
7798: PetscInt ncoarse,nedcfield;
7799: PetscBool compute_vecs = PETSC_FALSE;
7800: PetscScalar *array;
7801: MatReuse coarse_mat_reuse;
7802: PetscBool restr, full_restr, have_void;
7803: PetscMPIInt size;
7804: PetscErrorCode ierr;
7807: PetscLogEventBegin(PC_BDDC_CoarseSetUp[pcbddc->current_level],pc,0,0,0);
7808: /* Assign global numbering to coarse dofs */
7809: if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
7810: PetscInt ocoarse_size;
7811: compute_vecs = PETSC_TRUE;
7813: pcbddc->new_primal_space = PETSC_TRUE;
7814: ocoarse_size = pcbddc->coarse_size;
7815: PetscFree(pcbddc->global_primal_indices);
7816: PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);
7817: /* see if we can avoid some work */
7818: if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
7819: /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */
7820: if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) {
7821: KSPReset(pcbddc->coarse_ksp);
7822: coarse_reuse = PETSC_FALSE;
7823: } else { /* we can safely reuse already computed coarse matrix */
7824: coarse_reuse = PETSC_TRUE;
7825: }
7826: } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
7827: coarse_reuse = PETSC_FALSE;
7828: }
7829: /* reset any subassembling information */
7830: if (!coarse_reuse || pcbddc->recompute_topography) {
7831: ISDestroy(&pcbddc->coarse_subassembling);
7832: }
7833: } else { /* primal space is unchanged, so we can reuse coarse matrix */
7834: coarse_reuse = PETSC_TRUE;
7835: }
7836: /* assemble coarse matrix */
7837: if (coarse_reuse && pcbddc->coarse_ksp) {
7838: KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);
7839: PetscObjectReference((PetscObject)coarse_mat);
7840: coarse_mat_reuse = MAT_REUSE_MATRIX;
7841: } else {
7842: coarse_mat = NULL;
7843: coarse_mat_reuse = MAT_INITIAL_MATRIX;
7844: }
7846: /* creates temporary l2gmap and IS for coarse indexes */
7847: ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);
7848: ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);
7850: /* creates temporary MATIS object for coarse matrix */
7851: MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);
7852: MatDenseGetArray(coarse_submat_dense,&array);
7853: PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);
7854: MatDenseRestoreArray(coarse_submat_dense,&array);
7855: MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);
7856: MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);
7857: MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);
7858: MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);
7859: MatDestroy(&coarse_submat_dense);
7861: /* count "active" (i.e. with positive local size) and "void" processes */
7862: im_active = !!(pcis->n);
7863: MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));
7865: /* determine number of processes partecipating to coarse solver and compute subassembling pattern */
7866: /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */
7867: /* full_restr : just use the receivers from the subassembling pattern */
7868: MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);
7869: coarse_mat_is = NULL;
7870: multilevel_allowed = PETSC_FALSE;
7871: multilevel_requested = PETSC_FALSE;
7872: coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc);
7873: if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE;
7874: if (pcbddc->coarse_size <= pcbddc->coarse_eqs_limit) multilevel_requested = PETSC_FALSE;
7875: if (multilevel_requested) {
7876: ncoarse = active_procs/pcbddc->coarsening_ratio;
7877: restr = PETSC_FALSE;
7878: full_restr = PETSC_FALSE;
7879: } else {
7880: ncoarse = pcbddc->coarse_size/coarse_eqs_per_proc + !!(pcbddc->coarse_size%coarse_eqs_per_proc);
7881: restr = PETSC_TRUE;
7882: full_restr = PETSC_TRUE;
7883: }
7884: if (!pcbddc->coarse_size || size == 1) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE;
7885: ncoarse = PetscMax(1,ncoarse);
7886: if (!pcbddc->coarse_subassembling) {
7887: if (pcbddc->coarsening_ratio > 1) {
7888: if (multilevel_requested) {
7889: PCBDDCMatISGetSubassemblingPattern(pc->pmat,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);
7890: } else {
7891: PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);
7892: }
7893: } else {
7894: PetscMPIInt rank;
7895: MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);
7896: have_void = (active_procs == (PetscInt)size) ? PETSC_FALSE : PETSC_TRUE;
7897: ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);
7898: }
7899: } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */
7900: PetscInt psum;
7901: if (pcbddc->coarse_ksp) psum = 1;
7902: else psum = 0;
7903: MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));
7904: have_void = ncoarse < size ? PETSC_TRUE : PETSC_FALSE;
7905: }
7906: /* determine if we can go multilevel */
7907: if (multilevel_requested) {
7908: if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */
7909: else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */
7910: }
7911: if (multilevel_allowed && have_void) restr = PETSC_TRUE;
7913: /* dump subassembling pattern */
7914: if (pcbddc->dbg_flag && multilevel_allowed) {
7915: ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);
7916: }
7917: /* compute dofs splitting and neumann boundaries for coarse dofs */
7918: nedcfield = -1;
7919: if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal)) { /* protects from unneded computations */
7920: PetscInt *tidxs,*tidxs2,nout,tsize,i;
7921: const PetscInt *idxs;
7922: ISLocalToGlobalMapping tmap;
7924: /* create map between primal indices (in local representative ordering) and local primal numbering */
7925: ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);
7926: /* allocate space for temporary storage */
7927: PetscMalloc1(pcbddc->local_primal_size,&tidxs);
7928: PetscMalloc1(pcbddc->local_primal_size,&tidxs2);
7929: /* allocate for IS array */
7930: nisdofs = pcbddc->n_ISForDofsLocal;
7931: if (pcbddc->nedclocal) {
7932: if (pcbddc->nedfield > -1) {
7933: nedcfield = pcbddc->nedfield;
7934: } else {
7935: nedcfield = 0;
7936: if (nisdofs) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen (%D)",nisdofs);
7937: nisdofs = 1;
7938: }
7939: }
7940: nisneu = !!pcbddc->NeumannBoundariesLocal;
7941: nisvert = 0; /* nisvert is not used */
7942: nis = nisdofs + nisneu + nisvert;
7943: PetscMalloc1(nis,&isarray);
7944: /* dofs splitting */
7945: for (i=0;i<nisdofs;i++) {
7946: /* ISView(pcbddc->ISForDofsLocal[i],0); */
7947: if (nedcfield != i) {
7948: ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);
7949: ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);
7950: ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);
7951: ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);
7952: } else {
7953: ISGetLocalSize(pcbddc->nedclocal,&tsize);
7954: ISGetIndices(pcbddc->nedclocal,&idxs);
7955: ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);
7956: if (tsize != nout) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping coarse nedelec field! %D != %D",tsize,nout);
7957: ISRestoreIndices(pcbddc->nedclocal,&idxs);
7958: }
7959: ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);
7960: ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);
7961: /* ISView(isarray[i],0); */
7962: }
7963: /* neumann boundaries */
7964: if (pcbddc->NeumannBoundariesLocal) {
7965: /* ISView(pcbddc->NeumannBoundariesLocal,0); */
7966: ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);
7967: ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);
7968: ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);
7969: ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);
7970: ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);
7971: ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);
7972: /* ISView(isarray[nisdofs],0); */
7973: }
7974: /* free memory */
7975: PetscFree(tidxs);
7976: PetscFree(tidxs2);
7977: ISLocalToGlobalMappingDestroy(&tmap);
7978: } else {
7979: nis = 0;
7980: nisdofs = 0;
7981: nisneu = 0;
7982: nisvert = 0;
7983: isarray = NULL;
7984: }
7985: /* destroy no longer needed map */
7986: ISLocalToGlobalMappingDestroy(&coarse_islg);
7988: /* subassemble */
7989: if (multilevel_allowed) {
7990: Vec vp[1];
7991: PetscInt nvecs = 0;
7992: PetscBool reuse,reuser;
7994: if (coarse_mat) reuse = PETSC_TRUE;
7995: else reuse = PETSC_FALSE;
7996: MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
7997: vp[0] = NULL;
7998: if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */
7999: VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);
8000: VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);
8001: VecSetType(vp[0],VECSTANDARD);
8002: nvecs = 1;
8004: if (pcbddc->divudotp) {
8005: Mat B,loc_divudotp;
8006: Vec v,p;
8007: IS dummy;
8008: PetscInt np;
8010: MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);
8011: MatGetSize(loc_divudotp,&np,NULL);
8012: ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);
8013: MatCreateSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);
8014: MatCreateVecs(B,&v,&p);
8015: VecSet(p,1.);
8016: MatMultTranspose(B,p,v);
8017: VecDestroy(&p);
8018: MatDestroy(&B);
8019: VecGetArray(vp[0],&array);
8020: VecPlaceArray(pcbddc->vec1_P,array);
8021: VecRestoreArray(vp[0],&array);
8022: MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);
8023: VecResetArray(pcbddc->vec1_P);
8024: ISDestroy(&dummy);
8025: VecDestroy(&v);
8026: }
8027: }
8028: if (reuser) {
8029: PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);
8030: } else {
8031: PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);
8032: }
8033: if (vp[0]) { /* vp[0] could have been placed on a different set of processes */
8034: PetscScalar *arraym,*arrayv;
8035: PetscInt nl;
8036: VecGetLocalSize(vp[0],&nl);
8037: MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);
8038: MatDenseGetArray(coarsedivudotp,&arraym);
8039: VecGetArray(vp[0],&arrayv);
8040: PetscMemcpy(arraym,arrayv,nl*sizeof(PetscScalar));
8041: VecRestoreArray(vp[0],&arrayv);
8042: MatDenseRestoreArray(coarsedivudotp,&arraym);
8043: VecDestroy(&vp[0]);
8044: } else {
8045: MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);
8046: }
8047: } else {
8048: PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,0,NULL,0,NULL);
8049: }
8050: if (coarse_mat_is || coarse_mat) {
8051: if (!multilevel_allowed) {
8052: MatConvert(coarse_mat_is,MATAIJ,coarse_mat_reuse,&coarse_mat);
8053: } else {
8054: Mat A;
8056: /* if this matrix is present, it means we are not reusing the coarse matrix */
8057: if (coarse_mat_is) {
8058: if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen");
8059: PetscObjectReference((PetscObject)coarse_mat_is);
8060: coarse_mat = coarse_mat_is;
8061: }
8062: /* be sure we don't have MatSeqDENSE as local mat */
8063: MatISGetLocalMat(coarse_mat,&A);
8064: MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);
8065: }
8066: }
8067: MatDestroy(&t_coarse_mat_is);
8068: MatDestroy(&coarse_mat_is);
8070: /* create local to global scatters for coarse problem */
8071: if (compute_vecs) {
8072: PetscInt lrows;
8073: VecDestroy(&pcbddc->coarse_vec);
8074: if (coarse_mat) {
8075: MatGetLocalSize(coarse_mat,&lrows,NULL);
8076: } else {
8077: lrows = 0;
8078: }
8079: VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);
8080: VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);
8081: VecSetType(pcbddc->coarse_vec,coarse_mat ? coarse_mat->defaultvectype : VECSTANDARD);
8082: VecScatterDestroy(&pcbddc->coarse_loc_to_glob);
8083: VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);
8084: }
8085: ISDestroy(&coarse_is);
8087: /* set defaults for coarse KSP and PC */
8088: if (multilevel_allowed) {
8089: coarse_ksp_type = KSPRICHARDSON;
8090: coarse_pc_type = PCBDDC;
8091: } else {
8092: coarse_ksp_type = KSPPREONLY;
8093: coarse_pc_type = PCREDUNDANT;
8094: }
8096: /* print some info if requested */
8097: if (pcbddc->dbg_flag) {
8098: if (!multilevel_allowed) {
8099: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
8100: if (multilevel_requested) {
8101: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %D (active processes %D, coarsening ratio %D)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);
8102: } else if (pcbddc->max_levels) {
8103: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%D)\n",pcbddc->max_levels);
8104: }
8105: PetscViewerFlush(pcbddc->dbg_viewer);
8106: }
8107: }
8109: /* communicate coarse discrete gradient */
8110: coarseG = NULL;
8111: if (pcbddc->nedcG && multilevel_allowed) {
8112: MPI_Comm ccomm;
8113: if (coarse_mat) {
8114: ccomm = PetscObjectComm((PetscObject)coarse_mat);
8115: } else {
8116: ccomm = MPI_COMM_NULL;
8117: }
8118: MatMPIAIJRestrict(pcbddc->nedcG,ccomm,&coarseG);
8119: }
8121: /* create the coarse KSP object only once with defaults */
8122: if (coarse_mat) {
8123: PetscBool isredundant,isnn,isbddc;
8124: PetscViewer dbg_viewer = NULL;
8126: if (pcbddc->dbg_flag) {
8127: dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat));
8128: PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);
8129: }
8130: if (!pcbddc->coarse_ksp) {
8131: char prefix[256],str_level[16];
8132: size_t len;
8134: KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);
8135: KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);
8136: PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);
8137: KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);
8138: KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);
8139: KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);
8140: KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);
8141: KSPGetPC(pcbddc->coarse_ksp,&pc_temp);
8142: /* TODO is this logic correct? should check for coarse_mat type */
8143: PCSetType(pc_temp,coarse_pc_type);
8144: /* prefix */
8145: PetscStrcpy(prefix,"");
8146: PetscStrcpy(str_level,"");
8147: if (!pcbddc->current_level) {
8148: PetscStrncpy(prefix,((PetscObject)pc)->prefix,sizeof(prefix));
8149: PetscStrlcat(prefix,"pc_bddc_coarse_",sizeof(prefix));
8150: } else {
8151: PetscStrlen(((PetscObject)pc)->prefix,&len);
8152: if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
8153: if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
8154: /* Nonstandard use of PetscStrncpy() to copy only a portion of the string */
8155: PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);
8156: PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level));
8157: PetscStrlcat(prefix,str_level,sizeof(prefix));
8158: }
8159: KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);
8160: /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
8161: PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);
8162: PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);
8163: PCBDDCSetLevels(pc_temp,pcbddc->max_levels);
8164: /* allow user customization */
8165: KSPSetFromOptions(pcbddc->coarse_ksp);
8166: /* get some info after set from options */
8167: KSPGetPC(pcbddc->coarse_ksp,&pc_temp);
8168: /* multilevel cannot be done with coarse PCs different from BDDC or NN */
8169: PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);
8170: PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);
8171: if (multilevel_allowed && !isbddc && !isnn) {
8172: isbddc = PETSC_TRUE;
8173: PCSetType(pc_temp,PCBDDC);
8174: PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);
8175: PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);
8176: PCBDDCSetLevels(pc_temp,pcbddc->max_levels);
8177: }
8178: }
8179: /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
8180: KSPGetPC(pcbddc->coarse_ksp,&pc_temp);
8181: if (nisdofs) {
8182: PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);
8183: for (i=0;i<nisdofs;i++) {
8184: ISDestroy(&isarray[i]);
8185: }
8186: }
8187: if (nisneu) {
8188: PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);
8189: ISDestroy(&isarray[nisdofs]);
8190: }
8191: if (nisvert) {
8192: PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);
8193: ISDestroy(&isarray[nis-1]);
8194: }
8195: if (coarseG) {
8196: PCBDDCSetDiscreteGradient(pc_temp,coarseG,1,nedcfield,PETSC_FALSE,PETSC_TRUE);
8197: }
8199: /* get some info after set from options */
8200: PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);
8201: /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */
8202: if (isbddc && !multilevel_allowed) {
8203: PCSetType(pc_temp,coarse_pc_type);
8204: isbddc = PETSC_FALSE;
8205: }
8206: /* multilevel cannot be done with coarse PCs different from BDDC or NN */
8207: PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);
8208: if (multilevel_requested && multilevel_allowed && !isbddc && !isnn) {
8209: PCSetType(pc_temp,PCBDDC);
8210: isbddc = PETSC_TRUE;
8211: }
8212: PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);
8213: if (isredundant) {
8214: KSP inner_ksp;
8215: PC inner_pc;
8217: PCRedundantGetKSP(pc_temp,&inner_ksp);
8218: KSPGetPC(inner_ksp,&inner_pc);
8219: }
8221: /* parameters which miss an API */
8222: PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);
8223: if (isbddc) {
8224: PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data;
8226: pcbddc_coarse->detect_disconnected = PETSC_TRUE;
8227: pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc;
8228: pcbddc_coarse->coarse_eqs_limit = pcbddc->coarse_eqs_limit;
8229: pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null;
8230: if (pcbddc_coarse->benign_saddle_point) {
8231: Mat coarsedivudotp_is;
8232: ISLocalToGlobalMapping l2gmap,rl2g,cl2g;
8233: IS row,col;
8234: const PetscInt *gidxs;
8235: PetscInt n,st,M,N;
8237: MatGetSize(coarsedivudotp,&n,NULL);
8238: MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));
8239: st = st-n;
8240: ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);
8241: MatGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);
8242: ISLocalToGlobalMappingGetSize(l2gmap,&n);
8243: ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);
8244: ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);
8245: ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);
8246: ISLocalToGlobalMappingCreateIS(row,&rl2g);
8247: ISLocalToGlobalMappingCreateIS(col,&cl2g);
8248: ISGetSize(row,&M);
8249: MatGetSize(coarse_mat,&N,NULL);
8250: ISDestroy(&row);
8251: ISDestroy(&col);
8252: MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);
8253: MatSetType(coarsedivudotp_is,MATIS);
8254: MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);
8255: MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);
8256: ISLocalToGlobalMappingDestroy(&rl2g);
8257: ISLocalToGlobalMappingDestroy(&cl2g);
8258: MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);
8259: MatDestroy(&coarsedivudotp);
8260: PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL);
8261: MatDestroy(&coarsedivudotp_is);
8262: pcbddc_coarse->adaptive_userdefined = PETSC_TRUE;
8263: if (pcbddc->adaptive_threshold[0] == 0.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE;
8264: }
8265: }
8267: /* propagate symmetry info of coarse matrix */
8268: MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);
8269: if (pc->pmat->symmetric_set) {
8270: MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);
8271: }
8272: if (pc->pmat->hermitian_set) {
8273: MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);
8274: }
8275: if (pc->pmat->spd_set) {
8276: MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);
8277: }
8278: if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) {
8279: MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);
8280: }
8281: /* set operators */
8282: MatViewFromOptions(coarse_mat,(PetscObject)pc,"-pc_bddc_coarse_mat_view");
8283: MatSetOptionsPrefix(coarse_mat,((PetscObject)pcbddc->coarse_ksp)->prefix);
8284: KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);
8285: if (pcbddc->dbg_flag) {
8286: PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);
8287: }
8288: }
8289: MatDestroy(&coarseG);
8290: PetscFree(isarray);
8291: #if 0
8292: {
8293: PetscViewer viewer;
8294: char filename[256];
8295: sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level);
8296: PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);
8297: PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
8298: MatView(coarse_mat,viewer);
8299: PetscViewerPopFormat(viewer);
8300: PetscViewerDestroy(&viewer);
8301: }
8302: #endif
8304: if (pcbddc->coarse_ksp) {
8305: Vec crhs,csol;
8307: KSPGetSolution(pcbddc->coarse_ksp,&csol);
8308: KSPGetRhs(pcbddc->coarse_ksp,&crhs);
8309: if (!csol) {
8310: MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);
8311: }
8312: if (!crhs) {
8313: MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));
8314: }
8315: }
8316: MatDestroy(&coarsedivudotp);
8318: /* compute null space for coarse solver if the benign trick has been requested */
8319: if (pcbddc->benign_null) {
8321: VecSet(pcbddc->vec1_P,0.);
8322: for (i=0;i<pcbddc->benign_n;i++) {
8323: VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);
8324: }
8325: VecAssemblyBegin(pcbddc->vec1_P);
8326: VecAssemblyEnd(pcbddc->vec1_P);
8327: VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);
8328: VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);
8329: if (coarse_mat) {
8330: Vec nullv;
8331: PetscScalar *array,*array2;
8332: PetscInt nl;
8334: MatCreateVecs(coarse_mat,&nullv,NULL);
8335: VecGetLocalSize(nullv,&nl);
8336: VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);
8337: VecGetArray(nullv,&array2);
8338: PetscMemcpy(array2,array,nl*sizeof(*array));
8339: VecRestoreArray(nullv,&array2);
8340: VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);
8341: VecNormalize(nullv,NULL);
8342: MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);
8343: VecDestroy(&nullv);
8344: }
8345: }
8346: PetscLogEventEnd(PC_BDDC_CoarseSetUp[pcbddc->current_level],pc,0,0,0);
8348: PetscLogEventBegin(PC_BDDC_CoarseSolver[pcbddc->current_level],pc,0,0,0);
8349: if (pcbddc->coarse_ksp) {
8350: PetscBool ispreonly;
8352: if (CoarseNullSpace) {
8353: PetscBool isnull;
8354: MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);
8355: if (isnull) {
8356: MatSetNullSpace(coarse_mat,CoarseNullSpace);
8357: }
8358: /* TODO: add local nullspaces (if any) */
8359: }
8360: /* setup coarse ksp */
8361: KSPSetUp(pcbddc->coarse_ksp);
8362: /* Check coarse problem if in debug mode or if solving with an iterative method */
8363: PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);
8364: if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
8365: KSP check_ksp;
8366: KSPType check_ksp_type;
8367: PC check_pc;
8368: Vec check_vec,coarse_vec;
8369: PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
8370: PetscInt its;
8371: PetscBool compute_eigs;
8372: PetscReal *eigs_r,*eigs_c;
8373: PetscInt neigs;
8374: const char *prefix;
8376: /* Create ksp object suitable for estimation of extreme eigenvalues */
8377: KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);
8378: PetscObjectIncrementTabLevel((PetscObject)check_ksp,(PetscObject)pcbddc->coarse_ksp,0);
8379: KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,PETSC_FALSE);
8380: KSPSetOperators(check_ksp,coarse_mat,coarse_mat);
8381: KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);
8382: /* prevent from setup unneeded object */
8383: KSPGetPC(check_ksp,&check_pc);
8384: PCSetType(check_pc,PCNONE);
8385: if (ispreonly) {
8386: check_ksp_type = KSPPREONLY;
8387: compute_eigs = PETSC_FALSE;
8388: } else {
8389: check_ksp_type = KSPGMRES;
8390: compute_eigs = PETSC_TRUE;
8391: }
8392: KSPSetType(check_ksp,check_ksp_type);
8393: KSPSetComputeSingularValues(check_ksp,compute_eigs);
8394: KSPSetComputeEigenvalues(check_ksp,compute_eigs);
8395: KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);
8396: KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);
8397: KSPSetOptionsPrefix(check_ksp,prefix);
8398: KSPAppendOptionsPrefix(check_ksp,"check_");
8399: KSPSetFromOptions(check_ksp);
8400: KSPSetUp(check_ksp);
8401: KSPGetPC(pcbddc->coarse_ksp,&check_pc);
8402: KSPSetPC(check_ksp,check_pc);
8403: /* create random vec */
8404: MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);
8405: VecSetRandom(check_vec,NULL);
8406: MatMult(coarse_mat,check_vec,coarse_vec);
8407: /* solve coarse problem */
8408: KSPSolve(check_ksp,coarse_vec,coarse_vec);
8409: /* set eigenvalue estimation if preonly has not been requested */
8410: if (compute_eigs) {
8411: PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);
8412: PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);
8413: KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);
8414: if (neigs) {
8415: lambda_max = eigs_r[neigs-1];
8416: lambda_min = eigs_r[0];
8417: if (pcbddc->use_coarse_estimates) {
8418: if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */
8419: KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);
8420: KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));
8421: }
8422: }
8423: }
8424: }
8426: /* check coarse problem residual error */
8427: if (pcbddc->dbg_flag) {
8428: PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
8429: PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));
8430: VecAXPY(check_vec,-1.0,coarse_vec);
8431: VecNorm(check_vec,NORM_INFINITY,&infty_error);
8432: MatMult(coarse_mat,check_vec,coarse_vec);
8433: VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);
8434: PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);
8435: PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);
8436: PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);
8437: PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);
8438: PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);
8439: if (CoarseNullSpace) {
8440: PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");
8441: }
8442: if (compute_eigs) {
8443: PetscReal lambda_max_s,lambda_min_s;
8444: KSPConvergedReason reason;
8445: KSPGetType(check_ksp,&check_ksp_type);
8446: KSPGetIterationNumber(check_ksp,&its);
8447: KSPGetConvergedReason(check_ksp,&reason);
8448: KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);
8449: PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,reason,lambda_min,lambda_max,lambda_min_s,lambda_max_s);
8450: for (i=0;i<neigs;i++) {
8451: PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);
8452: }
8453: }
8454: PetscViewerFlush(dbg_viewer);
8455: PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));
8456: }
8457: VecDestroy(&check_vec);
8458: VecDestroy(&coarse_vec);
8459: KSPDestroy(&check_ksp);
8460: if (compute_eigs) {
8461: PetscFree(eigs_r);
8462: PetscFree(eigs_c);
8463: }
8464: }
8465: }
8466: MatNullSpaceDestroy(&CoarseNullSpace);
8467: /* print additional info */
8468: if (pcbddc->dbg_flag) {
8469: /* waits until all processes reaches this point */
8470: PetscBarrier((PetscObject)pc);
8471: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %D\n",pcbddc->current_level);
8472: PetscViewerFlush(pcbddc->dbg_viewer);
8473: }
8475: /* free memory */
8476: MatDestroy(&coarse_mat);
8477: PetscLogEventEnd(PC_BDDC_CoarseSolver[pcbddc->current_level],pc,0,0,0);
8478: return(0);
8479: }
8481: PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
8482: {
8483: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
8484: PC_IS* pcis = (PC_IS*)pc->data;
8485: Mat_IS* matis = (Mat_IS*)pc->pmat->data;
8486: IS subset,subset_mult,subset_n;
8487: PetscInt local_size,coarse_size=0;
8488: PetscInt *local_primal_indices=NULL;
8489: const PetscInt *t_local_primal_indices;
8493: /* Compute global number of coarse dofs */
8494: if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first");
8495: ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);
8496: ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);
8497: ISDestroy(&subset_n);
8498: ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);
8499: ISRenumber(subset,subset_mult,&coarse_size,&subset_n);
8500: ISDestroy(&subset);
8501: ISDestroy(&subset_mult);
8502: ISGetLocalSize(subset_n,&local_size);
8503: if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size);
8504: PetscMalloc1(local_size,&local_primal_indices);
8505: ISGetIndices(subset_n,&t_local_primal_indices);
8506: PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));
8507: ISRestoreIndices(subset_n,&t_local_primal_indices);
8508: ISDestroy(&subset_n);
8510: /* check numbering */
8511: if (pcbddc->dbg_flag) {
8512: PetscScalar coarsesum,*array,*array2;
8513: PetscInt i;
8514: PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;
8516: PetscViewerFlush(pcbddc->dbg_viewer);
8517: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
8518: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");
8519: PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
8520: /* counter */
8521: VecSet(pcis->vec1_global,0.0);
8522: VecSet(pcis->vec1_N,1.0);
8523: VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8524: VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8525: VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);
8526: VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);
8527: VecSet(pcis->vec1_N,0.0);
8528: for (i=0;i<pcbddc->local_primal_size;i++) {
8529: VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);
8530: }
8531: VecAssemblyBegin(pcis->vec1_N);
8532: VecAssemblyEnd(pcis->vec1_N);
8533: VecSet(pcis->vec1_global,0.0);
8534: VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8535: VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8536: VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
8537: VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
8538: VecGetArray(pcis->vec1_N,&array);
8539: VecGetArray(pcis->vec2_N,&array2);
8540: for (i=0;i<pcis->n;i++) {
8541: if (array[i] != 0.0 && array[i] != array2[i]) {
8542: PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi;
8543: PetscInt neigh = (PetscInt)PetscRealPart(array2[i]);
8544: set_error = PETSC_TRUE;
8545: ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);
8546: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %D (gid %D) owned by %D processes instead of %D!\n",PetscGlobalRank,i,gi,owned,neigh);
8547: }
8548: }
8549: VecRestoreArray(pcis->vec2_N,&array2);
8550: MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
8551: PetscViewerFlush(pcbddc->dbg_viewer);
8552: for (i=0;i<pcis->n;i++) {
8553: if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
8554: }
8555: VecRestoreArray(pcis->vec1_N,&array);
8556: VecSet(pcis->vec1_global,0.0);
8557: VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8558: VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8559: VecSum(pcis->vec1_global,&coarsesum);
8560: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %D (%lf)\n",coarse_size,PetscRealPart(coarsesum));
8561: if (pcbddc->dbg_flag > 1 || set_error_reduced) {
8562: PetscInt *gidxs;
8564: PetscMalloc1(pcbddc->local_primal_size,&gidxs);
8565: ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);
8566: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");
8567: PetscViewerFlush(pcbddc->dbg_viewer);
8568: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);
8569: for (i=0;i<pcbddc->local_primal_size;i++) {
8570: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%D]=%D (%D,%D)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);
8571: }
8572: PetscViewerFlush(pcbddc->dbg_viewer);
8573: PetscFree(gidxs);
8574: }
8575: PetscViewerFlush(pcbddc->dbg_viewer);
8576: PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
8577: if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
8578: }
8580: /* get back data */
8581: *coarse_size_n = coarse_size;
8582: *local_primal_indices_n = local_primal_indices;
8583: return(0);
8584: }
8586: PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
8587: {
8588: IS localis_t;
8589: PetscInt i,lsize,*idxs,n;
8590: PetscScalar *vals;
8594: /* get indices in local ordering exploiting local to global map */
8595: ISGetLocalSize(globalis,&lsize);
8596: PetscMalloc1(lsize,&vals);
8597: for (i=0;i<lsize;i++) vals[i] = 1.0;
8598: ISGetIndices(globalis,(const PetscInt**)&idxs);
8599: VecSet(gwork,0.0);
8600: VecSet(lwork,0.0);
8601: if (idxs) { /* multilevel guard */
8602: VecSetOption(gwork,VEC_IGNORE_NEGATIVE_INDICES,PETSC_TRUE);
8603: VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);
8604: }
8605: VecAssemblyBegin(gwork);
8606: ISRestoreIndices(globalis,(const PetscInt**)&idxs);
8607: PetscFree(vals);
8608: VecAssemblyEnd(gwork);
8609: /* now compute set in local ordering */
8610: VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);
8611: VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);
8612: VecGetArrayRead(lwork,(const PetscScalar**)&vals);
8613: VecGetSize(lwork,&n);
8614: for (i=0,lsize=0;i<n;i++) {
8615: if (PetscRealPart(vals[i]) > 0.5) {
8616: lsize++;
8617: }
8618: }
8619: PetscMalloc1(lsize,&idxs);
8620: for (i=0,lsize=0;i<n;i++) {
8621: if (PetscRealPart(vals[i]) > 0.5) {
8622: idxs[lsize++] = i;
8623: }
8624: }
8625: VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);
8626: ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);
8627: *localis = localis_t;
8628: return(0);
8629: }
8631: PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
8632: {
8633: PC_IS *pcis=(PC_IS*)pc->data;
8634: PC_BDDC *pcbddc=(PC_BDDC*)pc->data;
8635: PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
8636: Mat S_j;
8637: PetscInt *used_xadj,*used_adjncy;
8638: PetscBool free_used_adj;
8639: PetscErrorCode ierr;
8642: PetscLogEventBegin(PC_BDDC_Schurs[pcbddc->current_level],pc,0,0,0);
8643: /* decide the adjacency to be used for determining internal problems for local schur on subsets */
8644: free_used_adj = PETSC_FALSE;
8645: if (pcbddc->sub_schurs_layers == -1) {
8646: used_xadj = NULL;
8647: used_adjncy = NULL;
8648: } else {
8649: if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
8650: used_xadj = pcbddc->mat_graph->xadj;
8651: used_adjncy = pcbddc->mat_graph->adjncy;
8652: } else if (pcbddc->computed_rowadj) {
8653: used_xadj = pcbddc->mat_graph->xadj;
8654: used_adjncy = pcbddc->mat_graph->adjncy;
8655: } else {
8656: PetscBool flg_row=PETSC_FALSE;
8657: const PetscInt *xadj,*adjncy;
8658: PetscInt nvtxs;
8660: MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);
8661: if (flg_row) {
8662: PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);
8663: PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));
8664: PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));
8665: free_used_adj = PETSC_TRUE;
8666: } else {
8667: pcbddc->sub_schurs_layers = -1;
8668: used_xadj = NULL;
8669: used_adjncy = NULL;
8670: }
8671: MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);
8672: }
8673: }
8675: /* setup sub_schurs data */
8676: MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);
8677: if (!sub_schurs->schur_explicit) {
8678: /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */
8679: MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);
8680: PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);
8681: } else {
8682: Mat change = NULL;
8683: Vec scaling = NULL;
8684: IS change_primal = NULL, iP;
8685: PetscInt benign_n;
8686: PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis;
8687: PetscBool isseqaij,need_change = PETSC_FALSE;
8688: PetscBool discrete_harmonic = PETSC_FALSE;
8690: if (!pcbddc->use_vertices && reuse_solvers) {
8691: PetscInt n_vertices;
8693: ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);
8694: reuse_solvers = (PetscBool)!n_vertices;
8695: }
8696: PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);
8697: if (!isseqaij) {
8698: Mat_IS* matis = (Mat_IS*)pc->pmat->data;
8699: if (matis->A == pcbddc->local_mat) {
8700: MatDestroy(&pcbddc->local_mat);
8701: MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);
8702: } else {
8703: MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);
8704: }
8705: }
8706: if (!pcbddc->benign_change_explicit) {
8707: benign_n = pcbddc->benign_n;
8708: } else {
8709: benign_n = 0;
8710: }
8711: /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc.
8712: We need a global reduction to avoid possible deadlocks.
8713: We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */
8714: if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) {
8715: PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change);
8716: MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
8717: need_change = (PetscBool)(!need_change);
8718: }
8719: /* If the user defines additional constraints, we import them here.
8720: We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */
8721: if (need_change) {
8722: PC_IS *pcisf;
8723: PC_BDDC *pcbddcf;
8724: PC pcf;
8726: if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph");
8727: PCCreate(PetscObjectComm((PetscObject)pc),&pcf);
8728: PCSetOperators(pcf,pc->mat,pc->pmat);
8729: PCSetType(pcf,PCBDDC);
8731: /* hacks */
8732: pcisf = (PC_IS*)pcf->data;
8733: pcisf->is_B_local = pcis->is_B_local;
8734: pcisf->vec1_N = pcis->vec1_N;
8735: pcisf->BtoNmap = pcis->BtoNmap;
8736: pcisf->n = pcis->n;
8737: pcisf->n_B = pcis->n_B;
8738: pcbddcf = (PC_BDDC*)pcf->data;
8739: PetscFree(pcbddcf->mat_graph);
8740: pcbddcf->mat_graph = pcbddc->mat_graph;
8741: pcbddcf->use_faces = PETSC_TRUE;
8742: pcbddcf->use_change_of_basis = PETSC_TRUE;
8743: pcbddcf->use_change_on_faces = PETSC_TRUE;
8744: pcbddcf->use_qr_single = PETSC_TRUE;
8745: pcbddcf->fake_change = PETSC_TRUE;
8747: /* setup constraints so that we can get information on primal vertices and change of basis (in local numbering) */
8748: PCBDDCConstraintsSetUp(pcf);
8749: sub_schurs->change_with_qr = pcbddcf->use_qr_single;
8750: ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);
8751: change = pcbddcf->ConstraintMatrix;
8752: pcbddcf->ConstraintMatrix = NULL;
8754: /* free unneeded memory allocated in PCBDDCConstraintsSetUp */
8755: PetscFree(pcbddcf->sub_schurs);
8756: MatNullSpaceDestroy(&pcbddcf->onearnullspace);
8757: PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);
8758: PetscFree(pcbddcf->primal_indices_local_idxs);
8759: PetscFree(pcbddcf->onearnullvecs_state);
8760: PetscFree(pcf->data);
8761: pcf->ops->destroy = NULL;
8762: pcf->ops->reset = NULL;
8763: PCDestroy(&pcf);
8764: }
8765: if (!pcbddc->use_deluxe_scaling) scaling = pcis->D;
8767: PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_iP",(PetscObject*)&iP);
8768: if (iP) {
8769: PetscOptionsBegin(PetscObjectComm((PetscObject)iP),sub_schurs->prefix,"BDDC sub_schurs options","PC");
8770: PetscOptionsBool("-sub_schurs_discrete_harmonic",NULL,NULL,discrete_harmonic,&discrete_harmonic,NULL);
8771: PetscOptionsEnd();
8772: }
8773: if (discrete_harmonic) {
8774: Mat A;
8775: MatDuplicate(pcbddc->local_mat,MAT_COPY_VALUES,&A);
8776: MatZeroRowsColumnsIS(A,iP,1.0,NULL,NULL);
8777: PetscObjectCompose((PetscObject)A,"__KSPFETIDP_iP",(PetscObject)iP);
8778: PCBDDCSubSchursSetUp(sub_schurs,A,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);
8779: MatDestroy(&A);
8780: } else {
8781: PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);
8782: }
8783: MatDestroy(&change);
8784: ISDestroy(&change_primal);
8785: }
8786: MatDestroy(&S_j);
8788: /* free adjacency */
8789: if (free_used_adj) {
8790: PetscFree2(used_xadj,used_adjncy);
8791: }
8792: PetscLogEventEnd(PC_BDDC_Schurs[pcbddc->current_level],pc,0,0,0);
8793: return(0);
8794: }
8796: PetscErrorCode PCBDDCInitSubSchurs(PC pc)
8797: {
8798: PC_IS *pcis=(PC_IS*)pc->data;
8799: PC_BDDC *pcbddc=(PC_BDDC*)pc->data;
8800: PCBDDCGraph graph;
8801: PetscErrorCode ierr;
8804: /* attach interface graph for determining subsets */
8805: if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
8806: IS verticesIS,verticescomm;
8807: PetscInt vsize,*idxs;
8809: PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);
8810: ISGetSize(verticesIS,&vsize);
8811: ISGetIndices(verticesIS,(const PetscInt**)&idxs);
8812: ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);
8813: ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);
8814: PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);
8815: PCBDDCGraphCreate(&graph);
8816: PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global,pcbddc->graphmaxcount);
8817: PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);
8818: ISDestroy(&verticescomm);
8819: PCBDDCGraphComputeConnectedComponents(graph);
8820: } else {
8821: graph = pcbddc->mat_graph;
8822: }
8823: /* print some info */
8824: if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) {
8825: IS vertices;
8826: PetscInt nv,nedges,nfaces;
8827: PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);
8828: PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);
8829: ISGetSize(vertices,&nv);
8830: PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
8831: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");
8832: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%D)\n",PetscGlobalRank,(int)nv,pcbddc->use_vertices);
8833: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%D)\n",PetscGlobalRank,(int)nedges,pcbddc->use_edges);
8834: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%D)\n",PetscGlobalRank,(int)nfaces,pcbddc->use_faces);
8835: PetscViewerFlush(pcbddc->dbg_viewer);
8836: PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);
8837: PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);
8838: }
8840: /* sub_schurs init */
8841: if (!pcbddc->sub_schurs) {
8842: PCBDDCSubSchursCreate(&pcbddc->sub_schurs);
8843: }
8844: PCBDDCSubSchursInit(pcbddc->sub_schurs,((PetscObject)pc)->prefix,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_rebuild);
8846: /* free graph struct */
8847: if (pcbddc->sub_schurs_rebuild) {
8848: PCBDDCGraphDestroy(&graph);
8849: }
8850: return(0);
8851: }
8853: PetscErrorCode PCBDDCCheckOperator(PC pc)
8854: {
8855: PC_IS *pcis=(PC_IS*)pc->data;
8856: PC_BDDC *pcbddc=(PC_BDDC*)pc->data;
8857: PetscErrorCode ierr;
8860: if (pcbddc->n_vertices == pcbddc->local_primal_size) {
8861: IS zerodiag = NULL;
8862: Mat S_j,B0_B=NULL;
8863: Vec dummy_vec=NULL,vec_check_B,vec_scale_P;
8864: PetscScalar *p0_check,*array,*array2;
8865: PetscReal norm;
8866: PetscInt i;
8868: /* B0 and B0_B */
8869: if (zerodiag) {
8870: IS dummy;
8872: ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);
8873: MatCreateSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);
8874: MatCreateVecs(B0_B,NULL,&dummy_vec);
8875: ISDestroy(&dummy);
8876: }
8877: /* I need a primal vector to scale primal nodes since BDDC sums contibutions */
8878: VecDuplicate(pcbddc->vec1_P,&vec_scale_P);
8879: VecSet(pcbddc->vec1_P,1.0);
8880: VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);
8881: VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);
8882: VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);
8883: VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);
8884: VecReciprocal(vec_scale_P);
8885: /* S_j */
8886: MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);
8887: MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);
8889: /* mimic vector in \widetilde{W}_\Gamma */
8890: VecSetRandom(pcis->vec1_N,NULL);
8891: /* continuous in primal space */
8892: VecSetRandom(pcbddc->coarse_vec,NULL);
8893: VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);
8894: VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);
8895: VecGetArray(pcbddc->vec1_P,&array);
8896: PetscCalloc1(pcbddc->benign_n,&p0_check);
8897: for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i];
8898: VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);
8899: VecRestoreArray(pcbddc->vec1_P,&array);
8900: VecAssemblyBegin(pcis->vec1_N);
8901: VecAssemblyEnd(pcis->vec1_N);
8902: VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);
8903: VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);
8904: VecDuplicate(pcis->vec2_B,&vec_check_B);
8905: VecCopy(pcis->vec2_B,vec_check_B);
8907: /* assemble rhs for coarse problem */
8908: /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */
8909: /* local with Schur */
8910: MatMult(S_j,pcis->vec2_B,pcis->vec1_B);
8911: if (zerodiag) {
8912: VecGetArray(dummy_vec,&array);
8913: for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i];
8914: VecRestoreArray(dummy_vec,&array);
8915: MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);
8916: }
8917: /* sum on primal nodes the local contributions */
8918: VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);
8919: VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);
8920: VecGetArray(pcis->vec1_N,&array);
8921: VecGetArray(pcbddc->vec1_P,&array2);
8922: for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]];
8923: VecRestoreArray(pcbddc->vec1_P,&array2);
8924: VecRestoreArray(pcis->vec1_N,&array);
8925: VecSet(pcbddc->coarse_vec,0.);
8926: VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);
8927: VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);
8928: VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);
8929: VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);
8930: VecGetArray(pcbddc->vec1_P,&array);
8931: /* scale primal nodes (BDDC sums contibutions) */
8932: VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);
8933: VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);
8934: VecRestoreArray(pcbddc->vec1_P,&array);
8935: VecAssemblyBegin(pcis->vec1_N);
8936: VecAssemblyEnd(pcis->vec1_N);
8937: VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
8938: VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
8939: /* global: \widetilde{B0}_B w_\Gamma */
8940: if (zerodiag) {
8941: MatMult(B0_B,pcis->vec2_B,dummy_vec);
8942: VecGetArray(dummy_vec,&array);
8943: for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i];
8944: VecRestoreArray(dummy_vec,&array);
8945: }
8946: /* BDDC */
8947: VecSet(pcis->vec1_D,0.);
8948: PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);
8950: VecCopy(pcis->vec1_B,pcis->vec2_B);
8951: VecAXPY(pcis->vec1_B,-1.0,vec_check_B);
8952: VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);
8953: PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm);
8954: for (i=0;i<pcbddc->benign_n;i++) {
8955: PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%D] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i]));
8956: }
8957: PetscFree(p0_check);
8958: VecDestroy(&vec_scale_P);
8959: VecDestroy(&vec_check_B);
8960: VecDestroy(&dummy_vec);
8961: MatDestroy(&S_j);
8962: MatDestroy(&B0_B);
8963: }
8964: return(0);
8965: }
8967: #include <../src/mat/impls/aij/mpi/mpiaij.h>
8968: PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B)
8969: {
8970: Mat At;
8971: IS rows;
8972: PetscInt rst,ren;
8974: PetscLayout rmap;
8977: rst = ren = 0;
8978: if (ccomm != MPI_COMM_NULL) {
8979: PetscLayoutCreate(ccomm,&rmap);
8980: PetscLayoutSetSize(rmap,A->rmap->N);
8981: PetscLayoutSetBlockSize(rmap,1);
8982: PetscLayoutSetUp(rmap);
8983: PetscLayoutGetRange(rmap,&rst,&ren);
8984: }
8985: ISCreateStride(PetscObjectComm((PetscObject)A),ren-rst,rst,1,&rows);
8986: MatCreateSubMatrix(A,rows,NULL,MAT_INITIAL_MATRIX,&At);
8987: ISDestroy(&rows);
8989: if (ccomm != MPI_COMM_NULL) {
8990: Mat_MPIAIJ *a,*b;
8991: IS from,to;
8992: Vec gvec;
8993: PetscInt lsize;
8995: MatCreate(ccomm,B);
8996: MatSetSizes(*B,ren-rst,PETSC_DECIDE,PETSC_DECIDE,At->cmap->N);
8997: MatSetType(*B,MATAIJ);
8998: PetscLayoutDestroy(&((*B)->rmap));
8999: PetscLayoutSetUp((*B)->cmap);
9000: a = (Mat_MPIAIJ*)At->data;
9001: b = (Mat_MPIAIJ*)(*B)->data;
9002: MPI_Comm_size(ccomm,&b->size);
9003: MPI_Comm_rank(ccomm,&b->rank);
9004: PetscObjectReference((PetscObject)a->A);
9005: PetscObjectReference((PetscObject)a->B);
9006: b->A = a->A;
9007: b->B = a->B;
9009: b->donotstash = a->donotstash;
9010: b->roworiented = a->roworiented;
9011: b->rowindices = 0;
9012: b->rowvalues = 0;
9013: b->getrowactive = PETSC_FALSE;
9015: (*B)->rmap = rmap;
9016: (*B)->factortype = A->factortype;
9017: (*B)->assembled = PETSC_TRUE;
9018: (*B)->insertmode = NOT_SET_VALUES;
9019: (*B)->preallocated = PETSC_TRUE;
9021: if (a->colmap) {
9022: #if defined(PETSC_USE_CTABLE)
9023: PetscTableCreateCopy(a->colmap,&b->colmap);
9024: #else
9025: PetscMalloc1(At->cmap->N,&b->colmap);
9026: PetscLogObjectMemory((PetscObject)*B,At->cmap->N*sizeof(PetscInt));
9027: PetscMemcpy(b->colmap,a->colmap,At->cmap->N*sizeof(PetscInt));
9028: #endif
9029: } else b->colmap = 0;
9030: if (a->garray) {
9031: PetscInt len;
9032: len = a->B->cmap->n;
9033: PetscMalloc1(len+1,&b->garray);
9034: PetscLogObjectMemory((PetscObject)(*B),len*sizeof(PetscInt));
9035: if (len) { PetscMemcpy(b->garray,a->garray,len*sizeof(PetscInt)); }
9036: } else b->garray = 0;
9038: PetscObjectReference((PetscObject)a->lvec);
9039: b->lvec = a->lvec;
9040: PetscLogObjectParent((PetscObject)*B,(PetscObject)b->lvec);
9042: /* cannot use VecScatterCopy */
9043: VecGetLocalSize(b->lvec,&lsize);
9044: ISCreateGeneral(ccomm,lsize,b->garray,PETSC_USE_POINTER,&from);
9045: ISCreateStride(PETSC_COMM_SELF,lsize,0,1,&to);
9046: MatCreateVecs(*B,&gvec,NULL);
9047: VecScatterCreate(gvec,from,b->lvec,to,&b->Mvctx);
9048: PetscLogObjectParent((PetscObject)*B,(PetscObject)b->Mvctx);
9049: ISDestroy(&from);
9050: ISDestroy(&to);
9051: VecDestroy(&gvec);
9052: }
9053: MatDestroy(&At);
9054: return(0);
9055: }