Actual source code: bddcprivate.c
petsc-3.11.4 2019-09-28
1: #include <../src/mat/impls/aij/seq/aij.h>
2: #include <../src/ksp/pc/impls/bddc/bddc.h>
3: #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
4: #include <../src/mat/impls/dense/seq/dense.h>
5: #include <petscdmplex.h>
6: #include <petscblaslapack.h>
7: #include <petsc/private/sfimpl.h>
8: #include <petsc/private/dmpleximpl.h>
9: #include <petscdmda.h>
11: static PetscErrorCode MatMPIAIJRestrict(Mat,MPI_Comm,Mat*);
13: /* if range is true, it returns B s.t. span{B} = range(A)
14: if range is false, it returns B s.t. range(B) _|_ range(A) */
15: PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B)
16: {
17: #if !defined(PETSC_USE_COMPLEX)
18: PetscScalar *uwork,*data,*U, ds = 0.;
19: PetscReal *sing;
20: PetscBLASInt bM,bN,lwork,lierr,di = 1;
21: PetscInt ulw,i,nr,nc,n;
25: #if defined(PETSC_MISSING_LAPACK_GESVD)
26: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LAPACK _GESVD not available");
27: #else
28: MatGetSize(A,&nr,&nc);
29: if (!nr || !nc) return(0);
31: /* workspace */
32: if (!work) {
33: ulw = PetscMax(PetscMax(1,5*PetscMin(nr,nc)),3*PetscMin(nr,nc)+PetscMax(nr,nc));
34: PetscMalloc1(ulw,&uwork);
35: } else {
36: ulw = lw;
37: uwork = work;
38: }
39: n = PetscMin(nr,nc);
40: if (!rwork) {
41: PetscMalloc1(n,&sing);
42: } else {
43: sing = rwork;
44: }
46: /* SVD */
47: PetscMalloc1(nr*nr,&U);
48: PetscBLASIntCast(nr,&bM);
49: PetscBLASIntCast(nc,&bN);
50: PetscBLASIntCast(ulw,&lwork);
51: MatDenseGetArray(A,&data);
52: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
53: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,&lierr));
54: PetscFPTrapPop();
55: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
56: MatDenseRestoreArray(A,&data);
57: for (i=0;i<n;i++) if (sing[i] < PETSC_SMALL) break;
58: if (!rwork) {
59: PetscFree(sing);
60: }
61: if (!work) {
62: PetscFree(uwork);
63: }
64: /* create B */
65: if (!range) {
66: MatCreateSeqDense(PETSC_COMM_SELF,nr,nr-i,NULL,B);
67: MatDenseGetArray(*B,&data);
68: PetscMemcpy(data,U+nr*i,(nr-i)*nr*sizeof(PetscScalar));
69: } else {
70: MatCreateSeqDense(PETSC_COMM_SELF,nr,i,NULL,B);
71: MatDenseGetArray(*B,&data);
72: PetscMemcpy(data,U,i*nr*sizeof(PetscScalar));
73: }
74: MatDenseRestoreArray(*B,&data);
75: PetscFree(U);
76: #endif
77: #else /* PETSC_USE_COMPLEX */
79: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for complexes");
80: #endif
81: return(0);
82: }
84: /* TODO REMOVE */
85: #if defined(PRINT_GDET)
86: static int inc = 0;
87: static int lev = 0;
88: #endif
90: PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat* Gins, Mat* GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork)
91: {
93: Mat GE,GEd;
94: PetscInt rsize,csize,esize;
95: PetscScalar *ptr;
98: ISGetSize(edge,&esize);
99: if (!esize) return(0);
100: ISGetSize(extrow,&rsize);
101: ISGetSize(extcol,&csize);
103: /* gradients */
104: ptr = work + 5*esize;
105: MatCreateSubMatrix(lG,extrow,extcol,MAT_INITIAL_MATRIX,&GE);
106: MatCreateSeqDense(PETSC_COMM_SELF,rsize,csize,ptr,Gins);
107: MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,Gins);
108: MatDestroy(&GE);
110: /* constants */
111: ptr += rsize*csize;
112: MatCreateSeqDense(PETSC_COMM_SELF,esize,csize,ptr,&GEd);
113: MatCreateSubMatrix(lG,edge,extcol,MAT_INITIAL_MATRIX,&GE);
114: MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,&GEd);
115: MatDestroy(&GE);
116: MatDenseOrthogonalRangeOrComplement(GEd,PETSC_FALSE,5*esize,work,rwork,GKins);
117: MatDestroy(&GEd);
119: if (corners) {
120: Mat GEc;
121: PetscScalar *vals,v;
123: MatCreateSubMatrix(lG,edge,corners,MAT_INITIAL_MATRIX,&GEc);
124: MatTransposeMatMult(GEc,*GKins,MAT_INITIAL_MATRIX,1.0,&GEd);
125: MatDenseGetArray(GEd,&vals);
126: /* v = PetscAbsScalar(vals[0]) */;
127: v = 1.;
128: cvals[0] = vals[0]/v;
129: cvals[1] = vals[1]/v;
130: MatDenseRestoreArray(GEd,&vals);
131: MatScale(*GKins,1./v);
132: #if defined(PRINT_GDET)
133: {
134: PetscViewer viewer;
135: char filename[256];
136: sprintf(filename,"Gdet_l%d_r%d_cc%d.m",lev,PetscGlobalRank,inc++);
137: PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);
138: PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
139: PetscObjectSetName((PetscObject)GEc,"GEc");
140: MatView(GEc,viewer);
141: PetscObjectSetName((PetscObject)(*GKins),"GK");
142: MatView(*GKins,viewer);
143: PetscObjectSetName((PetscObject)GEd,"Gproj");
144: MatView(GEd,viewer);
145: PetscViewerDestroy(&viewer);
146: }
147: #endif
148: MatDestroy(&GEd);
149: MatDestroy(&GEc);
150: }
152: return(0);
153: }
155: PetscErrorCode PCBDDCNedelecSupport(PC pc)
156: {
157: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
158: Mat_IS *matis = (Mat_IS*)pc->pmat->data;
159: Mat G,T,conn,lG,lGt,lGis,lGall,lGe,lGinit;
160: Vec tvec;
161: PetscSF sfv;
162: ISLocalToGlobalMapping el2g,vl2g,fl2g,al2g;
163: MPI_Comm comm;
164: IS lned,primals,allprimals,nedfieldlocal;
165: IS *eedges,*extrows,*extcols,*alleedges;
166: PetscBT btv,bte,btvc,btb,btbd,btvcand,btvi,btee,bter;
167: PetscScalar *vals,*work;
168: PetscReal *rwork;
169: const PetscInt *idxs,*ii,*jj,*iit,*jjt;
170: PetscInt ne,nv,Lv,order,n,field;
171: PetscInt n_neigh,*neigh,*n_shared,**shared;
172: PetscInt i,j,extmem,cum,maxsize,nee;
173: PetscInt *extrow,*extrowcum,*marks,*vmarks,*gidxs;
174: PetscInt *sfvleaves,*sfvroots;
175: PetscInt *corners,*cedges;
176: PetscInt *ecount,**eneighs,*vcount,**vneighs;
177: #if defined(PETSC_USE_DEBUG)
178: PetscInt *emarks;
179: #endif
180: PetscBool print,eerr,done,lrc[2],conforming,global,singular,setprimal;
181: PetscErrorCode ierr;
184: /* If the discrete gradient is defined for a subset of dofs and global is true,
185: it assumes G is given in global ordering for all the dofs.
186: Otherwise, the ordering is global for the Nedelec field */
187: order = pcbddc->nedorder;
188: conforming = pcbddc->conforming;
189: field = pcbddc->nedfield;
190: global = pcbddc->nedglobal;
191: setprimal = PETSC_FALSE;
192: print = PETSC_FALSE;
193: singular = PETSC_FALSE;
195: /* Command line customization */
196: PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC Nedelec options","PC");
197: PetscOptionsBool("-pc_bddc_nedelec_field_primal","All edge dofs set as primals: Toselli's algorithm C",NULL,setprimal,&setprimal,NULL);
198: PetscOptionsBool("-pc_bddc_nedelec_singular","Infer nullspace from discrete gradient",NULL,singular,&singular,NULL);
199: PetscOptionsInt("-pc_bddc_nedelec_order","Test variable order code (to be removed)",NULL,order,&order,NULL);
200: /* print debug info TODO: to be removed */
201: PetscOptionsBool("-pc_bddc_nedelec_print","Print debug info",NULL,print,&print,NULL);
202: PetscOptionsEnd();
204: /* Return if there are no edges in the decomposition and the problem is not singular */
205: MatGetLocalToGlobalMapping(pc->pmat,&al2g,NULL);
206: ISLocalToGlobalMappingGetSize(al2g,&n);
207: PetscObjectGetComm((PetscObject)pc,&comm);
208: if (!singular) {
209: VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);
210: lrc[0] = PETSC_FALSE;
211: for (i=0;i<n;i++) {
212: if (PetscRealPart(vals[i]) > 2.) {
213: lrc[0] = PETSC_TRUE;
214: break;
215: }
216: }
217: VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);
218: MPIU_Allreduce(&lrc[0],&lrc[1],1,MPIU_BOOL,MPI_LOR,comm);
219: if (!lrc[1]) return(0);
220: }
222: /* Get Nedelec field */
223: if (pcbddc->n_ISForDofsLocal && field >= pcbddc->n_ISForDofsLocal) SETERRQ2(comm,PETSC_ERR_USER,"Invalid field for Nedelec %D: number of fields is %D",field,pcbddc->n_ISForDofsLocal);
224: if (pcbddc->n_ISForDofsLocal && field >= 0) {
225: PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]);
226: nedfieldlocal = pcbddc->ISForDofsLocal[field];
227: ISGetLocalSize(nedfieldlocal,&ne);
228: } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) {
229: ne = n;
230: nedfieldlocal = NULL;
231: global = PETSC_TRUE;
232: } else if (field == PETSC_DECIDE) {
233: PetscInt rst,ren,*idx;
235: PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));
236: PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));
237: MatGetOwnershipRange(pcbddc->discretegradient,&rst,&ren);
238: for (i=rst;i<ren;i++) {
239: PetscInt nc;
241: MatGetRow(pcbddc->discretegradient,i,&nc,NULL,NULL);
242: if (nc > 1) matis->sf_rootdata[i-rst] = 1;
243: MatRestoreRow(pcbddc->discretegradient,i,&nc,NULL,NULL);
244: }
245: PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);
246: PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);
247: PetscMalloc1(n,&idx);
248: for (i=0,ne=0;i<n;i++) if (matis->sf_leafdata[i]) idx[ne++] = i;
249: ISCreateGeneral(comm,ne,idx,PETSC_OWN_POINTER,&nedfieldlocal);
250: } else {
251: SETERRQ(comm,PETSC_ERR_USER,"When multiple fields are present, the Nedelec field has to be specified");
252: }
254: /* Sanity checks */
255: if (!order && !conforming) SETERRQ(comm,PETSC_ERR_SUP,"Variable order and non-conforming spaces are not supported at the same time");
256: if (pcbddc->user_ChangeOfBasisMatrix) SETERRQ(comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with user defined change of basis");
257: if (order && ne%order) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"The number of local edge dofs %D it's not a multiple of the order %D",ne,order);
259: /* Just set primal dofs and return */
260: if (setprimal) {
261: IS enedfieldlocal;
262: PetscInt *eidxs;
264: PetscMalloc1(ne,&eidxs);
265: VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);
266: if (nedfieldlocal) {
267: ISGetIndices(nedfieldlocal,&idxs);
268: for (i=0,cum=0;i<ne;i++) {
269: if (PetscRealPart(vals[idxs[i]]) > 2.) {
270: eidxs[cum++] = idxs[i];
271: }
272: }
273: ISRestoreIndices(nedfieldlocal,&idxs);
274: } else {
275: for (i=0,cum=0;i<ne;i++) {
276: if (PetscRealPart(vals[i]) > 2.) {
277: eidxs[cum++] = i;
278: }
279: }
280: }
281: VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);
282: ISCreateGeneral(comm,cum,eidxs,PETSC_COPY_VALUES,&enedfieldlocal);
283: PCBDDCSetPrimalVerticesLocalIS(pc,enedfieldlocal);
284: PetscFree(eidxs);
285: ISDestroy(&nedfieldlocal);
286: ISDestroy(&enedfieldlocal);
287: return(0);
288: }
290: /* Compute some l2g maps */
291: if (nedfieldlocal) {
292: IS is;
294: /* need to map from the local Nedelec field to local numbering */
295: ISLocalToGlobalMappingCreateIS(nedfieldlocal,&fl2g);
296: /* need to map from the local Nedelec field to global numbering for the whole dofs*/
297: ISLocalToGlobalMappingApplyIS(al2g,nedfieldlocal,&is);
298: ISLocalToGlobalMappingCreateIS(is,&al2g);
299: /* need to map from the local Nedelec field to global numbering (for Nedelec only) */
300: if (global) {
301: PetscObjectReference((PetscObject)al2g);
302: el2g = al2g;
303: } else {
304: IS gis;
306: ISRenumber(is,NULL,NULL,&gis);
307: ISLocalToGlobalMappingCreateIS(gis,&el2g);
308: ISDestroy(&gis);
309: }
310: ISDestroy(&is);
311: } else {
312: /* restore default */
313: pcbddc->nedfield = -1;
314: /* one ref for the destruction of al2g, one for el2g */
315: PetscObjectReference((PetscObject)al2g);
316: PetscObjectReference((PetscObject)al2g);
317: el2g = al2g;
318: fl2g = NULL;
319: }
321: /* Start communication to drop connections for interior edges (for cc analysis only) */
322: PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));
323: PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));
324: if (nedfieldlocal) {
325: ISGetIndices(nedfieldlocal,&idxs);
326: for (i=0;i<ne;i++) matis->sf_leafdata[idxs[i]] = 1;
327: ISRestoreIndices(nedfieldlocal,&idxs);
328: } else {
329: for (i=0;i<ne;i++) matis->sf_leafdata[i] = 1;
330: }
331: PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);
332: PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);
334: if (!singular) { /* drop connections with interior edges to avoid unneeded communications and memory movements */
335: MatDuplicate(pcbddc->discretegradient,MAT_COPY_VALUES,&G);
336: MatSetOption(G,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);
337: if (global) {
338: PetscInt rst;
340: MatGetOwnershipRange(G,&rst,NULL);
341: for (i=0,cum=0;i<pc->pmat->rmap->n;i++) {
342: if (matis->sf_rootdata[i] < 2) {
343: matis->sf_rootdata[cum++] = i + rst;
344: }
345: }
346: MatSetOption(G,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE);
347: MatZeroRows(G,cum,matis->sf_rootdata,0.,NULL,NULL);
348: } else {
349: PetscInt *tbz;
351: PetscMalloc1(ne,&tbz);
352: PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);
353: PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);
354: ISGetIndices(nedfieldlocal,&idxs);
355: for (i=0,cum=0;i<ne;i++)
356: if (matis->sf_leafdata[idxs[i]] == 1)
357: tbz[cum++] = i;
358: ISRestoreIndices(nedfieldlocal,&idxs);
359: ISLocalToGlobalMappingApply(el2g,cum,tbz,tbz);
360: MatZeroRows(G,cum,tbz,0.,NULL,NULL);
361: PetscFree(tbz);
362: }
363: } else { /* we need the entire G to infer the nullspace */
364: PetscObjectReference((PetscObject)pcbddc->discretegradient);
365: G = pcbddc->discretegradient;
366: }
368: /* Extract subdomain relevant rows of G */
369: ISLocalToGlobalMappingGetIndices(el2g,&idxs);
370: ISCreateGeneral(comm,ne,idxs,PETSC_USE_POINTER,&lned);
371: MatCreateSubMatrix(G,lned,NULL,MAT_INITIAL_MATRIX,&lGall);
372: ISLocalToGlobalMappingRestoreIndices(el2g,&idxs);
373: ISDestroy(&lned);
374: MatConvert(lGall,MATIS,MAT_INITIAL_MATRIX,&lGis);
375: MatDestroy(&lGall);
376: MatISGetLocalMat(lGis,&lG);
378: /* SF for nodal dofs communications */
379: MatGetLocalSize(G,NULL,&Lv);
380: MatGetLocalToGlobalMapping(lGis,NULL,&vl2g);
381: PetscObjectReference((PetscObject)vl2g);
382: ISLocalToGlobalMappingGetSize(vl2g,&nv);
383: PetscSFCreate(comm,&sfv);
384: ISLocalToGlobalMappingGetIndices(vl2g,&idxs);
385: PetscSFSetGraphLayout(sfv,lGis->cmap,nv,NULL,PETSC_OWN_POINTER,idxs);
386: ISLocalToGlobalMappingRestoreIndices(vl2g,&idxs);
387: i = singular ? 2 : 1;
388: PetscMalloc2(i*nv,&sfvleaves,i*Lv,&sfvroots);
390: /* Destroy temporary G created in MATIS format and modified G */
391: PetscObjectReference((PetscObject)lG);
392: MatDestroy(&lGis);
393: MatDestroy(&G);
395: if (print) {
396: PetscObjectSetName((PetscObject)lG,"initial_lG");
397: MatView(lG,NULL);
398: }
400: /* Save lG for values insertion in change of basis */
401: MatDuplicate(lG,MAT_COPY_VALUES,&lGinit);
403: /* Analyze the edge-nodes connections (duplicate lG) */
404: MatDuplicate(lG,MAT_COPY_VALUES,&lGe);
405: MatSetOption(lGe,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);
406: PetscBTCreate(nv,&btv);
407: PetscBTCreate(ne,&bte);
408: PetscBTCreate(ne,&btb);
409: PetscBTCreate(ne,&btbd);
410: PetscBTCreate(nv,&btvcand);
411: /* need to import the boundary specification to ensure the
412: proper detection of coarse edges' endpoints */
413: if (pcbddc->DirichletBoundariesLocal) {
414: IS is;
416: if (fl2g) {
417: ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->DirichletBoundariesLocal,&is);
418: } else {
419: is = pcbddc->DirichletBoundariesLocal;
420: }
421: ISGetLocalSize(is,&cum);
422: ISGetIndices(is,&idxs);
423: for (i=0;i<cum;i++) {
424: if (idxs[i] >= 0) {
425: PetscBTSet(btb,idxs[i]);
426: PetscBTSet(btbd,idxs[i]);
427: }
428: }
429: ISRestoreIndices(is,&idxs);
430: if (fl2g) {
431: ISDestroy(&is);
432: }
433: }
434: if (pcbddc->NeumannBoundariesLocal) {
435: IS is;
437: if (fl2g) {
438: ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->NeumannBoundariesLocal,&is);
439: } else {
440: is = pcbddc->NeumannBoundariesLocal;
441: }
442: ISGetLocalSize(is,&cum);
443: ISGetIndices(is,&idxs);
444: for (i=0;i<cum;i++) {
445: if (idxs[i] >= 0) {
446: PetscBTSet(btb,idxs[i]);
447: }
448: }
449: ISRestoreIndices(is,&idxs);
450: if (fl2g) {
451: ISDestroy(&is);
452: }
453: }
455: /* Count neighs per dof */
456: ISLocalToGlobalMappingGetNodeInfo(el2g,NULL,&ecount,&eneighs);
457: ISLocalToGlobalMappingGetNodeInfo(vl2g,NULL,&vcount,&vneighs);
459: /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs
460: for proper detection of coarse edges' endpoints */
461: PetscBTCreate(ne,&btee);
462: for (i=0;i<ne;i++) {
463: if ((ecount[i] > 2 && !PetscBTLookup(btbd,i)) || (ecount[i] == 2 && PetscBTLookup(btb,i))) {
464: PetscBTSet(btee,i);
465: }
466: }
467: PetscMalloc1(ne,&marks);
468: if (!conforming) {
469: MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);
470: MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
471: }
472: MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
473: MatSeqAIJGetArray(lGe,&vals);
474: cum = 0;
475: for (i=0;i<ne;i++) {
476: /* eliminate rows corresponding to edge dofs belonging to coarse faces */
477: if (!PetscBTLookup(btee,i)) {
478: marks[cum++] = i;
479: continue;
480: }
481: /* set badly connected edge dofs as primal */
482: if (!conforming) {
483: if (ii[i+1]-ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */
484: marks[cum++] = i;
485: PetscBTSet(bte,i);
486: for (j=ii[i];j<ii[i+1];j++) {
487: PetscBTSet(btv,jj[j]);
488: }
489: } else {
490: /* every edge dofs should be connected trough a certain number of nodal dofs
491: to other edge dofs belonging to coarse edges
492: - at most 2 endpoints
493: - order-1 interior nodal dofs
494: - no undefined nodal dofs (nconn < order)
495: */
496: PetscInt ends = 0,ints = 0, undef = 0;
497: for (j=ii[i];j<ii[i+1];j++) {
498: PetscInt v = jj[j],k;
499: PetscInt nconn = iit[v+1]-iit[v];
500: for (k=iit[v];k<iit[v+1];k++) if (!PetscBTLookup(btee,jjt[k])) nconn--;
501: if (nconn > order) ends++;
502: else if (nconn == order) ints++;
503: else undef++;
504: }
505: if (undef || ends > 2 || ints != order -1) {
506: marks[cum++] = i;
507: PetscBTSet(bte,i);
508: for (j=ii[i];j<ii[i+1];j++) {
509: PetscBTSet(btv,jj[j]);
510: }
511: }
512: }
513: }
514: /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */
515: if (!order && ii[i+1] != ii[i]) {
516: PetscScalar val = 1./(ii[i+1]-ii[i]-1);
517: for (j=ii[i];j<ii[i+1];j++) vals[j] = val;
518: }
519: }
520: PetscBTDestroy(&btee);
521: MatSeqAIJRestoreArray(lGe,&vals);
522: MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
523: if (!conforming) {
524: MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
525: MatDestroy(&lGt);
526: }
527: MatZeroRows(lGe,cum,marks,0.,NULL,NULL);
529: /* identify splitpoints and corner candidates */
530: MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);
531: if (print) {
532: PetscObjectSetName((PetscObject)lGe,"edgerestr_lG");
533: MatView(lGe,NULL);
534: PetscObjectSetName((PetscObject)lGt,"edgerestr_lGt");
535: MatView(lGt,NULL);
536: }
537: MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
538: MatSeqAIJGetArray(lGt,&vals);
539: for (i=0;i<nv;i++) {
540: PetscInt ord = order, test = ii[i+1]-ii[i], vc = vcount[i];
541: PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE;
542: if (!order) { /* variable order */
543: PetscReal vorder = 0.;
545: for (j=ii[i];j<ii[i+1];j++) vorder += PetscRealPart(vals[j]);
546: test = PetscFloorReal(vorder+10.*PETSC_SQRT_MACHINE_EPSILON);
547: if (vorder-test > PETSC_SQRT_MACHINE_EPSILON) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected value for vorder: %g (%D)",vorder,test);
548: ord = 1;
549: }
550: #if defined(PETSC_USE_DEBUG)
551: if (test%ord) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected number of edge dofs %D connected with nodal dof %D with order %D",test,i,ord);
552: #endif
553: for (j=ii[i];j<ii[i+1] && sneighs;j++) {
554: if (PetscBTLookup(btbd,jj[j])) {
555: bdir = PETSC_TRUE;
556: break;
557: }
558: if (vc != ecount[jj[j]]) {
559: sneighs = PETSC_FALSE;
560: } else {
561: PetscInt k,*vn = vneighs[i], *en = eneighs[jj[j]];
562: for (k=0;k<vc;k++) {
563: if (vn[k] != en[k]) {
564: sneighs = PETSC_FALSE;
565: break;
566: }
567: }
568: }
569: }
570: if (!sneighs || test >= 3*ord || bdir) { /* splitpoints */
571: if (print) PetscPrintf(PETSC_COMM_SELF,"SPLITPOINT %D (%D %D %D)\n",i,!sneighs,test >= 3*ord,bdir);
572: PetscBTSet(btv,i);
573: } else if (test == ord) {
574: if (order == 1 || (!order && ii[i+1]-ii[i] == 1)) {
575: if (print) PetscPrintf(PETSC_COMM_SELF,"ENDPOINT %D\n",i);
576: PetscBTSet(btv,i);
577: } else {
578: if (print) PetscPrintf(PETSC_COMM_SELF,"CORNER CANDIDATE %D\n",i);
579: PetscBTSet(btvcand,i);
580: }
581: }
582: }
583: ISLocalToGlobalMappingRestoreNodeInfo(el2g,NULL,&ecount,&eneighs);
584: ISLocalToGlobalMappingRestoreNodeInfo(vl2g,NULL,&vcount,&vneighs);
585: PetscBTDestroy(&btbd);
587: /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */
588: if (order != 1) {
589: if (print) PetscPrintf(PETSC_COMM_SELF,"INSPECTING CANDIDATES\n");
590: MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
591: for (i=0;i<nv;i++) {
592: if (PetscBTLookup(btvcand,i)) {
593: PetscBool found = PETSC_FALSE;
594: for (j=ii[i];j<ii[i+1] && !found;j++) {
595: PetscInt k,e = jj[j];
596: if (PetscBTLookup(bte,e)) continue;
597: for (k=iit[e];k<iit[e+1];k++) {
598: PetscInt v = jjt[k];
599: if (v != i && PetscBTLookup(btvcand,v)) {
600: found = PETSC_TRUE;
601: break;
602: }
603: }
604: }
605: if (!found) {
606: if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %D CLEARED\n",i);
607: PetscBTClear(btvcand,i);
608: } else {
609: if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %D ACCEPTED\n",i);
610: }
611: }
612: }
613: MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
614: }
615: MatSeqAIJRestoreArray(lGt,&vals);
616: MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
617: MatDestroy(&lGe);
619: /* Get the local G^T explicitly */
620: MatDestroy(&lGt);
621: MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);
622: MatSetOption(lGt,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);
624: /* Mark interior nodal dofs */
625: ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);
626: PetscBTCreate(nv,&btvi);
627: for (i=1;i<n_neigh;i++) {
628: for (j=0;j<n_shared[i];j++) {
629: PetscBTSet(btvi,shared[i][j]);
630: }
631: }
632: ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);
634: /* communicate corners and splitpoints */
635: PetscMalloc1(nv,&vmarks);
636: PetscMemzero(sfvleaves,nv*sizeof(PetscInt));
637: PetscMemzero(sfvroots,Lv*sizeof(PetscInt));
638: for (i=0;i<nv;i++) if (PetscUnlikely(PetscBTLookup(btv,i))) sfvleaves[i] = 1;
640: if (print) {
641: IS tbz;
643: cum = 0;
644: for (i=0;i<nv;i++)
645: if (sfvleaves[i])
646: vmarks[cum++] = i;
648: ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);
649: PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_local");
650: ISView(tbz,NULL);
651: ISDestroy(&tbz);
652: }
654: PetscSFReduceBegin(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);
655: PetscSFReduceEnd(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);
656: PetscSFBcastBegin(sfv,MPIU_INT,sfvroots,sfvleaves);
657: PetscSFBcastEnd(sfv,MPIU_INT,sfvroots,sfvleaves);
659: /* Zero rows of lGt corresponding to identified corners
660: and interior nodal dofs */
661: cum = 0;
662: for (i=0;i<nv;i++) {
663: if (sfvleaves[i]) {
664: vmarks[cum++] = i;
665: PetscBTSet(btv,i);
666: }
667: if (!PetscBTLookup(btvi,i)) vmarks[cum++] = i;
668: }
669: PetscBTDestroy(&btvi);
670: if (print) {
671: IS tbz;
673: ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);
674: PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_with_interior");
675: ISView(tbz,NULL);
676: ISDestroy(&tbz);
677: }
678: MatZeroRows(lGt,cum,vmarks,0.,NULL,NULL);
679: PetscFree(vmarks);
680: PetscSFDestroy(&sfv);
681: PetscFree2(sfvleaves,sfvroots);
683: /* Recompute G */
684: MatDestroy(&lG);
685: MatTranspose(lGt,MAT_INITIAL_MATRIX,&lG);
686: if (print) {
687: PetscObjectSetName((PetscObject)lG,"used_lG");
688: MatView(lG,NULL);
689: PetscObjectSetName((PetscObject)lGt,"used_lGt");
690: MatView(lGt,NULL);
691: }
693: /* Get primal dofs (if any) */
694: cum = 0;
695: for (i=0;i<ne;i++) {
696: if (PetscUnlikely(PetscBTLookup(bte,i))) marks[cum++] = i;
697: }
698: if (fl2g) {
699: ISLocalToGlobalMappingApply(fl2g,cum,marks,marks);
700: }
701: ISCreateGeneral(comm,cum,marks,PETSC_COPY_VALUES,&primals);
702: if (print) {
703: PetscObjectSetName((PetscObject)primals,"prescribed_primal_dofs");
704: ISView(primals,NULL);
705: }
706: PetscBTDestroy(&bte);
707: /* TODO: what if the user passed in some of them ? */
708: PCBDDCSetPrimalVerticesLocalIS(pc,primals);
709: ISDestroy(&primals);
711: /* Compute edge connectivity */
712: PetscObjectSetOptionsPrefix((PetscObject)lG,"econn_");
713: MatMatMultSymbolic(lG,lGt,PETSC_DEFAULT,&conn);
714: MatGetRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
715: if (fl2g) {
716: PetscBT btf;
717: PetscInt *iia,*jja,*iiu,*jju;
718: PetscBool rest = PETSC_FALSE,free = PETSC_FALSE;
720: /* create CSR for all local dofs */
721: PetscMalloc1(n+1,&iia);
722: if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */
723: if (pcbddc->mat_graph->nvtxs_csr != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid size of CSR graph %D. Should be %D",pcbddc->mat_graph->nvtxs_csr,n);
724: iiu = pcbddc->mat_graph->xadj;
725: jju = pcbddc->mat_graph->adjncy;
726: } else if (pcbddc->use_local_adj) {
727: rest = PETSC_TRUE;
728: MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);
729: } else {
730: free = PETSC_TRUE;
731: PetscMalloc2(n+1,&iiu,n,&jju);
732: iiu[0] = 0;
733: for (i=0;i<n;i++) {
734: iiu[i+1] = i+1;
735: jju[i] = -1;
736: }
737: }
739: /* import sizes of CSR */
740: iia[0] = 0;
741: for (i=0;i<n;i++) iia[i+1] = iiu[i+1]-iiu[i];
743: /* overwrite entries corresponding to the Nedelec field */
744: PetscBTCreate(n,&btf);
745: ISGetIndices(nedfieldlocal,&idxs);
746: for (i=0;i<ne;i++) {
747: PetscBTSet(btf,idxs[i]);
748: iia[idxs[i]+1] = ii[i+1]-ii[i];
749: }
751: /* iia in CSR */
752: for (i=0;i<n;i++) iia[i+1] += iia[i];
754: /* jja in CSR */
755: PetscMalloc1(iia[n],&jja);
756: for (i=0;i<n;i++)
757: if (!PetscBTLookup(btf,i))
758: for (j=0;j<iiu[i+1]-iiu[i];j++)
759: jja[iia[i]+j] = jju[iiu[i]+j];
761: /* map edge dofs connectivity */
762: if (jj) {
763: ISLocalToGlobalMappingApply(fl2g,ii[ne],jj,(PetscInt *)jj);
764: for (i=0;i<ne;i++) {
765: PetscInt e = idxs[i];
766: for (j=0;j<ii[i+1]-ii[i];j++) jja[iia[e]+j] = jj[ii[i]+j];
767: }
768: }
769: ISRestoreIndices(nedfieldlocal,&idxs);
770: PCBDDCSetLocalAdjacencyGraph(pc,n,iia,jja,PETSC_OWN_POINTER);
771: if (rest) {
772: MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);
773: }
774: if (free) {
775: PetscFree2(iiu,jju);
776: }
777: PetscBTDestroy(&btf);
778: } else {
779: PCBDDCSetLocalAdjacencyGraph(pc,n,ii,jj,PETSC_USE_POINTER);
780: }
782: /* Analyze interface for edge dofs */
783: PCBDDCAnalyzeInterface(pc);
784: pcbddc->mat_graph->twodim = PETSC_FALSE;
786: /* Get coarse edges in the edge space */
787: PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);
788: MatRestoreRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
790: if (fl2g) {
791: ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);
792: PetscMalloc1(nee,&eedges);
793: for (i=0;i<nee;i++) {
794: ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);
795: }
796: } else {
797: eedges = alleedges;
798: primals = allprimals;
799: }
801: /* Mark fine edge dofs with their coarse edge id */
802: PetscMemzero(marks,ne*sizeof(PetscInt));
803: ISGetLocalSize(primals,&cum);
804: ISGetIndices(primals,&idxs);
805: for (i=0;i<cum;i++) marks[idxs[i]] = nee+1;
806: ISRestoreIndices(primals,&idxs);
807: if (print) {
808: PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs");
809: ISView(primals,NULL);
810: }
812: maxsize = 0;
813: for (i=0;i<nee;i++) {
814: PetscInt size,mark = i+1;
816: ISGetLocalSize(eedges[i],&size);
817: ISGetIndices(eedges[i],&idxs);
818: for (j=0;j<size;j++) marks[idxs[j]] = mark;
819: ISRestoreIndices(eedges[i],&idxs);
820: maxsize = PetscMax(maxsize,size);
821: }
823: /* Find coarse edge endpoints */
824: MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
825: MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
826: for (i=0;i<nee;i++) {
827: PetscInt mark = i+1,size;
829: ISGetLocalSize(eedges[i],&size);
830: if (!size && nedfieldlocal) continue;
831: if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i);
832: ISGetIndices(eedges[i],&idxs);
833: if (print) {
834: PetscPrintf(PETSC_COMM_SELF,"ENDPOINTS ANALYSIS EDGE %D\n",i);
835: ISView(eedges[i],NULL);
836: }
837: for (j=0;j<size;j++) {
838: PetscInt k, ee = idxs[j];
839: if (print) PetscPrintf(PETSC_COMM_SELF," idx %D\n",ee);
840: for (k=ii[ee];k<ii[ee+1];k++) {
841: if (print) PetscPrintf(PETSC_COMM_SELF," inspect %D\n",jj[k]);
842: if (PetscBTLookup(btv,jj[k])) {
843: if (print) PetscPrintf(PETSC_COMM_SELF," corner found (already set) %D\n",jj[k]);
844: } else if (PetscBTLookup(btvcand,jj[k])) { /* is it ok? */
845: PetscInt k2;
846: PetscBool corner = PETSC_FALSE;
847: for (k2 = iit[jj[k]];k2 < iit[jj[k]+1];k2++) {
848: if (print) PetscPrintf(PETSC_COMM_SELF," INSPECTING %D: mark %D (ref mark %D), boundary %D\n",jjt[k2],marks[jjt[k2]],mark,!!PetscBTLookup(btb,jjt[k2]));
849: /* it's a corner if either is connected with an edge dof belonging to a different cc or
850: if the edge dof lie on the natural part of the boundary */
851: if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb,jjt[k2]))) {
852: corner = PETSC_TRUE;
853: break;
854: }
855: }
856: if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */
857: if (print) PetscPrintf(PETSC_COMM_SELF," corner found %D\n",jj[k]);
858: PetscBTSet(btv,jj[k]);
859: } else {
860: if (print) PetscPrintf(PETSC_COMM_SELF," no corners found\n");
861: }
862: }
863: }
864: }
865: ISRestoreIndices(eedges[i],&idxs);
866: }
867: MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
868: MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
869: PetscBTDestroy(&btb);
871: /* Reset marked primal dofs */
872: ISGetLocalSize(primals,&cum);
873: ISGetIndices(primals,&idxs);
874: for (i=0;i<cum;i++) marks[idxs[i]] = 0;
875: ISRestoreIndices(primals,&idxs);
877: /* Now use the initial lG */
878: MatDestroy(&lG);
879: MatDestroy(&lGt);
880: lG = lGinit;
881: MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);
883: /* Compute extended cols indices */
884: PetscBTCreate(nv,&btvc);
885: PetscBTCreate(nee,&bter);
886: MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
887: MatSeqAIJGetMaxRowNonzeros(lG,&i);
888: i *= maxsize;
889: PetscCalloc1(nee,&extcols);
890: PetscMalloc2(i,&extrow,i,&gidxs);
891: eerr = PETSC_FALSE;
892: for (i=0;i<nee;i++) {
893: PetscInt size,found = 0;
895: cum = 0;
896: ISGetLocalSize(eedges[i],&size);
897: if (!size && nedfieldlocal) continue;
898: if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i);
899: ISGetIndices(eedges[i],&idxs);
900: PetscBTMemzero(nv,btvc);
901: for (j=0;j<size;j++) {
902: PetscInt k,ee = idxs[j];
903: for (k=ii[ee];k<ii[ee+1];k++) {
904: PetscInt vv = jj[k];
905: if (!PetscBTLookup(btv,vv)) extrow[cum++] = vv;
906: else if (!PetscBTLookupSet(btvc,vv)) found++;
907: }
908: }
909: ISRestoreIndices(eedges[i],&idxs);
910: PetscSortRemoveDupsInt(&cum,extrow);
911: ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);
912: PetscSortIntWithArray(cum,gidxs,extrow);
913: ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);
914: /* it may happen that endpoints are not defined at this point
915: if it is the case, mark this edge for a second pass */
916: if (cum != size -1 || found != 2) {
917: PetscBTSet(bter,i);
918: if (print) {
919: PetscObjectSetName((PetscObject)eedges[i],"error_edge");
920: ISView(eedges[i],NULL);
921: PetscObjectSetName((PetscObject)extcols[i],"error_extcol");
922: ISView(extcols[i],NULL);
923: }
924: eerr = PETSC_TRUE;
925: }
926: }
927: /* if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */
928: MPIU_Allreduce(&eerr,&done,1,MPIU_BOOL,MPI_LOR,comm);
929: if (done) {
930: PetscInt *newprimals;
932: PetscMalloc1(ne,&newprimals);
933: ISGetLocalSize(primals,&cum);
934: ISGetIndices(primals,&idxs);
935: PetscMemcpy(newprimals,idxs,cum*sizeof(PetscInt));
936: ISRestoreIndices(primals,&idxs);
937: MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
938: if (print) PetscPrintf(PETSC_COMM_SELF,"DOING SECOND PASS (eerr %D)\n",eerr);
939: for (i=0;i<nee;i++) {
940: PetscBool has_candidates = PETSC_FALSE;
941: if (PetscBTLookup(bter,i)) {
942: PetscInt size,mark = i+1;
944: ISGetLocalSize(eedges[i],&size);
945: ISGetIndices(eedges[i],&idxs);
946: /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */
947: for (j=0;j<size;j++) {
948: PetscInt k,ee = idxs[j];
949: if (print) PetscPrintf(PETSC_COMM_SELF,"Inspecting edge dof %D [%D %D)\n",ee,ii[ee],ii[ee+1]);
950: for (k=ii[ee];k<ii[ee+1];k++) {
951: /* set all candidates located on the edge as corners */
952: if (PetscBTLookup(btvcand,jj[k])) {
953: PetscInt k2,vv = jj[k];
954: has_candidates = PETSC_TRUE;
955: if (print) PetscPrintf(PETSC_COMM_SELF," Candidate set to vertex %D\n",vv);
956: PetscBTSet(btv,vv);
957: /* set all edge dofs connected to candidate as primals */
958: for (k2=iit[vv];k2<iit[vv+1];k2++) {
959: if (marks[jjt[k2]] == mark) {
960: PetscInt k3,ee2 = jjt[k2];
961: if (print) PetscPrintf(PETSC_COMM_SELF," Connected edge dof set to primal %D\n",ee2);
962: newprimals[cum++] = ee2;
963: /* finally set the new corners */
964: for (k3=ii[ee2];k3<ii[ee2+1];k3++) {
965: if (print) PetscPrintf(PETSC_COMM_SELF," Connected nodal dof set to vertex %D\n",jj[k3]);
966: PetscBTSet(btv,jj[k3]);
967: }
968: }
969: }
970: } else {
971: if (print) PetscPrintf(PETSC_COMM_SELF," Not a candidate vertex %D\n",jj[k]);
972: }
973: }
974: }
975: if (!has_candidates) { /* circular edge */
976: PetscInt k, ee = idxs[0],*tmarks;
978: PetscCalloc1(ne,&tmarks);
979: if (print) PetscPrintf(PETSC_COMM_SELF," Circular edge %D\n",i);
980: for (k=ii[ee];k<ii[ee+1];k++) {
981: PetscInt k2;
982: if (print) PetscPrintf(PETSC_COMM_SELF," Set to corner %D\n",jj[k]);
983: PetscBTSet(btv,jj[k]);
984: for (k2=iit[jj[k]];k2<iit[jj[k]+1];k2++) tmarks[jjt[k2]]++;
985: }
986: for (j=0;j<size;j++) {
987: if (tmarks[idxs[j]] > 1) {
988: if (print) PetscPrintf(PETSC_COMM_SELF," Edge dof set to primal %D\n",idxs[j]);
989: newprimals[cum++] = idxs[j];
990: }
991: }
992: PetscFree(tmarks);
993: }
994: ISRestoreIndices(eedges[i],&idxs);
995: }
996: ISDestroy(&extcols[i]);
997: }
998: PetscFree(extcols);
999: MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
1000: PetscSortRemoveDupsInt(&cum,newprimals);
1001: if (fl2g) {
1002: ISLocalToGlobalMappingApply(fl2g,cum,newprimals,newprimals);
1003: ISDestroy(&primals);
1004: for (i=0;i<nee;i++) {
1005: ISDestroy(&eedges[i]);
1006: }
1007: PetscFree(eedges);
1008: }
1009: PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);
1010: ISCreateGeneral(comm,cum,newprimals,PETSC_COPY_VALUES,&primals);
1011: PetscFree(newprimals);
1012: PCBDDCSetPrimalVerticesLocalIS(pc,primals);
1013: ISDestroy(&primals);
1014: PCBDDCAnalyzeInterface(pc);
1015: pcbddc->mat_graph->twodim = PETSC_FALSE;
1016: PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);
1017: if (fl2g) {
1018: ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);
1019: PetscMalloc1(nee,&eedges);
1020: for (i=0;i<nee;i++) {
1021: ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);
1022: }
1023: } else {
1024: eedges = alleedges;
1025: primals = allprimals;
1026: }
1027: PetscCalloc1(nee,&extcols);
1029: /* Mark again */
1030: PetscMemzero(marks,ne*sizeof(PetscInt));
1031: for (i=0;i<nee;i++) {
1032: PetscInt size,mark = i+1;
1034: ISGetLocalSize(eedges[i],&size);
1035: ISGetIndices(eedges[i],&idxs);
1036: for (j=0;j<size;j++) marks[idxs[j]] = mark;
1037: ISRestoreIndices(eedges[i],&idxs);
1038: }
1039: if (print) {
1040: PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs_secondpass");
1041: ISView(primals,NULL);
1042: }
1044: /* Recompute extended cols */
1045: eerr = PETSC_FALSE;
1046: for (i=0;i<nee;i++) {
1047: PetscInt size;
1049: cum = 0;
1050: ISGetLocalSize(eedges[i],&size);
1051: if (!size && nedfieldlocal) continue;
1052: if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i);
1053: ISGetIndices(eedges[i],&idxs);
1054: for (j=0;j<size;j++) {
1055: PetscInt k,ee = idxs[j];
1056: for (k=ii[ee];k<ii[ee+1];k++) if (!PetscBTLookup(btv,jj[k])) extrow[cum++] = jj[k];
1057: }
1058: ISRestoreIndices(eedges[i],&idxs);
1059: PetscSortRemoveDupsInt(&cum,extrow);
1060: ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);
1061: PetscSortIntWithArray(cum,gidxs,extrow);
1062: ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);
1063: if (cum != size -1) {
1064: if (print) {
1065: PetscObjectSetName((PetscObject)eedges[i],"error_edge_secondpass");
1066: ISView(eedges[i],NULL);
1067: PetscObjectSetName((PetscObject)extcols[i],"error_extcol_secondpass");
1068: ISView(extcols[i],NULL);
1069: }
1070: eerr = PETSC_TRUE;
1071: }
1072: }
1073: }
1074: MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1075: PetscFree2(extrow,gidxs);
1076: PetscBTDestroy(&bter);
1077: if (print) { PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF); }
1078: /* an error should not occur at this point */
1079: if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL SECOND PASS");
1081: /* Check the number of endpoints */
1082: MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1083: PetscMalloc1(2*nee,&corners);
1084: PetscMalloc1(nee,&cedges);
1085: for (i=0;i<nee;i++) {
1086: PetscInt size, found = 0, gc[2];
1088: /* init with defaults */
1089: cedges[i] = corners[i*2] = corners[i*2+1] = -1;
1090: ISGetLocalSize(eedges[i],&size);
1091: if (!size && nedfieldlocal) continue;
1092: if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i);
1093: ISGetIndices(eedges[i],&idxs);
1094: PetscBTMemzero(nv,btvc);
1095: for (j=0;j<size;j++) {
1096: PetscInt k,ee = idxs[j];
1097: for (k=ii[ee];k<ii[ee+1];k++) {
1098: PetscInt vv = jj[k];
1099: if (PetscBTLookup(btv,vv) && !PetscBTLookupSet(btvc,vv)) {
1100: if (found == 2) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found more then two corners for edge %D",i);
1101: corners[i*2+found++] = vv;
1102: }
1103: }
1104: }
1105: if (found != 2) {
1106: PetscInt e;
1107: if (fl2g) {
1108: ISLocalToGlobalMappingApply(fl2g,1,idxs,&e);
1109: } else {
1110: e = idxs[0];
1111: }
1112: SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found %D corners for edge %D (astart %D, estart %D)",found,i,e,idxs[0]);
1113: }
1115: /* get primal dof index on this coarse edge */
1116: ISLocalToGlobalMappingApply(vl2g,2,corners+2*i,gc);
1117: if (gc[0] > gc[1]) {
1118: PetscInt swap = corners[2*i];
1119: corners[2*i] = corners[2*i+1];
1120: corners[2*i+1] = swap;
1121: }
1122: cedges[i] = idxs[size-1];
1123: ISRestoreIndices(eedges[i],&idxs);
1124: if (print) PetscPrintf(PETSC_COMM_SELF,"EDGE %D: ce %D, corners (%D,%D)\n",i,cedges[i],corners[2*i],corners[2*i+1]);
1125: }
1126: MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1127: PetscBTDestroy(&btvc);
1129: #if defined(PETSC_USE_DEBUG)
1130: /* Inspects columns of lG (rows of lGt) and make sure the change of basis will
1131: not interfere with neighbouring coarse edges */
1132: PetscMalloc1(nee+1,&emarks);
1133: MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1134: for (i=0;i<nv;i++) {
1135: PetscInt emax = 0,eemax = 0;
1137: if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue;
1138: PetscMemzero(emarks,(nee+1)*sizeof(PetscInt));
1139: for (j=ii[i];j<ii[i+1];j++) emarks[marks[jj[j]]]++;
1140: for (j=1;j<nee+1;j++) {
1141: if (emax < emarks[j]) {
1142: emax = emarks[j];
1143: eemax = j;
1144: }
1145: }
1146: /* not relevant for edges */
1147: if (!eemax) continue;
1149: for (j=ii[i];j<ii[i+1];j++) {
1150: if (marks[jj[j]] && marks[jj[j]] != eemax) {
1151: SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_SUP,"Found 2 coarse edges (id %D and %D) connected through the %D nodal dof at edge dof %D",marks[jj[j]]-1,eemax,i,jj[j]);
1152: }
1153: }
1154: }
1155: PetscFree(emarks);
1156: MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1157: #endif
1159: /* Compute extended rows indices for edge blocks of the change of basis */
1160: MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1161: MatSeqAIJGetMaxRowNonzeros(lGt,&extmem);
1162: extmem *= maxsize;
1163: PetscMalloc1(extmem*nee,&extrow);
1164: PetscMalloc1(nee,&extrows);
1165: PetscCalloc1(nee,&extrowcum);
1166: for (i=0;i<nv;i++) {
1167: PetscInt mark = 0,size,start;
1169: if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue;
1170: for (j=ii[i];j<ii[i+1];j++)
1171: if (marks[jj[j]] && !mark)
1172: mark = marks[jj[j]];
1174: /* not relevant */
1175: if (!mark) continue;
1177: /* import extended row */
1178: mark--;
1179: start = mark*extmem+extrowcum[mark];
1180: size = ii[i+1]-ii[i];
1181: if (extrowcum[mark] + size > extmem) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not enough memory allocated %D > %D",extrowcum[mark] + size,extmem);
1182: PetscMemcpy(extrow+start,jj+ii[i],size*sizeof(PetscInt));
1183: extrowcum[mark] += size;
1184: }
1185: MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1186: MatDestroy(&lGt);
1187: PetscFree(marks);
1189: /* Compress extrows */
1190: cum = 0;
1191: for (i=0;i<nee;i++) {
1192: PetscInt size = extrowcum[i],*start = extrow + i*extmem;
1193: PetscSortRemoveDupsInt(&size,start);
1194: ISCreateGeneral(PETSC_COMM_SELF,size,start,PETSC_USE_POINTER,&extrows[i]);
1195: cum = PetscMax(cum,size);
1196: }
1197: PetscFree(extrowcum);
1198: PetscBTDestroy(&btv);
1199: PetscBTDestroy(&btvcand);
1201: /* Workspace for lapack inner calls and VecSetValues */
1202: PetscMalloc2((5+cum+maxsize)*maxsize,&work,maxsize,&rwork);
1204: /* Create change of basis matrix (preallocation can be improved) */
1205: MatCreate(comm,&T);
1206: MatSetSizes(T,pc->pmat->rmap->n,pc->pmat->rmap->n,
1207: pc->pmat->rmap->N,pc->pmat->rmap->N);
1208: MatSetType(T,MATAIJ);
1209: MatSeqAIJSetPreallocation(T,10,NULL);
1210: MatMPIAIJSetPreallocation(T,10,NULL,10,NULL);
1211: MatSetLocalToGlobalMapping(T,al2g,al2g);
1212: MatSetOption(T,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);
1213: MatSetOption(T,MAT_ROW_ORIENTED,PETSC_FALSE);
1214: ISLocalToGlobalMappingDestroy(&al2g);
1216: /* Defaults to identity */
1217: MatCreateVecs(pc->pmat,&tvec,NULL);
1218: VecSet(tvec,1.0);
1219: MatDiagonalSet(T,tvec,INSERT_VALUES);
1220: VecDestroy(&tvec);
1222: /* Create discrete gradient for the coarser level if needed */
1223: MatDestroy(&pcbddc->nedcG);
1224: ISDestroy(&pcbddc->nedclocal);
1225: if (pcbddc->current_level < pcbddc->max_levels) {
1226: ISLocalToGlobalMapping cel2g,cvl2g;
1227: IS wis,gwis;
1228: PetscInt cnv,cne;
1230: ISCreateGeneral(comm,nee,cedges,PETSC_COPY_VALUES,&wis);
1231: if (fl2g) {
1232: ISLocalToGlobalMappingApplyIS(fl2g,wis,&pcbddc->nedclocal);
1233: } else {
1234: PetscObjectReference((PetscObject)wis);
1235: pcbddc->nedclocal = wis;
1236: }
1237: ISLocalToGlobalMappingApplyIS(el2g,wis,&gwis);
1238: ISDestroy(&wis);
1239: ISRenumber(gwis,NULL,&cne,&wis);
1240: ISLocalToGlobalMappingCreateIS(wis,&cel2g);
1241: ISDestroy(&wis);
1242: ISDestroy(&gwis);
1244: ISCreateGeneral(comm,2*nee,corners,PETSC_USE_POINTER,&wis);
1245: ISLocalToGlobalMappingApplyIS(vl2g,wis,&gwis);
1246: ISDestroy(&wis);
1247: ISRenumber(gwis,NULL,&cnv,&wis);
1248: ISLocalToGlobalMappingCreateIS(wis,&cvl2g);
1249: ISDestroy(&wis);
1250: ISDestroy(&gwis);
1252: MatCreate(comm,&pcbddc->nedcG);
1253: MatSetSizes(pcbddc->nedcG,PETSC_DECIDE,PETSC_DECIDE,cne,cnv);
1254: MatSetType(pcbddc->nedcG,MATAIJ);
1255: MatSeqAIJSetPreallocation(pcbddc->nedcG,2,NULL);
1256: MatMPIAIJSetPreallocation(pcbddc->nedcG,2,NULL,2,NULL);
1257: MatSetLocalToGlobalMapping(pcbddc->nedcG,cel2g,cvl2g);
1258: ISLocalToGlobalMappingDestroy(&cel2g);
1259: ISLocalToGlobalMappingDestroy(&cvl2g);
1260: }
1261: ISLocalToGlobalMappingDestroy(&vl2g);
1263: #if defined(PRINT_GDET)
1264: inc = 0;
1265: lev = pcbddc->current_level;
1266: #endif
1268: /* Insert values in the change of basis matrix */
1269: for (i=0;i<nee;i++) {
1270: Mat Gins = NULL, GKins = NULL;
1271: IS cornersis = NULL;
1272: PetscScalar cvals[2];
1274: if (pcbddc->nedcG) {
1275: ISCreateGeneral(PETSC_COMM_SELF,2,corners+2*i,PETSC_USE_POINTER,&cornersis);
1276: }
1277: PCBDDCComputeNedelecChangeEdge(lG,eedges[i],extrows[i],extcols[i],cornersis,&Gins,&GKins,cvals,work,rwork);
1278: if (Gins && GKins) {
1279: PetscScalar *data;
1280: const PetscInt *rows,*cols;
1281: PetscInt nrh,nch,nrc,ncc;
1283: ISGetIndices(eedges[i],&cols);
1284: /* H1 */
1285: ISGetIndices(extrows[i],&rows);
1286: MatGetSize(Gins,&nrh,&nch);
1287: MatDenseGetArray(Gins,&data);
1288: MatSetValuesLocal(T,nrh,rows,nch,cols,data,INSERT_VALUES);
1289: MatDenseRestoreArray(Gins,&data);
1290: ISRestoreIndices(extrows[i],&rows);
1291: /* complement */
1292: MatGetSize(GKins,&nrc,&ncc);
1293: if (!ncc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Constant function has not been generated for coarse edge %D",i);
1294: if (ncc + nch != nrc) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"The sum of the number of columns of GKins %D and Gins %D does not match %D for coarse edge %D",ncc,nch,nrc,i);
1295: if (ncc != 1 && pcbddc->nedcG) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot generate the coarse discrete gradient for coarse edge %D with ncc %D",i,ncc);
1296: MatDenseGetArray(GKins,&data);
1297: MatSetValuesLocal(T,nrc,cols,ncc,cols+nch,data,INSERT_VALUES);
1298: MatDenseRestoreArray(GKins,&data);
1300: /* coarse discrete gradient */
1301: if (pcbddc->nedcG) {
1302: PetscInt cols[2];
1304: cols[0] = 2*i;
1305: cols[1] = 2*i+1;
1306: MatSetValuesLocal(pcbddc->nedcG,1,&i,2,cols,cvals,INSERT_VALUES);
1307: }
1308: ISRestoreIndices(eedges[i],&cols);
1309: }
1310: ISDestroy(&extrows[i]);
1311: ISDestroy(&extcols[i]);
1312: ISDestroy(&cornersis);
1313: MatDestroy(&Gins);
1314: MatDestroy(&GKins);
1315: }
1316: ISLocalToGlobalMappingDestroy(&el2g);
1318: /* Start assembling */
1319: MatAssemblyBegin(T,MAT_FINAL_ASSEMBLY);
1320: if (pcbddc->nedcG) {
1321: MatAssemblyBegin(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);
1322: }
1324: /* Free */
1325: if (fl2g) {
1326: ISDestroy(&primals);
1327: for (i=0;i<nee;i++) {
1328: ISDestroy(&eedges[i]);
1329: }
1330: PetscFree(eedges);
1331: }
1333: /* hack mat_graph with primal dofs on the coarse edges */
1334: {
1335: PCBDDCGraph graph = pcbddc->mat_graph;
1336: PetscInt *oqueue = graph->queue;
1337: PetscInt *ocptr = graph->cptr;
1338: PetscInt ncc,*idxs;
1340: /* find first primal edge */
1341: if (pcbddc->nedclocal) {
1342: ISGetIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);
1343: } else {
1344: if (fl2g) {
1345: ISLocalToGlobalMappingApply(fl2g,nee,cedges,cedges);
1346: }
1347: idxs = cedges;
1348: }
1349: cum = 0;
1350: while (cum < nee && cedges[cum] < 0) cum++;
1352: /* adapt connected components */
1353: PetscMalloc2(graph->nvtxs+1,&graph->cptr,ocptr[graph->ncc],&graph->queue);
1354: graph->cptr[0] = 0;
1355: for (i=0,ncc=0;i<graph->ncc;i++) {
1356: PetscInt lc = ocptr[i+1]-ocptr[i];
1357: if (cum != nee && oqueue[ocptr[i+1]-1] == cedges[cum]) { /* this cc has a primal dof */
1358: graph->cptr[ncc+1] = graph->cptr[ncc]+1;
1359: graph->queue[graph->cptr[ncc]] = cedges[cum];
1360: ncc++;
1361: lc--;
1362: cum++;
1363: while (cum < nee && cedges[cum] < 0) cum++;
1364: }
1365: graph->cptr[ncc+1] = graph->cptr[ncc] + lc;
1366: for (j=0;j<lc;j++) graph->queue[graph->cptr[ncc]+j] = oqueue[ocptr[i]+j];
1367: ncc++;
1368: }
1369: graph->ncc = ncc;
1370: if (pcbddc->nedclocal) {
1371: ISRestoreIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);
1372: }
1373: PetscFree2(ocptr,oqueue);
1374: }
1375: ISLocalToGlobalMappingDestroy(&fl2g);
1376: PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);
1377: PCBDDCGraphResetCSR(pcbddc->mat_graph);
1378: MatDestroy(&conn);
1380: ISDestroy(&nedfieldlocal);
1381: PetscFree(extrow);
1382: PetscFree2(work,rwork);
1383: PetscFree(corners);
1384: PetscFree(cedges);
1385: PetscFree(extrows);
1386: PetscFree(extcols);
1387: MatDestroy(&lG);
1389: /* Complete assembling */
1390: MatAssemblyEnd(T,MAT_FINAL_ASSEMBLY);
1391: if (pcbddc->nedcG) {
1392: MatAssemblyEnd(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);
1393: #if 0
1394: PetscObjectSetName((PetscObject)pcbddc->nedcG,"coarse_G");
1395: MatView(pcbddc->nedcG,NULL);
1396: #endif
1397: }
1399: /* set change of basis */
1400: PCBDDCSetChangeOfBasisMat(pc,T,singular);
1401: MatDestroy(&T);
1403: return(0);
1404: }
1406: /* the near-null space of BDDC carries information on quadrature weights,
1407: and these can be collinear -> so cheat with MatNullSpaceCreate
1408: and create a suitable set of basis vectors first */
1409: PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp)
1410: {
1412: PetscInt i;
1415: for (i=0;i<nvecs;i++) {
1416: PetscInt first,last;
1418: VecGetOwnershipRange(quad_vecs[i],&first,&last);
1419: if (last-first < 2*nvecs && has_const) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented");
1420: if (i>=first && i < last) {
1421: PetscScalar *data;
1422: VecGetArray(quad_vecs[i],&data);
1423: if (!has_const) {
1424: data[i-first] = 1.;
1425: } else {
1426: data[2*i-first] = 1./PetscSqrtReal(2.);
1427: data[2*i-first+1] = -1./PetscSqrtReal(2.);
1428: }
1429: VecRestoreArray(quad_vecs[i],&data);
1430: }
1431: PetscObjectStateIncrease((PetscObject)quad_vecs[i]);
1432: }
1433: MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp);
1434: for (i=0;i<nvecs;i++) { /* reset vectors */
1435: PetscInt first,last;
1436: VecLockReadPop(quad_vecs[i]);
1437: VecGetOwnershipRange(quad_vecs[i],&first,&last);
1438: if (i>=first && i < last) {
1439: PetscScalar *data;
1440: VecGetArray(quad_vecs[i],&data);
1441: if (!has_const) {
1442: data[i-first] = 0.;
1443: } else {
1444: data[2*i-first] = 0.;
1445: data[2*i-first+1] = 0.;
1446: }
1447: VecRestoreArray(quad_vecs[i],&data);
1448: }
1449: PetscObjectStateIncrease((PetscObject)quad_vecs[i]);
1450: VecLockReadPush(quad_vecs[i]);
1451: }
1452: return(0);
1453: }
1455: PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp)
1456: {
1457: Mat loc_divudotp;
1458: Vec p,v,vins,quad_vec,*quad_vecs;
1459: ISLocalToGlobalMapping map;
1460: PetscScalar *vals;
1461: const PetscScalar *array;
1462: PetscInt i,maxneighs,maxsize,*gidxs;
1463: PetscInt n_neigh,*neigh,*n_shared,**shared;
1464: PetscMPIInt rank;
1465: PetscErrorCode ierr;
1468: ISLocalToGlobalMappingGetInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);
1469: MPIU_Allreduce(&n_neigh,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));
1470: if (!maxneighs) {
1471: ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);
1472: *nnsp = NULL;
1473: return(0);
1474: }
1475: maxsize = 0;
1476: for (i=0;i<n_neigh;i++) maxsize = PetscMax(n_shared[i],maxsize);
1477: PetscMalloc2(maxsize,&gidxs,maxsize,&vals);
1478: /* create vectors to hold quadrature weights */
1479: MatCreateVecs(A,&quad_vec,NULL);
1480: if (!transpose) {
1481: MatGetLocalToGlobalMapping(A,&map,NULL);
1482: } else {
1483: MatGetLocalToGlobalMapping(A,NULL,&map);
1484: }
1485: VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);
1486: VecDestroy(&quad_vec);
1487: PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);
1488: for (i=0;i<maxneighs;i++) {
1489: VecLockReadPop(quad_vecs[i]);
1490: }
1492: /* compute local quad vec */
1493: MatISGetLocalMat(divudotp,&loc_divudotp);
1494: if (!transpose) {
1495: MatCreateVecs(loc_divudotp,&v,&p);
1496: } else {
1497: MatCreateVecs(loc_divudotp,&p,&v);
1498: }
1499: VecSet(p,1.);
1500: if (!transpose) {
1501: MatMultTranspose(loc_divudotp,p,v);
1502: } else {
1503: MatMult(loc_divudotp,p,v);
1504: }
1505: if (vl2l) {
1506: Mat lA;
1507: VecScatter sc;
1509: MatISGetLocalMat(A,&lA);
1510: MatCreateVecs(lA,&vins,NULL);
1511: VecScatterCreate(v,NULL,vins,vl2l,&sc);
1512: VecScatterBegin(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);
1513: VecScatterEnd(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);
1514: VecScatterDestroy(&sc);
1515: } else {
1516: vins = v;
1517: }
1518: VecGetArrayRead(vins,&array);
1519: VecDestroy(&p);
1521: /* insert in global quadrature vecs */
1522: MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);
1523: for (i=0;i<n_neigh;i++) {
1524: const PetscInt *idxs;
1525: PetscInt idx,nn,j;
1527: idxs = shared[i];
1528: nn = n_shared[i];
1529: for (j=0;j<nn;j++) vals[j] = array[idxs[j]];
1530: PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);
1531: idx = -(idx+1);
1532: ISLocalToGlobalMappingApply(map,nn,idxs,gidxs);
1533: VecSetValues(quad_vecs[idx],nn,gidxs,vals,INSERT_VALUES);
1534: }
1535: ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);
1536: VecRestoreArrayRead(vins,&array);
1537: if (vl2l) {
1538: VecDestroy(&vins);
1539: }
1540: VecDestroy(&v);
1541: PetscFree2(gidxs,vals);
1543: /* assemble near null space */
1544: for (i=0;i<maxneighs;i++) {
1545: VecAssemblyBegin(quad_vecs[i]);
1546: }
1547: for (i=0;i<maxneighs;i++) {
1548: VecAssemblyEnd(quad_vecs[i]);
1549: VecViewFromOptions(quad_vecs[i],NULL,"-pc_bddc_quad_vecs_view");
1550: VecLockReadPush(quad_vecs[i]);
1551: }
1552: VecDestroyVecs(maxneighs,&quad_vecs);
1553: return(0);
1554: }
1556: PetscErrorCode PCBDDCAddPrimalVerticesLocalIS(PC pc, IS primalv)
1557: {
1558: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
1562: if (primalv) {
1563: if (pcbddc->user_primal_vertices_local) {
1564: IS list[2], newp;
1566: list[0] = primalv;
1567: list[1] = pcbddc->user_primal_vertices_local;
1568: ISConcatenate(PetscObjectComm((PetscObject)pc),2,list,&newp);
1569: ISSortRemoveDups(newp);
1570: ISDestroy(&list[1]);
1571: pcbddc->user_primal_vertices_local = newp;
1572: } else {
1573: PCBDDCSetPrimalVerticesLocalIS(pc,primalv);
1574: }
1575: }
1576: return(0);
1577: }
1579: static PetscErrorCode func_coords_private(PetscInt dim, PetscReal t, const PetscReal X[], PetscInt Nf, PetscScalar *out, void *ctx)
1580: {
1581: PetscInt f, *comp = (PetscInt *)ctx;
1584: for (f=0;f<Nf;f++) out[f] = X[*comp];
1585: return(0);
1586: }
1588: PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc)
1589: {
1591: Vec local,global;
1592: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
1593: Mat_IS *matis = (Mat_IS*)pc->pmat->data;
1594: PetscBool monolithic = PETSC_FALSE;
1597: PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC topology options","PC");
1598: PetscOptionsBool("-pc_bddc_monolithic","Discard any information on dofs splitting",NULL,monolithic,&monolithic,NULL);
1599: PetscOptionsEnd();
1600: /* need to convert from global to local topology information and remove references to information in global ordering */
1601: MatCreateVecs(pc->pmat,&global,NULL);
1602: MatCreateVecs(matis->A,&local,NULL);
1603: if (monolithic) { /* just get block size to properly compute vertices */
1604: if (pcbddc->vertex_size == 1) {
1605: MatGetBlockSize(pc->pmat,&pcbddc->vertex_size);
1606: }
1607: goto boundary;
1608: }
1610: if (pcbddc->user_provided_isfordofs) {
1611: if (pcbddc->n_ISForDofs) {
1612: PetscInt i;
1614: PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);
1615: for (i=0;i<pcbddc->n_ISForDofs;i++) {
1616: PetscInt bs;
1618: PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);
1619: ISGetBlockSize(pcbddc->ISForDofs[i],&bs);
1620: ISSetBlockSize(pcbddc->ISForDofsLocal[i],bs);
1621: ISDestroy(&pcbddc->ISForDofs[i]);
1622: }
1623: pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
1624: pcbddc->n_ISForDofs = 0;
1625: PetscFree(pcbddc->ISForDofs);
1626: }
1627: } else {
1628: if (!pcbddc->n_ISForDofsLocal) { /* field split not present */
1629: DM dm;
1631: MatGetDM(pc->pmat, &dm);
1632: if (!dm) {
1633: PCGetDM(pc, &dm);
1634: }
1635: if (dm) {
1636: IS *fields;
1637: PetscInt nf,i;
1639: DMCreateFieldDecomposition(dm,&nf,NULL,&fields,NULL);
1640: PetscMalloc1(nf,&pcbddc->ISForDofsLocal);
1641: for (i=0;i<nf;i++) {
1642: PetscInt bs;
1644: PCBDDCGlobalToLocal(matis->rctx,global,local,fields[i],&pcbddc->ISForDofsLocal[i]);
1645: ISGetBlockSize(fields[i],&bs);
1646: ISSetBlockSize(pcbddc->ISForDofsLocal[i],bs);
1647: ISDestroy(&fields[i]);
1648: }
1649: PetscFree(fields);
1650: pcbddc->n_ISForDofsLocal = nf;
1651: } else { /* See if MATIS has fields attached by the conversion from MatNest */
1652: PetscContainer c;
1654: PetscObjectQuery((PetscObject)pc->pmat,"_convert_nest_lfields",(PetscObject*)&c);
1655: if (c) {
1656: MatISLocalFields lf;
1657: PetscContainerGetPointer(c,(void**)&lf);
1658: PCBDDCSetDofsSplittingLocal(pc,lf->nr,lf->rf);
1659: } else { /* fallback, create the default fields if bs > 1 */
1660: PetscInt i, n = matis->A->rmap->n;
1661: MatGetBlockSize(pc->pmat,&i);
1662: if (i > 1) {
1663: pcbddc->n_ISForDofsLocal = i;
1664: PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);
1665: for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
1666: ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);
1667: }
1668: }
1669: }
1670: }
1671: } else {
1672: PetscInt i;
1673: for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
1674: PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->ISForDofsLocal[i]);
1675: }
1676: }
1677: }
1679: boundary:
1680: if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) {
1681: PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);
1682: } else if (pcbddc->DirichletBoundariesLocal) {
1683: PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->DirichletBoundariesLocal);
1684: }
1685: if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) {
1686: PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);
1687: } else if (pcbddc->NeumannBoundariesLocal) {
1688: PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->NeumannBoundariesLocal);
1689: }
1690: if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) {
1691: PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);
1692: }
1693: VecDestroy(&global);
1694: VecDestroy(&local);
1695: /* detect local disconnected subdomains if requested (use matis->A) */
1696: if (pcbddc->detect_disconnected) {
1697: IS primalv = NULL;
1698: PetscInt i;
1699: PetscBool filter = pcbddc->detect_disconnected_filter;
1701: for (i=0;i<pcbddc->n_local_subs;i++) {
1702: ISDestroy(&pcbddc->local_subs[i]);
1703: }
1704: PetscFree(pcbddc->local_subs);
1705: PCBDDCDetectDisconnectedComponents(pc,filter,&pcbddc->n_local_subs,&pcbddc->local_subs,&primalv);
1706: PCBDDCAddPrimalVerticesLocalIS(pc,primalv);
1707: ISDestroy(&primalv);
1708: }
1709: /* early stage corner detection */
1710: {
1711: DM dm;
1713: MatGetDM(pc->pmat,&dm);
1714: if (!dm) {
1715: PCGetDM(pc,&dm);
1716: }
1717: if (dm) {
1718: PetscBool isda;
1720: PetscObjectTypeCompare((PetscObject)dm,DMDA,&isda);
1721: if (isda) {
1722: ISLocalToGlobalMapping l2l;
1723: IS corners;
1724: Mat lA;
1725: PetscBool gl,lo;
1727: {
1728: Vec cvec;
1729: const PetscScalar *coords;
1730: PetscInt dof,n,cdim;
1731: PetscBool memc = PETSC_TRUE;
1733: DMDAGetInfo(dm,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL);
1734: DMGetCoordinates(dm,&cvec);
1735: VecGetLocalSize(cvec,&n);
1736: VecGetBlockSize(cvec,&cdim);
1737: n /= cdim;
1738: PetscFree(pcbddc->mat_graph->coords);
1739: PetscMalloc1(dof*n*cdim,&pcbddc->mat_graph->coords);
1740: VecGetArrayRead(cvec,&coords);
1741: #if defined(PETSC_USE_COMPLEX)
1742: memc = PETSC_FALSE;
1743: #endif
1744: if (dof != 1) memc = PETSC_FALSE;
1745: if (memc) {
1746: PetscMemcpy(pcbddc->mat_graph->coords,coords,cdim*n*dof*sizeof(PetscReal));
1747: } else { /* BDDC graph does not use any blocked information, we need to replicate the data */
1748: PetscReal *bcoords = pcbddc->mat_graph->coords;
1749: PetscInt i, b, d;
1751: for (i=0;i<n;i++) {
1752: for (b=0;b<dof;b++) {
1753: for (d=0;d<cdim;d++) {
1754: bcoords[i*dof*cdim + b*cdim + d] = PetscRealPart(coords[i*cdim+d]);
1755: }
1756: }
1757: }
1758: }
1759: VecRestoreArrayRead(cvec,&coords);
1760: pcbddc->mat_graph->cdim = cdim;
1761: pcbddc->mat_graph->cnloc = dof*n;
1762: pcbddc->mat_graph->cloc = PETSC_FALSE;
1763: }
1764: DMDAGetSubdomainCornersIS(dm,&corners);
1765: MatISGetLocalMat(pc->pmat,&lA);
1766: MatGetLocalToGlobalMapping(lA,&l2l,NULL);
1767: MatISRestoreLocalMat(pc->pmat,&lA);
1768: lo = (PetscBool)(l2l && corners);
1769: MPIU_Allreduce(&lo,&gl,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));
1770: if (gl) { /* From PETSc's DMDA */
1771: const PetscInt *idx;
1772: PetscInt dof,bs,*idxout,n;
1774: DMDAGetInfo(dm,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL);
1775: ISLocalToGlobalMappingGetBlockSize(l2l,&bs);
1776: ISGetLocalSize(corners,&n);
1777: ISGetIndices(corners,&idx);
1778: if (bs == dof) {
1779: PetscMalloc1(n,&idxout);
1780: ISLocalToGlobalMappingApplyBlock(l2l,n,idx,idxout);
1781: } else { /* the original DMDA local-to-local map have been modified */
1782: PetscInt i,d;
1784: PetscMalloc1(dof*n,&idxout);
1785: for (i=0;i<n;i++) for (d=0;d<dof;d++) idxout[dof*i+d] = dof*idx[i]+d;
1786: ISLocalToGlobalMappingApply(l2l,dof*n,idxout,idxout);
1788: bs = 1;
1789: n *= dof;
1790: }
1791: ISRestoreIndices(corners,&idx);
1792: DMDARestoreSubdomainCornersIS(dm,&corners);
1793: ISCreateBlock(PetscObjectComm((PetscObject)pc),bs,n,idxout,PETSC_OWN_POINTER,&corners);
1794: PCBDDCAddPrimalVerticesLocalIS(pc,corners);
1795: ISDestroy(&corners);
1796: pcbddc->corner_selected = PETSC_TRUE;
1797: pcbddc->corner_selection = PETSC_TRUE;
1798: }
1799: if (corners) {
1800: DMDARestoreSubdomainCornersIS(dm,&corners);
1801: }
1802: }
1803: }
1804: }
1805: if (pcbddc->corner_selection && !pcbddc->mat_graph->cdim) {
1806: DM dm;
1808: MatGetDM(pc->pmat,&dm);
1809: if (!dm) {
1810: PCGetDM(pc,&dm);
1811: }
1812: if (dm) { /* this can get very expensive, I need to find a faster alternative */
1813: Vec vcoords;
1814: PetscSection section;
1815: PetscReal *coords;
1816: PetscInt d,cdim,nl,nf,**ctxs;
1817: PetscErrorCode (**funcs)(PetscInt, PetscReal, const PetscReal *, PetscInt, PetscScalar *, void *);
1819: DMGetCoordinateDim(dm,&cdim);
1820: DMGetSection(dm,§ion);
1821: PetscSectionGetNumFields(section,&nf);
1822: DMCreateGlobalVector(dm,&vcoords);
1823: VecGetLocalSize(vcoords,&nl);
1824: PetscMalloc1(nl*cdim,&coords);
1825: PetscMalloc2(nf,&funcs,nf,&ctxs);
1826: PetscMalloc1(nf,&ctxs[0]);
1827: for (d=0;d<nf;d++) funcs[d] = func_coords_private;
1828: for (d=1;d<nf;d++) ctxs[d] = ctxs[d-1] + 1;
1829: for (d=0;d<cdim;d++) {
1830: PetscInt i;
1831: const PetscScalar *v;
1833: for (i=0;i<nf;i++) ctxs[i][0] = d;
1834: DMProjectFunction(dm,0.0,funcs,(void**)ctxs,INSERT_VALUES,vcoords);
1835: VecGetArrayRead(vcoords,&v);
1836: for (i=0;i<nl;i++) coords[i*cdim+d] = PetscRealPart(v[i]);
1837: VecRestoreArrayRead(vcoords,&v);
1838: }
1839: VecDestroy(&vcoords);
1840: PCSetCoordinates(pc,cdim,nl,coords);
1841: PetscFree(coords);
1842: PetscFree(ctxs[0]);
1843: PetscFree2(funcs,ctxs);
1844: }
1845: }
1846: return(0);
1847: }
1849: PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is)
1850: {
1851: Mat_IS *matis = (Mat_IS*)(pc->pmat->data);
1852: PetscErrorCode ierr;
1853: IS nis;
1854: const PetscInt *idxs;
1855: PetscInt i,nd,n = matis->A->rmap->n,*nidxs,nnd;
1856: PetscBool *ld;
1859: if (mop != MPI_LAND && mop != MPI_LOR) SETERRQ(PetscObjectComm((PetscObject)(pc)),PETSC_ERR_SUP,"Supported are MPI_LAND and MPI_LOR");
1860: if (mop == MPI_LAND) {
1861: /* init rootdata with true */
1862: ld = (PetscBool*) matis->sf_rootdata;
1863: for (i=0;i<pc->pmat->rmap->n;i++) ld[i] = PETSC_TRUE;
1864: } else {
1865: PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscBool));
1866: }
1867: PetscMemzero(matis->sf_leafdata,n*sizeof(PetscBool));
1868: ISGetLocalSize(*is,&nd);
1869: ISGetIndices(*is,&idxs);
1870: ld = (PetscBool*) matis->sf_leafdata;
1871: for (i=0;i<nd;i++)
1872: if (-1 < idxs[i] && idxs[i] < n)
1873: ld[idxs[i]] = PETSC_TRUE;
1874: ISRestoreIndices(*is,&idxs);
1875: PetscSFReduceBegin(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);
1876: PetscSFReduceEnd(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);
1877: PetscSFBcastBegin(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);
1878: PetscSFBcastEnd(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);
1879: if (mop == MPI_LAND) {
1880: PetscMalloc1(nd,&nidxs);
1881: } else {
1882: PetscMalloc1(n,&nidxs);
1883: }
1884: for (i=0,nnd=0;i<n;i++)
1885: if (ld[i])
1886: nidxs[nnd++] = i;
1887: ISCreateGeneral(PetscObjectComm((PetscObject)(*is)),nnd,nidxs,PETSC_OWN_POINTER,&nis);
1888: ISDestroy(is);
1889: *is = nis;
1890: return(0);
1891: }
1893: PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z)
1894: {
1895: PC_IS *pcis = (PC_IS*)(pc->data);
1896: PC_BDDC *pcbddc = (PC_BDDC*)(pc->data);
1897: PetscErrorCode ierr;
1900: if (!pcbddc->benign_have_null) {
1901: return(0);
1902: }
1903: if (pcbddc->ChangeOfBasisMatrix) {
1904: Vec swap;
1906: MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);
1907: swap = pcbddc->work_change;
1908: pcbddc->work_change = r;
1909: r = swap;
1910: }
1911: VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
1912: VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
1913: KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);
1914: KSPCheckSolve(pcbddc->ksp_D,pc,pcis->vec2_D);
1915: VecSet(z,0.);
1916: VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);
1917: VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);
1918: if (pcbddc->ChangeOfBasisMatrix) {
1919: pcbddc->work_change = r;
1920: VecCopy(z,pcbddc->work_change);
1921: MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);
1922: }
1923: return(0);
1924: }
1926: PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose)
1927: {
1928: PCBDDCBenignMatMult_ctx ctx;
1929: PetscErrorCode ierr;
1930: PetscBool apply_right,apply_left,reset_x;
1933: MatShellGetContext(A,&ctx);
1934: if (transpose) {
1935: apply_right = ctx->apply_left;
1936: apply_left = ctx->apply_right;
1937: } else {
1938: apply_right = ctx->apply_right;
1939: apply_left = ctx->apply_left;
1940: }
1941: reset_x = PETSC_FALSE;
1942: if (apply_right) {
1943: const PetscScalar *ax;
1944: PetscInt nl,i;
1946: VecGetLocalSize(x,&nl);
1947: VecGetArrayRead(x,&ax);
1948: PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));
1949: VecRestoreArrayRead(x,&ax);
1950: for (i=0;i<ctx->benign_n;i++) {
1951: PetscScalar sum,val;
1952: const PetscInt *idxs;
1953: PetscInt nz,j;
1954: ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);
1955: ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);
1956: sum = 0.;
1957: if (ctx->apply_p0) {
1958: val = ctx->work[idxs[nz-1]];
1959: for (j=0;j<nz-1;j++) {
1960: sum += ctx->work[idxs[j]];
1961: ctx->work[idxs[j]] += val;
1962: }
1963: } else {
1964: for (j=0;j<nz-1;j++) {
1965: sum += ctx->work[idxs[j]];
1966: }
1967: }
1968: ctx->work[idxs[nz-1]] -= sum;
1969: ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);
1970: }
1971: VecPlaceArray(x,ctx->work);
1972: reset_x = PETSC_TRUE;
1973: }
1974: if (transpose) {
1975: MatMultTranspose(ctx->A,x,y);
1976: } else {
1977: MatMult(ctx->A,x,y);
1978: }
1979: if (reset_x) {
1980: VecResetArray(x);
1981: }
1982: if (apply_left) {
1983: PetscScalar *ay;
1984: PetscInt i;
1986: VecGetArray(y,&ay);
1987: for (i=0;i<ctx->benign_n;i++) {
1988: PetscScalar sum,val;
1989: const PetscInt *idxs;
1990: PetscInt nz,j;
1991: ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);
1992: ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);
1993: val = -ay[idxs[nz-1]];
1994: if (ctx->apply_p0) {
1995: sum = 0.;
1996: for (j=0;j<nz-1;j++) {
1997: sum += ay[idxs[j]];
1998: ay[idxs[j]] += val;
1999: }
2000: ay[idxs[nz-1]] += sum;
2001: } else {
2002: for (j=0;j<nz-1;j++) {
2003: ay[idxs[j]] += val;
2004: }
2005: ay[idxs[nz-1]] = 0.;
2006: }
2007: ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);
2008: }
2009: VecRestoreArray(y,&ay);
2010: }
2011: return(0);
2012: }
2014: PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y)
2015: {
2019: PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);
2020: return(0);
2021: }
2023: PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y)
2024: {
2028: PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);
2029: return(0);
2030: }
2032: PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore)
2033: {
2034: PC_IS *pcis = (PC_IS*)pc->data;
2035: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
2036: PCBDDCBenignMatMult_ctx ctx;
2037: PetscErrorCode ierr;
2040: if (!restore) {
2041: Mat A_IB,A_BI;
2042: PetscScalar *work;
2043: PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL;
2045: if (pcbddc->benign_original_mat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored");
2046: if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) return(0);
2047: PetscMalloc1(pcis->n,&work);
2048: MatCreate(PETSC_COMM_SELF,&A_IB);
2049: MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);
2050: MatSetType(A_IB,MATSHELL);
2051: MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);
2052: MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);
2053: PetscNew(&ctx);
2054: MatShellSetContext(A_IB,ctx);
2055: ctx->apply_left = PETSC_TRUE;
2056: ctx->apply_right = PETSC_FALSE;
2057: ctx->apply_p0 = PETSC_FALSE;
2058: ctx->benign_n = pcbddc->benign_n;
2059: if (reuse) {
2060: ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs;
2061: ctx->free = PETSC_FALSE;
2062: } else { /* TODO: could be optimized for successive solves */
2063: ISLocalToGlobalMapping N_to_D;
2064: PetscInt i;
2066: ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);
2067: PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);
2068: for (i=0;i<pcbddc->benign_n;i++) {
2069: ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);
2070: }
2071: ISLocalToGlobalMappingDestroy(&N_to_D);
2072: ctx->free = PETSC_TRUE;
2073: }
2074: ctx->A = pcis->A_IB;
2075: ctx->work = work;
2076: MatSetUp(A_IB);
2077: MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);
2078: MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);
2079: pcis->A_IB = A_IB;
2081: /* A_BI as A_IB^T */
2082: MatCreateTranspose(A_IB,&A_BI);
2083: pcbddc->benign_original_mat = pcis->A_BI;
2084: pcis->A_BI = A_BI;
2085: } else {
2086: if (!pcbddc->benign_original_mat) {
2087: return(0);
2088: }
2089: MatShellGetContext(pcis->A_IB,&ctx);
2090: MatDestroy(&pcis->A_IB);
2091: pcis->A_IB = ctx->A;
2092: ctx->A = NULL;
2093: MatDestroy(&pcis->A_BI);
2094: pcis->A_BI = pcbddc->benign_original_mat;
2095: pcbddc->benign_original_mat = NULL;
2096: if (ctx->free) {
2097: PetscInt i;
2098: for (i=0;i<ctx->benign_n;i++) {
2099: ISDestroy(&ctx->benign_zerodiag_subs[i]);
2100: }
2101: PetscFree(ctx->benign_zerodiag_subs);
2102: }
2103: PetscFree(ctx->work);
2104: PetscFree(ctx);
2105: }
2106: return(0);
2107: }
2109: /* used just in bddc debug mode */
2110: PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B)
2111: {
2112: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
2113: Mat_IS *matis = (Mat_IS*)pc->pmat->data;
2114: Mat An;
2118: MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);
2119: MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);
2120: if (is1) {
2121: MatCreateSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);
2122: MatDestroy(&An);
2123: } else {
2124: *B = An;
2125: }
2126: return(0);
2127: }
2129: /* TODO: add reuse flag */
2130: PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B)
2131: {
2132: Mat Bt;
2133: PetscScalar *a,*bdata;
2134: const PetscInt *ii,*ij;
2135: PetscInt m,n,i,nnz,*bii,*bij;
2136: PetscBool flg_row;
2140: MatGetSize(A,&n,&m);
2141: MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);
2142: MatSeqAIJGetArray(A,&a);
2143: nnz = n;
2144: for (i=0;i<ii[n];i++) {
2145: if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++;
2146: }
2147: PetscMalloc1(n+1,&bii);
2148: PetscMalloc1(nnz,&bij);
2149: PetscMalloc1(nnz,&bdata);
2150: nnz = 0;
2151: bii[0] = 0;
2152: for (i=0;i<n;i++) {
2153: PetscInt j;
2154: for (j=ii[i];j<ii[i+1];j++) {
2155: PetscScalar entry = a[j];
2156: if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || (n == m && ij[j] == i)) {
2157: bij[nnz] = ij[j];
2158: bdata[nnz] = entry;
2159: nnz++;
2160: }
2161: }
2162: bii[i+1] = nnz;
2163: }
2164: MatSeqAIJRestoreArray(A,&a);
2165: MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);
2166: MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);
2167: {
2168: Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data);
2169: b->free_a = PETSC_TRUE;
2170: b->free_ij = PETSC_TRUE;
2171: }
2172: if (*B == A) {
2173: MatDestroy(&A);
2174: }
2175: *B = Bt;
2176: return(0);
2177: }
2179: PetscErrorCode PCBDDCDetectDisconnectedComponents(PC pc, PetscBool filter, PetscInt *ncc, IS* cc[], IS* primalv)
2180: {
2181: Mat B = NULL;
2182: DM dm;
2183: IS is_dummy,*cc_n;
2184: ISLocalToGlobalMapping l2gmap_dummy;
2185: PCBDDCGraph graph;
2186: PetscInt *xadj_filtered = NULL,*adjncy_filtered = NULL;
2187: PetscInt i,n;
2188: PetscInt *xadj,*adjncy;
2189: PetscBool isplex = PETSC_FALSE;
2190: PetscErrorCode ierr;
2193: if (ncc) *ncc = 0;
2194: if (cc) *cc = NULL;
2195: if (primalv) *primalv = NULL;
2196: PCBDDCGraphCreate(&graph);
2197: MatGetDM(pc->pmat,&dm);
2198: if (!dm) {
2199: PCGetDM(pc,&dm);
2200: }
2201: if (dm) {
2202: PetscObjectTypeCompare((PetscObject)dm,DMPLEX,&isplex);
2203: }
2204: if (filter) isplex = PETSC_FALSE;
2206: if (isplex) { /* this code has been modified from plexpartition.c */
2207: PetscInt p, pStart, pEnd, a, adjSize, idx, size, nroots;
2208: PetscInt *adj = NULL;
2209: IS cellNumbering;
2210: const PetscInt *cellNum;
2211: PetscBool useCone, useClosure;
2212: PetscSection section;
2213: PetscSegBuffer adjBuffer;
2214: PetscSF sfPoint;
2218: DMPlexGetHeightStratum(dm, 0, &pStart, &pEnd);
2219: DMGetPointSF(dm, &sfPoint);
2220: PetscSFGetGraph(sfPoint, &nroots, NULL, NULL, NULL);
2221: /* Build adjacency graph via a section/segbuffer */
2222: PetscSectionCreate(PetscObjectComm((PetscObject) dm), §ion);
2223: PetscSectionSetChart(section, pStart, pEnd);
2224: PetscSegBufferCreate(sizeof(PetscInt),1000,&adjBuffer);
2225: /* Always use FVM adjacency to create partitioner graph */
2226: DMGetBasicAdjacency(dm, &useCone, &useClosure);
2227: DMSetBasicAdjacency(dm, PETSC_TRUE, PETSC_FALSE);
2228: DMPlexGetCellNumbering(dm, &cellNumbering);
2229: ISGetIndices(cellNumbering, &cellNum);
2230: for (n = 0, p = pStart; p < pEnd; p++) {
2231: /* Skip non-owned cells in parallel (ParMetis expects no overlap) */
2232: if (nroots > 0) {if (cellNum[p] < 0) continue;}
2233: adjSize = PETSC_DETERMINE;
2234: DMPlexGetAdjacency(dm, p, &adjSize, &adj);
2235: for (a = 0; a < adjSize; ++a) {
2236: const PetscInt point = adj[a];
2237: if (pStart <= point && point < pEnd) {
2238: PetscInt *PETSC_RESTRICT pBuf;
2239: PetscSectionAddDof(section, p, 1);
2240: PetscSegBufferGetInts(adjBuffer, 1, &pBuf);
2241: *pBuf = point;
2242: }
2243: }
2244: n++;
2245: }
2246: DMSetBasicAdjacency(dm, useCone, useClosure);
2247: /* Derive CSR graph from section/segbuffer */
2248: PetscSectionSetUp(section);
2249: PetscSectionGetStorageSize(section, &size);
2250: PetscMalloc1(n+1, &xadj);
2251: for (idx = 0, p = pStart; p < pEnd; p++) {
2252: if (nroots > 0) {if (cellNum[p] < 0) continue;}
2253: PetscSectionGetOffset(section, p, &(xadj[idx++]));
2254: }
2255: xadj[n] = size;
2256: PetscSegBufferExtractAlloc(adjBuffer, &adjncy);
2257: /* Clean up */
2258: PetscSegBufferDestroy(&adjBuffer);
2259: PetscSectionDestroy(§ion);
2260: PetscFree(adj);
2261: graph->xadj = xadj;
2262: graph->adjncy = adjncy;
2263: } else {
2264: Mat A;
2265: PetscBool isseqaij, flg_row;
2267: MatISGetLocalMat(pc->pmat,&A);
2268: if (!A->rmap->N || !A->cmap->N) {
2269: PCBDDCGraphDestroy(&graph);
2270: return(0);
2271: }
2272: PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);
2273: if (!isseqaij && filter) {
2274: PetscBool isseqdense;
2276: PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);
2277: if (!isseqdense) {
2278: MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
2279: } else { /* TODO: rectangular case and LDA */
2280: PetscScalar *array;
2281: PetscReal chop=1.e-6;
2283: MatDuplicate(A,MAT_COPY_VALUES,&B);
2284: MatDenseGetArray(B,&array);
2285: MatGetSize(B,&n,NULL);
2286: for (i=0;i<n;i++) {
2287: PetscInt j;
2288: for (j=i+1;j<n;j++) {
2289: PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)]));
2290: if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.;
2291: if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.;
2292: }
2293: }
2294: MatDenseRestoreArray(B,&array);
2295: MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);
2296: }
2297: } else {
2298: PetscObjectReference((PetscObject)A);
2299: B = A;
2300: }
2301: MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);
2303: /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */
2304: if (filter) {
2305: PetscScalar *data;
2306: PetscInt j,cum;
2308: PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);
2309: MatSeqAIJGetArray(B,&data);
2310: cum = 0;
2311: for (i=0;i<n;i++) {
2312: PetscInt t;
2314: for (j=xadj[i];j<xadj[i+1];j++) {
2315: if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) {
2316: continue;
2317: }
2318: adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j];
2319: }
2320: t = xadj_filtered[i];
2321: xadj_filtered[i] = cum;
2322: cum += t;
2323: }
2324: MatSeqAIJRestoreArray(B,&data);
2325: graph->xadj = xadj_filtered;
2326: graph->adjncy = adjncy_filtered;
2327: } else {
2328: graph->xadj = xadj;
2329: graph->adjncy = adjncy;
2330: }
2331: }
2332: /* compute local connected components using PCBDDCGraph */
2333: ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);
2334: ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);
2335: ISDestroy(&is_dummy);
2336: PCBDDCGraphInit(graph,l2gmap_dummy,n,PETSC_MAX_INT);
2337: ISLocalToGlobalMappingDestroy(&l2gmap_dummy);
2338: PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);
2339: PCBDDCGraphComputeConnectedComponents(graph);
2341: /* partial clean up */
2342: PetscFree2(xadj_filtered,adjncy_filtered);
2343: if (B) {
2344: PetscBool flg_row;
2345: MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);
2346: MatDestroy(&B);
2347: }
2348: if (isplex) {
2349: PetscFree(xadj);
2350: PetscFree(adjncy);
2351: }
2353: /* get back data */
2354: if (isplex) {
2355: if (ncc) *ncc = graph->ncc;
2356: if (cc || primalv) {
2357: Mat A;
2358: PetscBT btv,btvt;
2359: PetscSection subSection;
2360: PetscInt *ids,cum,cump,*cids,*pids;
2362: DMPlexGetSubdomainSection(dm,&subSection);
2363: MatISGetLocalMat(pc->pmat,&A);
2364: PetscMalloc3(A->rmap->n,&ids,graph->ncc+1,&cids,A->rmap->n,&pids);
2365: PetscBTCreate(A->rmap->n,&btv);
2366: PetscBTCreate(A->rmap->n,&btvt);
2368: cids[0] = 0;
2369: for (i = 0, cump = 0, cum = 0; i < graph->ncc; i++) {
2370: PetscInt j;
2372: PetscBTMemzero(A->rmap->n,btvt);
2373: for (j = graph->cptr[i]; j < graph->cptr[i+1]; j++) {
2374: PetscInt k, size, *closure = NULL, cell = graph->queue[j];
2376: DMPlexGetTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);
2377: for (k = 0; k < 2*size; k += 2) {
2378: PetscInt s, pp, p = closure[k], off, dof, cdof;
2380: PetscSectionGetConstraintDof(subSection,p,&cdof);
2381: PetscSectionGetOffset(subSection,p,&off);
2382: PetscSectionGetDof(subSection,p,&dof);
2383: for (s = 0; s < dof-cdof; s++) {
2384: if (PetscBTLookupSet(btvt,off+s)) continue;
2385: if (!PetscBTLookup(btv,off+s)) {
2386: ids[cum++] = off+s;
2387: } else { /* cross-vertex */
2388: pids[cump++] = off+s;
2389: }
2390: }
2391: DMPlexGetTreeParent(dm,p,&pp,NULL);
2392: if (pp != p) {
2393: PetscSectionGetConstraintDof(subSection,pp,&cdof);
2394: PetscSectionGetOffset(subSection,pp,&off);
2395: PetscSectionGetDof(subSection,pp,&dof);
2396: for (s = 0; s < dof-cdof; s++) {
2397: if (PetscBTLookupSet(btvt,off+s)) continue;
2398: if (!PetscBTLookup(btv,off+s)) {
2399: ids[cum++] = off+s;
2400: } else { /* cross-vertex */
2401: pids[cump++] = off+s;
2402: }
2403: }
2404: }
2405: }
2406: DMPlexRestoreTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);
2407: }
2408: cids[i+1] = cum;
2409: /* mark dofs as already assigned */
2410: for (j = cids[i]; j < cids[i+1]; j++) {
2411: PetscBTSet(btv,ids[j]);
2412: }
2413: }
2414: if (cc) {
2415: PetscMalloc1(graph->ncc,&cc_n);
2416: for (i = 0; i < graph->ncc; i++) {
2417: ISCreateGeneral(PETSC_COMM_SELF,cids[i+1]-cids[i],ids+cids[i],PETSC_COPY_VALUES,&cc_n[i]);
2418: }
2419: *cc = cc_n;
2420: }
2421: if (primalv) {
2422: ISCreateGeneral(PetscObjectComm((PetscObject)pc),cump,pids,PETSC_COPY_VALUES,primalv);
2423: }
2424: PetscFree3(ids,cids,pids);
2425: PetscBTDestroy(&btv);
2426: PetscBTDestroy(&btvt);
2427: }
2428: } else {
2429: if (ncc) *ncc = graph->ncc;
2430: if (cc) {
2431: PetscMalloc1(graph->ncc,&cc_n);
2432: for (i=0;i<graph->ncc;i++) {
2433: ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);
2434: }
2435: *cc = cc_n;
2436: }
2437: }
2438: /* clean up graph */
2439: graph->xadj = 0;
2440: graph->adjncy = 0;
2441: PCBDDCGraphDestroy(&graph);
2442: return(0);
2443: }
2445: PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag)
2446: {
2447: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
2448: PC_IS* pcis = (PC_IS*)(pc->data);
2449: IS dirIS = NULL;
2450: PetscInt i;
2454: PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);
2455: if (zerodiag) {
2456: Mat A;
2457: Vec vec3_N;
2458: PetscScalar *vals;
2459: const PetscInt *idxs;
2460: PetscInt nz,*count;
2462: /* p0 */
2463: VecSet(pcis->vec1_N,0.);
2464: PetscMalloc1(pcis->n,&vals);
2465: ISGetLocalSize(zerodiag,&nz);
2466: ISGetIndices(zerodiag,&idxs);
2467: for (i=0;i<nz;i++) vals[i] = 1.;
2468: VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);
2469: VecAssemblyBegin(pcis->vec1_N);
2470: VecAssemblyEnd(pcis->vec1_N);
2471: /* v_I */
2472: VecSetRandom(pcis->vec2_N,NULL);
2473: for (i=0;i<nz;i++) vals[i] = 0.;
2474: VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);
2475: ISRestoreIndices(zerodiag,&idxs);
2476: ISGetIndices(pcis->is_B_local,&idxs);
2477: for (i=0;i<pcis->n_B;i++) vals[i] = 0.;
2478: VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);
2479: ISRestoreIndices(pcis->is_B_local,&idxs);
2480: if (dirIS) {
2481: PetscInt n;
2483: ISGetLocalSize(dirIS,&n);
2484: ISGetIndices(dirIS,&idxs);
2485: for (i=0;i<n;i++) vals[i] = 0.;
2486: VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);
2487: ISRestoreIndices(dirIS,&idxs);
2488: }
2489: VecAssemblyBegin(pcis->vec2_N);
2490: VecAssemblyEnd(pcis->vec2_N);
2491: VecDuplicate(pcis->vec1_N,&vec3_N);
2492: VecSet(vec3_N,0.);
2493: MatISGetLocalMat(pc->pmat,&A);
2494: MatMult(A,pcis->vec1_N,vec3_N);
2495: VecDot(vec3_N,pcis->vec2_N,&vals[0]);
2496: if (PetscAbsScalar(vals[0]) > 1.e-1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0]));
2497: PetscFree(vals);
2498: VecDestroy(&vec3_N);
2500: /* there should not be any pressure dofs lying on the interface */
2501: PetscCalloc1(pcis->n,&count);
2502: ISGetIndices(pcis->is_B_local,&idxs);
2503: for (i=0;i<pcis->n_B;i++) count[idxs[i]]++;
2504: ISRestoreIndices(pcis->is_B_local,&idxs);
2505: ISGetIndices(zerodiag,&idxs);
2506: for (i=0;i<nz;i++) if (count[idxs[i]]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %D is an interface dof",idxs[i]);
2507: ISRestoreIndices(zerodiag,&idxs);
2508: PetscFree(count);
2509: }
2510: ISDestroy(&dirIS);
2512: /* check PCBDDCBenignGetOrSetP0 */
2513: VecSetRandom(pcis->vec1_global,NULL);
2514: for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i;
2515: PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);
2516: for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1;
2517: PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);
2518: for (i=0;i<pcbddc->benign_n;i++) {
2519: PetscInt val = PetscRealPart(pcbddc->benign_p0[i]);
2520: if (val != -PetscGlobalRank-i) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %D instead of %g",PetscRealPart(pcbddc->benign_p0[i]),i,-PetscGlobalRank-i);
2521: }
2522: return(0);
2523: }
2525: PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, PetscBool reuse, IS *zerodiaglocal)
2526: {
2527: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
2528: IS pressures = NULL,zerodiag = NULL,*bzerodiag = NULL,zerodiag_save,*zerodiag_subs;
2529: PetscInt nz,n,benign_n,bsp = 1;
2530: PetscInt *interior_dofs,n_interior_dofs,nneu;
2531: PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag,checkb;
2535: if (reuse) goto project_b0;
2536: PetscSFDestroy(&pcbddc->benign_sf);
2537: MatDestroy(&pcbddc->benign_B0);
2538: for (n=0;n<pcbddc->benign_n;n++) {
2539: ISDestroy(&pcbddc->benign_zerodiag_subs[n]);
2540: }
2541: PetscFree(pcbddc->benign_zerodiag_subs);
2542: has_null_pressures = PETSC_TRUE;
2543: have_null = PETSC_TRUE;
2544: /* if a local information on dofs is present, gets pressure dofs from command line (uses the last field is not provided)
2545: Without local information, it uses only the zerodiagonal dofs (ok if the pressure block is all zero and it is a scalar field)
2546: Checks if all the pressure dofs in each subdomain have a zero diagonal
2547: If not, a change of basis on pressures is not needed
2548: since the local Schur complements are already SPD
2549: */
2550: if (pcbddc->n_ISForDofsLocal) {
2551: IS iP = NULL;
2552: PetscInt p,*pp;
2553: PetscBool flg;
2555: PetscMalloc1(pcbddc->n_ISForDofsLocal,&pp);
2556: n = pcbddc->n_ISForDofsLocal;
2557: PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC benign options","PC");
2558: PetscOptionsIntArray("-pc_bddc_pressure_field","Field id for pressures",NULL,pp,&n,&flg);
2559: PetscOptionsEnd();
2560: if (!flg) {
2561: n = 1;
2562: pp[0] = pcbddc->n_ISForDofsLocal-1;
2563: }
2565: bsp = 0;
2566: for (p=0;p<n;p++) {
2567: PetscInt bs;
2569: if (pp[p] < 0 || pp[p] > pcbddc->n_ISForDofsLocal-1) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Invalid field id for pressures %D",pp[p]);
2570: ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]],&bs);
2571: bsp += bs;
2572: }
2573: PetscMalloc1(bsp,&bzerodiag);
2574: bsp = 0;
2575: for (p=0;p<n;p++) {
2576: const PetscInt *idxs;
2577: PetscInt b,bs,npl,*bidxs;
2579: ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]],&bs);
2580: ISGetLocalSize(pcbddc->ISForDofsLocal[pp[p]],&npl);
2581: ISGetIndices(pcbddc->ISForDofsLocal[pp[p]],&idxs);
2582: PetscMalloc1(npl/bs,&bidxs);
2583: for (b=0;b<bs;b++) {
2584: PetscInt i;
2586: for (i=0;i<npl/bs;i++) bidxs[i] = idxs[bs*i+b];
2587: ISCreateGeneral(PETSC_COMM_SELF,npl/bs,bidxs,PETSC_COPY_VALUES,&bzerodiag[bsp]);
2588: bsp++;
2589: }
2590: PetscFree(bidxs);
2591: ISRestoreIndices(pcbddc->ISForDofsLocal[pp[p]],&idxs);
2592: }
2593: ISConcatenate(PETSC_COMM_SELF,bsp,bzerodiag,&pressures);
2595: /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */
2596: PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lP",(PetscObject*)&iP);
2597: if (iP) {
2598: IS newpressures;
2600: ISDifference(pressures,iP,&newpressures);
2601: ISDestroy(&pressures);
2602: pressures = newpressures;
2603: }
2604: ISSorted(pressures,&sorted);
2605: if (!sorted) {
2606: ISSort(pressures);
2607: }
2608: PetscFree(pp);
2609: }
2611: /* pcis has not been setup yet, so get the local size from the subdomain matrix */
2612: MatGetLocalSize(pcbddc->local_mat,&n,NULL);
2613: if (!n) pcbddc->benign_change_explicit = PETSC_TRUE;
2614: MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);
2615: ISSorted(zerodiag,&sorted);
2616: if (!sorted) {
2617: ISSort(zerodiag);
2618: }
2619: PetscObjectReference((PetscObject)zerodiag);
2620: zerodiag_save = zerodiag;
2621: ISGetLocalSize(zerodiag,&nz);
2622: if (!nz) {
2623: if (n) have_null = PETSC_FALSE;
2624: has_null_pressures = PETSC_FALSE;
2625: ISDestroy(&zerodiag);
2626: }
2627: recompute_zerodiag = PETSC_FALSE;
2629: /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */
2630: zerodiag_subs = NULL;
2631: benign_n = 0;
2632: n_interior_dofs = 0;
2633: interior_dofs = NULL;
2634: nneu = 0;
2635: if (pcbddc->NeumannBoundariesLocal) {
2636: ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nneu);
2637: }
2638: checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level);
2639: if (checkb) { /* need to compute interior nodes */
2640: PetscInt n,i,j;
2641: PetscInt n_neigh,*neigh,*n_shared,**shared;
2642: PetscInt *iwork;
2644: ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);
2645: ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);
2646: PetscCalloc1(n,&iwork);
2647: PetscMalloc1(n,&interior_dofs);
2648: for (i=1;i<n_neigh;i++)
2649: for (j=0;j<n_shared[i];j++)
2650: iwork[shared[i][j]] += 1;
2651: for (i=0;i<n;i++)
2652: if (!iwork[i])
2653: interior_dofs[n_interior_dofs++] = i;
2654: PetscFree(iwork);
2655: ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);
2656: }
2657: if (has_null_pressures) {
2658: IS *subs;
2659: PetscInt nsubs,i,j,nl;
2660: const PetscInt *idxs;
2661: PetscScalar *array;
2662: Vec *work;
2663: Mat_IS* matis = (Mat_IS*)(pc->pmat->data);
2665: subs = pcbddc->local_subs;
2666: nsubs = pcbddc->n_local_subs;
2667: /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */
2668: if (checkb) {
2669: VecDuplicateVecs(matis->y,2,&work);
2670: ISGetLocalSize(zerodiag,&nl);
2671: ISGetIndices(zerodiag,&idxs);
2672: /* work[0] = 1_p */
2673: VecSet(work[0],0.);
2674: VecGetArray(work[0],&array);
2675: for (j=0;j<nl;j++) array[idxs[j]] = 1.;
2676: VecRestoreArray(work[0],&array);
2677: /* work[0] = 1_v */
2678: VecSet(work[1],1.);
2679: VecGetArray(work[1],&array);
2680: for (j=0;j<nl;j++) array[idxs[j]] = 0.;
2681: VecRestoreArray(work[1],&array);
2682: ISRestoreIndices(zerodiag,&idxs);
2683: }
2685: if (nsubs > 1 || bsp > 1) {
2686: IS *is;
2687: PetscInt b,totb;
2689: totb = bsp;
2690: is = bsp > 1 ? bzerodiag : &zerodiag;
2691: nsubs = PetscMax(nsubs,1);
2692: PetscCalloc1(nsubs*totb,&zerodiag_subs);
2693: for (b=0;b<totb;b++) {
2694: for (i=0;i<nsubs;i++) {
2695: ISLocalToGlobalMapping l2g;
2696: IS t_zerodiag_subs;
2697: PetscInt nl;
2699: if (subs) {
2700: ISLocalToGlobalMappingCreateIS(subs[i],&l2g);
2701: } else {
2702: IS tis;
2704: MatGetLocalSize(pcbddc->local_mat,&nl,NULL);
2705: ISCreateStride(PETSC_COMM_SELF,nl,0,1,&tis);
2706: ISLocalToGlobalMappingCreateIS(tis,&l2g);
2707: ISDestroy(&tis);
2708: }
2709: ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,is[b],&t_zerodiag_subs);
2710: ISGetLocalSize(t_zerodiag_subs,&nl);
2711: if (nl) {
2712: PetscBool valid = PETSC_TRUE;
2714: if (checkb) {
2715: VecSet(matis->x,0);
2716: ISGetLocalSize(subs[i],&nl);
2717: ISGetIndices(subs[i],&idxs);
2718: VecGetArray(matis->x,&array);
2719: for (j=0;j<nl;j++) array[idxs[j]] = 1.;
2720: VecRestoreArray(matis->x,&array);
2721: ISRestoreIndices(subs[i],&idxs);
2722: VecPointwiseMult(matis->x,work[0],matis->x);
2723: MatMult(matis->A,matis->x,matis->y);
2724: VecPointwiseMult(matis->y,work[1],matis->y);
2725: VecGetArray(matis->y,&array);
2726: for (j=0;j<n_interior_dofs;j++) {
2727: if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) {
2728: valid = PETSC_FALSE;
2729: break;
2730: }
2731: }
2732: VecRestoreArray(matis->y,&array);
2733: }
2734: if (valid && nneu) {
2735: const PetscInt *idxs;
2736: PetscInt nzb;
2738: ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);
2739: ISGlobalToLocalMappingApply(l2g,IS_GTOLM_DROP,nneu,idxs,&nzb,NULL);
2740: ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);
2741: if (nzb) valid = PETSC_FALSE;
2742: }
2743: if (valid && pressures) {
2744: IS t_pressure_subs,tmp;
2745: PetscInt i1,i2;
2747: ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);
2748: ISEmbed(t_zerodiag_subs,t_pressure_subs,PETSC_TRUE,&tmp);
2749: ISGetLocalSize(tmp,&i1);
2750: ISGetLocalSize(t_zerodiag_subs,&i2);
2751: if (i2 != i1) valid = PETSC_FALSE;
2752: ISDestroy(&t_pressure_subs);
2753: ISDestroy(&tmp);
2754: }
2755: if (valid) {
2756: ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[benign_n]);
2757: benign_n++;
2758: } else recompute_zerodiag = PETSC_TRUE;
2759: }
2760: ISDestroy(&t_zerodiag_subs);
2761: ISLocalToGlobalMappingDestroy(&l2g);
2762: }
2763: }
2764: } else { /* there's just one subdomain (or zero if they have not been detected */
2765: PetscBool valid = PETSC_TRUE;
2767: if (nneu) valid = PETSC_FALSE;
2768: if (valid && pressures) {
2769: ISEqual(pressures,zerodiag,&valid);
2770: }
2771: if (valid && checkb) {
2772: MatMult(matis->A,work[0],matis->x);
2773: VecPointwiseMult(matis->x,work[1],matis->x);
2774: VecGetArray(matis->x,&array);
2775: for (j=0;j<n_interior_dofs;j++) {
2776: if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) {
2777: valid = PETSC_FALSE;
2778: break;
2779: }
2780: }
2781: VecRestoreArray(matis->x,&array);
2782: }
2783: if (valid) {
2784: benign_n = 1;
2785: PetscMalloc1(benign_n,&zerodiag_subs);
2786: PetscObjectReference((PetscObject)zerodiag);
2787: zerodiag_subs[0] = zerodiag;
2788: }
2789: }
2790: if (checkb) {
2791: VecDestroyVecs(2,&work);
2792: }
2793: }
2794: PetscFree(interior_dofs);
2796: if (!benign_n) {
2797: PetscInt n;
2799: ISDestroy(&zerodiag);
2800: recompute_zerodiag = PETSC_FALSE;
2801: MatGetLocalSize(pcbddc->local_mat,&n,NULL);
2802: if (n) have_null = PETSC_FALSE;
2803: }
2805: /* final check for null pressures */
2806: if (zerodiag && pressures) {
2807: ISEqual(pressures,zerodiag,&have_null);
2808: }
2810: if (recompute_zerodiag) {
2811: ISDestroy(&zerodiag);
2812: if (benign_n == 1) {
2813: PetscObjectReference((PetscObject)zerodiag_subs[0]);
2814: zerodiag = zerodiag_subs[0];
2815: } else {
2816: PetscInt i,nzn,*new_idxs;
2818: nzn = 0;
2819: for (i=0;i<benign_n;i++) {
2820: PetscInt ns;
2821: ISGetLocalSize(zerodiag_subs[i],&ns);
2822: nzn += ns;
2823: }
2824: PetscMalloc1(nzn,&new_idxs);
2825: nzn = 0;
2826: for (i=0;i<benign_n;i++) {
2827: PetscInt ns,*idxs;
2828: ISGetLocalSize(zerodiag_subs[i],&ns);
2829: ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);
2830: PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));
2831: ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);
2832: nzn += ns;
2833: }
2834: PetscSortInt(nzn,new_idxs);
2835: ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);
2836: }
2837: have_null = PETSC_FALSE;
2838: }
2840: /* determines if the coarse solver will be singular or not */
2841: MPIU_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));
2843: /* Prepare matrix to compute no-net-flux */
2844: if (pcbddc->compute_nonetflux && !pcbddc->divudotp) {
2845: Mat A,loc_divudotp;
2846: ISLocalToGlobalMapping rl2g,cl2g,l2gmap;
2847: IS row,col,isused = NULL;
2848: PetscInt M,N,n,st,n_isused;
2850: if (pressures) {
2851: isused = pressures;
2852: } else {
2853: isused = zerodiag_save;
2854: }
2855: MatGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);
2856: MatISGetLocalMat(pc->pmat,&A);
2857: MatGetLocalSize(A,&n,NULL);
2858: if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field");
2859: n_isused = 0;
2860: if (isused) {
2861: ISGetLocalSize(isused,&n_isused);
2862: }
2863: MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));
2864: st = st-n_isused;
2865: if (n) {
2866: const PetscInt *gidxs;
2868: MatCreateSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);
2869: ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);
2870: /* TODO: extend ISCreateStride with st = PETSC_DECIDE */
2871: ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);
2872: ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);
2873: ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);
2874: } else {
2875: MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);
2876: ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);
2877: ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);
2878: }
2879: MatGetSize(pc->pmat,NULL,&N);
2880: ISGetSize(row,&M);
2881: ISLocalToGlobalMappingCreateIS(row,&rl2g);
2882: ISLocalToGlobalMappingCreateIS(col,&cl2g);
2883: ISDestroy(&row);
2884: ISDestroy(&col);
2885: MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);
2886: MatSetType(pcbddc->divudotp,MATIS);
2887: MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);
2888: MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);
2889: ISLocalToGlobalMappingDestroy(&rl2g);
2890: ISLocalToGlobalMappingDestroy(&cl2g);
2891: MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);
2892: MatDestroy(&loc_divudotp);
2893: MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);
2894: MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);
2895: }
2896: ISDestroy(&zerodiag_save);
2897: ISDestroy(&pressures);
2898: if (bzerodiag) {
2899: PetscInt i;
2901: for (i=0;i<bsp;i++) {
2902: ISDestroy(&bzerodiag[i]);
2903: }
2904: PetscFree(bzerodiag);
2905: }
2906: pcbddc->benign_n = benign_n;
2907: pcbddc->benign_zerodiag_subs = zerodiag_subs;
2909: /* determines if the problem has subdomains with 0 pressure block */
2910: have_null = (PetscBool)(!!pcbddc->benign_n);
2911: MPIU_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
2913: project_b0:
2914: MatGetLocalSize(pcbddc->local_mat,&n,NULL);
2915: /* change of basis and p0 dofs */
2916: if (pcbddc->benign_n) {
2917: PetscInt i,s,*nnz;
2919: /* local change of basis for pressures */
2920: MatDestroy(&pcbddc->benign_change);
2921: MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);
2922: MatSetType(pcbddc->benign_change,MATAIJ);
2923: MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);
2924: PetscMalloc1(n,&nnz);
2925: for (i=0;i<n;i++) nnz[i] = 1; /* defaults to identity */
2926: for (i=0;i<pcbddc->benign_n;i++) {
2927: const PetscInt *idxs;
2928: PetscInt nzs,j;
2930: ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nzs);
2931: ISGetIndices(pcbddc->benign_zerodiag_subs[i],&idxs);
2932: for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */
2933: nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */
2934: ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],&idxs);
2935: }
2936: MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);
2937: MatSetOption(pcbddc->benign_change,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);
2938: PetscFree(nnz);
2939: /* set identity by default */
2940: for (i=0;i<n;i++) {
2941: MatSetValue(pcbddc->benign_change,i,i,1.,INSERT_VALUES);
2942: }
2943: PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);
2944: PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);
2945: /* set change on pressures */
2946: for (s=0;s<pcbddc->benign_n;s++) {
2947: PetscScalar *array;
2948: const PetscInt *idxs;
2949: PetscInt nzs;
2951: ISGetLocalSize(pcbddc->benign_zerodiag_subs[s],&nzs);
2952: ISGetIndices(pcbddc->benign_zerodiag_subs[s],&idxs);
2953: for (i=0;i<nzs-1;i++) {
2954: PetscScalar vals[2];
2955: PetscInt cols[2];
2957: cols[0] = idxs[i];
2958: cols[1] = idxs[nzs-1];
2959: vals[0] = 1.;
2960: vals[1] = 1.;
2961: MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);
2962: }
2963: PetscMalloc1(nzs,&array);
2964: for (i=0;i<nzs-1;i++) array[i] = -1.;
2965: array[nzs-1] = 1.;
2966: MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);
2967: /* store local idxs for p0 */
2968: pcbddc->benign_p0_lidx[s] = idxs[nzs-1];
2969: ISRestoreIndices(pcbddc->benign_zerodiag_subs[s],&idxs);
2970: PetscFree(array);
2971: }
2972: MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);
2973: MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);
2975: /* project if needed */
2976: if (pcbddc->benign_change_explicit) {
2977: Mat M;
2979: MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);
2980: MatDestroy(&pcbddc->local_mat);
2981: MatSeqAIJCompress(M,&pcbddc->local_mat);
2982: MatDestroy(&M);
2983: }
2984: /* store global idxs for p0 */
2985: ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);
2986: }
2987: *zerodiaglocal = zerodiag;
2988: return(0);
2989: }
2991: PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get)
2992: {
2993: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
2994: PetscScalar *array;
2998: if (!pcbddc->benign_sf) {
2999: PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);
3000: PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);
3001: }
3002: if (get) {
3003: VecGetArrayRead(v,(const PetscScalar**)&array);
3004: PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);
3005: PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);
3006: VecRestoreArrayRead(v,(const PetscScalar**)&array);
3007: } else {
3008: VecGetArray(v,&array);
3009: PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);
3010: PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);
3011: VecRestoreArray(v,&array);
3012: }
3013: return(0);
3014: }
3016: PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop)
3017: {
3018: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
3022: /* TODO: add error checking
3023: - avoid nested pop (or push) calls.
3024: - cannot push before pop.
3025: - cannot call this if pcbddc->local_mat is NULL
3026: */
3027: if (!pcbddc->benign_n) {
3028: return(0);
3029: }
3030: if (pop) {
3031: if (pcbddc->benign_change_explicit) {
3032: IS is_p0;
3033: MatReuse reuse;
3035: /* extract B_0 */
3036: reuse = MAT_INITIAL_MATRIX;
3037: if (pcbddc->benign_B0) {
3038: reuse = MAT_REUSE_MATRIX;
3039: }
3040: ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);
3041: MatCreateSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);
3042: /* remove rows and cols from local problem */
3043: MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);
3044: MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);
3045: MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);
3046: ISDestroy(&is_p0);
3047: } else {
3048: Mat_IS *matis = (Mat_IS*)pc->pmat->data;
3049: PetscScalar *vals;
3050: PetscInt i,n,*idxs_ins;
3052: VecGetLocalSize(matis->y,&n);
3053: PetscMalloc2(n,&idxs_ins,n,&vals);
3054: if (!pcbddc->benign_B0) {
3055: PetscInt *nnz;
3056: MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);
3057: MatSetType(pcbddc->benign_B0,MATAIJ);
3058: MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);
3059: PetscMalloc1(pcbddc->benign_n,&nnz);
3060: for (i=0;i<pcbddc->benign_n;i++) {
3061: ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);
3062: nnz[i] = n - nnz[i];
3063: }
3064: MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);
3065: MatSetOption(pcbddc->benign_B0,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);
3066: PetscFree(nnz);
3067: }
3069: for (i=0;i<pcbddc->benign_n;i++) {
3070: PetscScalar *array;
3071: PetscInt *idxs,j,nz,cum;
3073: VecSet(matis->x,0.);
3074: ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);
3075: ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);
3076: for (j=0;j<nz;j++) vals[j] = 1.;
3077: VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);
3078: VecAssemblyBegin(matis->x);
3079: VecAssemblyEnd(matis->x);
3080: VecSet(matis->y,0.);
3081: MatMult(matis->A,matis->x,matis->y);
3082: VecGetArray(matis->y,&array);
3083: cum = 0;
3084: for (j=0;j<n;j++) {
3085: if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) {
3086: vals[cum] = array[j];
3087: idxs_ins[cum] = j;
3088: cum++;
3089: }
3090: }
3091: MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);
3092: VecRestoreArray(matis->y,&array);
3093: ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);
3094: }
3095: MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);
3096: MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);
3097: PetscFree2(idxs_ins,vals);
3098: }
3099: } else { /* push */
3100: if (pcbddc->benign_change_explicit) {
3101: PetscInt i;
3103: for (i=0;i<pcbddc->benign_n;i++) {
3104: PetscScalar *B0_vals;
3105: PetscInt *B0_cols,B0_ncol;
3107: MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);
3108: MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);
3109: MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);
3110: MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);
3111: MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);
3112: }
3113: MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);
3114: MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);
3115: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!");
3116: }
3117: return(0);
3118: }
3120: PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
3121: {
3122: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
3123: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
3124: PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork;
3125: PetscBLASInt *B_iwork,*B_ifail;
3126: PetscScalar *work,lwork;
3127: PetscScalar *St,*S,*eigv;
3128: PetscScalar *Sarray,*Starray;
3129: PetscReal *eigs,thresh,lthresh,uthresh;
3130: PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs;
3131: PetscBool allocated_S_St;
3132: #if defined(PETSC_USE_COMPLEX)
3133: PetscReal *rwork;
3134: #endif
3135: PetscErrorCode ierr;
3138: if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data");
3139: if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO");
3140: if (sub_schurs->n_subs && (!sub_schurs->is_symmetric)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for this matrix pencil (herm %d, symm %d, posdef %d)",sub_schurs->is_hermitian,sub_schurs->is_symmetric,sub_schurs->is_posdef);
3141: PetscLogEventBegin(PC_BDDC_AdaptiveSetUp[pcbddc->current_level],pc,0,0,0);
3143: if (pcbddc->dbg_flag) {
3144: PetscViewerFlush(pcbddc->dbg_viewer);
3145: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
3146: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");
3147: PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
3148: }
3150: if (pcbddc->dbg_flag) {
3151: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %D (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef);
3152: }
3154: /* max size of subsets */
3155: mss = 0;
3156: for (i=0;i<sub_schurs->n_subs;i++) {
3157: PetscInt subset_size;
3159: ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
3160: mss = PetscMax(mss,subset_size);
3161: }
3163: /* min/max and threshold */
3164: nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
3165: nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0;
3166: nmax = PetscMax(nmin,nmax);
3167: allocated_S_St = PETSC_FALSE;
3168: if (nmin || !sub_schurs->is_posdef) { /* XXX */
3169: allocated_S_St = PETSC_TRUE;
3170: }
3172: /* allocate lapack workspace */
3173: cum = cum2 = 0;
3174: maxneigs = 0;
3175: for (i=0;i<sub_schurs->n_subs;i++) {
3176: PetscInt n,subset_size;
3178: ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
3179: n = PetscMin(subset_size,nmax);
3180: cum += subset_size;
3181: cum2 += subset_size*n;
3182: maxneigs = PetscMax(maxneigs,n);
3183: }
3184: if (mss) {
3185: if (sub_schurs->is_symmetric) {
3186: PetscBLASInt B_itype = 1;
3187: PetscBLASInt B_N = mss;
3188: PetscReal zero = 0.0;
3189: PetscReal eps = 0.0; /* dlamch? */
3191: B_lwork = -1;
3192: S = NULL;
3193: St = NULL;
3194: eigs = NULL;
3195: eigv = NULL;
3196: B_iwork = NULL;
3197: B_ifail = NULL;
3198: #if defined(PETSC_USE_COMPLEX)
3199: rwork = NULL;
3200: #endif
3201: thresh = 1.0;
3202: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
3203: #if defined(PETSC_USE_COMPLEX)
3204: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3205: #else
3206: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr));
3207: #endif
3208: if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr);
3209: PetscFPTrapPop();
3210: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented");
3211: } else {
3212: lwork = 0;
3213: }
3215: nv = 0;
3216: if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */
3217: ISGetLocalSize(sub_schurs->is_vertices,&nv);
3218: }
3219: PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);
3220: if (allocated_S_St) {
3221: PetscMalloc2(mss*mss,&S,mss*mss,&St);
3222: }
3223: PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);
3224: #if defined(PETSC_USE_COMPLEX)
3225: PetscMalloc1(7*mss,&rwork);
3226: #endif
3227: PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n,
3228: nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr,
3229: nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr,
3230: nv+cum,&pcbddc->adaptive_constraints_idxs,
3231: nv+cum2,&pcbddc->adaptive_constraints_data);
3232: PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));
3234: maxneigs = 0;
3235: cum = cumarray = 0;
3236: pcbddc->adaptive_constraints_idxs_ptr[0] = 0;
3237: pcbddc->adaptive_constraints_data_ptr[0] = 0;
3238: if (sub_schurs->is_vertices && pcbddc->use_vertices) {
3239: const PetscInt *idxs;
3241: ISGetIndices(sub_schurs->is_vertices,&idxs);
3242: for (cum=0;cum<nv;cum++) {
3243: pcbddc->adaptive_constraints_n[cum] = 1;
3244: pcbddc->adaptive_constraints_idxs[cum] = idxs[cum];
3245: pcbddc->adaptive_constraints_data[cum] = 1.0;
3246: pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1;
3247: pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1;
3248: }
3249: ISRestoreIndices(sub_schurs->is_vertices,&idxs);
3250: }
3252: if (mss) { /* multilevel */
3253: MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);
3254: MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);
3255: }
3257: lthresh = pcbddc->adaptive_threshold[0];
3258: uthresh = pcbddc->adaptive_threshold[1];
3259: for (i=0;i<sub_schurs->n_subs;i++) {
3260: const PetscInt *idxs;
3261: PetscReal upper,lower;
3262: PetscInt j,subset_size,eigs_start = 0;
3263: PetscBLASInt B_N;
3264: PetscBool same_data = PETSC_FALSE;
3265: PetscBool scal = PETSC_FALSE;
3267: if (pcbddc->use_deluxe_scaling) {
3268: upper = PETSC_MAX_REAL;
3269: lower = uthresh;
3270: } else {
3271: if (!sub_schurs->is_posdef) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented without deluxe scaling");
3272: upper = 1./uthresh;
3273: lower = 0.;
3274: }
3275: ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
3276: ISGetIndices(sub_schurs->is_subs[i],&idxs);
3277: PetscBLASIntCast(subset_size,&B_N);
3278: /* this is experimental: we assume the dofs have been properly grouped to have
3279: the diagonal blocks Schur complements either positive or negative definite (true for Stokes) */
3280: if (!sub_schurs->is_posdef) {
3281: Mat T;
3283: for (j=0;j<subset_size;j++) {
3284: if (PetscRealPart(*(Sarray+cumarray+j*(subset_size+1))) < 0.0) {
3285: MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,Sarray+cumarray,&T);
3286: MatScale(T,-1.0);
3287: MatDestroy(&T);
3288: MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,Starray+cumarray,&T);
3289: MatScale(T,-1.0);
3290: MatDestroy(&T);
3291: if (sub_schurs->change_primal_sub) {
3292: PetscInt nz,k;
3293: const PetscInt *idxs;
3295: ISGetLocalSize(sub_schurs->change_primal_sub[i],&nz);
3296: ISGetIndices(sub_schurs->change_primal_sub[i],&idxs);
3297: for (k=0;k<nz;k++) {
3298: *( Sarray + cumarray + idxs[k]*(subset_size+1)) *= -1.0;
3299: *(Starray + cumarray + idxs[k]*(subset_size+1)) = 0.0;
3300: }
3301: ISRestoreIndices(sub_schurs->change_primal_sub[i],&idxs);
3302: }
3303: scal = PETSC_TRUE;
3304: break;
3305: }
3306: }
3307: }
3309: if (allocated_S_St) { /* S and S_t should be copied since we could need them later */
3310: if (sub_schurs->is_symmetric) {
3311: PetscInt j,k;
3312: if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */
3313: PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));
3314: PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));
3315: }
3316: for (j=0;j<subset_size;j++) {
3317: for (k=j;k<subset_size;k++) {
3318: S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k];
3319: St[j*subset_size+k] = Starray[cumarray+j*subset_size+k];
3320: }
3321: }
3322: } else {
3323: PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3324: PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3325: }
3326: } else {
3327: S = Sarray + cumarray;
3328: St = Starray + cumarray;
3329: }
3330: /* see if we can save some work */
3331: if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) {
3332: PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);
3333: }
3335: if (same_data && !sub_schurs->change) { /* there's no need of constraints here */
3336: B_neigs = 0;
3337: } else {
3338: if (sub_schurs->is_symmetric) {
3339: PetscBLASInt B_itype = 1;
3340: PetscBLASInt B_IL, B_IU;
3341: PetscReal eps = -1.0; /* dlamch? */
3342: PetscInt nmin_s;
3343: PetscBool compute_range;
3345: B_neigs = 0;
3346: compute_range = (PetscBool)!same_data;
3347: if (nmin >= subset_size) compute_range = PETSC_FALSE;
3349: if (pcbddc->dbg_flag) {
3350: PetscInt nc = 0;
3352: if (sub_schurs->change_primal_sub) {
3353: ISGetLocalSize(sub_schurs->change_primal_sub[i],&nc);
3354: }
3355: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %D/%D size %D count %D fid %D (range %d) (change %D).\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]+1,pcbddc->mat_graph->which_dof[idxs[0]],compute_range,nc);
3356: }
3358: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
3359: if (compute_range) {
3361: /* ask for eigenvalues larger than thresh */
3362: if (sub_schurs->is_posdef) {
3363: #if defined(PETSC_USE_COMPLEX)
3364: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3365: #else
3366: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3367: #endif
3368: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3369: } else { /* no theory so far, but it works nicely */
3370: PetscInt recipe = 0,recipe_m = 1;
3371: PetscReal bb[2];
3373: PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe",&recipe,NULL);
3374: switch (recipe) {
3375: case 0:
3376: if (scal) { bb[0] = PETSC_MIN_REAL; bb[1] = lthresh; }
3377: else { bb[0] = uthresh; bb[1] = PETSC_MAX_REAL; }
3378: #if defined(PETSC_USE_COMPLEX)
3379: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3380: #else
3381: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3382: #endif
3383: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3384: break;
3385: case 1:
3386: bb[0] = PETSC_MIN_REAL; bb[1] = lthresh*lthresh;
3387: #if defined(PETSC_USE_COMPLEX)
3388: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3389: #else
3390: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3391: #endif
3392: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3393: if (!scal) {
3394: PetscBLASInt B_neigs2 = 0;
3396: bb[0] = PetscMax(lthresh*lthresh,uthresh); bb[1] = PETSC_MAX_REAL;
3397: PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3398: PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3399: #if defined(PETSC_USE_COMPLEX)
3400: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3401: #else
3402: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3403: #endif
3404: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3405: B_neigs += B_neigs2;
3406: }
3407: break;
3408: case 2:
3409: if (scal) {
3410: bb[0] = PETSC_MIN_REAL;
3411: bb[1] = 0;
3412: #if defined(PETSC_USE_COMPLEX)
3413: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3414: #else
3415: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3416: #endif
3417: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3418: } else {
3419: PetscBLASInt B_neigs2 = 0;
3420: PetscBool import = PETSC_FALSE;
3422: lthresh = PetscMax(lthresh,0.0);
3423: if (lthresh > 0.0) {
3424: bb[0] = PETSC_MIN_REAL;
3425: bb[1] = lthresh*lthresh;
3427: import = PETSC_TRUE;
3428: #if defined(PETSC_USE_COMPLEX)
3429: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3430: #else
3431: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3432: #endif
3433: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3434: }
3435: bb[0] = PetscMax(lthresh*lthresh,uthresh);
3436: bb[1] = PETSC_MAX_REAL;
3437: if (import) {
3438: PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3439: PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3440: }
3441: #if defined(PETSC_USE_COMPLEX)
3442: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3443: #else
3444: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3445: #endif
3446: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3447: B_neigs += B_neigs2;
3448: }
3449: break;
3450: case 3:
3451: if (scal) {
3452: PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe3_min_scal",&recipe_m,NULL);
3453: } else {
3454: PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe3_min",&recipe_m,NULL);
3455: }
3456: if (!scal) {
3457: bb[0] = uthresh;
3458: bb[1] = PETSC_MAX_REAL;
3459: #if defined(PETSC_USE_COMPLEX)
3460: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3461: #else
3462: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3463: #endif
3464: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3465: }
3466: if (recipe_m > 0 && B_N - B_neigs > 0) {
3467: PetscBLASInt B_neigs2 = 0;
3469: B_IL = 1;
3470: PetscBLASIntCast(PetscMin(recipe_m,B_N - B_neigs),&B_IU);
3471: PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3472: PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3473: #if defined(PETSC_USE_COMPLEX)
3474: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3475: #else
3476: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3477: #endif
3478: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3479: B_neigs += B_neigs2;
3480: }
3481: break;
3482: case 4:
3483: bb[0] = PETSC_MIN_REAL; bb[1] = lthresh;
3484: #if defined(PETSC_USE_COMPLEX)
3485: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3486: #else
3487: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3488: #endif
3489: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3490: {
3491: PetscBLASInt B_neigs2 = 0;
3493: bb[0] = PetscMax(lthresh+PETSC_SMALL,uthresh); bb[1] = PETSC_MAX_REAL;
3494: PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3495: PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3496: #if defined(PETSC_USE_COMPLEX)
3497: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3498: #else
3499: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3500: #endif
3501: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3502: B_neigs += B_neigs2;
3503: }
3504: break;
3505: case 5: /* same as before: first compute all eigenvalues, then filter */
3506: #if defined(PETSC_USE_COMPLEX)
3507: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","A","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3508: #else
3509: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","A","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3510: #endif
3511: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3512: {
3513: PetscInt e,k,ne;
3514: for (e=0,ne=0;e<B_neigs;e++) {
3515: if (eigs[e] < lthresh || eigs[e] > uthresh) {
3516: for (k=0;k<B_N;k++) S[ne*B_N+k] = eigv[e*B_N+k];
3517: eigs[ne] = eigs[e];
3518: ne++;
3519: }
3520: }
3521: PetscMemcpy(eigv,S,B_N*ne*sizeof(PetscScalar));
3522: B_neigs = ne;
3523: }
3524: break;
3525: default:
3526: SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Unknown recipe %D",recipe);
3527: break;
3528: }
3529: }
3530: } else if (!same_data) { /* this is just to see all the eigenvalues */
3531: B_IU = PetscMax(1,PetscMin(B_N,nmax));
3532: B_IL = 1;
3533: #if defined(PETSC_USE_COMPLEX)
3534: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3535: #else
3536: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3537: #endif
3538: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3539: } else { /* same_data is true, so just get the adaptive functional requested by the user */
3540: PetscInt k;
3541: if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen");
3542: ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);
3543: PetscBLASIntCast(nmax,&B_neigs);
3544: nmin = nmax;
3545: PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));
3546: for (k=0;k<nmax;k++) {
3547: eigs[k] = 1./PETSC_SMALL;
3548: eigv[k*(subset_size+1)] = 1.0;
3549: }
3550: }
3551: PetscFPTrapPop();
3552: if (B_ierr) {
3553: if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
3554: else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
3555: else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
3556: }
3558: if (B_neigs > nmax) {
3559: if (pcbddc->dbg_flag) {
3560: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %D.\n",B_neigs,nmax);
3561: }
3562: if (pcbddc->use_deluxe_scaling) eigs_start = scal ? 0 : B_neigs-nmax;
3563: B_neigs = nmax;
3564: }
3566: nmin_s = PetscMin(nmin,B_N);
3567: if (B_neigs < nmin_s) {
3568: PetscBLASInt B_neigs2 = 0;
3570: if (pcbddc->use_deluxe_scaling) {
3571: if (scal) {
3572: B_IU = nmin_s;
3573: B_IL = B_neigs + 1;
3574: } else {
3575: B_IL = B_N - nmin_s + 1;
3576: B_IU = B_N - B_neigs;
3577: }
3578: } else {
3579: B_IL = B_neigs + 1;
3580: B_IU = nmin_s;
3581: }
3582: if (pcbddc->dbg_flag) {
3583: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %D. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU);
3584: }
3585: if (sub_schurs->is_symmetric) {
3586: PetscInt j,k;
3587: for (j=0;j<subset_size;j++) {
3588: for (k=j;k<subset_size;k++) {
3589: S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k];
3590: St[j*subset_size+k] = Starray[cumarray+j*subset_size+k];
3591: }
3592: }
3593: } else {
3594: PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3595: PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
3596: }
3597: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
3598: #if defined(PETSC_USE_COMPLEX)
3599: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3600: #else
3601: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3602: #endif
3603: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3604: PetscFPTrapPop();
3605: B_neigs += B_neigs2;
3606: }
3607: if (B_ierr) {
3608: if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
3609: else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
3610: else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
3611: }
3612: if (pcbddc->dbg_flag) {
3613: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);
3614: for (j=0;j<B_neigs;j++) {
3615: if (eigs[j] == 0.0) {
3616: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");
3617: } else {
3618: if (pcbddc->use_deluxe_scaling) {
3619: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);
3620: } else {
3621: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);
3622: }
3623: }
3624: }
3625: }
3626: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented");
3627: }
3628: /* change the basis back to the original one */
3629: if (sub_schurs->change) {
3630: Mat change,phi,phit;
3632: if (pcbddc->dbg_flag > 2) {
3633: PetscInt ii;
3634: for (ii=0;ii<B_neigs;ii++) {
3635: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);
3636: for (j=0;j<B_N;j++) {
3637: #if defined(PETSC_USE_COMPLEX)
3638: PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]);
3639: PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]);
3640: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);
3641: #else
3642: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);
3643: #endif
3644: }
3645: }
3646: }
3647: KSPGetOperators(sub_schurs->change[i],&change,NULL);
3648: MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);
3649: MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);
3650: MatCopy(phi,phit,SAME_NONZERO_PATTERN);
3651: MatDestroy(&phit);
3652: MatDestroy(&phi);
3653: }
3654: maxneigs = PetscMax(B_neigs,maxneigs);
3655: pcbddc->adaptive_constraints_n[i+nv] = B_neigs;
3656: if (B_neigs) {
3657: PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));
3659: if (pcbddc->dbg_flag > 1) {
3660: PetscInt ii;
3661: for (ii=0;ii<B_neigs;ii++) {
3662: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);
3663: for (j=0;j<B_N;j++) {
3664: #if defined(PETSC_USE_COMPLEX)
3665: PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
3666: PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
3667: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);
3668: #else
3669: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
3670: #endif
3671: }
3672: }
3673: }
3674: PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));
3675: pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size;
3676: pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs;
3677: cum++;
3678: }
3679: ISRestoreIndices(sub_schurs->is_subs[i],&idxs);
3680: /* shift for next computation */
3681: cumarray += subset_size*subset_size;
3682: }
3683: if (pcbddc->dbg_flag) {
3684: PetscViewerFlush(pcbddc->dbg_viewer);
3685: }
3687: if (mss) {
3688: MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);
3689: MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);
3690: /* destroy matrices (junk) */
3691: MatDestroy(&sub_schurs->sum_S_Ej_inv_all);
3692: MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);
3693: }
3694: if (allocated_S_St) {
3695: PetscFree2(S,St);
3696: }
3697: PetscFree5(eigv,eigs,work,B_iwork,B_ifail);
3698: #if defined(PETSC_USE_COMPLEX)
3699: PetscFree(rwork);
3700: #endif
3701: if (pcbddc->dbg_flag) {
3702: PetscInt maxneigs_r;
3703: MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));
3704: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %D\n",maxneigs_r);
3705: }
3706: PetscLogEventEnd(PC_BDDC_AdaptiveSetUp[pcbddc->current_level],pc,0,0,0);
3707: return(0);
3708: }
3710: PetscErrorCode PCBDDCSetUpSolvers(PC pc)
3711: {
3712: PetscScalar *coarse_submat_vals;
3716: /* Setup local scatters R_to_B and (optionally) R_to_D */
3717: /* PCBDDCSetUpLocalWorkVectors should be called first! */
3718: PCBDDCSetUpLocalScatters(pc);
3720: /* Setup local neumann solver ksp_R */
3721: /* PCBDDCSetUpLocalScatters should be called first! */
3722: PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);
3724: /*
3725: Setup local correction and local part of coarse basis.
3726: Gives back the dense local part of the coarse matrix in column major ordering
3727: */
3728: PCBDDCSetUpCorrection(pc,&coarse_submat_vals);
3730: /* Compute total number of coarse nodes and setup coarse solver */
3731: PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);
3733: /* free */
3734: PetscFree(coarse_submat_vals);
3735: return(0);
3736: }
3738: PetscErrorCode PCBDDCResetCustomization(PC pc)
3739: {
3740: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
3744: ISDestroy(&pcbddc->user_primal_vertices);
3745: ISDestroy(&pcbddc->user_primal_vertices_local);
3746: ISDestroy(&pcbddc->NeumannBoundaries);
3747: ISDestroy(&pcbddc->NeumannBoundariesLocal);
3748: ISDestroy(&pcbddc->DirichletBoundaries);
3749: MatNullSpaceDestroy(&pcbddc->onearnullspace);
3750: PetscFree(pcbddc->onearnullvecs_state);
3751: ISDestroy(&pcbddc->DirichletBoundariesLocal);
3752: PCBDDCSetDofsSplitting(pc,0,NULL);
3753: PCBDDCSetDofsSplittingLocal(pc,0,NULL);
3754: return(0);
3755: }
3757: PetscErrorCode PCBDDCResetTopography(PC pc)
3758: {
3759: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
3760: PetscInt i;
3764: MatDestroy(&pcbddc->nedcG);
3765: ISDestroy(&pcbddc->nedclocal);
3766: MatDestroy(&pcbddc->discretegradient);
3767: MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);
3768: MatDestroy(&pcbddc->ChangeOfBasisMatrix);
3769: MatDestroy(&pcbddc->switch_static_change);
3770: VecDestroy(&pcbddc->work_change);
3771: MatDestroy(&pcbddc->ConstraintMatrix);
3772: MatDestroy(&pcbddc->divudotp);
3773: ISDestroy(&pcbddc->divudotp_vl2l);
3774: PCBDDCGraphDestroy(&pcbddc->mat_graph);
3775: for (i=0;i<pcbddc->n_local_subs;i++) {
3776: ISDestroy(&pcbddc->local_subs[i]);
3777: }
3778: pcbddc->n_local_subs = 0;
3779: PetscFree(pcbddc->local_subs);
3780: PCBDDCSubSchursDestroy(&pcbddc->sub_schurs);
3781: pcbddc->graphanalyzed = PETSC_FALSE;
3782: pcbddc->recompute_topography = PETSC_TRUE;
3783: pcbddc->corner_selected = PETSC_FALSE;
3784: return(0);
3785: }
3787: PetscErrorCode PCBDDCResetSolvers(PC pc)
3788: {
3789: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
3793: VecDestroy(&pcbddc->coarse_vec);
3794: if (pcbddc->coarse_phi_B) {
3795: PetscScalar *array;
3796: MatDenseGetArray(pcbddc->coarse_phi_B,&array);
3797: PetscFree(array);
3798: }
3799: MatDestroy(&pcbddc->coarse_phi_B);
3800: MatDestroy(&pcbddc->coarse_phi_D);
3801: MatDestroy(&pcbddc->coarse_psi_B);
3802: MatDestroy(&pcbddc->coarse_psi_D);
3803: VecDestroy(&pcbddc->vec1_P);
3804: VecDestroy(&pcbddc->vec1_C);
3805: MatDestroy(&pcbddc->local_auxmat2);
3806: MatDestroy(&pcbddc->local_auxmat1);
3807: VecDestroy(&pcbddc->vec1_R);
3808: VecDestroy(&pcbddc->vec2_R);
3809: ISDestroy(&pcbddc->is_R_local);
3810: VecScatterDestroy(&pcbddc->R_to_B);
3811: VecScatterDestroy(&pcbddc->R_to_D);
3812: VecScatterDestroy(&pcbddc->coarse_loc_to_glob);
3813: KSPReset(pcbddc->ksp_D);
3814: KSPReset(pcbddc->ksp_R);
3815: KSPReset(pcbddc->coarse_ksp);
3816: MatDestroy(&pcbddc->local_mat);
3817: PetscFree(pcbddc->primal_indices_local_idxs);
3818: PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);
3819: PetscFree(pcbddc->global_primal_indices);
3820: ISDestroy(&pcbddc->coarse_subassembling);
3821: MatDestroy(&pcbddc->benign_change);
3822: VecDestroy(&pcbddc->benign_vec);
3823: PCBDDCBenignShellMat(pc,PETSC_TRUE);
3824: MatDestroy(&pcbddc->benign_B0);
3825: PetscSFDestroy(&pcbddc->benign_sf);
3826: if (pcbddc->benign_zerodiag_subs) {
3827: PetscInt i;
3828: for (i=0;i<pcbddc->benign_n;i++) {
3829: ISDestroy(&pcbddc->benign_zerodiag_subs[i]);
3830: }
3831: PetscFree(pcbddc->benign_zerodiag_subs);
3832: }
3833: PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);
3834: return(0);
3835: }
3837: PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
3838: {
3839: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
3840: PC_IS *pcis = (PC_IS*)pc->data;
3841: VecType impVecType;
3842: PetscInt n_constraints,n_R,old_size;
3846: n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices;
3847: n_R = pcis->n - pcbddc->n_vertices;
3848: VecGetType(pcis->vec1_N,&impVecType);
3849: /* local work vectors (try to avoid unneeded work)*/
3850: /* R nodes */
3851: old_size = -1;
3852: if (pcbddc->vec1_R) {
3853: VecGetSize(pcbddc->vec1_R,&old_size);
3854: }
3855: if (n_R != old_size) {
3856: VecDestroy(&pcbddc->vec1_R);
3857: VecDestroy(&pcbddc->vec2_R);
3858: VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);
3859: VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);
3860: VecSetType(pcbddc->vec1_R,impVecType);
3861: VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);
3862: }
3863: /* local primal dofs */
3864: old_size = -1;
3865: if (pcbddc->vec1_P) {
3866: VecGetSize(pcbddc->vec1_P,&old_size);
3867: }
3868: if (pcbddc->local_primal_size != old_size) {
3869: VecDestroy(&pcbddc->vec1_P);
3870: VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);
3871: VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);
3872: VecSetType(pcbddc->vec1_P,impVecType);
3873: }
3874: /* local explicit constraints */
3875: old_size = -1;
3876: if (pcbddc->vec1_C) {
3877: VecGetSize(pcbddc->vec1_C,&old_size);
3878: }
3879: if (n_constraints && n_constraints != old_size) {
3880: VecDestroy(&pcbddc->vec1_C);
3881: VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);
3882: VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);
3883: VecSetType(pcbddc->vec1_C,impVecType);
3884: }
3885: return(0);
3886: }
3888: PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
3889: {
3890: PetscErrorCode ierr;
3891: /* pointers to pcis and pcbddc */
3892: PC_IS* pcis = (PC_IS*)pc->data;
3893: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
3894: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
3895: /* submatrices of local problem */
3896: Mat A_RV,A_VR,A_VV,local_auxmat2_R;
3897: /* submatrices of local coarse problem */
3898: Mat S_VV,S_CV,S_VC,S_CC;
3899: /* working matrices */
3900: Mat C_CR;
3901: /* additional working stuff */
3902: PC pc_R;
3903: Mat F,Brhs = NULL;
3904: Vec dummy_vec;
3905: PetscBool isLU,isCHOL,isILU,need_benign_correction,sparserhs;
3906: PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */
3907: PetscScalar *work;
3908: PetscInt *idx_V_B;
3909: PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I;
3910: PetscInt i,n_R,n_D,n_B;
3912: /* some shortcuts to scalars */
3913: PetscScalar one=1.0,m_one=-1.0;
3916: if (!pcbddc->symmetric_primal && pcbddc->benign_n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented");
3917: PetscLogEventBegin(PC_BDDC_CorrectionSetUp[pcbddc->current_level],pc,0,0,0);
3919: /* Set Non-overlapping dimensions */
3920: n_vertices = pcbddc->n_vertices;
3921: n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices;
3922: n_B = pcis->n_B;
3923: n_D = pcis->n - n_B;
3924: n_R = pcis->n - n_vertices;
3926: /* vertices in boundary numbering */
3927: PetscMalloc1(n_vertices,&idx_V_B);
3928: ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);
3929: if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D",n_vertices,i);
3931: /* Subdomain contribution (Non-overlapping) to coarse matrix */
3932: PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);
3933: MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);
3934: MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);
3935: MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);
3936: MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);
3937: MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);
3938: MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);
3939: MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);
3940: MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);
3942: /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
3943: KSPGetPC(pcbddc->ksp_R,&pc_R);
3944: PCSetUp(pc_R);
3945: PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);
3946: PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);
3947: PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);
3948: lda_rhs = n_R;
3949: need_benign_correction = PETSC_FALSE;
3950: if (isLU || isILU || isCHOL) {
3951: PCFactorGetMatrix(pc_R,&F);
3952: } else if (sub_schurs && sub_schurs->reuse_solver) {
3953: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
3954: MatFactorType type;
3956: F = reuse_solver->F;
3957: MatGetFactorType(F,&type);
3958: if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE;
3959: MatGetSize(F,&lda_rhs,NULL);
3960: need_benign_correction = (PetscBool)(!!reuse_solver->benign_n);
3961: } else {
3962: F = NULL;
3963: }
3965: /* determine if we can use a sparse right-hand side */
3966: sparserhs = PETSC_FALSE;
3967: if (F) {
3968: MatSolverType solver;
3970: MatFactorGetSolverType(F,&solver);
3971: PetscStrcmp(solver,MATSOLVERMUMPS,&sparserhs);
3972: }
3974: /* allocate workspace */
3975: n = 0;
3976: if (n_constraints) {
3977: n += lda_rhs*n_constraints;
3978: }
3979: if (n_vertices) {
3980: n = PetscMax(2*lda_rhs*n_vertices,n);
3981: n = PetscMax((lda_rhs+n_B)*n_vertices,n);
3982: }
3983: if (!pcbddc->symmetric_primal) {
3984: n = PetscMax(2*lda_rhs*pcbddc->local_primal_size,n);
3985: }
3986: PetscMalloc1(n,&work);
3988: /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */
3989: dummy_vec = NULL;
3990: if (need_benign_correction && lda_rhs != n_R && F) {
3991: VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&dummy_vec);
3992: VecSetSizes(dummy_vec,lda_rhs,PETSC_DECIDE);
3993: VecSetType(dummy_vec,((PetscObject)pcis->vec1_N)->type_name);
3994: }
3996: /* Precompute stuffs needed for preprocessing and application of BDDC*/
3997: if (n_constraints) {
3998: Mat M3,C_B;
3999: IS is_aux;
4000: PetscScalar *array,*array2;
4002: MatDestroy(&pcbddc->local_auxmat1);
4003: MatDestroy(&pcbddc->local_auxmat2);
4005: /* Extract constraints on R nodes: C_{CR} */
4006: ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);
4007: MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);
4008: MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);
4010: /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */
4011: /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */
4012: if (!sparserhs) {
4013: PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));
4014: for (i=0;i<n_constraints;i++) {
4015: const PetscScalar *row_cmat_values;
4016: const PetscInt *row_cmat_indices;
4017: PetscInt size_of_constraint,j;
4019: MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);
4020: for (j=0;j<size_of_constraint;j++) {
4021: work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j];
4022: }
4023: MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);
4024: }
4025: MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&Brhs);
4026: } else {
4027: Mat tC_CR;
4029: MatScale(C_CR,-1.0);
4030: if (lda_rhs != n_R) {
4031: PetscScalar *aa;
4032: PetscInt r,*ii,*jj;
4033: PetscBool done;
4035: MatGetRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);
4036: if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed");
4037: MatSeqAIJGetArray(C_CR,&aa);
4038: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_constraints,lda_rhs,ii,jj,aa,&tC_CR);
4039: MatRestoreRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);
4040: if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed");
4041: } else {
4042: PetscObjectReference((PetscObject)C_CR);
4043: tC_CR = C_CR;
4044: }
4045: MatCreateTranspose(tC_CR,&Brhs);
4046: MatDestroy(&tC_CR);
4047: }
4048: MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);
4049: if (F) {
4050: if (need_benign_correction) {
4051: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4053: /* rhs is already zero on interior dofs, no need to change the rhs */
4054: PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));
4055: }
4056: MatMatSolve(F,Brhs,local_auxmat2_R);
4057: if (need_benign_correction) {
4058: PetscScalar *marr;
4059: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4061: MatDenseGetArray(local_auxmat2_R,&marr);
4062: if (lda_rhs != n_R) {
4063: for (i=0;i<n_constraints;i++) {
4064: VecPlaceArray(dummy_vec,marr+i*lda_rhs);
4065: PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);
4066: VecResetArray(dummy_vec);
4067: }
4068: } else {
4069: for (i=0;i<n_constraints;i++) {
4070: VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);
4071: PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);
4072: VecResetArray(pcbddc->vec1_R);
4073: }
4074: }
4075: MatDenseRestoreArray(local_auxmat2_R,&marr);
4076: }
4077: } else {
4078: PetscScalar *marr;
4080: MatDenseGetArray(local_auxmat2_R,&marr);
4081: for (i=0;i<n_constraints;i++) {
4082: VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);
4083: VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);
4084: KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
4085: KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);
4086: VecResetArray(pcbddc->vec1_R);
4087: VecResetArray(pcbddc->vec2_R);
4088: }
4089: MatDenseRestoreArray(local_auxmat2_R,&marr);
4090: }
4091: if (sparserhs) {
4092: MatScale(C_CR,-1.0);
4093: }
4094: MatDestroy(&Brhs);
4095: if (!pcbddc->switch_static) {
4096: MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);
4097: MatDenseGetArray(pcbddc->local_auxmat2,&array);
4098: MatDenseGetArray(local_auxmat2_R,&array2);
4099: for (i=0;i<n_constraints;i++) {
4100: VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);
4101: VecPlaceArray(pcis->vec1_B,array+i*n_B);
4102: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4103: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4104: VecResetArray(pcis->vec1_B);
4105: VecResetArray(pcbddc->vec1_R);
4106: }
4107: MatDenseRestoreArray(local_auxmat2_R,&array2);
4108: MatDenseRestoreArray(pcbddc->local_auxmat2,&array);
4109: MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);
4110: } else {
4111: if (lda_rhs != n_R) {
4112: IS dummy;
4114: ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);
4115: MatCreateSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);
4116: ISDestroy(&dummy);
4117: } else {
4118: PetscObjectReference((PetscObject)local_auxmat2_R);
4119: pcbddc->local_auxmat2 = local_auxmat2_R;
4120: }
4121: MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);
4122: }
4123: ISDestroy(&is_aux);
4124: /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */
4125: MatScale(M3,m_one);
4126: if (isCHOL) {
4127: MatCholeskyFactor(M3,NULL,NULL);
4128: } else {
4129: MatLUFactor(M3,NULL,NULL,NULL);
4130: }
4131: MatSeqDenseInvertFactors_Private(M3);
4132: /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */
4133: MatMatMult(M3,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);
4134: MatDestroy(&C_B);
4135: MatCopy(M3,S_CC,SAME_NONZERO_PATTERN); /* S_CC can have a different LDA, MatMatSolve doesn't support it */
4136: MatDestroy(&M3);
4137: }
4139: /* Get submatrices from subdomain matrix */
4140: if (n_vertices) {
4141: IS is_aux;
4142: PetscBool isseqaij;
4144: if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */
4145: IS tis;
4147: ISDuplicate(pcbddc->is_R_local,&tis);
4148: ISSort(tis);
4149: ISComplement(tis,0,pcis->n,&is_aux);
4150: ISDestroy(&tis);
4151: } else {
4152: ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);
4153: }
4154: MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);
4155: MatCreateSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);
4156: PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);
4157: if (!isseqaij) { /* MatMatMult(A_VR,A_RRmA_RV) below will raise an error */
4158: MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);
4159: }
4160: MatCreateSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);
4161: ISDestroy(&is_aux);
4162: }
4164: /* Matrix of coarse basis functions (local) */
4165: if (pcbddc->coarse_phi_B) {
4166: PetscInt on_B,on_primal,on_D=n_D;
4167: if (pcbddc->coarse_phi_D) {
4168: MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);
4169: }
4170: MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);
4171: if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) {
4172: PetscScalar *marray;
4174: MatDenseGetArray(pcbddc->coarse_phi_B,&marray);
4175: PetscFree(marray);
4176: MatDestroy(&pcbddc->coarse_phi_B);
4177: MatDestroy(&pcbddc->coarse_psi_B);
4178: MatDestroy(&pcbddc->coarse_phi_D);
4179: MatDestroy(&pcbddc->coarse_psi_D);
4180: }
4181: }
4183: if (!pcbddc->coarse_phi_B) {
4184: PetscScalar *marr;
4186: /* memory size */
4187: n = n_B*pcbddc->local_primal_size;
4188: if (pcbddc->switch_static || pcbddc->dbg_flag) n += n_D*pcbddc->local_primal_size;
4189: if (!pcbddc->symmetric_primal) n *= 2;
4190: PetscCalloc1(n,&marr);
4191: MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_B);
4192: marr += n_B*pcbddc->local_primal_size;
4193: if (pcbddc->switch_static || pcbddc->dbg_flag) {
4194: MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_D);
4195: marr += n_D*pcbddc->local_primal_size;
4196: }
4197: if (!pcbddc->symmetric_primal) {
4198: MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_B);
4199: marr += n_B*pcbddc->local_primal_size;
4200: if (pcbddc->switch_static || pcbddc->dbg_flag) {
4201: MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_D);
4202: }
4203: } else {
4204: PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);
4205: pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
4206: if (pcbddc->switch_static || pcbddc->dbg_flag) {
4207: PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);
4208: pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
4209: }
4210: }
4211: }
4213: /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
4214: p0_lidx_I = NULL;
4215: if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) {
4216: const PetscInt *idxs;
4218: ISGetIndices(pcis->is_I_local,&idxs);
4219: PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);
4220: for (i=0;i<pcbddc->benign_n;i++) {
4221: PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);
4222: }
4223: ISRestoreIndices(pcis->is_I_local,&idxs);
4224: }
4226: /* vertices */
4227: if (n_vertices) {
4228: PetscBool restoreavr = PETSC_FALSE;
4230: MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);
4232: if (n_R) {
4233: Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */
4234: PetscBLASInt B_N,B_one = 1;
4235: PetscScalar *x,*y;
4237: MatScale(A_RV,m_one);
4238: if (need_benign_correction) {
4239: ISLocalToGlobalMapping RtoN;
4240: IS is_p0;
4241: PetscInt *idxs_p0,n;
4243: PetscMalloc1(pcbddc->benign_n,&idxs_p0);
4244: ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);
4245: ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);
4246: if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %D != %D",n,pcbddc->benign_n);
4247: ISLocalToGlobalMappingDestroy(&RtoN);
4248: ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);
4249: MatCreateSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);
4250: ISDestroy(&is_p0);
4251: }
4253: MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);
4254: if (!sparserhs || need_benign_correction) {
4255: if (lda_rhs == n_R) {
4256: MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);
4257: } else {
4258: PetscScalar *av,*array;
4259: const PetscInt *xadj,*adjncy;
4260: PetscInt n;
4261: PetscBool flg_row;
4263: array = work+lda_rhs*n_vertices;
4264: PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));
4265: MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);
4266: MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);
4267: MatSeqAIJGetArray(A_RV,&av);
4268: for (i=0;i<n;i++) {
4269: PetscInt j;
4270: for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j];
4271: }
4272: MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);
4273: MatDestroy(&A_RV);
4274: MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);
4275: }
4276: if (need_benign_correction) {
4277: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4278: PetscScalar *marr;
4280: MatDenseGetArray(A_RV,&marr);
4281: /* need \Phi^T A_RV = (I+L)A_RV, L given by
4283: | 0 0 0 | (V)
4284: L = | 0 0 -1 | (P-p0)
4285: | 0 0 -1 | (p0)
4287: */
4288: for (i=0;i<reuse_solver->benign_n;i++) {
4289: const PetscScalar *vals;
4290: const PetscInt *idxs,*idxs_zero;
4291: PetscInt n,j,nz;
4293: ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);
4294: ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);
4295: MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);
4296: for (j=0;j<n;j++) {
4297: PetscScalar val = vals[j];
4298: PetscInt k,col = idxs[j];
4299: for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val;
4300: }
4301: MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);
4302: ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);
4303: }
4304: MatDenseRestoreArray(A_RV,&marr);
4305: }
4306: PetscObjectReference((PetscObject)A_RV);
4307: Brhs = A_RV;
4308: } else {
4309: Mat tA_RVT,A_RVT;
4311: if (!pcbddc->symmetric_primal) {
4312: /* A_RV already scaled by -1 */
4313: MatTranspose(A_RV,MAT_INITIAL_MATRIX,&A_RVT);
4314: } else {
4315: restoreavr = PETSC_TRUE;
4316: MatScale(A_VR,-1.0);
4317: PetscObjectReference((PetscObject)A_VR);
4318: A_RVT = A_VR;
4319: }
4320: if (lda_rhs != n_R) {
4321: PetscScalar *aa;
4322: PetscInt r,*ii,*jj;
4323: PetscBool done;
4325: MatGetRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);
4326: if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed");
4327: MatSeqAIJGetArray(A_RVT,&aa);
4328: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_vertices,lda_rhs,ii,jj,aa,&tA_RVT);
4329: MatRestoreRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);
4330: if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed");
4331: } else {
4332: PetscObjectReference((PetscObject)A_RVT);
4333: tA_RVT = A_RVT;
4334: }
4335: MatCreateTranspose(tA_RVT,&Brhs);
4336: MatDestroy(&tA_RVT);
4337: MatDestroy(&A_RVT);
4338: }
4339: if (F) {
4340: /* need to correct the rhs */
4341: if (need_benign_correction) {
4342: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4343: PetscScalar *marr;
4345: MatDenseGetArray(Brhs,&marr);
4346: if (lda_rhs != n_R) {
4347: for (i=0;i<n_vertices;i++) {
4348: VecPlaceArray(dummy_vec,marr+i*lda_rhs);
4349: PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);
4350: VecResetArray(dummy_vec);
4351: }
4352: } else {
4353: for (i=0;i<n_vertices;i++) {
4354: VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);
4355: PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);
4356: VecResetArray(pcbddc->vec1_R);
4357: }
4358: }
4359: MatDenseRestoreArray(Brhs,&marr);
4360: }
4361: MatMatSolve(F,Brhs,A_RRmA_RV);
4362: if (restoreavr) {
4363: MatScale(A_VR,-1.0);
4364: }
4365: /* need to correct the solution */
4366: if (need_benign_correction) {
4367: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4368: PetscScalar *marr;
4370: MatDenseGetArray(A_RRmA_RV,&marr);
4371: if (lda_rhs != n_R) {
4372: for (i=0;i<n_vertices;i++) {
4373: VecPlaceArray(dummy_vec,marr+i*lda_rhs);
4374: PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);
4375: VecResetArray(dummy_vec);
4376: }
4377: } else {
4378: for (i=0;i<n_vertices;i++) {
4379: VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);
4380: PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);
4381: VecResetArray(pcbddc->vec1_R);
4382: }
4383: }
4384: MatDenseRestoreArray(A_RRmA_RV,&marr);
4385: }
4386: } else {
4387: MatDenseGetArray(Brhs,&y);
4388: for (i=0;i<n_vertices;i++) {
4389: VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);
4390: VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);
4391: KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
4392: KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);
4393: VecResetArray(pcbddc->vec1_R);
4394: VecResetArray(pcbddc->vec2_R);
4395: }
4396: MatDenseRestoreArray(Brhs,&y);
4397: }
4398: MatDestroy(&A_RV);
4399: MatDestroy(&Brhs);
4400: /* S_VV and S_CV */
4401: if (n_constraints) {
4402: Mat B;
4404: PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));
4405: for (i=0;i<n_vertices;i++) {
4406: VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);
4407: VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);
4408: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4409: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4410: VecResetArray(pcis->vec1_B);
4411: VecResetArray(pcbddc->vec1_R);
4412: }
4413: MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);
4414: MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);
4415: MatDestroy(&B);
4416: MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);
4417: MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);
4418: MatScale(S_CV,m_one);
4419: PetscBLASIntCast(lda_rhs*n_vertices,&B_N);
4420: PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one));
4421: MatDestroy(&B);
4422: }
4423: if (lda_rhs != n_R) {
4424: MatDestroy(&A_RRmA_RV);
4425: MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);
4426: MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);
4427: }
4428: MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);
4429: /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */
4430: if (need_benign_correction) {
4431: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4432: PetscScalar *marr,*sums;
4434: PetscMalloc1(n_vertices,&sums);
4435: MatDenseGetArray(S_VVt,&marr);
4436: for (i=0;i<reuse_solver->benign_n;i++) {
4437: const PetscScalar *vals;
4438: const PetscInt *idxs,*idxs_zero;
4439: PetscInt n,j,nz;
4441: ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);
4442: ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);
4443: for (j=0;j<n_vertices;j++) {
4444: PetscInt k;
4445: sums[j] = 0.;
4446: for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs];
4447: }
4448: MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);
4449: for (j=0;j<n;j++) {
4450: PetscScalar val = vals[j];
4451: PetscInt k;
4452: for (k=0;k<n_vertices;k++) {
4453: marr[idxs[j]+k*n_vertices] += val*sums[k];
4454: }
4455: }
4456: MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);
4457: ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);
4458: }
4459: PetscFree(sums);
4460: MatDenseRestoreArray(S_VVt,&marr);
4461: MatDestroy(&A_RV_bcorr);
4462: }
4463: MatDestroy(&A_RRmA_RV);
4464: PetscBLASIntCast(n_vertices*n_vertices,&B_N);
4465: MatDenseGetArray(A_VV,&x);
4466: MatDenseGetArray(S_VVt,&y);
4467: PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one));
4468: MatDenseRestoreArray(A_VV,&x);
4469: MatDenseRestoreArray(S_VVt,&y);
4470: MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);
4471: MatDestroy(&S_VVt);
4472: } else {
4473: MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);
4474: }
4475: MatDestroy(&A_VV);
4477: /* coarse basis functions */
4478: for (i=0;i<n_vertices;i++) {
4479: PetscScalar *y;
4481: VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);
4482: MatDenseGetArray(pcbddc->coarse_phi_B,&y);
4483: VecPlaceArray(pcis->vec1_B,y+n_B*i);
4484: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4485: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4486: y[n_B*i+idx_V_B[i]] = 1.0;
4487: MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);
4488: VecResetArray(pcis->vec1_B);
4490: if (pcbddc->switch_static || pcbddc->dbg_flag) {
4491: PetscInt j;
4493: MatDenseGetArray(pcbddc->coarse_phi_D,&y);
4494: VecPlaceArray(pcis->vec1_D,y+n_D*i);
4495: VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4496: VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4497: VecResetArray(pcis->vec1_D);
4498: for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0;
4499: MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);
4500: }
4501: VecResetArray(pcbddc->vec1_R);
4502: }
4503: /* if n_R == 0 the object is not destroyed */
4504: MatDestroy(&A_RV);
4505: }
4506: VecDestroy(&dummy_vec);
4508: if (n_constraints) {
4509: Mat B;
4511: MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);
4512: MatScale(S_CC,m_one);
4513: MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);
4514: MatScale(S_CC,m_one);
4515: if (n_vertices) {
4516: if (isCHOL || need_benign_correction) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */
4517: MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);
4518: } else {
4519: Mat S_VCt;
4521: if (lda_rhs != n_R) {
4522: MatDestroy(&B);
4523: MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);
4524: MatSeqDenseSetLDA(B,lda_rhs);
4525: }
4526: MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);
4527: MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);
4528: MatDestroy(&S_VCt);
4529: }
4530: }
4531: MatDestroy(&B);
4532: /* coarse basis functions */
4533: for (i=0;i<n_constraints;i++) {
4534: PetscScalar *y;
4536: VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);
4537: MatDenseGetArray(pcbddc->coarse_phi_B,&y);
4538: VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));
4539: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4540: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4541: MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);
4542: VecResetArray(pcis->vec1_B);
4543: if (pcbddc->switch_static || pcbddc->dbg_flag) {
4544: PetscInt j;
4546: MatDenseGetArray(pcbddc->coarse_phi_D,&y);
4547: VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));
4548: VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4549: VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4550: VecResetArray(pcis->vec1_D);
4551: for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0;
4552: MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);
4553: }
4554: VecResetArray(pcbddc->vec1_R);
4555: }
4556: }
4557: if (n_constraints) {
4558: MatDestroy(&local_auxmat2_R);
4559: }
4560: PetscFree(p0_lidx_I);
4562: /* coarse matrix entries relative to B_0 */
4563: if (pcbddc->benign_n) {
4564: Mat B0_B,B0_BPHI;
4565: IS is_dummy;
4566: PetscScalar *data;
4567: PetscInt j;
4569: ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);
4570: MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);
4571: ISDestroy(&is_dummy);
4572: MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);
4573: MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);
4574: MatDenseGetArray(B0_BPHI,&data);
4575: for (j=0;j<pcbddc->benign_n;j++) {
4576: PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j;
4577: for (i=0;i<pcbddc->local_primal_size;i++) {
4578: coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j];
4579: coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j];
4580: }
4581: }
4582: MatDenseRestoreArray(B0_BPHI,&data);
4583: MatDestroy(&B0_B);
4584: MatDestroy(&B0_BPHI);
4585: }
4587: /* compute other basis functions for non-symmetric problems */
4588: if (!pcbddc->symmetric_primal) {
4589: Mat B_V=NULL,B_C=NULL;
4590: PetscScalar *marray;
4592: if (n_constraints) {
4593: Mat S_CCT,C_CRT;
4595: MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT);
4596: MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);
4597: MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);
4598: MatDestroy(&S_CCT);
4599: if (n_vertices) {
4600: Mat S_VCT;
4602: MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);
4603: MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);
4604: MatDestroy(&S_VCT);
4605: }
4606: MatDestroy(&C_CRT);
4607: } else {
4608: MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);
4609: }
4610: if (n_vertices && n_R) {
4611: PetscScalar *av,*marray;
4612: const PetscInt *xadj,*adjncy;
4613: PetscInt n;
4614: PetscBool flg_row;
4616: /* B_V = B_V - A_VR^T */
4617: MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);
4618: MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);
4619: MatSeqAIJGetArray(A_VR,&av);
4620: MatDenseGetArray(B_V,&marray);
4621: for (i=0;i<n;i++) {
4622: PetscInt j;
4623: for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j];
4624: }
4625: MatDenseRestoreArray(B_V,&marray);
4626: MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);
4627: MatDestroy(&A_VR);
4628: }
4630: /* currently there's no support for MatTransposeMatSolve(F,B,X) */
4631: if (n_vertices) {
4632: MatDenseGetArray(B_V,&marray);
4633: for (i=0;i<n_vertices;i++) {
4634: VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);
4635: VecPlaceArray(pcbddc->vec2_R,work+i*n_R);
4636: KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
4637: KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);
4638: VecResetArray(pcbddc->vec1_R);
4639: VecResetArray(pcbddc->vec2_R);
4640: }
4641: MatDenseRestoreArray(B_V,&marray);
4642: }
4643: if (B_C) {
4644: MatDenseGetArray(B_C,&marray);
4645: for (i=n_vertices;i<n_constraints+n_vertices;i++) {
4646: VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);
4647: VecPlaceArray(pcbddc->vec2_R,work+i*n_R);
4648: KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
4649: KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);
4650: VecResetArray(pcbddc->vec1_R);
4651: VecResetArray(pcbddc->vec2_R);
4652: }
4653: MatDenseRestoreArray(B_C,&marray);
4654: }
4655: /* coarse basis functions */
4656: for (i=0;i<pcbddc->local_primal_size;i++) {
4657: PetscScalar *y;
4659: VecPlaceArray(pcbddc->vec1_R,work+i*n_R);
4660: MatDenseGetArray(pcbddc->coarse_psi_B,&y);
4661: VecPlaceArray(pcis->vec1_B,y+n_B*i);
4662: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4663: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4664: if (i<n_vertices) {
4665: y[n_B*i+idx_V_B[i]] = 1.0;
4666: }
4667: MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);
4668: VecResetArray(pcis->vec1_B);
4670: if (pcbddc->switch_static || pcbddc->dbg_flag) {
4671: MatDenseGetArray(pcbddc->coarse_psi_D,&y);
4672: VecPlaceArray(pcis->vec1_D,y+n_D*i);
4673: VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4674: VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4675: VecResetArray(pcis->vec1_D);
4676: MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);
4677: }
4678: VecResetArray(pcbddc->vec1_R);
4679: }
4680: MatDestroy(&B_V);
4681: MatDestroy(&B_C);
4682: }
4684: /* free memory */
4685: PetscFree(idx_V_B);
4686: MatDestroy(&S_VV);
4687: MatDestroy(&S_CV);
4688: MatDestroy(&S_VC);
4689: MatDestroy(&S_CC);
4690: PetscFree(work);
4691: if (n_vertices) {
4692: MatDestroy(&A_VR);
4693: }
4694: if (n_constraints) {
4695: MatDestroy(&C_CR);
4696: }
4697: /* Checking coarse_sub_mat and coarse basis functios */
4698: /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
4699: /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
4700: if (pcbddc->dbg_flag) {
4701: Mat coarse_sub_mat;
4702: Mat AUXMAT,TM1,TM2,TM3,TM4;
4703: Mat coarse_phi_D,coarse_phi_B;
4704: Mat coarse_psi_D,coarse_psi_B;
4705: Mat A_II,A_BB,A_IB,A_BI;
4706: Mat C_B,CPHI;
4707: IS is_dummy;
4708: Vec mones;
4709: MatType checkmattype=MATSEQAIJ;
4710: PetscReal real_value;
4712: if (pcbddc->benign_n && !pcbddc->benign_change_explicit) {
4713: Mat A;
4714: PCBDDCBenignProject(pc,NULL,NULL,&A);
4715: MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);
4716: MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);
4717: MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);
4718: MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);
4719: MatDestroy(&A);
4720: } else {
4721: MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);
4722: MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);
4723: MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);
4724: MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);
4725: }
4726: MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);
4727: MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);
4728: if (!pcbddc->symmetric_primal) {
4729: MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);
4730: MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);
4731: }
4732: MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);
4734: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
4735: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);
4736: PetscViewerFlush(pcbddc->dbg_viewer);
4737: if (!pcbddc->symmetric_primal) {
4738: MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4739: MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);
4740: MatDestroy(&AUXMAT);
4741: MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4742: MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);
4743: MatDestroy(&AUXMAT);
4744: MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4745: MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);
4746: MatDestroy(&AUXMAT);
4747: MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4748: MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);
4749: MatDestroy(&AUXMAT);
4750: } else {
4751: MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);
4752: MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);
4753: MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4754: MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);
4755: MatDestroy(&AUXMAT);
4756: MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4757: MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);
4758: MatDestroy(&AUXMAT);
4759: }
4760: MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);
4761: MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);
4762: MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);
4763: MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);
4764: if (pcbddc->benign_n) {
4765: Mat B0_B,B0_BPHI;
4766: PetscScalar *data,*data2;
4767: PetscInt j;
4769: ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);
4770: MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);
4771: MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);
4772: MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);
4773: MatDenseGetArray(TM1,&data);
4774: MatDenseGetArray(B0_BPHI,&data2);
4775: for (j=0;j<pcbddc->benign_n;j++) {
4776: PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j;
4777: for (i=0;i<pcbddc->local_primal_size;i++) {
4778: data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j];
4779: data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j];
4780: }
4781: }
4782: MatDenseRestoreArray(TM1,&data);
4783: MatDenseRestoreArray(B0_BPHI,&data2);
4784: MatDestroy(&B0_B);
4785: ISDestroy(&is_dummy);
4786: MatDestroy(&B0_BPHI);
4787: }
4788: #if 0
4789: {
4790: PetscViewer viewer;
4791: char filename[256];
4792: sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level);
4793: PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);
4794: PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
4795: PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");
4796: MatView(coarse_sub_mat,viewer);
4797: PetscObjectSetName((PetscObject)TM1,"projected");
4798: MatView(TM1,viewer);
4799: if (pcbddc->coarse_phi_B) {
4800: PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");
4801: MatView(pcbddc->coarse_phi_B,viewer);
4802: }
4803: if (pcbddc->coarse_phi_D) {
4804: PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");
4805: MatView(pcbddc->coarse_phi_D,viewer);
4806: }
4807: if (pcbddc->coarse_psi_B) {
4808: PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");
4809: MatView(pcbddc->coarse_psi_B,viewer);
4810: }
4811: if (pcbddc->coarse_psi_D) {
4812: PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");
4813: MatView(pcbddc->coarse_psi_D,viewer);
4814: }
4815: PetscObjectSetName((PetscObject)pcbddc->local_mat,"A");
4816: MatView(pcbddc->local_mat,viewer);
4817: PetscObjectSetName((PetscObject)pcbddc->ConstraintMatrix,"C");
4818: MatView(pcbddc->ConstraintMatrix,viewer);
4819: PetscObjectSetName((PetscObject)pcis->is_I_local,"I");
4820: ISView(pcis->is_I_local,viewer);
4821: PetscObjectSetName((PetscObject)pcis->is_B_local,"B");
4822: ISView(pcis->is_B_local,viewer);
4823: PetscObjectSetName((PetscObject)pcbddc->is_R_local,"R");
4824: ISView(pcbddc->is_R_local,viewer);
4825: PetscViewerDestroy(&viewer);
4826: }
4827: #endif
4828: MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);
4829: MatNorm(TM1,NORM_FROBENIUS,&real_value);
4830: PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
4831: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);
4833: /* check constraints */
4834: ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);
4835: MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);
4836: if (!pcbddc->benign_n) { /* TODO: add benign case */
4837: MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);
4838: } else {
4839: PetscScalar *data;
4840: Mat tmat;
4841: MatDenseGetArray(pcbddc->coarse_phi_B,&data);
4842: MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);
4843: MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);
4844: MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);
4845: MatDestroy(&tmat);
4846: }
4847: MatCreateVecs(CPHI,&mones,NULL);
4848: VecSet(mones,-1.0);
4849: MatDiagonalSet(CPHI,mones,ADD_VALUES);
4850: MatNorm(CPHI,NORM_FROBENIUS,&real_value);
4851: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);
4852: if (!pcbddc->symmetric_primal) {
4853: MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);
4854: VecSet(mones,-1.0);
4855: MatDiagonalSet(CPHI,mones,ADD_VALUES);
4856: MatNorm(CPHI,NORM_FROBENIUS,&real_value);
4857: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);
4858: }
4859: MatDestroy(&C_B);
4860: MatDestroy(&CPHI);
4861: ISDestroy(&is_dummy);
4862: VecDestroy(&mones);
4863: PetscViewerFlush(pcbddc->dbg_viewer);
4864: MatDestroy(&A_II);
4865: MatDestroy(&A_BB);
4866: MatDestroy(&A_IB);
4867: MatDestroy(&A_BI);
4868: MatDestroy(&TM1);
4869: MatDestroy(&TM2);
4870: MatDestroy(&TM3);
4871: MatDestroy(&TM4);
4872: MatDestroy(&coarse_phi_D);
4873: MatDestroy(&coarse_phi_B);
4874: if (!pcbddc->symmetric_primal) {
4875: MatDestroy(&coarse_psi_D);
4876: MatDestroy(&coarse_psi_B);
4877: }
4878: MatDestroy(&coarse_sub_mat);
4879: }
4880: /* get back data */
4881: *coarse_submat_vals_n = coarse_submat_vals;
4882: PetscLogEventEnd(PC_BDDC_CorrectionSetUp[pcbddc->current_level],pc,0,0,0);
4883: return(0);
4884: }
4886: PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B)
4887: {
4888: Mat *work_mat;
4889: IS isrow_s,iscol_s;
4890: PetscBool rsorted,csorted;
4891: PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL;
4895: ISSorted(isrow,&rsorted);
4896: ISSorted(iscol,&csorted);
4897: ISGetLocalSize(isrow,&rsize);
4898: ISGetLocalSize(iscol,&csize);
4900: if (!rsorted) {
4901: const PetscInt *idxs;
4902: PetscInt *idxs_sorted,i;
4904: PetscMalloc1(rsize,&idxs_perm_r);
4905: PetscMalloc1(rsize,&idxs_sorted);
4906: for (i=0;i<rsize;i++) {
4907: idxs_perm_r[i] = i;
4908: }
4909: ISGetIndices(isrow,&idxs);
4910: PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);
4911: for (i=0;i<rsize;i++) {
4912: idxs_sorted[i] = idxs[idxs_perm_r[i]];
4913: }
4914: ISRestoreIndices(isrow,&idxs);
4915: ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);
4916: } else {
4917: PetscObjectReference((PetscObject)isrow);
4918: isrow_s = isrow;
4919: }
4921: if (!csorted) {
4922: if (isrow == iscol) {
4923: PetscObjectReference((PetscObject)isrow_s);
4924: iscol_s = isrow_s;
4925: } else {
4926: const PetscInt *idxs;
4927: PetscInt *idxs_sorted,i;
4929: PetscMalloc1(csize,&idxs_perm_c);
4930: PetscMalloc1(csize,&idxs_sorted);
4931: for (i=0;i<csize;i++) {
4932: idxs_perm_c[i] = i;
4933: }
4934: ISGetIndices(iscol,&idxs);
4935: PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);
4936: for (i=0;i<csize;i++) {
4937: idxs_sorted[i] = idxs[idxs_perm_c[i]];
4938: }
4939: ISRestoreIndices(iscol,&idxs);
4940: ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);
4941: }
4942: } else {
4943: PetscObjectReference((PetscObject)iscol);
4944: iscol_s = iscol;
4945: }
4947: MatCreateSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);
4949: if (!rsorted || !csorted) {
4950: Mat new_mat;
4951: IS is_perm_r,is_perm_c;
4953: if (!rsorted) {
4954: PetscInt *idxs_r,i;
4955: PetscMalloc1(rsize,&idxs_r);
4956: for (i=0;i<rsize;i++) {
4957: idxs_r[idxs_perm_r[i]] = i;
4958: }
4959: PetscFree(idxs_perm_r);
4960: ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);
4961: } else {
4962: ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);
4963: }
4964: ISSetPermutation(is_perm_r);
4966: if (!csorted) {
4967: if (isrow_s == iscol_s) {
4968: PetscObjectReference((PetscObject)is_perm_r);
4969: is_perm_c = is_perm_r;
4970: } else {
4971: PetscInt *idxs_c,i;
4972: if (!idxs_perm_c) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Permutation array not present");
4973: PetscMalloc1(csize,&idxs_c);
4974: for (i=0;i<csize;i++) {
4975: idxs_c[idxs_perm_c[i]] = i;
4976: }
4977: PetscFree(idxs_perm_c);
4978: ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);
4979: }
4980: } else {
4981: ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);
4982: }
4983: ISSetPermutation(is_perm_c);
4985: MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);
4986: MatDestroy(&work_mat[0]);
4987: work_mat[0] = new_mat;
4988: ISDestroy(&is_perm_r);
4989: ISDestroy(&is_perm_c);
4990: }
4992: PetscObjectReference((PetscObject)work_mat[0]);
4993: *B = work_mat[0];
4994: MatDestroyMatrices(1,&work_mat);
4995: ISDestroy(&isrow_s);
4996: ISDestroy(&iscol_s);
4997: return(0);
4998: }
5000: PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
5001: {
5002: Mat_IS* matis = (Mat_IS*)pc->pmat->data;
5003: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
5004: Mat new_mat,lA;
5005: IS is_local,is_global;
5006: PetscInt local_size;
5007: PetscBool isseqaij;
5011: MatDestroy(&pcbddc->local_mat);
5012: MatGetSize(matis->A,&local_size,NULL);
5013: ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);
5014: ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);
5015: ISDestroy(&is_local);
5016: MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);
5017: ISDestroy(&is_global);
5019: /* check */
5020: if (pcbddc->dbg_flag) {
5021: Vec x,x_change;
5022: PetscReal error;
5024: MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);
5025: VecSetRandom(x,NULL);
5026: MatMult(ChangeOfBasisMatrix,x,x_change);
5027: VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);
5028: VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);
5029: MatMult(new_mat,matis->x,matis->y);
5030: if (!pcbddc->change_interior) {
5031: const PetscScalar *x,*y,*v;
5032: PetscReal lerror = 0.;
5033: PetscInt i;
5035: VecGetArrayRead(matis->x,&x);
5036: VecGetArrayRead(matis->y,&y);
5037: VecGetArrayRead(matis->counter,&v);
5038: for (i=0;i<local_size;i++)
5039: if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i]-y[i]) > lerror)
5040: lerror = PetscAbsScalar(x[i]-y[i]);
5041: VecRestoreArrayRead(matis->x,&x);
5042: VecRestoreArrayRead(matis->y,&y);
5043: VecRestoreArrayRead(matis->counter,&v);
5044: MPIU_Allreduce(&lerror,&error,1,MPIU_REAL,MPI_MAX,PetscObjectComm((PetscObject)pc));
5045: if (error > PETSC_SMALL) {
5046: if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) {
5047: SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on I: %1.6e",error);
5048: } else {
5049: SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on I: %1.6e",error);
5050: }
5051: }
5052: }
5053: VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);
5054: VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);
5055: VecAXPY(x,-1.0,x_change);
5056: VecNorm(x,NORM_INFINITY,&error);
5057: if (error > PETSC_SMALL) {
5058: if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) {
5059: SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e",error);
5060: } else {
5061: SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on N: %1.6e",error);
5062: }
5063: }
5064: VecDestroy(&x);
5065: VecDestroy(&x_change);
5066: }
5068: /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */
5069: PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject*)&lA);
5071: /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
5072: PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);
5073: if (isseqaij) {
5074: MatDestroy(&pcbddc->local_mat);
5075: MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);
5076: if (lA) {
5077: Mat work;
5078: MatPtAP(lA,new_mat,MAT_INITIAL_MATRIX,2.0,&work);
5079: PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);
5080: MatDestroy(&work);
5081: }
5082: } else {
5083: Mat work_mat;
5085: MatDestroy(&pcbddc->local_mat);
5086: MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);
5087: MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);
5088: MatDestroy(&work_mat);
5089: if (lA) {
5090: Mat work;
5091: MatConvert(lA,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);
5092: MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&work);
5093: PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);
5094: MatDestroy(&work);
5095: }
5096: }
5097: if (matis->A->symmetric_set) {
5098: MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);
5099: #if !defined(PETSC_USE_COMPLEX)
5100: MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);
5101: #endif
5102: }
5103: MatDestroy(&new_mat);
5104: return(0);
5105: }
5107: PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
5108: {
5109: PC_IS* pcis = (PC_IS*)(pc->data);
5110: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
5111: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5112: PetscInt *idx_R_local=NULL;
5113: PetscInt n_vertices,i,j,n_R,n_D,n_B;
5114: PetscInt vbs,bs;
5115: PetscBT bitmask=NULL;
5116: PetscErrorCode ierr;
5119: /*
5120: No need to setup local scatters if
5121: - primal space is unchanged
5122: AND
5123: - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
5124: AND
5125: - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
5126: */
5127: if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
5128: return(0);
5129: }
5130: /* destroy old objects */
5131: ISDestroy(&pcbddc->is_R_local);
5132: VecScatterDestroy(&pcbddc->R_to_B);
5133: VecScatterDestroy(&pcbddc->R_to_D);
5134: /* Set Non-overlapping dimensions */
5135: n_B = pcis->n_B;
5136: n_D = pcis->n - n_B;
5137: n_vertices = pcbddc->n_vertices;
5139: /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
5141: /* create auxiliary bitmask and allocate workspace */
5142: if (!sub_schurs || !sub_schurs->reuse_solver) {
5143: PetscMalloc1(pcis->n-n_vertices,&idx_R_local);
5144: PetscBTCreate(pcis->n,&bitmask);
5145: for (i=0;i<n_vertices;i++) {
5146: PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);
5147: }
5149: for (i=0, n_R=0; i<pcis->n; i++) {
5150: if (!PetscBTLookup(bitmask,i)) {
5151: idx_R_local[n_R++] = i;
5152: }
5153: }
5154: } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */
5155: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5157: ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);
5158: ISGetLocalSize(reuse_solver->is_R,&n_R);
5159: }
5161: /* Block code */
5162: vbs = 1;
5163: MatGetBlockSize(pcbddc->local_mat,&bs);
5164: if (bs>1 && !(n_vertices%bs)) {
5165: PetscBool is_blocked = PETSC_TRUE;
5166: PetscInt *vary;
5167: if (!sub_schurs || !sub_schurs->reuse_solver) {
5168: PetscMalloc1(pcis->n/bs,&vary);
5169: PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));
5170: /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
5171: /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */
5172: for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++;
5173: for (i=0; i<pcis->n/bs; i++) {
5174: if (vary[i]!=0 && vary[i]!=bs) {
5175: is_blocked = PETSC_FALSE;
5176: break;
5177: }
5178: }
5179: PetscFree(vary);
5180: } else {
5181: /* Verify directly the R set */
5182: for (i=0; i<n_R/bs; i++) {
5183: PetscInt j,node=idx_R_local[bs*i];
5184: for (j=1; j<bs; j++) {
5185: if (node != idx_R_local[bs*i+j]-j) {
5186: is_blocked = PETSC_FALSE;
5187: break;
5188: }
5189: }
5190: }
5191: }
5192: if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
5193: vbs = bs;
5194: for (i=0;i<n_R/vbs;i++) {
5195: idx_R_local[i] = idx_R_local[vbs*i]/vbs;
5196: }
5197: }
5198: }
5199: ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);
5200: if (sub_schurs && sub_schurs->reuse_solver) {
5201: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5203: ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);
5204: ISDestroy(&reuse_solver->is_R);
5205: PetscObjectReference((PetscObject)pcbddc->is_R_local);
5206: reuse_solver->is_R = pcbddc->is_R_local;
5207: } else {
5208: PetscFree(idx_R_local);
5209: }
5211: /* print some info if requested */
5212: if (pcbddc->dbg_flag) {
5213: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
5214: PetscViewerFlush(pcbddc->dbg_viewer);
5215: PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
5216: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);
5217: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %D, dirichlet_size = %D, boundary_size = %D\n",pcis->n,n_D,n_B);
5218: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %D, v_size = %D, constraints = %D, local_primal_size = %D\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);
5219: PetscViewerFlush(pcbddc->dbg_viewer);
5220: }
5222: /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
5223: if (!sub_schurs || !sub_schurs->reuse_solver) {
5224: IS is_aux1,is_aux2;
5225: PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local;
5227: ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);
5228: PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);
5229: PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);
5230: ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);
5231: for (i=0; i<n_D; i++) {
5232: PetscBTSet(bitmask,is_indices[i]);
5233: }
5234: ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);
5235: for (i=0, j=0; i<n_R; i++) {
5236: if (!PetscBTLookup(bitmask,idx_R_local[i])) {
5237: aux_array1[j++] = i;
5238: }
5239: }
5240: ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);
5241: ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);
5242: for (i=0, j=0; i<n_B; i++) {
5243: if (!PetscBTLookup(bitmask,is_indices[i])) {
5244: aux_array2[j++] = i;
5245: }
5246: }
5247: ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);
5248: ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);
5249: VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);
5250: ISDestroy(&is_aux1);
5251: ISDestroy(&is_aux2);
5253: if (pcbddc->switch_static || pcbddc->dbg_flag) {
5254: PetscMalloc1(n_D,&aux_array1);
5255: for (i=0, j=0; i<n_R; i++) {
5256: if (PetscBTLookup(bitmask,idx_R_local[i])) {
5257: aux_array1[j++] = i;
5258: }
5259: }
5260: ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);
5261: VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);
5262: ISDestroy(&is_aux1);
5263: }
5264: PetscBTDestroy(&bitmask);
5265: ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);
5266: } else {
5267: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5268: IS tis;
5269: PetscInt schur_size;
5271: ISGetLocalSize(reuse_solver->is_B,&schur_size);
5272: ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);
5273: VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);
5274: ISDestroy(&tis);
5275: if (pcbddc->switch_static || pcbddc->dbg_flag) {
5276: ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);
5277: VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);
5278: ISDestroy(&tis);
5279: }
5280: }
5281: return(0);
5282: }
5284: static PetscErrorCode MatNullSpacePropagate_Private(Mat A, IS is, Mat B)
5285: {
5286: MatNullSpace NullSpace;
5287: Mat dmat;
5288: const Vec *nullvecs;
5289: Vec v,v2,*nullvecs2;
5290: VecScatter sct;
5291: PetscInt k,nnsp_size,bsiz,n,N,bs;
5292: PetscBool nnsp_has_cnst;
5296: MatGetNullSpace(B,&NullSpace);
5297: if (!NullSpace) {
5298: MatGetNearNullSpace(B,&NullSpace);
5299: }
5300: if (NullSpace) return(0);
5301: MatGetNullSpace(A,&NullSpace);
5302: if (!NullSpace) {
5303: MatGetNearNullSpace(A,&NullSpace);
5304: }
5305: if (!NullSpace) return(0);
5306: MatCreateVecs(A,&v,NULL);
5307: MatCreateVecs(B,&v2,NULL);
5308: VecScatterCreate(v,is,v2,NULL,&sct);
5309: MatNullSpaceGetVecs(NullSpace,&nnsp_has_cnst,&nnsp_size,(const Vec**)&nullvecs);
5310: bsiz = nnsp_size+!!nnsp_has_cnst;
5311: PetscMalloc1(bsiz,&nullvecs2);
5312: VecGetBlockSize(v2,&bs);
5313: VecGetSize(v2,&N);
5314: VecGetLocalSize(v2,&n);
5315: MatCreateDense(PetscObjectComm((PetscObject)B),n,PETSC_DECIDE,N,bsiz,NULL,&dmat);
5316: for (k=0;k<nnsp_size;k++) {
5317: PetscScalar *arr;
5319: MatDenseGetColumn(dmat,k,&arr);
5320: VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),bs,n,N,arr,&nullvecs2[k]);
5321: VecScatterBegin(sct,nullvecs[k],nullvecs2[k],INSERT_VALUES,SCATTER_FORWARD);
5322: VecScatterEnd(sct,nullvecs[k],nullvecs2[k],INSERT_VALUES,SCATTER_FORWARD);
5323: MatDenseRestoreColumn(dmat,&arr);
5324: }
5325: if (nnsp_has_cnst) {
5326: PetscScalar *arr;
5328: MatDenseGetColumn(dmat,nnsp_size,&arr);
5329: VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),bs,n,N,arr,&nullvecs2[nnsp_size]);
5330: VecSet(nullvecs2[nnsp_size],1.0);
5331: MatDenseRestoreColumn(dmat,&arr);
5332: }
5333: PCBDDCOrthonormalizeVecs(bsiz,nullvecs2);
5334: MatNullSpaceCreate(PetscObjectComm((PetscObject)B),PETSC_FALSE,bsiz,nullvecs2,&NullSpace);
5335: PetscObjectCompose((PetscObject)NullSpace,"_PBDDC_Null_dmat",(PetscObject)dmat);
5336: MatDestroy(&dmat);
5337: for (k=0;k<bsiz;k++) {
5338: VecDestroy(&nullvecs2[k]);
5339: }
5340: PetscFree(nullvecs2);
5341: MatSetNearNullSpace(B,NullSpace);
5342: MatNullSpaceDestroy(&NullSpace);
5343: VecDestroy(&v);
5344: VecDestroy(&v2);
5345: VecScatterDestroy(&sct);
5346: return(0);
5347: }
5349: PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
5350: {
5351: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
5352: PC_IS *pcis = (PC_IS*)pc->data;
5353: PC pc_temp;
5354: Mat A_RR;
5355: MatNullSpace nnsp;
5356: MatReuse reuse;
5357: PetscScalar m_one = -1.0;
5358: PetscReal value;
5359: PetscInt n_D,n_R;
5360: PetscBool issbaij,opts;
5362: void (*f)(void) = 0;
5363: char dir_prefix[256],neu_prefix[256],str_level[16];
5364: size_t len;
5367: PetscLogEventBegin(PC_BDDC_LocalSolvers[pcbddc->current_level],pc,0,0,0);
5368: /* compute prefixes */
5369: PetscStrcpy(dir_prefix,"");
5370: PetscStrcpy(neu_prefix,"");
5371: if (!pcbddc->current_level) {
5372: PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,sizeof(dir_prefix));
5373: PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,sizeof(neu_prefix));
5374: PetscStrlcat(dir_prefix,"pc_bddc_dirichlet_",sizeof(dir_prefix));
5375: PetscStrlcat(neu_prefix,"pc_bddc_neumann_",sizeof(neu_prefix));
5376: } else {
5377: PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level));
5378: PetscStrlen(((PetscObject)pc)->prefix,&len);
5379: len -= 15; /* remove "pc_bddc_coarse_" */
5380: if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
5381: if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
5382: /* Nonstandard use of PetscStrncpy() to only copy a portion of the input string */
5383: PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);
5384: PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);
5385: PetscStrlcat(dir_prefix,"pc_bddc_dirichlet_",sizeof(dir_prefix));
5386: PetscStrlcat(neu_prefix,"pc_bddc_neumann_",sizeof(neu_prefix));
5387: PetscStrlcat(dir_prefix,str_level,sizeof(dir_prefix));
5388: PetscStrlcat(neu_prefix,str_level,sizeof(neu_prefix));
5389: }
5391: /* DIRICHLET PROBLEM */
5392: if (dirichlet) {
5393: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5394: if (pcbddc->benign_n && !pcbddc->benign_change_explicit) {
5395: if (!sub_schurs || !sub_schurs->reuse_solver) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented");
5396: if (pcbddc->dbg_flag) {
5397: Mat A_IIn;
5399: PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);
5400: MatDestroy(&pcis->A_II);
5401: pcis->A_II = A_IIn;
5402: }
5403: }
5404: if (pcbddc->local_mat->symmetric_set) {
5405: MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric);
5406: }
5407: /* Matrix for Dirichlet problem is pcis->A_II */
5408: n_D = pcis->n - pcis->n_B;
5409: opts = PETSC_FALSE;
5410: if (!pcbddc->ksp_D) { /* create object if not yet build */
5411: opts = PETSC_TRUE;
5412: KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);
5413: PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);
5414: /* default */
5415: KSPSetType(pcbddc->ksp_D,KSPPREONLY);
5416: KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);
5417: PetscObjectTypeCompare((PetscObject)pcis->pA_II,MATSEQSBAIJ,&issbaij);
5418: KSPGetPC(pcbddc->ksp_D,&pc_temp);
5419: if (issbaij) {
5420: PCSetType(pc_temp,PCCHOLESKY);
5421: } else {
5422: PCSetType(pc_temp,PCLU);
5423: }
5424: KSPSetErrorIfNotConverged(pcbddc->ksp_D,pc->erroriffailure);
5425: }
5426: MatSetOptionsPrefix(pcis->pA_II,((PetscObject)pcbddc->ksp_D)->prefix);
5427: KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->pA_II);
5428: /* Allow user's customization */
5429: if (opts) {
5430: KSPSetFromOptions(pcbddc->ksp_D);
5431: }
5432: if (pcbddc->NullSpace_corr[0]) { /* approximate solver, propagate NearNullSpace */
5433: MatNullSpacePropagate_Private(pcbddc->local_mat,pcis->is_I_local,pcis->pA_II);
5434: }
5435: MatGetNearNullSpace(pcis->pA_II,&nnsp);
5436: KSPGetPC(pcbddc->ksp_D,&pc_temp);
5437: PetscObjectQueryFunction((PetscObject)pc_temp,"PCSetCoordinates_C",&f);
5438: if (f && pcbddc->mat_graph->cloc && !nnsp) {
5439: PetscReal *coords = pcbddc->mat_graph->coords,*scoords;
5440: const PetscInt *idxs;
5441: PetscInt cdim = pcbddc->mat_graph->cdim,nl,i,d;
5443: ISGetLocalSize(pcis->is_I_local,&nl);
5444: ISGetIndices(pcis->is_I_local,&idxs);
5445: PetscMalloc1(nl*cdim,&scoords);
5446: for (i=0;i<nl;i++) {
5447: for (d=0;d<cdim;d++) {
5448: scoords[i*cdim+d] = coords[idxs[i]*cdim+d];
5449: }
5450: }
5451: ISRestoreIndices(pcis->is_I_local,&idxs);
5452: PCSetCoordinates(pc_temp,cdim,nl,scoords);
5453: PetscFree(scoords);
5454: }
5455: if (sub_schurs && sub_schurs->reuse_solver) {
5456: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5458: KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);
5459: }
5461: /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
5462: if (!n_D) {
5463: KSPGetPC(pcbddc->ksp_D,&pc_temp);
5464: PCSetType(pc_temp,PCNONE);
5465: }
5466: /* set ksp_D into pcis data */
5467: PetscObjectReference((PetscObject)pcbddc->ksp_D);
5468: KSPDestroy(&pcis->ksp_D);
5469: pcis->ksp_D = pcbddc->ksp_D;
5470: }
5472: /* NEUMANN PROBLEM */
5473: A_RR = 0;
5474: if (neumann) {
5475: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5476: PetscInt ibs,mbs;
5477: PetscBool issbaij, reuse_neumann_solver;
5478: Mat_IS* matis = (Mat_IS*)pc->pmat->data;
5480: reuse_neumann_solver = PETSC_FALSE;
5481: if (sub_schurs && sub_schurs->reuse_solver) {
5482: IS iP;
5484: reuse_neumann_solver = PETSC_TRUE;
5485: PetscObjectQuery((PetscObject)sub_schurs->A,"__KSPFETIDP_iP",(PetscObject*)&iP);
5486: if (iP) reuse_neumann_solver = PETSC_FALSE;
5487: }
5488: /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
5489: ISGetSize(pcbddc->is_R_local,&n_R);
5490: if (pcbddc->ksp_R) { /* already created ksp */
5491: PetscInt nn_R;
5492: KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);
5493: PetscObjectReference((PetscObject)A_RR);
5494: MatGetSize(A_RR,&nn_R,NULL);
5495: if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
5496: KSPReset(pcbddc->ksp_R);
5497: MatDestroy(&A_RR);
5498: reuse = MAT_INITIAL_MATRIX;
5499: } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
5500: if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
5501: MatDestroy(&A_RR);
5502: reuse = MAT_INITIAL_MATRIX;
5503: } else { /* safe to reuse the matrix */
5504: reuse = MAT_REUSE_MATRIX;
5505: }
5506: }
5507: /* last check */
5508: if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
5509: MatDestroy(&A_RR);
5510: reuse = MAT_INITIAL_MATRIX;
5511: }
5512: } else { /* first time, so we need to create the matrix */
5513: reuse = MAT_INITIAL_MATRIX;
5514: }
5515: /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */
5516: MatGetBlockSize(pcbddc->local_mat,&mbs);
5517: ISGetBlockSize(pcbddc->is_R_local,&ibs);
5518: PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);
5519: if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */
5520: if (matis->A == pcbddc->local_mat) {
5521: MatDestroy(&pcbddc->local_mat);
5522: MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);
5523: } else {
5524: MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);
5525: }
5526: } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
5527: if (matis->A == pcbddc->local_mat) {
5528: MatDestroy(&pcbddc->local_mat);
5529: MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);
5530: } else {
5531: MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);
5532: }
5533: }
5534: /* extract A_RR */
5535: if (reuse_neumann_solver) {
5536: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5538: if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */
5539: MatDestroy(&A_RR);
5540: if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */
5541: PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);
5542: } else {
5543: MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);
5544: }
5545: } else {
5546: MatDestroy(&A_RR);
5547: PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);
5548: PetscObjectReference((PetscObject)A_RR);
5549: }
5550: } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */
5551: MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);
5552: }
5553: if (pcbddc->local_mat->symmetric_set) {
5554: MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric);
5555: }
5556: opts = PETSC_FALSE;
5557: if (!pcbddc->ksp_R) { /* create object if not present */
5558: opts = PETSC_TRUE;
5559: KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);
5560: PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);
5561: /* default */
5562: KSPSetType(pcbddc->ksp_R,KSPPREONLY);
5563: KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);
5564: KSPGetPC(pcbddc->ksp_R,&pc_temp);
5565: PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);
5566: if (issbaij) {
5567: PCSetType(pc_temp,PCCHOLESKY);
5568: } else {
5569: PCSetType(pc_temp,PCLU);
5570: }
5571: KSPSetErrorIfNotConverged(pcbddc->ksp_R,pc->erroriffailure);
5572: }
5573: KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);
5574: MatSetOptionsPrefix(A_RR,((PetscObject)pcbddc->ksp_R)->prefix);
5575: if (opts) { /* Allow user's customization once */
5576: KSPSetFromOptions(pcbddc->ksp_R);
5577: }
5578: if (pcbddc->NullSpace_corr[2]) { /* approximate solver, propagate NearNullSpace */
5579: MatNullSpacePropagate_Private(pcbddc->local_mat,pcbddc->is_R_local,A_RR);
5580: }
5581: MatGetNearNullSpace(A_RR,&nnsp);
5582: KSPGetPC(pcbddc->ksp_R,&pc_temp);
5583: PetscObjectQueryFunction((PetscObject)pc_temp,"PCSetCoordinates_C",&f);
5584: if (f && pcbddc->mat_graph->cloc && !nnsp) {
5585: PetscReal *coords = pcbddc->mat_graph->coords,*scoords;
5586: const PetscInt *idxs;
5587: PetscInt cdim = pcbddc->mat_graph->cdim,nl,i,d;
5589: ISGetLocalSize(pcbddc->is_R_local,&nl);
5590: ISGetIndices(pcbddc->is_R_local,&idxs);
5591: PetscMalloc1(nl*cdim,&scoords);
5592: for (i=0;i<nl;i++) {
5593: for (d=0;d<cdim;d++) {
5594: scoords[i*cdim+d] = coords[idxs[i]*cdim+d];
5595: }
5596: }
5597: ISRestoreIndices(pcbddc->is_R_local,&idxs);
5598: PCSetCoordinates(pc_temp,cdim,nl,scoords);
5599: PetscFree(scoords);
5600: }
5602: /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
5603: if (!n_R) {
5604: KSPGetPC(pcbddc->ksp_R,&pc_temp);
5605: PCSetType(pc_temp,PCNONE);
5606: }
5607: /* Reuse solver if it is present */
5608: if (reuse_neumann_solver) {
5609: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5611: KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);
5612: }
5613: }
5615: if (pcbddc->dbg_flag) {
5616: PetscViewerFlush(pcbddc->dbg_viewer);
5617: PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
5618: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
5619: }
5621: /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */
5622: if (pcbddc->NullSpace_corr[0]) {
5623: PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);
5624: }
5625: if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) {
5626: PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);
5627: }
5628: if (neumann && pcbddc->NullSpace_corr[2]) {
5629: PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);
5630: }
5631: /* check Dirichlet and Neumann solvers */
5632: if (pcbddc->dbg_flag) {
5633: if (dirichlet) { /* Dirichlet */
5634: VecSetRandom(pcis->vec1_D,NULL);
5635: MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);
5636: KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);
5637: KSPCheckSolve(pcbddc->ksp_D,pc,pcis->vec2_D);
5638: VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);
5639: VecNorm(pcis->vec1_D,NORM_INFINITY,&value);
5640: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);
5641: PetscViewerFlush(pcbddc->dbg_viewer);
5642: }
5643: if (neumann) { /* Neumann */
5644: VecSetRandom(pcbddc->vec1_R,NULL);
5645: MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);
5646: KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);
5647: KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);
5648: VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);
5649: VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);
5650: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);
5651: PetscViewerFlush(pcbddc->dbg_viewer);
5652: }
5653: }
5654: /* free Neumann problem's matrix */
5655: MatDestroy(&A_RR);
5656: PetscLogEventEnd(PC_BDDC_LocalSolvers[pcbddc->current_level],pc,0,0,0);
5657: return(0);
5658: }
5660: static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose)
5661: {
5662: PetscErrorCode ierr;
5663: PC_BDDC* pcbddc = (PC_BDDC*)(pc->data);
5664: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5665: PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE ) : PETSC_FALSE;
5668: if (!reuse_solver) {
5669: VecSet(pcbddc->vec1_R,0.);
5670: }
5671: if (!pcbddc->switch_static) {
5672: if (applytranspose && pcbddc->local_auxmat1) {
5673: MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);
5674: MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);
5675: }
5676: if (!reuse_solver) {
5677: VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5678: VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5679: } else {
5680: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5682: VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);
5683: VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);
5684: }
5685: } else {
5686: VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5687: VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5688: VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5689: VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5690: if (applytranspose && pcbddc->local_auxmat1) {
5691: MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);
5692: MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);
5693: VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5694: VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5695: }
5696: }
5697: if (!reuse_solver || pcbddc->switch_static) {
5698: if (applytranspose) {
5699: KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);
5700: } else {
5701: KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);
5702: }
5703: KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec1_R);
5704: } else {
5705: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5707: if (applytranspose) {
5708: MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);
5709: } else {
5710: MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);
5711: }
5712: }
5713: VecSet(inout_B,0.);
5714: if (!pcbddc->switch_static) {
5715: if (!reuse_solver) {
5716: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5717: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5718: } else {
5719: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5721: VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);
5722: VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);
5723: }
5724: if (!applytranspose && pcbddc->local_auxmat1) {
5725: MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);
5726: MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);
5727: }
5728: } else {
5729: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5730: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5731: VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
5732: VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
5733: if (!applytranspose && pcbddc->local_auxmat1) {
5734: MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);
5735: MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);
5736: }
5737: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5738: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5739: VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
5740: VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
5741: }
5742: return(0);
5743: }
5745: /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
5746: PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
5747: {
5749: PC_BDDC* pcbddc = (PC_BDDC*)(pc->data);
5750: PC_IS* pcis = (PC_IS*) (pc->data);
5751: const PetscScalar zero = 0.0;
5754: /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
5755: if (!pcbddc->benign_apply_coarse_only) {
5756: if (applytranspose) {
5757: MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);
5758: if (pcbddc->switch_static) { MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P); }
5759: } else {
5760: MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);
5761: if (pcbddc->switch_static) { MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P); }
5762: }
5763: } else {
5764: VecSet(pcbddc->vec1_P,zero);
5765: }
5767: /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */
5768: if (pcbddc->benign_n) {
5769: PetscScalar *array;
5770: PetscInt j;
5772: VecGetArray(pcbddc->vec1_P,&array);
5773: for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j];
5774: VecRestoreArray(pcbddc->vec1_P,&array);
5775: }
5777: /* start communications from local primal nodes to rhs of coarse solver */
5778: VecSet(pcbddc->coarse_vec,zero);
5779: PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);
5780: PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);
5782: /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
5783: if (pcbddc->coarse_ksp) {
5784: Mat coarse_mat;
5785: Vec rhs,sol;
5786: MatNullSpace nullsp;
5787: PetscBool isbddc = PETSC_FALSE;
5789: if (pcbddc->benign_have_null) {
5790: PC coarse_pc;
5792: KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);
5793: PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);
5794: /* we need to propagate to coarser levels the need for a possible benign correction */
5795: if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) {
5796: PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data);
5797: coarsepcbddc->benign_skip_correction = PETSC_FALSE;
5798: coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE;
5799: }
5800: }
5801: KSPGetRhs(pcbddc->coarse_ksp,&rhs);
5802: KSPGetSolution(pcbddc->coarse_ksp,&sol);
5803: KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);
5804: if (applytranspose) {
5805: if (pcbddc->benign_apply_coarse_only) SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented");
5806: KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);
5807: KSPCheckSolve(pcbddc->coarse_ksp,pc,sol);
5808: MatGetTransposeNullSpace(coarse_mat,&nullsp);
5809: if (nullsp) {
5810: MatNullSpaceRemove(nullsp,sol);
5811: }
5812: } else {
5813: MatGetNullSpace(coarse_mat,&nullsp);
5814: if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */
5815: PC coarse_pc;
5817: if (nullsp) {
5818: MatNullSpaceRemove(nullsp,rhs);
5819: }
5820: KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);
5821: PCPreSolve(coarse_pc,pcbddc->coarse_ksp);
5822: PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);
5823: PCPostSolve(coarse_pc,pcbddc->coarse_ksp);
5824: } else {
5825: KSPSolve(pcbddc->coarse_ksp,rhs,sol);
5826: KSPCheckSolve(pcbddc->coarse_ksp,pc,sol);
5827: if (nullsp) {
5828: MatNullSpaceRemove(nullsp,sol);
5829: }
5830: }
5831: }
5832: /* we don't need the benign correction at coarser levels anymore */
5833: if (pcbddc->benign_have_null && isbddc) {
5834: PC coarse_pc;
5835: PC_BDDC* coarsepcbddc;
5837: KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);
5838: coarsepcbddc = (PC_BDDC*)(coarse_pc->data);
5839: coarsepcbddc->benign_skip_correction = PETSC_TRUE;
5840: coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE;
5841: }
5842: }
5844: /* Local solution on R nodes */
5845: if (pcis->n && !pcbddc->benign_apply_coarse_only) {
5846: PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);
5847: }
5848: /* communications from coarse sol to local primal nodes */
5849: PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);
5850: PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);
5852: /* Sum contributions from the two levels */
5853: if (!pcbddc->benign_apply_coarse_only) {
5854: if (applytranspose) {
5855: MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);
5856: if (pcbddc->switch_static) { MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D); }
5857: } else {
5858: MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);
5859: if (pcbddc->switch_static) { MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D); }
5860: }
5861: /* store p0 */
5862: if (pcbddc->benign_n) {
5863: PetscScalar *array;
5864: PetscInt j;
5866: VecGetArray(pcbddc->vec1_P,&array);
5867: for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j];
5868: VecRestoreArray(pcbddc->vec1_P,&array);
5869: }
5870: } else { /* expand the coarse solution */
5871: if (applytranspose) {
5872: MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);
5873: } else {
5874: MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);
5875: }
5876: }
5877: return(0);
5878: }
5880: PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
5881: {
5883: PC_BDDC* pcbddc = (PC_BDDC*)(pc->data);
5884: PetscScalar *array;
5885: Vec from,to;
5888: if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
5889: from = pcbddc->coarse_vec;
5890: to = pcbddc->vec1_P;
5891: if (pcbddc->coarse_ksp) { /* get array from coarse processes */
5892: Vec tvec;
5894: KSPGetRhs(pcbddc->coarse_ksp,&tvec);
5895: VecResetArray(tvec);
5896: KSPGetSolution(pcbddc->coarse_ksp,&tvec);
5897: VecGetArray(tvec,&array);
5898: VecPlaceArray(from,array);
5899: VecRestoreArray(tvec,&array);
5900: }
5901: } else { /* from local to global -> put data in coarse right hand side */
5902: from = pcbddc->vec1_P;
5903: to = pcbddc->coarse_vec;
5904: }
5905: VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);
5906: return(0);
5907: }
5909: PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
5910: {
5912: PC_BDDC* pcbddc = (PC_BDDC*)(pc->data);
5913: PetscScalar *array;
5914: Vec from,to;
5917: if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
5918: from = pcbddc->coarse_vec;
5919: to = pcbddc->vec1_P;
5920: } else { /* from local to global -> put data in coarse right hand side */
5921: from = pcbddc->vec1_P;
5922: to = pcbddc->coarse_vec;
5923: }
5924: VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);
5925: if (smode == SCATTER_FORWARD) {
5926: if (pcbddc->coarse_ksp) { /* get array from coarse processes */
5927: Vec tvec;
5929: KSPGetRhs(pcbddc->coarse_ksp,&tvec);
5930: VecGetArray(to,&array);
5931: VecPlaceArray(tvec,array);
5932: VecRestoreArray(to,&array);
5933: }
5934: } else {
5935: if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
5936: VecResetArray(from);
5937: }
5938: }
5939: return(0);
5940: }
5942: /* uncomment for testing purposes */
5943: /* #define PETSC_MISSING_LAPACK_GESVD 1 */
5944: PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
5945: {
5946: PetscErrorCode ierr;
5947: PC_IS* pcis = (PC_IS*)(pc->data);
5948: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
5949: Mat_IS* matis = (Mat_IS*)pc->pmat->data;
5950: /* one and zero */
5951: PetscScalar one=1.0,zero=0.0;
5952: /* space to store constraints and their local indices */
5953: PetscScalar *constraints_data;
5954: PetscInt *constraints_idxs,*constraints_idxs_B;
5955: PetscInt *constraints_idxs_ptr,*constraints_data_ptr;
5956: PetscInt *constraints_n;
5957: /* iterators */
5958: PetscInt i,j,k,total_counts,total_counts_cc,cum;
5959: /* BLAS integers */
5960: PetscBLASInt lwork,lierr;
5961: PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1;
5962: PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC;
5963: /* reuse */
5964: PetscInt olocal_primal_size,olocal_primal_size_cc;
5965: PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult;
5966: /* change of basis */
5967: PetscBool qr_needed;
5968: PetscBT change_basis,qr_needed_idx;
5969: /* auxiliary stuff */
5970: PetscInt *nnz,*is_indices;
5971: PetscInt ncc;
5972: /* some quantities */
5973: PetscInt n_vertices,total_primal_vertices,valid_constraints;
5974: PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints;
5975: PetscReal tol; /* tolerance for retaining eigenmodes */
5978: tol = PetscSqrtReal(PETSC_SMALL);
5979: /* Destroy Mat objects computed previously */
5980: MatDestroy(&pcbddc->ChangeOfBasisMatrix);
5981: MatDestroy(&pcbddc->ConstraintMatrix);
5982: MatDestroy(&pcbddc->switch_static_change);
5983: /* save info on constraints from previous setup (if any) */
5984: olocal_primal_size = pcbddc->local_primal_size;
5985: olocal_primal_size_cc = pcbddc->local_primal_size_cc;
5986: PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);
5987: PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));
5988: PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));
5989: PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);
5990: PetscFree(pcbddc->primal_indices_local_idxs);
5992: if (!pcbddc->adaptive_selection) {
5993: IS ISForVertices,*ISForFaces,*ISForEdges;
5994: MatNullSpace nearnullsp;
5995: const Vec *nearnullvecs;
5996: Vec *localnearnullsp;
5997: PetscScalar *array;
5998: PetscInt n_ISForFaces,n_ISForEdges,nnsp_size;
5999: PetscBool nnsp_has_cnst;
6000: /* LAPACK working arrays for SVD or POD */
6001: PetscBool skip_lapack,boolforchange;
6002: PetscScalar *work;
6003: PetscReal *singular_vals;
6004: #if defined(PETSC_USE_COMPLEX)
6005: PetscReal *rwork;
6006: #endif
6007: #if defined(PETSC_MISSING_LAPACK_GESVD)
6008: PetscScalar *temp_basis,*correlation_mat;
6009: #else
6010: PetscBLASInt dummy_int=1;
6011: PetscScalar dummy_scalar=1.;
6012: #endif
6014: /* Get index sets for faces, edges and vertices from graph */
6015: PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);
6016: /* print some info */
6017: if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) {
6018: PetscInt nv;
6020: PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);
6021: ISGetSize(ISForVertices,&nv);
6022: PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
6023: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");
6024: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%D)\n",PetscGlobalRank,nv,pcbddc->use_vertices);
6025: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%D)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);
6026: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%D)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);
6027: PetscViewerFlush(pcbddc->dbg_viewer);
6028: PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);
6029: }
6031: /* free unneeded index sets */
6032: if (!pcbddc->use_vertices) {
6033: ISDestroy(&ISForVertices);
6034: }
6035: if (!pcbddc->use_edges) {
6036: for (i=0;i<n_ISForEdges;i++) {
6037: ISDestroy(&ISForEdges[i]);
6038: }
6039: PetscFree(ISForEdges);
6040: n_ISForEdges = 0;
6041: }
6042: if (!pcbddc->use_faces) {
6043: for (i=0;i<n_ISForFaces;i++) {
6044: ISDestroy(&ISForFaces[i]);
6045: }
6046: PetscFree(ISForFaces);
6047: n_ISForFaces = 0;
6048: }
6050: /* check if near null space is attached to global mat */
6051: MatGetNearNullSpace(pc->pmat,&nearnullsp);
6052: if (nearnullsp) {
6053: MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);
6054: /* remove any stored info */
6055: MatNullSpaceDestroy(&pcbddc->onearnullspace);
6056: PetscFree(pcbddc->onearnullvecs_state);
6057: /* store information for BDDC solver reuse */
6058: PetscObjectReference((PetscObject)nearnullsp);
6059: pcbddc->onearnullspace = nearnullsp;
6060: PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);
6061: for (i=0;i<nnsp_size;i++) {
6062: PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);
6063: }
6064: } else { /* if near null space is not provided BDDC uses constants by default */
6065: nnsp_size = 0;
6066: nnsp_has_cnst = PETSC_TRUE;
6067: }
6068: /* get max number of constraints on a single cc */
6069: max_constraints = nnsp_size;
6070: if (nnsp_has_cnst) max_constraints++;
6072: /*
6073: Evaluate maximum storage size needed by the procedure
6074: - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]"
6075: - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]"
6076: There can be multiple constraints per connected component
6077: */
6078: n_vertices = 0;
6079: if (ISForVertices) {
6080: ISGetSize(ISForVertices,&n_vertices);
6081: }
6082: ncc = n_vertices+n_ISForFaces+n_ISForEdges;
6083: PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);
6085: total_counts = n_ISForFaces+n_ISForEdges;
6086: total_counts *= max_constraints;
6087: total_counts += n_vertices;
6088: PetscBTCreate(total_counts,&change_basis);
6090: total_counts = 0;
6091: max_size_of_constraint = 0;
6092: for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
6093: IS used_is;
6094: if (i<n_ISForEdges) {
6095: used_is = ISForEdges[i];
6096: } else {
6097: used_is = ISForFaces[i-n_ISForEdges];
6098: }
6099: ISGetSize(used_is,&j);
6100: total_counts += j;
6101: max_size_of_constraint = PetscMax(j,max_size_of_constraint);
6102: }
6103: PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);
6105: /* get local part of global near null space vectors */
6106: PetscMalloc1(nnsp_size,&localnearnullsp);
6107: for (k=0;k<nnsp_size;k++) {
6108: VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);
6109: VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);
6110: VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);
6111: }
6113: /* whether or not to skip lapack calls */
6114: skip_lapack = PETSC_TRUE;
6115: if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
6117: /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
6118: if (!skip_lapack) {
6119: PetscScalar temp_work;
6121: #if defined(PETSC_MISSING_LAPACK_GESVD)
6122: /* Proper Orthogonal Decomposition (POD) using the snapshot method */
6123: PetscMalloc1(max_constraints*max_constraints,&correlation_mat);
6124: PetscMalloc1(max_constraints,&singular_vals);
6125: PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);
6126: #if defined(PETSC_USE_COMPLEX)
6127: PetscMalloc1(3*max_constraints,&rwork);
6128: #endif
6129: /* now we evaluate the optimal workspace using query with lwork=-1 */
6130: PetscBLASIntCast(max_constraints,&Blas_N);
6131: PetscBLASIntCast(max_constraints,&Blas_LDA);
6132: lwork = -1;
6133: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6134: #if !defined(PETSC_USE_COMPLEX)
6135: PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
6136: #else
6137: PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
6138: #endif
6139: PetscFPTrapPop();
6140: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
6141: #else /* on missing GESVD */
6142: /* SVD */
6143: PetscInt max_n,min_n;
6144: max_n = max_size_of_constraint;
6145: min_n = max_constraints;
6146: if (max_size_of_constraint < max_constraints) {
6147: min_n = max_size_of_constraint;
6148: max_n = max_constraints;
6149: }
6150: PetscMalloc1(min_n,&singular_vals);
6151: #if defined(PETSC_USE_COMPLEX)
6152: PetscMalloc1(5*min_n,&rwork);
6153: #endif
6154: /* now we evaluate the optimal workspace using query with lwork=-1 */
6155: lwork = -1;
6156: PetscBLASIntCast(max_n,&Blas_M);
6157: PetscBLASIntCast(min_n,&Blas_N);
6158: PetscBLASIntCast(max_n,&Blas_LDA);
6159: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6160: #if !defined(PETSC_USE_COMPLEX)
6161: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr));
6162: #else
6163: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr));
6164: #endif
6165: PetscFPTrapPop();
6166: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
6167: #endif /* on missing GESVD */
6168: /* Allocate optimal workspace */
6169: PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);
6170: PetscMalloc1(lwork,&work);
6171: }
6172: /* Now we can loop on constraining sets */
6173: total_counts = 0;
6174: constraints_idxs_ptr[0] = 0;
6175: constraints_data_ptr[0] = 0;
6176: /* vertices */
6177: if (n_vertices) {
6178: ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);
6179: PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));
6180: for (i=0;i<n_vertices;i++) {
6181: constraints_n[total_counts] = 1;
6182: constraints_data[total_counts] = 1.0;
6183: constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1;
6184: constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1;
6185: total_counts++;
6186: }
6187: ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);
6188: n_vertices = total_counts;
6189: }
6191: /* edges and faces */
6192: total_counts_cc = total_counts;
6193: for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
6194: IS used_is;
6195: PetscBool idxs_copied = PETSC_FALSE;
6197: if (ncc<n_ISForEdges) {
6198: used_is = ISForEdges[ncc];
6199: boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
6200: } else {
6201: used_is = ISForFaces[ncc-n_ISForEdges];
6202: boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
6203: }
6204: temp_constraints = 0; /* zero the number of constraints I have on this conn comp */
6206: ISGetSize(used_is,&size_of_constraint);
6207: ISGetIndices(used_is,(const PetscInt**)&is_indices);
6208: /* change of basis should not be performed on local periodic nodes */
6209: if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
6210: if (nnsp_has_cnst) {
6211: PetscScalar quad_value;
6213: PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));
6214: idxs_copied = PETSC_TRUE;
6216: if (!pcbddc->use_nnsp_true) {
6217: quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
6218: } else {
6219: quad_value = 1.0;
6220: }
6221: for (j=0;j<size_of_constraint;j++) {
6222: constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value;
6223: }
6224: temp_constraints++;
6225: total_counts++;
6226: }
6227: for (k=0;k<nnsp_size;k++) {
6228: PetscReal real_value;
6229: PetscScalar *ptr_to_data;
6231: VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);
6232: ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint];
6233: for (j=0;j<size_of_constraint;j++) {
6234: ptr_to_data[j] = array[is_indices[j]];
6235: }
6236: VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);
6237: /* check if array is null on the connected component */
6238: PetscBLASIntCast(size_of_constraint,&Blas_N);
6239: PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one));
6240: if (real_value > tol*size_of_constraint) { /* keep indices and values */
6241: temp_constraints++;
6242: total_counts++;
6243: if (!idxs_copied) {
6244: PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));
6245: idxs_copied = PETSC_TRUE;
6246: }
6247: }
6248: }
6249: ISRestoreIndices(used_is,(const PetscInt**)&is_indices);
6250: valid_constraints = temp_constraints;
6251: if (!pcbddc->use_nnsp_true && temp_constraints) {
6252: if (temp_constraints == 1) { /* just normalize the constraint */
6253: PetscScalar norm,*ptr_to_data;
6255: ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
6256: PetscBLASIntCast(size_of_constraint,&Blas_N);
6257: PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one));
6258: norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
6259: PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one));
6260: } else { /* perform SVD */
6261: PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
6263: #if defined(PETSC_MISSING_LAPACK_GESVD)
6264: /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
6265: POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
6266: -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
6267: the constraints basis will differ (by a complex factor with absolute value equal to 1)
6268: from that computed using LAPACKgesvd
6269: -> This is due to a different computation of eigenvectors in LAPACKheev
6270: -> The quality of the POD-computed basis will be the same */
6271: PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));
6272: /* Store upper triangular part of correlation matrix */
6273: PetscBLASIntCast(size_of_constraint,&Blas_N);
6274: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6275: for (j=0;j<temp_constraints;j++) {
6276: for (k=0;k<j+1;k++) {
6277: PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one));
6278: }
6279: }
6280: /* compute eigenvalues and eigenvectors of correlation matrix */
6281: PetscBLASIntCast(temp_constraints,&Blas_N);
6282: PetscBLASIntCast(temp_constraints,&Blas_LDA);
6283: #if !defined(PETSC_USE_COMPLEX)
6284: PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
6285: #else
6286: PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
6287: #endif
6288: PetscFPTrapPop();
6289: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
6290: /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
6291: j = 0;
6292: while (j < temp_constraints && singular_vals[j]/singular_vals[temp_constraints-1] < tol) j++;
6293: total_counts = total_counts-j;
6294: valid_constraints = temp_constraints-j;
6295: /* scale and copy POD basis into used quadrature memory */
6296: PetscBLASIntCast(size_of_constraint,&Blas_M);
6297: PetscBLASIntCast(temp_constraints,&Blas_N);
6298: PetscBLASIntCast(temp_constraints,&Blas_K);
6299: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6300: PetscBLASIntCast(temp_constraints,&Blas_LDB);
6301: PetscBLASIntCast(size_of_constraint,&Blas_LDC);
6302: if (j<temp_constraints) {
6303: PetscInt ii;
6304: for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]);
6305: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6306: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
6307: PetscFPTrapPop();
6308: for (k=0;k<temp_constraints-j;k++) {
6309: for (ii=0;ii<size_of_constraint;ii++) {
6310: ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
6311: }
6312: }
6313: }
6314: #else /* on missing GESVD */
6315: PetscBLASIntCast(size_of_constraint,&Blas_M);
6316: PetscBLASIntCast(temp_constraints,&Blas_N);
6317: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6318: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6319: #if !defined(PETSC_USE_COMPLEX)
6320: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr));
6321: #else
6322: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr));
6323: #endif
6324: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
6325: PetscFPTrapPop();
6326: /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
6327: k = temp_constraints;
6328: if (k > size_of_constraint) k = size_of_constraint;
6329: j = 0;
6330: while (j < k && singular_vals[k-j-1]/singular_vals[0] < tol) j++;
6331: valid_constraints = k-j;
6332: total_counts = total_counts-temp_constraints+valid_constraints;
6333: #endif /* on missing GESVD */
6334: }
6335: }
6336: /* update pointers information */
6337: if (valid_constraints) {
6338: constraints_n[total_counts_cc] = valid_constraints;
6339: constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint;
6340: constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints;
6341: /* set change_of_basis flag */
6342: if (boolforchange) {
6343: PetscBTSet(change_basis,total_counts_cc);
6344: }
6345: total_counts_cc++;
6346: }
6347: }
6348: /* free workspace */
6349: if (!skip_lapack) {
6350: PetscFree(work);
6351: #if defined(PETSC_USE_COMPLEX)
6352: PetscFree(rwork);
6353: #endif
6354: PetscFree(singular_vals);
6355: #if defined(PETSC_MISSING_LAPACK_GESVD)
6356: PetscFree(correlation_mat);
6357: PetscFree(temp_basis);
6358: #endif
6359: }
6360: for (k=0;k<nnsp_size;k++) {
6361: VecDestroy(&localnearnullsp[k]);
6362: }
6363: PetscFree(localnearnullsp);
6364: /* free index sets of faces, edges and vertices */
6365: for (i=0;i<n_ISForFaces;i++) {
6366: ISDestroy(&ISForFaces[i]);
6367: }
6368: if (n_ISForFaces) {
6369: PetscFree(ISForFaces);
6370: }
6371: for (i=0;i<n_ISForEdges;i++) {
6372: ISDestroy(&ISForEdges[i]);
6373: }
6374: if (n_ISForEdges) {
6375: PetscFree(ISForEdges);
6376: }
6377: ISDestroy(&ISForVertices);
6378: } else {
6379: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
6381: total_counts = 0;
6382: n_vertices = 0;
6383: if (sub_schurs->is_vertices && pcbddc->use_vertices) {
6384: ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);
6385: }
6386: max_constraints = 0;
6387: total_counts_cc = 0;
6388: for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
6389: total_counts += pcbddc->adaptive_constraints_n[i];
6390: if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++;
6391: max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]);
6392: }
6393: constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr;
6394: constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr;
6395: constraints_idxs = pcbddc->adaptive_constraints_idxs;
6396: constraints_data = pcbddc->adaptive_constraints_data;
6397: /* constraints_n differs from pcbddc->adaptive_constraints_n */
6398: PetscMalloc1(total_counts_cc,&constraints_n);
6399: total_counts_cc = 0;
6400: for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
6401: if (pcbddc->adaptive_constraints_n[i]) {
6402: constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i];
6403: }
6404: }
6406: max_size_of_constraint = 0;
6407: for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]);
6408: PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);
6409: /* Change of basis */
6410: PetscBTCreate(total_counts_cc,&change_basis);
6411: if (pcbddc->use_change_of_basis) {
6412: for (i=0;i<sub_schurs->n_subs;i++) {
6413: if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) {
6414: PetscBTSet(change_basis,i+n_vertices);
6415: }
6416: }
6417: }
6418: }
6419: pcbddc->local_primal_size = total_counts;
6420: PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);
6422: /* map constraints_idxs in boundary numbering */
6423: ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);
6424: if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for constraints indices %D != %D",constraints_idxs_ptr[total_counts_cc],i);
6426: /* Create constraint matrix */
6427: MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);
6428: MatSetType(pcbddc->ConstraintMatrix,MATAIJ);
6429: MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);
6431: /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
6432: /* determine if a QR strategy is needed for change of basis */
6433: qr_needed = pcbddc->use_qr_single;
6434: PetscBTCreate(total_counts_cc,&qr_needed_idx);
6435: total_primal_vertices=0;
6436: pcbddc->local_primal_size_cc = 0;
6437: for (i=0;i<total_counts_cc;i++) {
6438: size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
6439: if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) {
6440: pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]];
6441: pcbddc->local_primal_size_cc += 1;
6442: } else if (PetscBTLookup(change_basis,i)) {
6443: for (k=0;k<constraints_n[i];k++) {
6444: pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k];
6445: }
6446: pcbddc->local_primal_size_cc += constraints_n[i];
6447: if (constraints_n[i] > 1 || pcbddc->use_qr_single) {
6448: PetscBTSet(qr_needed_idx,i);
6449: qr_needed = PETSC_TRUE;
6450: }
6451: } else {
6452: pcbddc->local_primal_size_cc += 1;
6453: }
6454: }
6455: /* note that the local variable n_vertices used below stores the number of pointwise constraints */
6456: pcbddc->n_vertices = total_primal_vertices;
6457: /* permute indices in order to have a sorted set of vertices */
6458: PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);
6459: PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);
6460: PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));
6461: for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1;
6463: /* nonzero structure of constraint matrix */
6464: /* and get reference dof for local constraints */
6465: PetscMalloc1(pcbddc->local_primal_size,&nnz);
6466: for (i=0;i<total_primal_vertices;i++) nnz[i] = 1;
6468: j = total_primal_vertices;
6469: total_counts = total_primal_vertices;
6470: cum = total_primal_vertices;
6471: for (i=n_vertices;i<total_counts_cc;i++) {
6472: if (!PetscBTLookup(change_basis,i)) {
6473: pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]];
6474: pcbddc->local_primal_ref_mult[cum] = constraints_n[i];
6475: cum++;
6476: size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
6477: for (k=0;k<constraints_n[i];k++) {
6478: pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k];
6479: nnz[j+k] = size_of_constraint;
6480: }
6481: j += constraints_n[i];
6482: }
6483: }
6484: MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);
6485: MatSetOption(pcbddc->ConstraintMatrix,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);
6486: PetscFree(nnz);
6488: /* set values in constraint matrix */
6489: for (i=0;i<total_primal_vertices;i++) {
6490: MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);
6491: }
6492: total_counts = total_primal_vertices;
6493: for (i=n_vertices;i<total_counts_cc;i++) {
6494: if (!PetscBTLookup(change_basis,i)) {
6495: PetscInt *cols;
6497: size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
6498: cols = constraints_idxs+constraints_idxs_ptr[i];
6499: for (k=0;k<constraints_n[i];k++) {
6500: PetscInt row = total_counts+k;
6501: PetscScalar *vals;
6503: vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint;
6504: MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);
6505: }
6506: total_counts += constraints_n[i];
6507: }
6508: }
6509: /* assembling */
6510: MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);
6511: MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);
6512: MatViewFromOptions(pcbddc->ConstraintMatrix,NULL,"-pc_bddc_constraint_mat_view");
6514: /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
6515: if (pcbddc->use_change_of_basis) {
6516: /* dual and primal dofs on a single cc */
6517: PetscInt dual_dofs,primal_dofs;
6518: /* working stuff for GEQRF */
6519: PetscScalar *qr_basis = NULL,*qr_tau = NULL,*qr_work = NULL,lqr_work_t;
6520: PetscBLASInt lqr_work;
6521: /* working stuff for UNGQR */
6522: PetscScalar *gqr_work = NULL,lgqr_work_t;
6523: PetscBLASInt lgqr_work;
6524: /* working stuff for TRTRS */
6525: PetscScalar *trs_rhs = NULL;
6526: PetscBLASInt Blas_NRHS;
6527: /* pointers for values insertion into change of basis matrix */
6528: PetscInt *start_rows,*start_cols;
6529: PetscScalar *start_vals;
6530: /* working stuff for values insertion */
6531: PetscBT is_primal;
6532: PetscInt *aux_primal_numbering_B;
6533: /* matrix sizes */
6534: PetscInt global_size,local_size;
6535: /* temporary change of basis */
6536: Mat localChangeOfBasisMatrix;
6537: /* extra space for debugging */
6538: PetscScalar *dbg_work = NULL;
6540: /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
6541: MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);
6542: MatSetType(localChangeOfBasisMatrix,MATAIJ);
6543: MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);
6544: /* nonzeros for local mat */
6545: PetscMalloc1(pcis->n,&nnz);
6546: if (!pcbddc->benign_change || pcbddc->fake_change) {
6547: for (i=0;i<pcis->n;i++) nnz[i]=1;
6548: } else {
6549: const PetscInt *ii;
6550: PetscInt n;
6551: PetscBool flg_row;
6552: MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);
6553: for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i];
6554: MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);
6555: }
6556: for (i=n_vertices;i<total_counts_cc;i++) {
6557: if (PetscBTLookup(change_basis,i)) {
6558: size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
6559: if (PetscBTLookup(qr_needed_idx,i)) {
6560: for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint;
6561: } else {
6562: nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint;
6563: for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2;
6564: }
6565: }
6566: }
6567: MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);
6568: MatSetOption(localChangeOfBasisMatrix,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);
6569: PetscFree(nnz);
6570: /* Set interior change in the matrix */
6571: if (!pcbddc->benign_change || pcbddc->fake_change) {
6572: for (i=0;i<pcis->n;i++) {
6573: MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);
6574: }
6575: } else {
6576: const PetscInt *ii,*jj;
6577: PetscScalar *aa;
6578: PetscInt n;
6579: PetscBool flg_row;
6580: MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);
6581: MatSeqAIJGetArray(pcbddc->benign_change,&aa);
6582: for (i=0;i<n;i++) {
6583: MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);
6584: }
6585: MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);
6586: MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);
6587: }
6589: if (pcbddc->dbg_flag) {
6590: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");
6591: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);
6592: }
6595: /* Now we loop on the constraints which need a change of basis */
6596: /*
6597: Change of basis matrix is evaluated similarly to the FIRST APPROACH in
6598: Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
6600: Basic blocks of change of basis matrix T computed by
6602: - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
6604: | 1 0 ... 0 s_1/S |
6605: | 0 1 ... 0 s_2/S |
6606: | ... |
6607: | 0 ... 1 s_{n-1}/S |
6608: | -s_1/s_n ... -s_{n-1}/s_n s_n/S |
6610: with S = \sum_{i=1}^n s_i^2
6611: NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
6612: in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
6614: - QR decomposition of constraints otherwise
6615: */
6616: if (qr_needed && max_size_of_constraint) {
6617: /* space to store Q */
6618: PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);
6619: /* array to store scaling factors for reflectors */
6620: PetscMalloc1(max_constraints,&qr_tau);
6621: /* first we issue queries for optimal work */
6622: PetscBLASIntCast(max_size_of_constraint,&Blas_M);
6623: PetscBLASIntCast(max_constraints,&Blas_N);
6624: PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);
6625: lqr_work = -1;
6626: PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
6627: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
6628: PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);
6629: PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);
6630: lgqr_work = -1;
6631: PetscBLASIntCast(max_size_of_constraint,&Blas_M);
6632: PetscBLASIntCast(max_size_of_constraint,&Blas_N);
6633: PetscBLASIntCast(max_constraints,&Blas_K);
6634: PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);
6635: if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
6636: PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
6637: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to ORGQR/UNGQR Lapack routine %d",(int)lierr);
6638: PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);
6639: PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);
6640: /* array to store rhs and solution of triangular solver */
6641: PetscMalloc1(max_constraints*max_constraints,&trs_rhs);
6642: /* allocating workspace for check */
6643: if (pcbddc->dbg_flag) {
6644: PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);
6645: }
6646: }
6647: /* array to store whether a node is primal or not */
6648: PetscBTCreate(pcis->n_B,&is_primal);
6649: PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);
6650: ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);
6651: if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D",total_primal_vertices,i);
6652: for (i=0;i<total_primal_vertices;i++) {
6653: PetscBTSet(is_primal,aux_primal_numbering_B[i]);
6654: }
6655: PetscFree(aux_primal_numbering_B);
6657: /* loop on constraints and see whether or not they need a change of basis and compute it */
6658: for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) {
6659: size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts];
6660: if (PetscBTLookup(change_basis,total_counts)) {
6661: /* get constraint info */
6662: primal_dofs = constraints_n[total_counts];
6663: dual_dofs = size_of_constraint-primal_dofs;
6665: if (pcbddc->dbg_flag) {
6666: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %D: %D need a change of basis (size %D)\n",total_counts,primal_dofs,size_of_constraint);
6667: }
6669: if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */
6671: /* copy quadrature constraints for change of basis check */
6672: if (pcbddc->dbg_flag) {
6673: PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));
6674: }
6675: /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
6676: PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));
6678: /* compute QR decomposition of constraints */
6679: PetscBLASIntCast(size_of_constraint,&Blas_M);
6680: PetscBLASIntCast(primal_dofs,&Blas_N);
6681: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6682: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6683: PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
6684: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
6685: PetscFPTrapPop();
6687: /* explictly compute R^-T */
6688: PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));
6689: for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
6690: PetscBLASIntCast(primal_dofs,&Blas_N);
6691: PetscBLASIntCast(primal_dofs,&Blas_NRHS);
6692: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6693: PetscBLASIntCast(primal_dofs,&Blas_LDB);
6694: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6695: PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
6696: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
6697: PetscFPTrapPop();
6699: /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
6700: PetscBLASIntCast(size_of_constraint,&Blas_M);
6701: PetscBLASIntCast(size_of_constraint,&Blas_N);
6702: PetscBLASIntCast(primal_dofs,&Blas_K);
6703: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6704: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6705: PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
6706: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in ORGQR/UNGQR Lapack routine %d",(int)lierr);
6707: PetscFPTrapPop();
6709: /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
6710: i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
6711: where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
6712: PetscBLASIntCast(size_of_constraint,&Blas_M);
6713: PetscBLASIntCast(primal_dofs,&Blas_N);
6714: PetscBLASIntCast(primal_dofs,&Blas_K);
6715: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6716: PetscBLASIntCast(primal_dofs,&Blas_LDB);
6717: PetscBLASIntCast(size_of_constraint,&Blas_LDC);
6718: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6719: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC));
6720: PetscFPTrapPop();
6721: PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));
6723: /* insert values in change of basis matrix respecting global ordering of new primal dofs */
6724: start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]];
6725: /* insert cols for primal dofs */
6726: for (j=0;j<primal_dofs;j++) {
6727: start_vals = &qr_basis[j*size_of_constraint];
6728: start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j];
6729: MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);
6730: }
6731: /* insert cols for dual dofs */
6732: for (j=0,k=0;j<dual_dofs;k++) {
6733: if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) {
6734: start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
6735: start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k];
6736: MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);
6737: j++;
6738: }
6739: }
6741: /* check change of basis */
6742: if (pcbddc->dbg_flag) {
6743: PetscInt ii,jj;
6744: PetscBool valid_qr=PETSC_TRUE;
6745: PetscBLASIntCast(primal_dofs,&Blas_M);
6746: PetscBLASIntCast(size_of_constraint,&Blas_N);
6747: PetscBLASIntCast(size_of_constraint,&Blas_K);
6748: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6749: PetscBLASIntCast(size_of_constraint,&Blas_LDB);
6750: PetscBLASIntCast(primal_dofs,&Blas_LDC);
6751: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6752: PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC));
6753: PetscFPTrapPop();
6754: for (jj=0;jj<size_of_constraint;jj++) {
6755: for (ii=0;ii<primal_dofs;ii++) {
6756: if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
6757: if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) valid_qr = PETSC_FALSE;
6758: }
6759: }
6760: if (!valid_qr) {
6761: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");
6762: for (jj=0;jj<size_of_constraint;jj++) {
6763: for (ii=0;ii<primal_dofs;ii++) {
6764: if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
6765: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %D is not orthogonal to constraint %D (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
6766: }
6767: if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) {
6768: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %D is not unitary w.r.t constraint %D (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
6769: }
6770: }
6771: }
6772: } else {
6773: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");
6774: }
6775: }
6776: } else { /* simple transformation block */
6777: PetscInt row,col;
6778: PetscScalar val,norm;
6780: PetscBLASIntCast(size_of_constraint,&Blas_N);
6781: PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one));
6782: for (j=0;j<size_of_constraint;j++) {
6783: PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j];
6784: row = constraints_idxs[constraints_idxs_ptr[total_counts]+j];
6785: if (!PetscBTLookup(is_primal,row_B)) {
6786: col = constraints_idxs[constraints_idxs_ptr[total_counts]];
6787: MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);
6788: MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);
6789: } else {
6790: for (k=0;k<size_of_constraint;k++) {
6791: col = constraints_idxs[constraints_idxs_ptr[total_counts]+k];
6792: if (row != col) {
6793: val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]];
6794: } else {
6795: val = constraints_data[constraints_data_ptr[total_counts]]/norm;
6796: }
6797: MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);
6798: }
6799: }
6800: }
6801: if (pcbddc->dbg_flag) {
6802: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");
6803: }
6804: }
6805: } else {
6806: if (pcbddc->dbg_flag) {
6807: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %D does not need a change of basis (size %D)\n",total_counts,size_of_constraint);
6808: }
6809: }
6810: }
6812: /* free workspace */
6813: if (qr_needed) {
6814: if (pcbddc->dbg_flag) {
6815: PetscFree(dbg_work);
6816: }
6817: PetscFree(trs_rhs);
6818: PetscFree(qr_tau);
6819: PetscFree(qr_work);
6820: PetscFree(gqr_work);
6821: PetscFree(qr_basis);
6822: }
6823: PetscBTDestroy(&is_primal);
6824: MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);
6825: MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);
6827: /* assembling of global change of variable */
6828: if (!pcbddc->fake_change) {
6829: Mat tmat;
6830: PetscInt bs;
6832: VecGetSize(pcis->vec1_global,&global_size);
6833: VecGetLocalSize(pcis->vec1_global,&local_size);
6834: MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);
6835: MatISSetLocalMat(tmat,localChangeOfBasisMatrix);
6836: MatAssemblyBegin(tmat,MAT_FINAL_ASSEMBLY);
6837: MatAssemblyEnd(tmat,MAT_FINAL_ASSEMBLY);
6838: MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);
6839: MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);
6840: MatGetBlockSize(pc->pmat,&bs);
6841: MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);
6842: MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);
6843: MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);
6844: MatConvert(tmat,MATAIJ,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);
6845: MatDestroy(&tmat);
6846: VecSet(pcis->vec1_global,0.0);
6847: VecSet(pcis->vec1_N,1.0);
6848: VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
6849: VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
6850: VecReciprocal(pcis->vec1_global);
6851: MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);
6853: /* check */
6854: if (pcbddc->dbg_flag) {
6855: PetscReal error;
6856: Vec x,x_change;
6858: VecDuplicate(pcis->vec1_global,&x);
6859: VecDuplicate(pcis->vec1_global,&x_change);
6860: VecSetRandom(x,NULL);
6861: VecCopy(x,pcis->vec1_global);
6862: VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
6863: VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
6864: MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);
6865: VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);
6866: VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);
6867: MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);
6868: VecAXPY(x,-1.0,x_change);
6869: VecNorm(x,NORM_INFINITY,&error);
6870: if (error > PETSC_SMALL) {
6871: SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e",error);
6872: }
6873: VecDestroy(&x);
6874: VecDestroy(&x_change);
6875: }
6876: /* adapt sub_schurs computed (if any) */
6877: if (pcbddc->use_deluxe_scaling) {
6878: PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
6880: if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");
6881: if (sub_schurs && sub_schurs->S_Ej_all) {
6882: Mat S_new,tmat;
6883: IS is_all_N,is_V_Sall = NULL;
6885: ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);
6886: MatCreateSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);
6887: if (pcbddc->deluxe_zerorows) {
6888: ISLocalToGlobalMapping NtoSall;
6889: IS is_V;
6890: ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);
6891: ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);
6892: ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);
6893: ISLocalToGlobalMappingDestroy(&NtoSall);
6894: ISDestroy(&is_V);
6895: }
6896: ISDestroy(&is_all_N);
6897: MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);
6898: MatDestroy(&sub_schurs->S_Ej_all);
6899: PetscObjectReference((PetscObject)S_new);
6900: if (pcbddc->deluxe_zerorows) {
6901: const PetscScalar *array;
6902: const PetscInt *idxs_V,*idxs_all;
6903: PetscInt i,n_V;
6905: MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);
6906: ISGetLocalSize(is_V_Sall,&n_V);
6907: ISGetIndices(is_V_Sall,&idxs_V);
6908: ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);
6909: VecGetArrayRead(pcis->D,&array);
6910: for (i=0;i<n_V;i++) {
6911: PetscScalar val;
6912: PetscInt idx;
6914: idx = idxs_V[i];
6915: val = array[idxs_all[idxs_V[i]]];
6916: MatSetValue(S_new,idx,idx,val,INSERT_VALUES);
6917: }
6918: MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);
6919: MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);
6920: VecRestoreArrayRead(pcis->D,&array);
6921: ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);
6922: ISRestoreIndices(is_V_Sall,&idxs_V);
6923: }
6924: sub_schurs->S_Ej_all = S_new;
6925: MatDestroy(&S_new);
6926: if (sub_schurs->sum_S_Ej_all) {
6927: MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);
6928: MatDestroy(&sub_schurs->sum_S_Ej_all);
6929: PetscObjectReference((PetscObject)S_new);
6930: if (pcbddc->deluxe_zerorows) {
6931: MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);
6932: }
6933: sub_schurs->sum_S_Ej_all = S_new;
6934: MatDestroy(&S_new);
6935: }
6936: ISDestroy(&is_V_Sall);
6937: MatDestroy(&tmat);
6938: }
6939: /* destroy any change of basis context in sub_schurs */
6940: if (sub_schurs && sub_schurs->change) {
6941: PetscInt i;
6943: for (i=0;i<sub_schurs->n_subs;i++) {
6944: KSPDestroy(&sub_schurs->change[i]);
6945: }
6946: PetscFree(sub_schurs->change);
6947: }
6948: }
6949: if (pcbddc->switch_static) { /* need to save the local change */
6950: pcbddc->switch_static_change = localChangeOfBasisMatrix;
6951: } else {
6952: MatDestroy(&localChangeOfBasisMatrix);
6953: }
6954: /* determine if any process has changed the pressures locally */
6955: pcbddc->change_interior = pcbddc->benign_have_null;
6956: } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */
6957: MatDestroy(&pcbddc->ConstraintMatrix);
6958: pcbddc->ConstraintMatrix = localChangeOfBasisMatrix;
6959: pcbddc->use_qr_single = qr_needed;
6960: }
6961: } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) {
6962: if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) {
6963: PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);
6964: pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
6965: } else {
6966: Mat benign_global = NULL;
6967: if (pcbddc->benign_have_null) {
6968: Mat M;
6970: pcbddc->change_interior = PETSC_TRUE;
6971: VecCopy(matis->counter,pcis->vec1_N);
6972: VecReciprocal(pcis->vec1_N);
6973: MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&benign_global);
6974: if (pcbddc->benign_change) {
6975: MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);
6976: MatDiagonalScale(M,pcis->vec1_N,NULL);
6977: } else {
6978: MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&M);
6979: MatDiagonalSet(M,pcis->vec1_N,INSERT_VALUES);
6980: }
6981: MatISSetLocalMat(benign_global,M);
6982: MatDestroy(&M);
6983: MatAssemblyBegin(benign_global,MAT_FINAL_ASSEMBLY);
6984: MatAssemblyEnd(benign_global,MAT_FINAL_ASSEMBLY);
6985: }
6986: if (pcbddc->user_ChangeOfBasisMatrix) {
6987: MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);
6988: MatDestroy(&benign_global);
6989: } else if (pcbddc->benign_have_null) {
6990: pcbddc->ChangeOfBasisMatrix = benign_global;
6991: }
6992: }
6993: if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */
6994: IS is_global;
6995: const PetscInt *gidxs;
6997: ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);
6998: ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);
6999: ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);
7000: MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);
7001: ISDestroy(&is_global);
7002: }
7003: }
7004: if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) {
7005: VecDuplicate(pcis->vec1_global,&pcbddc->work_change);
7006: }
7008: if (!pcbddc->fake_change) {
7009: /* add pressure dofs to set of primal nodes for numbering purposes */
7010: for (i=0;i<pcbddc->benign_n;i++) {
7011: pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i];
7012: pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i];
7013: pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1;
7014: pcbddc->local_primal_size_cc++;
7015: pcbddc->local_primal_size++;
7016: }
7018: /* check if a new primal space has been introduced (also take into account benign trick) */
7019: pcbddc->new_primal_space_local = PETSC_TRUE;
7020: if (olocal_primal_size == pcbddc->local_primal_size) {
7021: PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);
7022: pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
7023: if (!pcbddc->new_primal_space_local) {
7024: PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);
7025: pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
7026: }
7027: }
7028: /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
7029: MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
7030: }
7031: PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);
7033: /* flush dbg viewer */
7034: if (pcbddc->dbg_flag) {
7035: PetscViewerFlush(pcbddc->dbg_viewer);
7036: }
7038: /* free workspace */
7039: PetscBTDestroy(&qr_needed_idx);
7040: PetscBTDestroy(&change_basis);
7041: if (!pcbddc->adaptive_selection) {
7042: PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);
7043: PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);
7044: } else {
7045: PetscFree5(pcbddc->adaptive_constraints_n,
7046: pcbddc->adaptive_constraints_idxs_ptr,
7047: pcbddc->adaptive_constraints_data_ptr,
7048: pcbddc->adaptive_constraints_idxs,
7049: pcbddc->adaptive_constraints_data);
7050: PetscFree(constraints_n);
7051: PetscFree(constraints_idxs_B);
7052: }
7053: return(0);
7054: }
7055: /* #undef PETSC_MISSING_LAPACK_GESVD */
7057: PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
7058: {
7059: ISLocalToGlobalMapping map;
7060: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
7061: Mat_IS *matis = (Mat_IS*)pc->pmat->data;
7062: PetscInt i,N;
7063: PetscBool rcsr = PETSC_FALSE;
7064: PetscErrorCode ierr;
7067: if (pcbddc->recompute_topography) {
7068: pcbddc->graphanalyzed = PETSC_FALSE;
7069: /* Reset previously computed graph */
7070: PCBDDCGraphReset(pcbddc->mat_graph);
7071: /* Init local Graph struct */
7072: MatGetSize(pc->pmat,&N,NULL);
7073: MatGetLocalToGlobalMapping(pc->pmat,&map,NULL);
7074: PCBDDCGraphInit(pcbddc->mat_graph,map,N,pcbddc->graphmaxcount);
7076: if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) {
7077: PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->user_primal_vertices_local);
7078: }
7079: /* Check validity of the csr graph passed in by the user */
7080: if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %D, expected %D",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs);
7082: /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
7083: if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) {
7084: PetscInt *xadj,*adjncy;
7085: PetscInt nvtxs;
7086: PetscBool flg_row=PETSC_FALSE;
7088: MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);
7089: if (flg_row) {
7090: PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);
7091: pcbddc->computed_rowadj = PETSC_TRUE;
7092: }
7093: MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);
7094: rcsr = PETSC_TRUE;
7095: }
7096: if (pcbddc->dbg_flag) {
7097: PetscViewerFlush(pcbddc->dbg_viewer);
7098: }
7100: if (pcbddc->mat_graph->cdim && !pcbddc->mat_graph->cloc) {
7101: PetscReal *lcoords;
7102: PetscInt n;
7103: MPI_Datatype dimrealtype;
7105: /* TODO: support for blocked */
7106: if (pcbddc->mat_graph->cnloc != pc->pmat->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid number of local coordinates! Got %D, expected %D",pcbddc->mat_graph->cnloc,pc->pmat->rmap->n);
7107: MatGetLocalSize(matis->A,&n,NULL);
7108: PetscMalloc1(pcbddc->mat_graph->cdim*n,&lcoords);
7109: MPI_Type_contiguous(pcbddc->mat_graph->cdim,MPIU_REAL,&dimrealtype);
7110: MPI_Type_commit(&dimrealtype);
7111: PetscSFBcastBegin(matis->sf,dimrealtype,pcbddc->mat_graph->coords,lcoords);
7112: PetscSFBcastEnd(matis->sf,dimrealtype,pcbddc->mat_graph->coords,lcoords);
7113: MPI_Type_free(&dimrealtype);
7114: PetscFree(pcbddc->mat_graph->coords);
7116: pcbddc->mat_graph->coords = lcoords;
7117: pcbddc->mat_graph->cloc = PETSC_TRUE;
7118: pcbddc->mat_graph->cnloc = n;
7119: }
7120: if (pcbddc->mat_graph->cnloc && pcbddc->mat_graph->cnloc != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid number of local subdomain coordinates! Got %D, expected %D",pcbddc->mat_graph->cnloc,pcbddc->mat_graph->nvtxs);
7121: pcbddc->mat_graph->active_coords = (PetscBool)(pcbddc->corner_selection && !pcbddc->corner_selected);
7123: /* Setup of Graph */
7124: pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */
7125: PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);
7127: /* attach info on disconnected subdomains if present */
7128: if (pcbddc->n_local_subs) {
7129: PetscInt *local_subs,n,totn;
7131: MatGetLocalSize(matis->A,&n,NULL);
7132: PetscMalloc1(n,&local_subs);
7133: for (i=0;i<n;i++) local_subs[i] = pcbddc->n_local_subs;
7134: for (i=0;i<pcbddc->n_local_subs;i++) {
7135: const PetscInt *idxs;
7136: PetscInt nl,j;
7138: ISGetLocalSize(pcbddc->local_subs[i],&nl);
7139: ISGetIndices(pcbddc->local_subs[i],&idxs);
7140: for (j=0;j<nl;j++) local_subs[idxs[j]] = i;
7141: ISRestoreIndices(pcbddc->local_subs[i],&idxs);
7142: }
7143: for (i=0,totn=0;i<n;i++) totn = PetscMax(totn,local_subs[i]);
7144: pcbddc->mat_graph->n_local_subs = totn + 1;
7145: pcbddc->mat_graph->local_subs = local_subs;
7146: }
7147: }
7149: if (!pcbddc->graphanalyzed) {
7150: /* Graph's connected components analysis */
7151: PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);
7152: pcbddc->graphanalyzed = PETSC_TRUE;
7153: pcbddc->corner_selected = pcbddc->corner_selection;
7154: }
7155: if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0;
7156: return(0);
7157: }
7159: PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
7160: {
7161: PetscInt i,j;
7162: PetscScalar *alphas;
7163: PetscReal norm;
7167: if (!n) return(0);
7168: PetscMalloc1(n,&alphas);
7169: VecNormalize(vecs[0],&norm);
7170: if (norm < PETSC_SMALL) {
7171: VecSet(vecs[0],0.0);
7172: }
7173: for (i=1;i<n;i++) {
7174: VecMDot(vecs[i],i,vecs,alphas);
7175: for (j=0;j<i;j++) alphas[j] = PetscConj(-alphas[j]);
7176: VecMAXPY(vecs[i],i,alphas,vecs);
7177: VecNormalize(vecs[i],&norm);
7178: if (norm < PETSC_SMALL) {
7179: VecSet(vecs[i],0.0);
7180: }
7181: }
7182: PetscFree(alphas);
7183: return(0);
7184: }
7186: PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void)
7187: {
7188: Mat A;
7189: PetscInt n_neighs,*neighs,*n_shared,**shared;
7190: PetscMPIInt size,rank,color;
7191: PetscInt *xadj,*adjncy;
7192: PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx;
7193: PetscInt im_active,active_procs,N,n,i,j,threshold = 2;
7194: PetscInt void_procs,*procs_candidates = NULL;
7195: PetscInt xadj_count,*count;
7196: PetscBool ismatis,use_vwgt=PETSC_FALSE;
7197: PetscSubcomm psubcomm;
7198: MPI_Comm subcomm;
7203: PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);
7204: if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME);
7207: if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %D",*n_subdomains);
7209: if (have_void) *have_void = PETSC_FALSE;
7210: MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);
7211: MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);
7212: MatISGetLocalMat(mat,&A);
7213: MatGetLocalSize(A,&n,NULL);
7214: im_active = !!n;
7215: MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));
7216: void_procs = size - active_procs;
7217: /* get ranks of of non-active processes in mat communicator */
7218: if (void_procs) {
7219: PetscInt ncand;
7221: if (have_void) *have_void = PETSC_TRUE;
7222: PetscMalloc1(size,&procs_candidates);
7223: MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));
7224: for (i=0,ncand=0;i<size;i++) {
7225: if (!procs_candidates[i]) {
7226: procs_candidates[ncand++] = i;
7227: }
7228: }
7229: /* force n_subdomains to be not greater that the number of non-active processes */
7230: *n_subdomains = PetscMin(void_procs,*n_subdomains);
7231: }
7233: /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix
7234: number of subdomains requested 1 -> send to master or first candidate in voids */
7235: MatGetSize(mat,&N,NULL);
7236: if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) {
7237: PetscInt issize,isidx,dest;
7238: if (*n_subdomains == 1) dest = 0;
7239: else dest = rank;
7240: if (im_active) {
7241: issize = 1;
7242: if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7243: isidx = procs_candidates[dest];
7244: } else {
7245: isidx = dest;
7246: }
7247: } else {
7248: issize = 0;
7249: isidx = -1;
7250: }
7251: if (*n_subdomains != 1) *n_subdomains = active_procs;
7252: ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);
7253: PetscFree(procs_candidates);
7254: return(0);
7255: }
7256: PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);
7257: PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);
7258: threshold = PetscMax(threshold,2);
7260: /* Get info on mapping */
7261: ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);
7263: /* build local CSR graph of subdomains' connectivity */
7264: PetscMalloc1(2,&xadj);
7265: xadj[0] = 0;
7266: xadj[1] = PetscMax(n_neighs-1,0);
7267: PetscMalloc1(xadj[1],&adjncy);
7268: PetscMalloc1(xadj[1],&adjncy_wgt);
7269: PetscCalloc1(n,&count);
7270: for (i=1;i<n_neighs;i++)
7271: for (j=0;j<n_shared[i];j++)
7272: count[shared[i][j]] += 1;
7274: xadj_count = 0;
7275: for (i=1;i<n_neighs;i++) {
7276: for (j=0;j<n_shared[i];j++) {
7277: if (count[shared[i][j]] < threshold) {
7278: adjncy[xadj_count] = neighs[i];
7279: adjncy_wgt[xadj_count] = n_shared[i];
7280: xadj_count++;
7281: break;
7282: }
7283: }
7284: }
7285: xadj[1] = xadj_count;
7286: PetscFree(count);
7287: ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);
7288: PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);
7290: PetscMalloc1(1,&ranks_send_to_idx);
7292: /* Restrict work on active processes only */
7293: PetscMPIIntCast(im_active,&color);
7294: if (void_procs) {
7295: PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);
7296: PetscSubcommSetNumber(psubcomm,2); /* 2 groups, active process and not active processes */
7297: PetscSubcommSetTypeGeneral(psubcomm,color,rank);
7298: subcomm = PetscSubcommChild(psubcomm);
7299: } else {
7300: psubcomm = NULL;
7301: subcomm = PetscObjectComm((PetscObject)mat);
7302: }
7304: v_wgt = NULL;
7305: if (!color) {
7306: PetscFree(xadj);
7307: PetscFree(adjncy);
7308: PetscFree(adjncy_wgt);
7309: } else {
7310: Mat subdomain_adj;
7311: IS new_ranks,new_ranks_contig;
7312: MatPartitioning partitioner;
7313: PetscInt rstart=0,rend=0;
7314: PetscInt *is_indices,*oldranks;
7315: PetscMPIInt size;
7316: PetscBool aggregate;
7318: MPI_Comm_size(subcomm,&size);
7319: if (void_procs) {
7320: PetscInt prank = rank;
7321: PetscMalloc1(size,&oldranks);
7322: MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);
7323: for (i=0;i<xadj[1];i++) {
7324: PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);
7325: }
7326: PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);
7327: } else {
7328: oldranks = NULL;
7329: }
7330: aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE);
7331: if (aggregate) { /* TODO: all this part could be made more efficient */
7332: PetscInt lrows,row,ncols,*cols;
7333: PetscMPIInt nrank;
7334: PetscScalar *vals;
7336: MPI_Comm_rank(subcomm,&nrank);
7337: lrows = 0;
7338: if (nrank<redprocs) {
7339: lrows = size/redprocs;
7340: if (nrank<size%redprocs) lrows++;
7341: }
7342: MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);
7343: MatGetOwnershipRange(subdomain_adj,&rstart,&rend);
7344: MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);
7345: MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);
7346: row = nrank;
7347: ncols = xadj[1]-xadj[0];
7348: cols = adjncy;
7349: PetscMalloc1(ncols,&vals);
7350: for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i];
7351: MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);
7352: MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);
7353: MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);
7354: PetscFree(xadj);
7355: PetscFree(adjncy);
7356: PetscFree(adjncy_wgt);
7357: PetscFree(vals);
7358: if (use_vwgt) {
7359: Vec v;
7360: const PetscScalar *array;
7361: PetscInt nl;
7363: MatCreateVecs(subdomain_adj,&v,NULL);
7364: VecSetValue(v,row,(PetscScalar)n,INSERT_VALUES);
7365: VecAssemblyBegin(v);
7366: VecAssemblyEnd(v);
7367: VecGetLocalSize(v,&nl);
7368: VecGetArrayRead(v,&array);
7369: PetscMalloc1(nl,&v_wgt);
7370: for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]);
7371: VecRestoreArrayRead(v,&array);
7372: VecDestroy(&v);
7373: }
7374: } else {
7375: MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);
7376: if (use_vwgt) {
7377: PetscMalloc1(1,&v_wgt);
7378: v_wgt[0] = n;
7379: }
7380: }
7381: /* MatView(subdomain_adj,0); */
7383: /* Partition */
7384: MatPartitioningCreate(subcomm,&partitioner);
7385: #if defined(PETSC_HAVE_PTSCOTCH)
7386: MatPartitioningSetType(partitioner,MATPARTITIONINGPTSCOTCH);
7387: #elif defined(PETSC_HAVE_PARMETIS)
7388: MatPartitioningSetType(partitioner,MATPARTITIONINGPARMETIS);
7389: #else
7390: MatPartitioningSetType(partitioner,MATPARTITIONINGAVERAGE);
7391: #endif
7392: MatPartitioningSetAdjacency(partitioner,subdomain_adj);
7393: if (v_wgt) {
7394: MatPartitioningSetVertexWeights(partitioner,v_wgt);
7395: }
7396: *n_subdomains = PetscMin((PetscInt)size,*n_subdomains);
7397: MatPartitioningSetNParts(partitioner,*n_subdomains);
7398: MatPartitioningSetFromOptions(partitioner);
7399: MatPartitioningApply(partitioner,&new_ranks);
7400: /* MatPartitioningView(partitioner,0); */
7402: /* renumber new_ranks to avoid "holes" in new set of processors */
7403: ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);
7404: ISDestroy(&new_ranks);
7405: ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);
7406: if (!aggregate) {
7407: if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7408: #if defined(PETSC_USE_DEBUG)
7409: if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen");
7410: #endif
7411: ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]];
7412: } else if (oldranks) {
7413: ranks_send_to_idx[0] = oldranks[is_indices[0]];
7414: } else {
7415: ranks_send_to_idx[0] = is_indices[0];
7416: }
7417: } else {
7418: PetscInt idx = 0;
7419: PetscMPIInt tag;
7420: MPI_Request *reqs;
7422: PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);
7423: PetscMalloc1(rend-rstart,&reqs);
7424: for (i=rstart;i<rend;i++) {
7425: MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);
7426: }
7427: MPI_Recv(&idx,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);
7428: MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);
7429: PetscFree(reqs);
7430: if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7431: #if defined(PETSC_USE_DEBUG)
7432: if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen");
7433: #endif
7434: ranks_send_to_idx[0] = procs_candidates[oldranks[idx]];
7435: } else if (oldranks) {
7436: ranks_send_to_idx[0] = oldranks[idx];
7437: } else {
7438: ranks_send_to_idx[0] = idx;
7439: }
7440: }
7441: ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);
7442: /* clean up */
7443: PetscFree(oldranks);
7444: ISDestroy(&new_ranks_contig);
7445: MatDestroy(&subdomain_adj);
7446: MatPartitioningDestroy(&partitioner);
7447: }
7448: PetscSubcommDestroy(&psubcomm);
7449: PetscFree(procs_candidates);
7451: /* assemble parallel IS for sends */
7452: i = 1;
7453: if (!color) i=0;
7454: ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);
7455: return(0);
7456: }
7458: typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
7460: PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[])
7461: {
7462: Mat local_mat;
7463: IS is_sends_internal;
7464: PetscInt rows,cols,new_local_rows;
7465: PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs;
7466: PetscBool ismatis,isdense,newisdense,destroy_mat;
7467: ISLocalToGlobalMapping l2gmap;
7468: PetscInt* l2gmap_indices;
7469: const PetscInt* is_indices;
7470: MatType new_local_type;
7471: /* buffers */
7472: PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
7473: PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
7474: PetscInt *recv_buffer_idxs_local;
7475: PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
7476: PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs;
7477: /* MPI */
7478: MPI_Comm comm,comm_n;
7479: PetscSubcomm subcomm;
7480: PetscMPIInt n_sends,n_recvs,size;
7481: PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
7482: PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
7483: PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest;
7484: MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs;
7485: MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs;
7486: PetscErrorCode ierr;
7490: PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);
7491: if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME);
7498: if (nvecs) {
7499: if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported");
7501: }
7502: /* further checks */
7503: MatISGetLocalMat(mat,&local_mat);
7504: PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);
7505: if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
7506: MatGetSize(local_mat,&rows,&cols);
7507: if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
7508: if (reuse && *mat_n) {
7509: PetscInt mrows,mcols,mnrows,mncols;
7511: PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);
7512: if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
7513: MatGetSize(mat,&mrows,&mcols);
7514: MatGetSize(*mat_n,&mnrows,&mncols);
7515: if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
7516: if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
7517: }
7518: MatGetBlockSize(local_mat,&bs);
7521: /* prepare IS for sending if not provided */
7522: if (!is_sends) {
7523: if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
7524: PCBDDCMatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);
7525: } else {
7526: PetscObjectReference((PetscObject)is_sends);
7527: is_sends_internal = is_sends;
7528: }
7530: /* get comm */
7531: PetscObjectGetComm((PetscObject)mat,&comm);
7533: /* compute number of sends */
7534: ISGetLocalSize(is_sends_internal,&i);
7535: PetscMPIIntCast(i,&n_sends);
7537: /* compute number of receives */
7538: MPI_Comm_size(comm,&size);
7539: PetscMalloc1(size,&iflags);
7540: PetscMemzero(iflags,size*sizeof(*iflags));
7541: ISGetIndices(is_sends_internal,&is_indices);
7542: for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
7543: PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);
7544: PetscFree(iflags);
7546: /* restrict comm if requested */
7547: subcomm = 0;
7548: destroy_mat = PETSC_FALSE;
7549: if (restrict_comm) {
7550: PetscMPIInt color,subcommsize;
7552: color = 0;
7553: if (restrict_full) {
7554: if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */
7555: } else {
7556: if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */
7557: }
7558: MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);
7559: subcommsize = size - subcommsize;
7560: /* check if reuse has been requested */
7561: if (reuse) {
7562: if (*mat_n) {
7563: PetscMPIInt subcommsize2;
7564: MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);
7565: if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
7566: comm_n = PetscObjectComm((PetscObject)*mat_n);
7567: } else {
7568: comm_n = PETSC_COMM_SELF;
7569: }
7570: } else { /* MAT_INITIAL_MATRIX */
7571: PetscMPIInt rank;
7573: MPI_Comm_rank(comm,&rank);
7574: PetscSubcommCreate(comm,&subcomm);
7575: PetscSubcommSetNumber(subcomm,2);
7576: PetscSubcommSetTypeGeneral(subcomm,color,rank);
7577: comm_n = PetscSubcommChild(subcomm);
7578: }
7579: /* flag to destroy *mat_n if not significative */
7580: if (color) destroy_mat = PETSC_TRUE;
7581: } else {
7582: comm_n = comm;
7583: }
7585: /* prepare send/receive buffers */
7586: PetscMalloc1(size,&ilengths_idxs);
7587: PetscMemzero(ilengths_idxs,size*sizeof(*ilengths_idxs));
7588: PetscMalloc1(size,&ilengths_vals);
7589: PetscMemzero(ilengths_vals,size*sizeof(*ilengths_vals));
7590: if (nis) {
7591: PetscCalloc1(size,&ilengths_idxs_is);
7592: }
7594: /* Get data from local matrices */
7595: if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
7596: /* TODO: See below some guidelines on how to prepare the local buffers */
7597: /*
7598: send_buffer_vals should contain the raw values of the local matrix
7599: send_buffer_idxs should contain:
7600: - MatType_PRIVATE type
7601: - PetscInt size_of_l2gmap
7602: - PetscInt global_row_indices[size_of_l2gmap]
7603: - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values
7604: */
7605: else {
7606: MatDenseGetArray(local_mat,&send_buffer_vals);
7607: ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);
7608: PetscMalloc1(i+2,&send_buffer_idxs);
7609: send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
7610: send_buffer_idxs[1] = i;
7611: ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);
7612: PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));
7613: ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);
7614: PetscMPIIntCast(i,&len);
7615: for (i=0;i<n_sends;i++) {
7616: ilengths_vals[is_indices[i]] = len*len;
7617: ilengths_idxs[is_indices[i]] = len+2;
7618: }
7619: }
7620: PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);
7621: /* additional is (if any) */
7622: if (nis) {
7623: PetscMPIInt psum;
7624: PetscInt j;
7625: for (j=0,psum=0;j<nis;j++) {
7626: PetscInt plen;
7627: ISGetLocalSize(isarray[j],&plen);
7628: PetscMPIIntCast(plen,&len);
7629: psum += len+1; /* indices + lenght */
7630: }
7631: PetscMalloc1(psum,&send_buffer_idxs_is);
7632: for (j=0,psum=0;j<nis;j++) {
7633: PetscInt plen;
7634: const PetscInt *is_array_idxs;
7635: ISGetLocalSize(isarray[j],&plen);
7636: send_buffer_idxs_is[psum] = plen;
7637: ISGetIndices(isarray[j],&is_array_idxs);
7638: PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));
7639: ISRestoreIndices(isarray[j],&is_array_idxs);
7640: psum += plen+1; /* indices + lenght */
7641: }
7642: for (i=0;i<n_sends;i++) {
7643: ilengths_idxs_is[is_indices[i]] = psum;
7644: }
7645: PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);
7646: }
7647: MatISRestoreLocalMat(mat,&local_mat);
7649: buf_size_idxs = 0;
7650: buf_size_vals = 0;
7651: buf_size_idxs_is = 0;
7652: buf_size_vecs = 0;
7653: for (i=0;i<n_recvs;i++) {
7654: buf_size_idxs += (PetscInt)olengths_idxs[i];
7655: buf_size_vals += (PetscInt)olengths_vals[i];
7656: if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
7657: if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i];
7658: }
7659: PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);
7660: PetscMalloc1(buf_size_vals,&recv_buffer_vals);
7661: PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);
7662: PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);
7664: /* get new tags for clean communications */
7665: PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);
7666: PetscObjectGetNewTag((PetscObject)mat,&tag_vals);
7667: PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);
7668: PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);
7670: /* allocate for requests */
7671: PetscMalloc1(n_sends,&send_req_idxs);
7672: PetscMalloc1(n_sends,&send_req_vals);
7673: PetscMalloc1(n_sends,&send_req_idxs_is);
7674: PetscMalloc1(n_sends,&send_req_vecs);
7675: PetscMalloc1(n_recvs,&recv_req_idxs);
7676: PetscMalloc1(n_recvs,&recv_req_vals);
7677: PetscMalloc1(n_recvs,&recv_req_idxs_is);
7678: PetscMalloc1(n_recvs,&recv_req_vecs);
7680: /* communications */
7681: ptr_idxs = recv_buffer_idxs;
7682: ptr_vals = recv_buffer_vals;
7683: ptr_idxs_is = recv_buffer_idxs_is;
7684: ptr_vecs = recv_buffer_vecs;
7685: for (i=0;i<n_recvs;i++) {
7686: source_dest = onodes[i];
7687: MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);
7688: MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);
7689: ptr_idxs += olengths_idxs[i];
7690: ptr_vals += olengths_vals[i];
7691: if (nis) {
7692: source_dest = onodes_is[i];
7693: MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);
7694: ptr_idxs_is += olengths_idxs_is[i];
7695: }
7696: if (nvecs) {
7697: source_dest = onodes[i];
7698: MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);
7699: ptr_vecs += olengths_idxs[i]-2;
7700: }
7701: }
7702: for (i=0;i<n_sends;i++) {
7703: PetscMPIIntCast(is_indices[i],&source_dest);
7704: MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);
7705: MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);
7706: if (nis) {
7707: MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);
7708: }
7709: if (nvecs) {
7710: VecGetArray(nnsp_vec[0],&send_buffer_vecs);
7711: MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);
7712: }
7713: }
7714: ISRestoreIndices(is_sends_internal,&is_indices);
7715: ISDestroy(&is_sends_internal);
7717: /* assemble new l2g map */
7718: MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);
7719: ptr_idxs = recv_buffer_idxs;
7720: new_local_rows = 0;
7721: for (i=0;i<n_recvs;i++) {
7722: new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
7723: ptr_idxs += olengths_idxs[i];
7724: }
7725: PetscMalloc1(new_local_rows,&l2gmap_indices);
7726: ptr_idxs = recv_buffer_idxs;
7727: new_local_rows = 0;
7728: for (i=0;i<n_recvs;i++) {
7729: PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));
7730: new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
7731: ptr_idxs += olengths_idxs[i];
7732: }
7733: PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);
7734: ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);
7735: PetscFree(l2gmap_indices);
7737: /* infer new local matrix type from received local matrices type */
7738: /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
7739: /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
7740: if (n_recvs) {
7741: MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
7742: ptr_idxs = recv_buffer_idxs;
7743: for (i=0;i<n_recvs;i++) {
7744: if ((PetscInt)new_local_type_private != *ptr_idxs) {
7745: new_local_type_private = MATAIJ_PRIVATE;
7746: break;
7747: }
7748: ptr_idxs += olengths_idxs[i];
7749: }
7750: switch (new_local_type_private) {
7751: case MATDENSE_PRIVATE:
7752: new_local_type = MATSEQAIJ;
7753: bs = 1;
7754: break;
7755: case MATAIJ_PRIVATE:
7756: new_local_type = MATSEQAIJ;
7757: bs = 1;
7758: break;
7759: case MATBAIJ_PRIVATE:
7760: new_local_type = MATSEQBAIJ;
7761: break;
7762: case MATSBAIJ_PRIVATE:
7763: new_local_type = MATSEQSBAIJ;
7764: break;
7765: default:
7766: SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,PETSC_FUNCTION_NAME);
7767: break;
7768: }
7769: } else { /* by default, new_local_type is seqaij */
7770: new_local_type = MATSEQAIJ;
7771: bs = 1;
7772: }
7774: /* create MATIS object if needed */
7775: if (!reuse) {
7776: MatGetSize(mat,&rows,&cols);
7777: MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);
7778: } else {
7779: /* it also destroys the local matrices */
7780: if (*mat_n) {
7781: MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);
7782: } else { /* this is a fake object */
7783: MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);
7784: }
7785: }
7786: MatISGetLocalMat(*mat_n,&local_mat);
7787: MatSetType(local_mat,new_local_type);
7789: MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);
7791: /* Global to local map of received indices */
7792: PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local); /* needed for values insertion */
7793: ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);
7794: ISLocalToGlobalMappingDestroy(&l2gmap);
7796: /* restore attributes -> type of incoming data and its size */
7797: buf_size_idxs = 0;
7798: for (i=0;i<n_recvs;i++) {
7799: recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
7800: recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
7801: buf_size_idxs += (PetscInt)olengths_idxs[i];
7802: }
7803: PetscFree(recv_buffer_idxs);
7805: /* set preallocation */
7806: PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);
7807: if (!newisdense) {
7808: PetscInt *new_local_nnz=0;
7810: ptr_idxs = recv_buffer_idxs_local;
7811: if (n_recvs) {
7812: PetscCalloc1(new_local_rows,&new_local_nnz);
7813: }
7814: for (i=0;i<n_recvs;i++) {
7815: PetscInt j;
7816: if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
7817: for (j=0;j<*(ptr_idxs+1);j++) {
7818: new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
7819: }
7820: } else {
7821: /* TODO */
7822: }
7823: ptr_idxs += olengths_idxs[i];
7824: }
7825: if (new_local_nnz) {
7826: for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
7827: MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);
7828: for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
7829: MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);
7830: for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
7831: MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);
7832: } else {
7833: MatSetUp(local_mat);
7834: }
7835: PetscFree(new_local_nnz);
7836: } else {
7837: MatSetUp(local_mat);
7838: }
7840: /* set values */
7841: ptr_vals = recv_buffer_vals;
7842: ptr_idxs = recv_buffer_idxs_local;
7843: for (i=0;i<n_recvs;i++) {
7844: if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
7845: MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);
7846: MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);
7847: MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);
7848: MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);
7849: MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);
7850: } else {
7851: /* TODO */
7852: }
7853: ptr_idxs += olengths_idxs[i];
7854: ptr_vals += olengths_vals[i];
7855: }
7856: MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);
7857: MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);
7858: MatISRestoreLocalMat(*mat_n,&local_mat);
7859: MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);
7860: MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);
7861: PetscFree(recv_buffer_vals);
7863: #if 0
7864: if (!restrict_comm) { /* check */
7865: Vec lvec,rvec;
7866: PetscReal infty_error;
7868: MatCreateVecs(mat,&rvec,&lvec);
7869: VecSetRandom(rvec,NULL);
7870: MatMult(mat,rvec,lvec);
7871: VecScale(lvec,-1.0);
7872: MatMultAdd(*mat_n,rvec,lvec,lvec);
7873: VecNorm(lvec,NORM_INFINITY,&infty_error);
7874: PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
7875: VecDestroy(&rvec);
7876: VecDestroy(&lvec);
7877: }
7878: #endif
7880: /* assemble new additional is (if any) */
7881: if (nis) {
7882: PetscInt **temp_idxs,*count_is,j,psum;
7884: MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);
7885: PetscCalloc1(nis,&count_is);
7886: ptr_idxs = recv_buffer_idxs_is;
7887: psum = 0;
7888: for (i=0;i<n_recvs;i++) {
7889: for (j=0;j<nis;j++) {
7890: PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
7891: count_is[j] += plen; /* increment counting of buffer for j-th IS */
7892: psum += plen;
7893: ptr_idxs += plen+1; /* shift pointer to received data */
7894: }
7895: }
7896: PetscMalloc1(nis,&temp_idxs);
7897: PetscMalloc1(psum,&temp_idxs[0]);
7898: for (i=1;i<nis;i++) {
7899: temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
7900: }
7901: PetscMemzero(count_is,nis*sizeof(PetscInt));
7902: ptr_idxs = recv_buffer_idxs_is;
7903: for (i=0;i<n_recvs;i++) {
7904: for (j=0;j<nis;j++) {
7905: PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
7906: PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));
7907: count_is[j] += plen; /* increment starting point of buffer for j-th IS */
7908: ptr_idxs += plen+1; /* shift pointer to received data */
7909: }
7910: }
7911: for (i=0;i<nis;i++) {
7912: ISDestroy(&isarray[i]);
7913: PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);
7914: ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);
7915: }
7916: PetscFree(count_is);
7917: PetscFree(temp_idxs[0]);
7918: PetscFree(temp_idxs);
7919: }
7920: /* free workspace */
7921: PetscFree(recv_buffer_idxs_is);
7922: MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);
7923: PetscFree(send_buffer_idxs);
7924: MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);
7925: if (isdense) {
7926: MatISGetLocalMat(mat,&local_mat);
7927: MatDenseRestoreArray(local_mat,&send_buffer_vals);
7928: MatISRestoreLocalMat(mat,&local_mat);
7929: } else {
7930: /* PetscFree(send_buffer_vals); */
7931: }
7932: if (nis) {
7933: MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);
7934: PetscFree(send_buffer_idxs_is);
7935: }
7937: if (nvecs) {
7938: MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);
7939: MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);
7940: VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);
7941: VecDestroy(&nnsp_vec[0]);
7942: VecCreate(comm_n,&nnsp_vec[0]);
7943: VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);
7944: VecSetType(nnsp_vec[0],VECSTANDARD);
7945: /* set values */
7946: ptr_vals = recv_buffer_vecs;
7947: ptr_idxs = recv_buffer_idxs_local;
7948: VecGetArray(nnsp_vec[0],&send_buffer_vecs);
7949: for (i=0;i<n_recvs;i++) {
7950: PetscInt j;
7951: for (j=0;j<*(ptr_idxs+1);j++) {
7952: send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j);
7953: }
7954: ptr_idxs += olengths_idxs[i];
7955: ptr_vals += olengths_idxs[i]-2;
7956: }
7957: VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);
7958: VecAssemblyBegin(nnsp_vec[0]);
7959: VecAssemblyEnd(nnsp_vec[0]);
7960: }
7962: PetscFree(recv_buffer_vecs);
7963: PetscFree(recv_buffer_idxs_local);
7964: PetscFree(recv_req_idxs);
7965: PetscFree(recv_req_vals);
7966: PetscFree(recv_req_vecs);
7967: PetscFree(recv_req_idxs_is);
7968: PetscFree(send_req_idxs);
7969: PetscFree(send_req_vals);
7970: PetscFree(send_req_vecs);
7971: PetscFree(send_req_idxs_is);
7972: PetscFree(ilengths_vals);
7973: PetscFree(ilengths_idxs);
7974: PetscFree(olengths_vals);
7975: PetscFree(olengths_idxs);
7976: PetscFree(onodes);
7977: if (nis) {
7978: PetscFree(ilengths_idxs_is);
7979: PetscFree(olengths_idxs_is);
7980: PetscFree(onodes_is);
7981: }
7982: PetscSubcommDestroy(&subcomm);
7983: if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
7984: MatDestroy(mat_n);
7985: for (i=0;i<nis;i++) {
7986: ISDestroy(&isarray[i]);
7987: }
7988: if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */
7989: VecDestroy(&nnsp_vec[0]);
7990: }
7991: *mat_n = NULL;
7992: }
7993: return(0);
7994: }
7996: /* temporary hack into ksp private data structure */
7997: #include <petsc/private/kspimpl.h>
7999: PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
8000: {
8001: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
8002: PC_IS *pcis = (PC_IS*)pc->data;
8003: Mat coarse_mat,coarse_mat_is,coarse_submat_dense;
8004: Mat coarsedivudotp = NULL;
8005: Mat coarseG,t_coarse_mat_is;
8006: MatNullSpace CoarseNullSpace = NULL;
8007: ISLocalToGlobalMapping coarse_islg;
8008: IS coarse_is,*isarray,corners;
8009: PetscInt i,im_active=-1,active_procs=-1;
8010: PetscInt nis,nisdofs,nisneu,nisvert;
8011: PetscInt coarse_eqs_per_proc;
8012: PC pc_temp;
8013: PCType coarse_pc_type;
8014: KSPType coarse_ksp_type;
8015: PetscBool multilevel_requested,multilevel_allowed;
8016: PetscBool coarse_reuse;
8017: PetscInt ncoarse,nedcfield;
8018: PetscBool compute_vecs = PETSC_FALSE;
8019: PetscScalar *array;
8020: MatReuse coarse_mat_reuse;
8021: PetscBool restr, full_restr, have_void;
8022: PetscMPIInt size;
8023: PetscErrorCode ierr;
8026: PetscLogEventBegin(PC_BDDC_CoarseSetUp[pcbddc->current_level],pc,0,0,0);
8027: /* Assign global numbering to coarse dofs */
8028: if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
8029: PetscInt ocoarse_size;
8030: compute_vecs = PETSC_TRUE;
8032: pcbddc->new_primal_space = PETSC_TRUE;
8033: ocoarse_size = pcbddc->coarse_size;
8034: PetscFree(pcbddc->global_primal_indices);
8035: PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);
8036: /* see if we can avoid some work */
8037: if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
8038: /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */
8039: if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) {
8040: KSPReset(pcbddc->coarse_ksp);
8041: coarse_reuse = PETSC_FALSE;
8042: } else { /* we can safely reuse already computed coarse matrix */
8043: coarse_reuse = PETSC_TRUE;
8044: }
8045: } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
8046: coarse_reuse = PETSC_FALSE;
8047: }
8048: /* reset any subassembling information */
8049: if (!coarse_reuse || pcbddc->recompute_topography) {
8050: ISDestroy(&pcbddc->coarse_subassembling);
8051: }
8052: } else { /* primal space is unchanged, so we can reuse coarse matrix */
8053: coarse_reuse = PETSC_TRUE;
8054: }
8055: if (coarse_reuse && pcbddc->coarse_ksp) {
8056: KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);
8057: PetscObjectReference((PetscObject)coarse_mat);
8058: coarse_mat_reuse = MAT_REUSE_MATRIX;
8059: } else {
8060: coarse_mat = NULL;
8061: coarse_mat_reuse = MAT_INITIAL_MATRIX;
8062: }
8064: /* creates temporary l2gmap and IS for coarse indexes */
8065: ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);
8066: ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);
8068: /* creates temporary MATIS object for coarse matrix */
8069: MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);
8070: MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);
8071: MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);
8072: MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);
8073: MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);
8074: MatDestroy(&coarse_submat_dense);
8076: /* count "active" (i.e. with positive local size) and "void" processes */
8077: im_active = !!(pcis->n);
8078: MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));
8080: /* determine number of processes partecipating to coarse solver and compute subassembling pattern */
8081: /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */
8082: /* full_restr : just use the receivers from the subassembling pattern */
8083: MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);
8084: coarse_mat_is = NULL;
8085: multilevel_allowed = PETSC_FALSE;
8086: multilevel_requested = PETSC_FALSE;
8087: coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc);
8088: if (coarse_eqs_per_proc < 0) coarse_eqs_per_proc = pcbddc->coarse_size;
8089: if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE;
8090: if (pcbddc->coarse_size <= pcbddc->coarse_eqs_limit) multilevel_requested = PETSC_FALSE;
8091: if (multilevel_requested) {
8092: ncoarse = active_procs/pcbddc->coarsening_ratio;
8093: restr = PETSC_FALSE;
8094: full_restr = PETSC_FALSE;
8095: } else {
8096: ncoarse = pcbddc->coarse_size/coarse_eqs_per_proc + !!(pcbddc->coarse_size%coarse_eqs_per_proc);
8097: restr = PETSC_TRUE;
8098: full_restr = PETSC_TRUE;
8099: }
8100: if (!pcbddc->coarse_size || size == 1) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE;
8101: ncoarse = PetscMax(1,ncoarse);
8102: if (!pcbddc->coarse_subassembling) {
8103: if (pcbddc->coarsening_ratio > 1) {
8104: if (multilevel_requested) {
8105: PCBDDCMatISGetSubassemblingPattern(pc->pmat,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);
8106: } else {
8107: PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);
8108: }
8109: } else {
8110: PetscMPIInt rank;
8111: MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);
8112: have_void = (active_procs == (PetscInt)size) ? PETSC_FALSE : PETSC_TRUE;
8113: ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);
8114: }
8115: } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */
8116: PetscInt psum;
8117: if (pcbddc->coarse_ksp) psum = 1;
8118: else psum = 0;
8119: MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));
8120: have_void = ncoarse < size ? PETSC_TRUE : PETSC_FALSE;
8121: }
8122: /* determine if we can go multilevel */
8123: if (multilevel_requested) {
8124: if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */
8125: else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */
8126: }
8127: if (multilevel_allowed && have_void) restr = PETSC_TRUE;
8129: /* dump subassembling pattern */
8130: if (pcbddc->dbg_flag && multilevel_allowed) {
8131: ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);
8132: }
8133: /* compute dofs splitting and neumann boundaries for coarse dofs */
8134: nedcfield = -1;
8135: corners = NULL;
8136: if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal || pcbddc->corner_selected)) { /* protects from unneded computations */
8137: PetscInt *tidxs,*tidxs2,nout,tsize,i;
8138: const PetscInt *idxs;
8139: ISLocalToGlobalMapping tmap;
8141: /* create map between primal indices (in local representative ordering) and local primal numbering */
8142: ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);
8143: /* allocate space for temporary storage */
8144: PetscMalloc1(pcbddc->local_primal_size,&tidxs);
8145: PetscMalloc1(pcbddc->local_primal_size,&tidxs2);
8146: /* allocate for IS array */
8147: nisdofs = pcbddc->n_ISForDofsLocal;
8148: if (pcbddc->nedclocal) {
8149: if (pcbddc->nedfield > -1) {
8150: nedcfield = pcbddc->nedfield;
8151: } else {
8152: nedcfield = 0;
8153: if (nisdofs) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen (%D)",nisdofs);
8154: nisdofs = 1;
8155: }
8156: }
8157: nisneu = !!pcbddc->NeumannBoundariesLocal;
8158: nisvert = 0; /* nisvert is not used */
8159: nis = nisdofs + nisneu + nisvert;
8160: PetscMalloc1(nis,&isarray);
8161: /* dofs splitting */
8162: for (i=0;i<nisdofs;i++) {
8163: /* ISView(pcbddc->ISForDofsLocal[i],0); */
8164: if (nedcfield != i) {
8165: ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);
8166: ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);
8167: ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);
8168: ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);
8169: } else {
8170: ISGetLocalSize(pcbddc->nedclocal,&tsize);
8171: ISGetIndices(pcbddc->nedclocal,&idxs);
8172: ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);
8173: if (tsize != nout) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping coarse nedelec field! %D != %D",tsize,nout);
8174: ISRestoreIndices(pcbddc->nedclocal,&idxs);
8175: }
8176: ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);
8177: ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);
8178: /* ISView(isarray[i],0); */
8179: }
8180: /* neumann boundaries */
8181: if (pcbddc->NeumannBoundariesLocal) {
8182: /* ISView(pcbddc->NeumannBoundariesLocal,0); */
8183: ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);
8184: ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);
8185: ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);
8186: ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);
8187: ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);
8188: ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);
8189: /* ISView(isarray[nisdofs],0); */
8190: }
8191: /* coordinates */
8192: if (pcbddc->corner_selected) {
8193: PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&corners);
8194: ISGetLocalSize(corners,&tsize);
8195: ISGetIndices(corners,&idxs);
8196: ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);
8197: if (tsize != nout) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping corners! %D != %D",tsize,nout);
8198: ISRestoreIndices(corners,&idxs);
8199: PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&corners);
8200: ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);
8201: ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&corners);
8202: }
8203: PetscFree(tidxs);
8204: PetscFree(tidxs2);
8205: ISLocalToGlobalMappingDestroy(&tmap);
8206: } else {
8207: nis = 0;
8208: nisdofs = 0;
8209: nisneu = 0;
8210: nisvert = 0;
8211: isarray = NULL;
8212: }
8213: /* destroy no longer needed map */
8214: ISLocalToGlobalMappingDestroy(&coarse_islg);
8216: /* subassemble */
8217: if (multilevel_allowed) {
8218: Vec vp[1];
8219: PetscInt nvecs = 0;
8220: PetscBool reuse,reuser;
8222: if (coarse_mat) reuse = PETSC_TRUE;
8223: else reuse = PETSC_FALSE;
8224: MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
8225: vp[0] = NULL;
8226: if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */
8227: VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);
8228: VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);
8229: VecSetType(vp[0],VECSTANDARD);
8230: nvecs = 1;
8232: if (pcbddc->divudotp) {
8233: Mat B,loc_divudotp;
8234: Vec v,p;
8235: IS dummy;
8236: PetscInt np;
8238: MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);
8239: MatGetSize(loc_divudotp,&np,NULL);
8240: ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);
8241: MatCreateSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);
8242: MatCreateVecs(B,&v,&p);
8243: VecSet(p,1.);
8244: MatMultTranspose(B,p,v);
8245: VecDestroy(&p);
8246: MatDestroy(&B);
8247: VecGetArray(vp[0],&array);
8248: VecPlaceArray(pcbddc->vec1_P,array);
8249: VecRestoreArray(vp[0],&array);
8250: MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);
8251: VecResetArray(pcbddc->vec1_P);
8252: ISDestroy(&dummy);
8253: VecDestroy(&v);
8254: }
8255: }
8256: if (reuser) {
8257: PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);
8258: } else {
8259: PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);
8260: }
8261: if (vp[0]) { /* vp[0] could have been placed on a different set of processes */
8262: PetscScalar *arraym,*arrayv;
8263: PetscInt nl;
8264: VecGetLocalSize(vp[0],&nl);
8265: MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);
8266: MatDenseGetArray(coarsedivudotp,&arraym);
8267: VecGetArray(vp[0],&arrayv);
8268: PetscMemcpy(arraym,arrayv,nl*sizeof(PetscScalar));
8269: VecRestoreArray(vp[0],&arrayv);
8270: MatDenseRestoreArray(coarsedivudotp,&arraym);
8271: VecDestroy(&vp[0]);
8272: } else {
8273: MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);
8274: }
8275: } else {
8276: PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,0,NULL,0,NULL);
8277: }
8278: if (coarse_mat_is || coarse_mat) {
8279: if (!multilevel_allowed) {
8280: MatConvert(coarse_mat_is,MATAIJ,coarse_mat_reuse,&coarse_mat);
8281: } else {
8282: Mat A;
8284: /* if this matrix is present, it means we are not reusing the coarse matrix */
8285: if (coarse_mat_is) {
8286: if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen");
8287: PetscObjectReference((PetscObject)coarse_mat_is);
8288: coarse_mat = coarse_mat_is;
8289: }
8290: /* be sure we don't have MatSeqDENSE as local mat */
8291: MatISGetLocalMat(coarse_mat,&A);
8292: MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);
8293: }
8294: }
8295: MatDestroy(&t_coarse_mat_is);
8296: MatDestroy(&coarse_mat_is);
8298: /* create local to global scatters for coarse problem */
8299: if (compute_vecs) {
8300: PetscInt lrows;
8301: VecDestroy(&pcbddc->coarse_vec);
8302: if (coarse_mat) {
8303: MatGetLocalSize(coarse_mat,&lrows,NULL);
8304: } else {
8305: lrows = 0;
8306: }
8307: VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);
8308: VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);
8309: VecSetType(pcbddc->coarse_vec,coarse_mat ? coarse_mat->defaultvectype : VECSTANDARD);
8310: VecScatterDestroy(&pcbddc->coarse_loc_to_glob);
8311: VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);
8312: }
8313: ISDestroy(&coarse_is);
8315: /* set defaults for coarse KSP and PC */
8316: if (multilevel_allowed) {
8317: coarse_ksp_type = KSPRICHARDSON;
8318: coarse_pc_type = PCBDDC;
8319: } else {
8320: coarse_ksp_type = KSPPREONLY;
8321: coarse_pc_type = PCREDUNDANT;
8322: }
8324: /* print some info if requested */
8325: if (pcbddc->dbg_flag) {
8326: if (!multilevel_allowed) {
8327: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
8328: if (multilevel_requested) {
8329: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %D (active processes %D, coarsening ratio %D)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);
8330: } else if (pcbddc->max_levels) {
8331: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%D)\n",pcbddc->max_levels);
8332: }
8333: PetscViewerFlush(pcbddc->dbg_viewer);
8334: }
8335: }
8337: /* communicate coarse discrete gradient */
8338: coarseG = NULL;
8339: if (pcbddc->nedcG && multilevel_allowed) {
8340: MPI_Comm ccomm;
8341: if (coarse_mat) {
8342: ccomm = PetscObjectComm((PetscObject)coarse_mat);
8343: } else {
8344: ccomm = MPI_COMM_NULL;
8345: }
8346: MatMPIAIJRestrict(pcbddc->nedcG,ccomm,&coarseG);
8347: }
8349: /* create the coarse KSP object only once with defaults */
8350: if (coarse_mat) {
8351: PetscBool isredundant,isnn,isbddc;
8352: PetscViewer dbg_viewer = NULL;
8354: if (pcbddc->dbg_flag) {
8355: dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat));
8356: PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);
8357: }
8358: if (!pcbddc->coarse_ksp) {
8359: char prefix[256],str_level[16];
8360: size_t len;
8362: KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);
8363: KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);
8364: PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);
8365: KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);
8366: KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);
8367: KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);
8368: KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);
8369: KSPGetPC(pcbddc->coarse_ksp,&pc_temp);
8370: /* TODO is this logic correct? should check for coarse_mat type */
8371: PCSetType(pc_temp,coarse_pc_type);
8372: /* prefix */
8373: PetscStrcpy(prefix,"");
8374: PetscStrcpy(str_level,"");
8375: if (!pcbddc->current_level) {
8376: PetscStrncpy(prefix,((PetscObject)pc)->prefix,sizeof(prefix));
8377: PetscStrlcat(prefix,"pc_bddc_coarse_",sizeof(prefix));
8378: } else {
8379: PetscStrlen(((PetscObject)pc)->prefix,&len);
8380: if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
8381: if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
8382: /* Nonstandard use of PetscStrncpy() to copy only a portion of the string */
8383: PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);
8384: PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level));
8385: PetscStrlcat(prefix,str_level,sizeof(prefix));
8386: }
8387: KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);
8388: /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
8389: PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);
8390: PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);
8391: PCBDDCSetLevels(pc_temp,pcbddc->max_levels);
8392: /* allow user customization */
8393: KSPSetFromOptions(pcbddc->coarse_ksp);
8394: /* get some info after set from options */
8395: KSPGetPC(pcbddc->coarse_ksp,&pc_temp);
8396: /* multilevel cannot be done with coarse PCs different from BDDC or NN */
8397: PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);
8398: PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);
8399: if (multilevel_allowed && !isbddc && !isnn) {
8400: isbddc = PETSC_TRUE;
8401: PCSetType(pc_temp,PCBDDC);
8402: PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);
8403: PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);
8404: PCBDDCSetLevels(pc_temp,pcbddc->max_levels);
8405: if (pc_temp->ops->setfromoptions) { /* need to setfromoptions again, skipping the pc_type */
8406: PetscObjectOptionsBegin((PetscObject)pc_temp);
8407: (*pc_temp->ops->setfromoptions)(PetscOptionsObject,pc_temp);
8408: PetscObjectProcessOptionsHandlers(PetscOptionsObject,(PetscObject)pc_temp);
8409: PetscOptionsEnd();
8410: pc_temp->setfromoptionscalled++;
8411: }
8412: }
8413: }
8414: /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
8415: KSPGetPC(pcbddc->coarse_ksp,&pc_temp);
8416: if (nisdofs) {
8417: PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);
8418: for (i=0;i<nisdofs;i++) {
8419: ISDestroy(&isarray[i]);
8420: }
8421: }
8422: if (nisneu) {
8423: PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);
8424: ISDestroy(&isarray[nisdofs]);
8425: }
8426: if (nisvert) {
8427: PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);
8428: ISDestroy(&isarray[nis-1]);
8429: }
8430: if (coarseG) {
8431: PCBDDCSetDiscreteGradient(pc_temp,coarseG,1,nedcfield,PETSC_FALSE,PETSC_TRUE);
8432: }
8434: /* get some info after set from options */
8435: PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);
8437: /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */
8438: if (isbddc && !multilevel_allowed) {
8439: PCSetType(pc_temp,coarse_pc_type);
8440: isbddc = PETSC_FALSE;
8441: }
8442: /* multilevel cannot be done with coarse PCs different from BDDC or NN */
8443: PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);
8444: if (multilevel_requested && multilevel_allowed && !isbddc && !isnn) {
8445: PCSetType(pc_temp,PCBDDC);
8446: isbddc = PETSC_TRUE;
8447: }
8448: PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);
8449: if (isredundant) {
8450: KSP inner_ksp;
8451: PC inner_pc;
8453: PCRedundantGetKSP(pc_temp,&inner_ksp);
8454: KSPGetPC(inner_ksp,&inner_pc);
8455: }
8457: /* parameters which miss an API */
8458: PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);
8459: if (isbddc) {
8460: PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data;
8462: pcbddc_coarse->detect_disconnected = PETSC_TRUE;
8463: pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc;
8464: pcbddc_coarse->coarse_eqs_limit = pcbddc->coarse_eqs_limit;
8465: pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null;
8466: if (pcbddc_coarse->benign_saddle_point) {
8467: Mat coarsedivudotp_is;
8468: ISLocalToGlobalMapping l2gmap,rl2g,cl2g;
8469: IS row,col;
8470: const PetscInt *gidxs;
8471: PetscInt n,st,M,N;
8473: MatGetSize(coarsedivudotp,&n,NULL);
8474: MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));
8475: st = st-n;
8476: ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);
8477: MatGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);
8478: ISLocalToGlobalMappingGetSize(l2gmap,&n);
8479: ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);
8480: ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);
8481: ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);
8482: ISLocalToGlobalMappingCreateIS(row,&rl2g);
8483: ISLocalToGlobalMappingCreateIS(col,&cl2g);
8484: ISGetSize(row,&M);
8485: MatGetSize(coarse_mat,&N,NULL);
8486: ISDestroy(&row);
8487: ISDestroy(&col);
8488: MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);
8489: MatSetType(coarsedivudotp_is,MATIS);
8490: MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);
8491: MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);
8492: ISLocalToGlobalMappingDestroy(&rl2g);
8493: ISLocalToGlobalMappingDestroy(&cl2g);
8494: MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);
8495: MatDestroy(&coarsedivudotp);
8496: PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL);
8497: MatDestroy(&coarsedivudotp_is);
8498: pcbddc_coarse->adaptive_userdefined = PETSC_TRUE;
8499: if (pcbddc->adaptive_threshold[0] == 0.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE;
8500: }
8501: }
8503: /* propagate symmetry info of coarse matrix */
8504: MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);
8505: if (pc->pmat->symmetric_set) {
8506: MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);
8507: }
8508: if (pc->pmat->hermitian_set) {
8509: MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);
8510: }
8511: if (pc->pmat->spd_set) {
8512: MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);
8513: }
8514: if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) {
8515: MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);
8516: }
8517: /* set operators */
8518: MatViewFromOptions(coarse_mat,(PetscObject)pc,"-pc_bddc_coarse_mat_view");
8519: MatSetOptionsPrefix(coarse_mat,((PetscObject)pcbddc->coarse_ksp)->prefix);
8520: KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);
8521: if (pcbddc->dbg_flag) {
8522: PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);
8523: }
8524: }
8525: MatDestroy(&coarseG);
8526: PetscFree(isarray);
8527: #if 0
8528: {
8529: PetscViewer viewer;
8530: char filename[256];
8531: sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level);
8532: PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);
8533: PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
8534: MatView(coarse_mat,viewer);
8535: PetscViewerPopFormat(viewer);
8536: PetscViewerDestroy(&viewer);
8537: }
8538: #endif
8540: if (corners) {
8541: Vec gv;
8542: IS is;
8543: const PetscInt *idxs;
8544: PetscInt i,d,N,n,cdim = pcbddc->mat_graph->cdim;
8545: PetscScalar *coords;
8547: if (!pcbddc->mat_graph->cloc) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing local coordinates");
8548: VecGetSize(pcbddc->coarse_vec,&N);
8549: VecGetLocalSize(pcbddc->coarse_vec,&n);
8550: VecCreate(PetscObjectComm((PetscObject)pcbddc->coarse_vec),&gv);
8551: VecSetBlockSize(gv,cdim);
8552: VecSetSizes(gv,n*cdim,N*cdim);
8553: VecSetType(gv,VECSTANDARD);
8554: VecSetFromOptions(gv);
8555: VecSet(gv,PETSC_MAX_REAL); /* we only propagate coordinates from vertices constraints */
8557: PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&is);
8558: ISGetLocalSize(is,&n);
8559: ISGetIndices(is,&idxs);
8560: PetscMalloc1(n*cdim,&coords);
8561: for (i=0;i<n;i++) {
8562: for (d=0;d<cdim;d++) {
8563: coords[cdim*i+d] = pcbddc->mat_graph->coords[cdim*idxs[i]+d];
8564: }
8565: }
8566: ISRestoreIndices(is,&idxs);
8567: PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&is);
8569: ISGetLocalSize(corners,&n);
8570: ISGetIndices(corners,&idxs);
8571: VecSetValuesBlocked(gv,n,idxs,coords,INSERT_VALUES);
8572: ISRestoreIndices(corners,&idxs);
8573: PetscFree(coords);
8574: VecAssemblyBegin(gv);
8575: VecAssemblyEnd(gv);
8576: VecGetArray(gv,&coords);
8577: if (pcbddc->coarse_ksp) {
8578: PC coarse_pc;
8579: PetscBool isbddc;
8581: KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);
8582: PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);
8583: if (isbddc) { /* coarse coordinates have PETSC_MAX_REAL, specific for BDDC */
8584: PetscReal *realcoords;
8586: VecGetLocalSize(gv,&n);
8587: #if defined(PETSC_USE_COMPLEX)
8588: PetscMalloc1(n,&realcoords);
8589: for (i=0;i<n;i++) realcoords[i] = PetscRealPart(coords[i]);
8590: #else
8591: realcoords = coords;
8592: #endif
8593: PCSetCoordinates(coarse_pc,cdim,n/cdim,realcoords);
8594: #if defined(PETSC_USE_COMPLEX)
8595: PetscFree(realcoords);
8596: #endif
8597: }
8598: }
8599: VecRestoreArray(gv,&coords);
8600: VecDestroy(&gv);
8601: }
8602: ISDestroy(&corners);
8604: if (pcbddc->coarse_ksp) {
8605: Vec crhs,csol;
8607: KSPGetSolution(pcbddc->coarse_ksp,&csol);
8608: KSPGetRhs(pcbddc->coarse_ksp,&crhs);
8609: if (!csol) {
8610: MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);
8611: }
8612: if (!crhs) {
8613: MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));
8614: }
8615: }
8616: MatDestroy(&coarsedivudotp);
8618: /* compute null space for coarse solver if the benign trick has been requested */
8619: if (pcbddc->benign_null) {
8621: VecSet(pcbddc->vec1_P,0.);
8622: for (i=0;i<pcbddc->benign_n;i++) {
8623: VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);
8624: }
8625: VecAssemblyBegin(pcbddc->vec1_P);
8626: VecAssemblyEnd(pcbddc->vec1_P);
8627: VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);
8628: VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);
8629: if (coarse_mat) {
8630: Vec nullv;
8631: PetscScalar *array,*array2;
8632: PetscInt nl;
8634: MatCreateVecs(coarse_mat,&nullv,NULL);
8635: VecGetLocalSize(nullv,&nl);
8636: VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);
8637: VecGetArray(nullv,&array2);
8638: PetscMemcpy(array2,array,nl*sizeof(*array));
8639: VecRestoreArray(nullv,&array2);
8640: VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);
8641: VecNormalize(nullv,NULL);
8642: MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);
8643: VecDestroy(&nullv);
8644: }
8645: }
8646: PetscLogEventEnd(PC_BDDC_CoarseSetUp[pcbddc->current_level],pc,0,0,0);
8648: PetscLogEventBegin(PC_BDDC_CoarseSolver[pcbddc->current_level],pc,0,0,0);
8649: if (pcbddc->coarse_ksp) {
8650: PetscBool ispreonly;
8652: if (CoarseNullSpace) {
8653: PetscBool isnull;
8654: MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);
8655: if (isnull) {
8656: MatSetNullSpace(coarse_mat,CoarseNullSpace);
8657: }
8658: /* TODO: add local nullspaces (if any) */
8659: }
8660: /* setup coarse ksp */
8661: KSPSetUp(pcbddc->coarse_ksp);
8662: /* Check coarse problem if in debug mode or if solving with an iterative method */
8663: PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);
8664: if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
8665: KSP check_ksp;
8666: KSPType check_ksp_type;
8667: PC check_pc;
8668: Vec check_vec,coarse_vec;
8669: PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
8670: PetscInt its;
8671: PetscBool compute_eigs;
8672: PetscReal *eigs_r,*eigs_c;
8673: PetscInt neigs;
8674: const char *prefix;
8676: /* Create ksp object suitable for estimation of extreme eigenvalues */
8677: KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);
8678: PetscObjectIncrementTabLevel((PetscObject)check_ksp,(PetscObject)pcbddc->coarse_ksp,0);
8679: KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,PETSC_FALSE);
8680: KSPSetOperators(check_ksp,coarse_mat,coarse_mat);
8681: KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);
8682: /* prevent from setup unneeded object */
8683: KSPGetPC(check_ksp,&check_pc);
8684: PCSetType(check_pc,PCNONE);
8685: if (ispreonly) {
8686: check_ksp_type = KSPPREONLY;
8687: compute_eigs = PETSC_FALSE;
8688: } else {
8689: check_ksp_type = KSPGMRES;
8690: compute_eigs = PETSC_TRUE;
8691: }
8692: KSPSetType(check_ksp,check_ksp_type);
8693: KSPSetComputeSingularValues(check_ksp,compute_eigs);
8694: KSPSetComputeEigenvalues(check_ksp,compute_eigs);
8695: KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);
8696: KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);
8697: KSPSetOptionsPrefix(check_ksp,prefix);
8698: KSPAppendOptionsPrefix(check_ksp,"check_");
8699: KSPSetFromOptions(check_ksp);
8700: KSPSetUp(check_ksp);
8701: KSPGetPC(pcbddc->coarse_ksp,&check_pc);
8702: KSPSetPC(check_ksp,check_pc);
8703: /* create random vec */
8704: MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);
8705: VecSetRandom(check_vec,NULL);
8706: MatMult(coarse_mat,check_vec,coarse_vec);
8707: /* solve coarse problem */
8708: KSPSolve(check_ksp,coarse_vec,coarse_vec);
8709: KSPCheckSolve(check_ksp,pc,coarse_vec);
8710: /* set eigenvalue estimation if preonly has not been requested */
8711: if (compute_eigs) {
8712: PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);
8713: PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);
8714: KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);
8715: if (neigs) {
8716: lambda_max = eigs_r[neigs-1];
8717: lambda_min = eigs_r[0];
8718: if (pcbddc->use_coarse_estimates) {
8719: if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */
8720: KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);
8721: KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));
8722: }
8723: }
8724: }
8725: }
8727: /* check coarse problem residual error */
8728: if (pcbddc->dbg_flag) {
8729: PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
8730: PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));
8731: VecAXPY(check_vec,-1.0,coarse_vec);
8732: VecNorm(check_vec,NORM_INFINITY,&infty_error);
8733: MatMult(coarse_mat,check_vec,coarse_vec);
8734: VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);
8735: PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);
8736: PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);
8737: PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);
8738: PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);
8739: PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);
8740: if (CoarseNullSpace) {
8741: PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");
8742: }
8743: if (compute_eigs) {
8744: PetscReal lambda_max_s,lambda_min_s;
8745: KSPConvergedReason reason;
8746: KSPGetType(check_ksp,&check_ksp_type);
8747: KSPGetIterationNumber(check_ksp,&its);
8748: KSPGetConvergedReason(check_ksp,&reason);
8749: KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);
8750: PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,reason,lambda_min,lambda_max,lambda_min_s,lambda_max_s);
8751: for (i=0;i<neigs;i++) {
8752: PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);
8753: }
8754: }
8755: PetscViewerFlush(dbg_viewer);
8756: PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));
8757: }
8758: VecDestroy(&check_vec);
8759: VecDestroy(&coarse_vec);
8760: KSPDestroy(&check_ksp);
8761: if (compute_eigs) {
8762: PetscFree(eigs_r);
8763: PetscFree(eigs_c);
8764: }
8765: }
8766: }
8767: MatNullSpaceDestroy(&CoarseNullSpace);
8768: /* print additional info */
8769: if (pcbddc->dbg_flag) {
8770: /* waits until all processes reaches this point */
8771: PetscBarrier((PetscObject)pc);
8772: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %D\n",pcbddc->current_level);
8773: PetscViewerFlush(pcbddc->dbg_viewer);
8774: }
8776: /* free memory */
8777: MatDestroy(&coarse_mat);
8778: PetscLogEventEnd(PC_BDDC_CoarseSolver[pcbddc->current_level],pc,0,0,0);
8779: return(0);
8780: }
8782: PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
8783: {
8784: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
8785: PC_IS* pcis = (PC_IS*)pc->data;
8786: Mat_IS* matis = (Mat_IS*)pc->pmat->data;
8787: IS subset,subset_mult,subset_n;
8788: PetscInt local_size,coarse_size=0;
8789: PetscInt *local_primal_indices=NULL;
8790: const PetscInt *t_local_primal_indices;
8794: /* Compute global number of coarse dofs */
8795: if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first");
8796: ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);
8797: ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);
8798: ISDestroy(&subset_n);
8799: ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);
8800: ISRenumber(subset,subset_mult,&coarse_size,&subset_n);
8801: ISDestroy(&subset);
8802: ISDestroy(&subset_mult);
8803: ISGetLocalSize(subset_n,&local_size);
8804: if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size);
8805: PetscMalloc1(local_size,&local_primal_indices);
8806: ISGetIndices(subset_n,&t_local_primal_indices);
8807: PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));
8808: ISRestoreIndices(subset_n,&t_local_primal_indices);
8809: ISDestroy(&subset_n);
8811: /* check numbering */
8812: if (pcbddc->dbg_flag) {
8813: PetscScalar coarsesum,*array,*array2;
8814: PetscInt i;
8815: PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;
8817: PetscViewerFlush(pcbddc->dbg_viewer);
8818: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
8819: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");
8820: PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
8821: /* counter */
8822: VecSet(pcis->vec1_global,0.0);
8823: VecSet(pcis->vec1_N,1.0);
8824: VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8825: VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8826: VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);
8827: VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);
8828: VecSet(pcis->vec1_N,0.0);
8829: for (i=0;i<pcbddc->local_primal_size;i++) {
8830: VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);
8831: }
8832: VecAssemblyBegin(pcis->vec1_N);
8833: VecAssemblyEnd(pcis->vec1_N);
8834: VecSet(pcis->vec1_global,0.0);
8835: VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8836: VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8837: VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
8838: VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
8839: VecGetArray(pcis->vec1_N,&array);
8840: VecGetArray(pcis->vec2_N,&array2);
8841: for (i=0;i<pcis->n;i++) {
8842: if (array[i] != 0.0 && array[i] != array2[i]) {
8843: PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi;
8844: PetscInt neigh = (PetscInt)PetscRealPart(array2[i]);
8845: set_error = PETSC_TRUE;
8846: ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);
8847: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %D (gid %D) owned by %D processes instead of %D!\n",PetscGlobalRank,i,gi,owned,neigh);
8848: }
8849: }
8850: VecRestoreArray(pcis->vec2_N,&array2);
8851: MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
8852: PetscViewerFlush(pcbddc->dbg_viewer);
8853: for (i=0;i<pcis->n;i++) {
8854: if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
8855: }
8856: VecRestoreArray(pcis->vec1_N,&array);
8857: VecSet(pcis->vec1_global,0.0);
8858: VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8859: VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8860: VecSum(pcis->vec1_global,&coarsesum);
8861: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %D (%lf)\n",coarse_size,PetscRealPart(coarsesum));
8862: if (pcbddc->dbg_flag > 1 || set_error_reduced) {
8863: PetscInt *gidxs;
8865: PetscMalloc1(pcbddc->local_primal_size,&gidxs);
8866: ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);
8867: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");
8868: PetscViewerFlush(pcbddc->dbg_viewer);
8869: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);
8870: for (i=0;i<pcbddc->local_primal_size;i++) {
8871: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%D]=%D (%D,%D)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);
8872: }
8873: PetscViewerFlush(pcbddc->dbg_viewer);
8874: PetscFree(gidxs);
8875: }
8876: PetscViewerFlush(pcbddc->dbg_viewer);
8877: PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
8878: if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
8879: }
8881: /* get back data */
8882: *coarse_size_n = coarse_size;
8883: *local_primal_indices_n = local_primal_indices;
8884: return(0);
8885: }
8887: PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
8888: {
8889: IS localis_t;
8890: PetscInt i,lsize,*idxs,n;
8891: PetscScalar *vals;
8895: /* get indices in local ordering exploiting local to global map */
8896: ISGetLocalSize(globalis,&lsize);
8897: PetscMalloc1(lsize,&vals);
8898: for (i=0;i<lsize;i++) vals[i] = 1.0;
8899: ISGetIndices(globalis,(const PetscInt**)&idxs);
8900: VecSet(gwork,0.0);
8901: VecSet(lwork,0.0);
8902: if (idxs) { /* multilevel guard */
8903: VecSetOption(gwork,VEC_IGNORE_NEGATIVE_INDICES,PETSC_TRUE);
8904: VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);
8905: }
8906: VecAssemblyBegin(gwork);
8907: ISRestoreIndices(globalis,(const PetscInt**)&idxs);
8908: PetscFree(vals);
8909: VecAssemblyEnd(gwork);
8910: /* now compute set in local ordering */
8911: VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);
8912: VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);
8913: VecGetArrayRead(lwork,(const PetscScalar**)&vals);
8914: VecGetSize(lwork,&n);
8915: for (i=0,lsize=0;i<n;i++) {
8916: if (PetscRealPart(vals[i]) > 0.5) {
8917: lsize++;
8918: }
8919: }
8920: PetscMalloc1(lsize,&idxs);
8921: for (i=0,lsize=0;i<n;i++) {
8922: if (PetscRealPart(vals[i]) > 0.5) {
8923: idxs[lsize++] = i;
8924: }
8925: }
8926: VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);
8927: ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);
8928: *localis = localis_t;
8929: return(0);
8930: }
8932: PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
8933: {
8934: PC_IS *pcis=(PC_IS*)pc->data;
8935: PC_BDDC *pcbddc=(PC_BDDC*)pc->data;
8936: PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
8937: Mat S_j;
8938: PetscInt *used_xadj,*used_adjncy;
8939: PetscBool free_used_adj;
8940: PetscErrorCode ierr;
8943: PetscLogEventBegin(PC_BDDC_Schurs[pcbddc->current_level],pc,0,0,0);
8944: /* decide the adjacency to be used for determining internal problems for local schur on subsets */
8945: free_used_adj = PETSC_FALSE;
8946: if (pcbddc->sub_schurs_layers == -1) {
8947: used_xadj = NULL;
8948: used_adjncy = NULL;
8949: } else {
8950: if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
8951: used_xadj = pcbddc->mat_graph->xadj;
8952: used_adjncy = pcbddc->mat_graph->adjncy;
8953: } else if (pcbddc->computed_rowadj) {
8954: used_xadj = pcbddc->mat_graph->xadj;
8955: used_adjncy = pcbddc->mat_graph->adjncy;
8956: } else {
8957: PetscBool flg_row=PETSC_FALSE;
8958: const PetscInt *xadj,*adjncy;
8959: PetscInt nvtxs;
8961: MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);
8962: if (flg_row) {
8963: PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);
8964: PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));
8965: PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));
8966: free_used_adj = PETSC_TRUE;
8967: } else {
8968: pcbddc->sub_schurs_layers = -1;
8969: used_xadj = NULL;
8970: used_adjncy = NULL;
8971: }
8972: MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);
8973: }
8974: }
8976: /* setup sub_schurs data */
8977: MatCreateSchurComplement(pcis->A_II,pcis->pA_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);
8978: if (!sub_schurs->schur_explicit) {
8979: /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */
8980: MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);
8981: PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);
8982: } else {
8983: Mat change = NULL;
8984: Vec scaling = NULL;
8985: IS change_primal = NULL, iP;
8986: PetscInt benign_n;
8987: PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis;
8988: PetscBool isseqaij,need_change = PETSC_FALSE;
8989: PetscBool discrete_harmonic = PETSC_FALSE;
8991: if (!pcbddc->use_vertices && reuse_solvers) {
8992: PetscInt n_vertices;
8994: ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);
8995: reuse_solvers = (PetscBool)!n_vertices;
8996: }
8997: PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);
8998: if (!isseqaij) {
8999: Mat_IS* matis = (Mat_IS*)pc->pmat->data;
9000: if (matis->A == pcbddc->local_mat) {
9001: MatDestroy(&pcbddc->local_mat);
9002: MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);
9003: } else {
9004: MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);
9005: }
9006: }
9007: if (!pcbddc->benign_change_explicit) {
9008: benign_n = pcbddc->benign_n;
9009: } else {
9010: benign_n = 0;
9011: }
9012: /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc.
9013: We need a global reduction to avoid possible deadlocks.
9014: We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */
9015: if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) {
9016: PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change);
9017: MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
9018: need_change = (PetscBool)(!need_change);
9019: }
9020: /* If the user defines additional constraints, we import them here.
9021: We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */
9022: if (need_change) {
9023: PC_IS *pcisf;
9024: PC_BDDC *pcbddcf;
9025: PC pcf;
9027: if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph");
9028: PCCreate(PetscObjectComm((PetscObject)pc),&pcf);
9029: PCSetOperators(pcf,pc->mat,pc->pmat);
9030: PCSetType(pcf,PCBDDC);
9032: /* hacks */
9033: pcisf = (PC_IS*)pcf->data;
9034: pcisf->is_B_local = pcis->is_B_local;
9035: pcisf->vec1_N = pcis->vec1_N;
9036: pcisf->BtoNmap = pcis->BtoNmap;
9037: pcisf->n = pcis->n;
9038: pcisf->n_B = pcis->n_B;
9039: pcbddcf = (PC_BDDC*)pcf->data;
9040: PetscFree(pcbddcf->mat_graph);
9041: pcbddcf->mat_graph = pcbddc->mat_graph;
9042: pcbddcf->use_faces = PETSC_TRUE;
9043: pcbddcf->use_change_of_basis = PETSC_TRUE;
9044: pcbddcf->use_change_on_faces = PETSC_TRUE;
9045: pcbddcf->use_qr_single = PETSC_TRUE;
9046: pcbddcf->fake_change = PETSC_TRUE;
9048: /* setup constraints so that we can get information on primal vertices and change of basis (in local numbering) */
9049: PCBDDCConstraintsSetUp(pcf);
9050: sub_schurs->change_with_qr = pcbddcf->use_qr_single;
9051: ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);
9052: change = pcbddcf->ConstraintMatrix;
9053: pcbddcf->ConstraintMatrix = NULL;
9055: /* free unneeded memory allocated in PCBDDCConstraintsSetUp */
9056: PetscFree(pcbddcf->sub_schurs);
9057: MatNullSpaceDestroy(&pcbddcf->onearnullspace);
9058: PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);
9059: PetscFree(pcbddcf->primal_indices_local_idxs);
9060: PetscFree(pcbddcf->onearnullvecs_state);
9061: PetscFree(pcf->data);
9062: pcf->ops->destroy = NULL;
9063: pcf->ops->reset = NULL;
9064: PCDestroy(&pcf);
9065: }
9066: if (!pcbddc->use_deluxe_scaling) scaling = pcis->D;
9068: PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_iP",(PetscObject*)&iP);
9069: if (iP) {
9070: PetscOptionsBegin(PetscObjectComm((PetscObject)iP),sub_schurs->prefix,"BDDC sub_schurs options","PC");
9071: PetscOptionsBool("-sub_schurs_discrete_harmonic",NULL,NULL,discrete_harmonic,&discrete_harmonic,NULL);
9072: PetscOptionsEnd();
9073: }
9074: if (discrete_harmonic) {
9075: Mat A;
9076: MatDuplicate(pcbddc->local_mat,MAT_COPY_VALUES,&A);
9077: MatZeroRowsColumnsIS(A,iP,1.0,NULL,NULL);
9078: PetscObjectCompose((PetscObject)A,"__KSPFETIDP_iP",(PetscObject)iP);
9079: PCBDDCSubSchursSetUp(sub_schurs,A,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);
9080: MatDestroy(&A);
9081: } else {
9082: PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);
9083: }
9084: MatDestroy(&change);
9085: ISDestroy(&change_primal);
9086: }
9087: MatDestroy(&S_j);
9089: /* free adjacency */
9090: if (free_used_adj) {
9091: PetscFree2(used_xadj,used_adjncy);
9092: }
9093: PetscLogEventEnd(PC_BDDC_Schurs[pcbddc->current_level],pc,0,0,0);
9094: return(0);
9095: }
9097: PetscErrorCode PCBDDCInitSubSchurs(PC pc)
9098: {
9099: PC_IS *pcis=(PC_IS*)pc->data;
9100: PC_BDDC *pcbddc=(PC_BDDC*)pc->data;
9101: PCBDDCGraph graph;
9102: PetscErrorCode ierr;
9105: /* attach interface graph for determining subsets */
9106: if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
9107: IS verticesIS,verticescomm;
9108: PetscInt vsize,*idxs;
9110: PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);
9111: ISGetSize(verticesIS,&vsize);
9112: ISGetIndices(verticesIS,(const PetscInt**)&idxs);
9113: ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);
9114: ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);
9115: PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);
9116: PCBDDCGraphCreate(&graph);
9117: PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global,pcbddc->graphmaxcount);
9118: PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);
9119: ISDestroy(&verticescomm);
9120: PCBDDCGraphComputeConnectedComponents(graph);
9121: } else {
9122: graph = pcbddc->mat_graph;
9123: }
9124: /* print some info */
9125: if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) {
9126: IS vertices;
9127: PetscInt nv,nedges,nfaces;
9128: PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);
9129: PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);
9130: ISGetSize(vertices,&nv);
9131: PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
9132: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");
9133: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%D)\n",PetscGlobalRank,(int)nv,pcbddc->use_vertices);
9134: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%D)\n",PetscGlobalRank,(int)nedges,pcbddc->use_edges);
9135: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%D)\n",PetscGlobalRank,(int)nfaces,pcbddc->use_faces);
9136: PetscViewerFlush(pcbddc->dbg_viewer);
9137: PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);
9138: PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);
9139: }
9141: /* sub_schurs init */
9142: if (!pcbddc->sub_schurs) {
9143: PCBDDCSubSchursCreate(&pcbddc->sub_schurs);
9144: }
9145: PCBDDCSubSchursInit(pcbddc->sub_schurs,((PetscObject)pc)->prefix,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_rebuild);
9147: /* free graph struct */
9148: if (pcbddc->sub_schurs_rebuild) {
9149: PCBDDCGraphDestroy(&graph);
9150: }
9151: return(0);
9152: }
9154: PetscErrorCode PCBDDCCheckOperator(PC pc)
9155: {
9156: PC_IS *pcis=(PC_IS*)pc->data;
9157: PC_BDDC *pcbddc=(PC_BDDC*)pc->data;
9158: PetscErrorCode ierr;
9161: if (pcbddc->n_vertices == pcbddc->local_primal_size) {
9162: IS zerodiag = NULL;
9163: Mat S_j,B0_B=NULL;
9164: Vec dummy_vec=NULL,vec_check_B,vec_scale_P;
9165: PetscScalar *p0_check,*array,*array2;
9166: PetscReal norm;
9167: PetscInt i;
9169: /* B0 and B0_B */
9170: if (zerodiag) {
9171: IS dummy;
9173: ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);
9174: MatCreateSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);
9175: MatCreateVecs(B0_B,NULL,&dummy_vec);
9176: ISDestroy(&dummy);
9177: }
9178: /* I need a primal vector to scale primal nodes since BDDC sums contibutions */
9179: VecDuplicate(pcbddc->vec1_P,&vec_scale_P);
9180: VecSet(pcbddc->vec1_P,1.0);
9181: VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);
9182: VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);
9183: VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);
9184: VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);
9185: VecReciprocal(vec_scale_P);
9186: /* S_j */
9187: MatCreateSchurComplement(pcis->A_II,pcis->pA_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);
9188: MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);
9190: /* mimic vector in \widetilde{W}_\Gamma */
9191: VecSetRandom(pcis->vec1_N,NULL);
9192: /* continuous in primal space */
9193: VecSetRandom(pcbddc->coarse_vec,NULL);
9194: VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);
9195: VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);
9196: VecGetArray(pcbddc->vec1_P,&array);
9197: PetscCalloc1(pcbddc->benign_n,&p0_check);
9198: for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i];
9199: VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);
9200: VecRestoreArray(pcbddc->vec1_P,&array);
9201: VecAssemblyBegin(pcis->vec1_N);
9202: VecAssemblyEnd(pcis->vec1_N);
9203: VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);
9204: VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);
9205: VecDuplicate(pcis->vec2_B,&vec_check_B);
9206: VecCopy(pcis->vec2_B,vec_check_B);
9208: /* assemble rhs for coarse problem */
9209: /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */
9210: /* local with Schur */
9211: MatMult(S_j,pcis->vec2_B,pcis->vec1_B);
9212: if (zerodiag) {
9213: VecGetArray(dummy_vec,&array);
9214: for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i];
9215: VecRestoreArray(dummy_vec,&array);
9216: MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);
9217: }
9218: /* sum on primal nodes the local contributions */
9219: VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);
9220: VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);
9221: VecGetArray(pcis->vec1_N,&array);
9222: VecGetArray(pcbddc->vec1_P,&array2);
9223: for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]];
9224: VecRestoreArray(pcbddc->vec1_P,&array2);
9225: VecRestoreArray(pcis->vec1_N,&array);
9226: VecSet(pcbddc->coarse_vec,0.);
9227: VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);
9228: VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);
9229: VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);
9230: VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);
9231: VecGetArray(pcbddc->vec1_P,&array);
9232: /* scale primal nodes (BDDC sums contibutions) */
9233: VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);
9234: VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);
9235: VecRestoreArray(pcbddc->vec1_P,&array);
9236: VecAssemblyBegin(pcis->vec1_N);
9237: VecAssemblyEnd(pcis->vec1_N);
9238: VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
9239: VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
9240: /* global: \widetilde{B0}_B w_\Gamma */
9241: if (zerodiag) {
9242: MatMult(B0_B,pcis->vec2_B,dummy_vec);
9243: VecGetArray(dummy_vec,&array);
9244: for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i];
9245: VecRestoreArray(dummy_vec,&array);
9246: }
9247: /* BDDC */
9248: VecSet(pcis->vec1_D,0.);
9249: PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);
9251: VecCopy(pcis->vec1_B,pcis->vec2_B);
9252: VecAXPY(pcis->vec1_B,-1.0,vec_check_B);
9253: VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);
9254: PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm);
9255: for (i=0;i<pcbddc->benign_n;i++) {
9256: PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%D] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i]));
9257: }
9258: PetscFree(p0_check);
9259: VecDestroy(&vec_scale_P);
9260: VecDestroy(&vec_check_B);
9261: VecDestroy(&dummy_vec);
9262: MatDestroy(&S_j);
9263: MatDestroy(&B0_B);
9264: }
9265: return(0);
9266: }
9268: #include <../src/mat/impls/aij/mpi/mpiaij.h>
9269: PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B)
9270: {
9271: Mat At;
9272: IS rows;
9273: PetscInt rst,ren;
9275: PetscLayout rmap;
9278: rst = ren = 0;
9279: if (ccomm != MPI_COMM_NULL) {
9280: PetscLayoutCreate(ccomm,&rmap);
9281: PetscLayoutSetSize(rmap,A->rmap->N);
9282: PetscLayoutSetBlockSize(rmap,1);
9283: PetscLayoutSetUp(rmap);
9284: PetscLayoutGetRange(rmap,&rst,&ren);
9285: }
9286: ISCreateStride(PetscObjectComm((PetscObject)A),ren-rst,rst,1,&rows);
9287: MatCreateSubMatrix(A,rows,NULL,MAT_INITIAL_MATRIX,&At);
9288: ISDestroy(&rows);
9290: if (ccomm != MPI_COMM_NULL) {
9291: Mat_MPIAIJ *a,*b;
9292: IS from,to;
9293: Vec gvec;
9294: PetscInt lsize;
9296: MatCreate(ccomm,B);
9297: MatSetSizes(*B,ren-rst,PETSC_DECIDE,PETSC_DECIDE,At->cmap->N);
9298: MatSetType(*B,MATAIJ);
9299: PetscLayoutDestroy(&((*B)->rmap));
9300: PetscLayoutSetUp((*B)->cmap);
9301: a = (Mat_MPIAIJ*)At->data;
9302: b = (Mat_MPIAIJ*)(*B)->data;
9303: MPI_Comm_size(ccomm,&b->size);
9304: MPI_Comm_rank(ccomm,&b->rank);
9305: PetscObjectReference((PetscObject)a->A);
9306: PetscObjectReference((PetscObject)a->B);
9307: b->A = a->A;
9308: b->B = a->B;
9310: b->donotstash = a->donotstash;
9311: b->roworiented = a->roworiented;
9312: b->rowindices = 0;
9313: b->rowvalues = 0;
9314: b->getrowactive = PETSC_FALSE;
9316: (*B)->rmap = rmap;
9317: (*B)->factortype = A->factortype;
9318: (*B)->assembled = PETSC_TRUE;
9319: (*B)->insertmode = NOT_SET_VALUES;
9320: (*B)->preallocated = PETSC_TRUE;
9322: if (a->colmap) {
9323: #if defined(PETSC_USE_CTABLE)
9324: PetscTableCreateCopy(a->colmap,&b->colmap);
9325: #else
9326: PetscMalloc1(At->cmap->N,&b->colmap);
9327: PetscLogObjectMemory((PetscObject)*B,At->cmap->N*sizeof(PetscInt));
9328: PetscMemcpy(b->colmap,a->colmap,At->cmap->N*sizeof(PetscInt));
9329: #endif
9330: } else b->colmap = 0;
9331: if (a->garray) {
9332: PetscInt len;
9333: len = a->B->cmap->n;
9334: PetscMalloc1(len+1,&b->garray);
9335: PetscLogObjectMemory((PetscObject)(*B),len*sizeof(PetscInt));
9336: if (len) { PetscMemcpy(b->garray,a->garray,len*sizeof(PetscInt)); }
9337: } else b->garray = 0;
9339: PetscObjectReference((PetscObject)a->lvec);
9340: b->lvec = a->lvec;
9341: PetscLogObjectParent((PetscObject)*B,(PetscObject)b->lvec);
9343: /* cannot use VecScatterCopy */
9344: VecGetLocalSize(b->lvec,&lsize);
9345: ISCreateGeneral(ccomm,lsize,b->garray,PETSC_USE_POINTER,&from);
9346: ISCreateStride(PETSC_COMM_SELF,lsize,0,1,&to);
9347: MatCreateVecs(*B,&gvec,NULL);
9348: VecScatterCreate(gvec,from,b->lvec,to,&b->Mvctx);
9349: PetscLogObjectParent((PetscObject)*B,(PetscObject)b->Mvctx);
9350: ISDestroy(&from);
9351: ISDestroy(&to);
9352: VecDestroy(&gvec);
9353: }
9354: MatDestroy(&At);
9355: return(0);
9356: }