Actual source code: bddcprivate.c
petsc-3.6.1 2015-08-06
1: #include <../src/ksp/pc/impls/bddc/bddc.h>
2: #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3: #include <petscblaslapack.h>
5: static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y);
6: static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y);
10: PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
11: {
12: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
13: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
14: PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork;
15: PetscBLASInt *B_iwork,*B_ifail;
16: PetscScalar *work,lwork;
17: PetscScalar *St,*S,*eigv;
18: PetscScalar *Sarray,*Starray;
19: PetscReal *eigs,thresh;
20: PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs;
21: PetscBool allocated_S_St;
22: #if defined(PETSC_USE_COMPLEX)
23: PetscReal *rwork;
24: #endif
25: PetscErrorCode ierr;
28: if (!sub_schurs->use_mumps) {
29: SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS");
30: }
32: if (pcbddc->dbg_flag) {
33: PetscViewerFlush(pcbddc->dbg_viewer);
34: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
35: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");
36: PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);
37: }
39: if (pcbddc->dbg_flag) {
40: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef);
41: }
43: if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) {
44: SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef);
45: }
47: /* max size of subsets */
48: mss = 0;
49: for (i=0;i<sub_schurs->n_subs;i++) {
50: PetscInt subset_size;
52: ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
53: mss = PetscMax(mss,subset_size);
54: }
56: /* min/max and threshold */
57: nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
58: nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0;
59: nmax = PetscMax(nmin,nmax);
60: allocated_S_St = PETSC_FALSE;
61: if (nmin) {
62: allocated_S_St = PETSC_TRUE;
63: }
65: /* allocate lapack workspace */
66: cum = cum2 = 0;
67: maxneigs = 0;
68: for (i=0;i<sub_schurs->n_subs;i++) {
69: PetscInt n,subset_size;
71: ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
72: n = PetscMin(subset_size,nmax);
73: cum += subset_size;
74: cum2 += subset_size*n;
75: maxneigs = PetscMax(maxneigs,n);
76: }
77: if (mss) {
78: if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
79: PetscBLASInt B_itype = 1;
80: PetscBLASInt B_N = mss;
81: PetscReal zero = 0.0;
82: PetscReal eps = 0.0; /* dlamch? */
84: B_lwork = -1;
85: S = NULL;
86: St = NULL;
87: eigs = NULL;
88: eigv = NULL;
89: B_iwork = NULL;
90: B_ifail = NULL;
91: #if defined(PETSC_USE_COMPLEX)
92: rwork = NULL;
93: #endif
94: thresh = 1.0;
95: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
96: #if defined(PETSC_USE_COMPLEX)
97: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
98: #else
99: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr));
100: #endif
101: if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr);
102: PetscFPTrapPop();
103: } else {
104: /* TODO */
105: }
106: } else {
107: lwork = 0;
108: }
110: nv = 0;
111: if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */
112: ISGetLocalSize(sub_schurs->is_vertices,&nv);
113: }
114: PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);
115: if (allocated_S_St) {
116: PetscMalloc2(mss*mss,&S,mss*mss,&St);
117: }
118: PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);
119: #if defined(PETSC_USE_COMPLEX)
120: PetscMalloc1(7*mss,&rwork);
121: #endif
122: PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n,
123: nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr,
124: nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr,
125: nv+cum,&pcbddc->adaptive_constraints_idxs,
126: nv+cum2,&pcbddc->adaptive_constraints_data);
127: PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));
129: maxneigs = 0;
130: cum = cum2 = cumarray = 0;
131: pcbddc->adaptive_constraints_idxs_ptr[0] = 0;
132: pcbddc->adaptive_constraints_data_ptr[0] = 0;
133: if (sub_schurs->is_vertices && pcbddc->use_vertices) {
134: const PetscInt *idxs;
136: ISGetIndices(sub_schurs->is_vertices,&idxs);
137: for (cum=0;cum<nv;cum++) {
138: pcbddc->adaptive_constraints_n[cum] = 1;
139: pcbddc->adaptive_constraints_idxs[cum] = idxs[cum];
140: pcbddc->adaptive_constraints_data[cum] = 1.0;
141: pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1;
142: pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1;
143: }
144: cum2 = cum;
145: ISRestoreIndices(sub_schurs->is_vertices,&idxs);
146: }
148: if (mss) { /* multilevel */
149: MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);
150: MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);
151: }
153: for (i=0;i<sub_schurs->n_subs;i++) {
155: const PetscInt *idxs;
156: PetscReal infty = PETSC_MAX_REAL;
157: PetscInt j,subset_size,eigs_start = 0;
158: PetscBLASInt B_N;
159: PetscBool same_data = PETSC_FALSE;
161: ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
162: PetscBLASIntCast(subset_size,&B_N);
163: if (allocated_S_St) { /* S and S_t should be copied since we could need them later */
164: if (sub_schurs->is_hermitian) {
165: PetscInt j,k;
166: if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */
167: PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));
168: PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));
169: }
170: for (j=0;j<subset_size;j++) {
171: for (k=j;k<subset_size;k++) {
172: S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k];
173: St[j*subset_size+k] = Starray[cumarray+j*subset_size+k];
174: }
175: }
176: } else {
177: PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
178: PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
179: }
180: } else {
181: S = Sarray + cumarray;
182: St = Starray + cumarray;
183: }
185: ISGetIndices(sub_schurs->is_subs[i],&idxs);
186: /* see if we can save some work */
187: if (sub_schurs->n_subs == 1) {
188: PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);
189: }
191: if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */
192: B_neigs = 0;
193: } else {
194: /* Threshold: this is an heuristic for edges */
195: thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold;
197: if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
198: PetscBLASInt B_itype = 1;
199: PetscBLASInt B_IL, B_IU;
200: PetscReal eps = -1.0; /* dlamch? */
201: PetscInt nmin_s;
203: /* ask for eigenvalues larger than thresh */
204: if (pcbddc->dbg_flag) {
205: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]);
206: }
207: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
208: #if defined(PETSC_USE_COMPLEX)
209: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
210: #else
211: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
212: #endif
213: PetscFPTrapPop();
214: if (B_ierr) {
215: if (B_ierr < 0 ) {
216: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
217: } else if (B_ierr <= B_N) {
218: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
219: } else {
220: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
221: }
222: }
224: if (B_neigs > nmax) {
225: if (pcbddc->dbg_flag) {
226: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax);
227: }
228: eigs_start = B_neigs -nmax;
229: B_neigs = nmax;
230: }
232: nmin_s = PetscMin(nmin,B_N);
233: if (B_neigs < nmin_s) {
234: PetscBLASInt B_neigs2;
236: B_IU = B_N - B_neigs;
237: B_IL = B_N - nmin_s + 1;
238: if (pcbddc->dbg_flag) {
239: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU);
240: }
241: if (sub_schurs->is_hermitian) {
242: PetscInt j;
243: for (j=0;j<subset_size;j++) {
244: PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));
245: }
246: for (j=0;j<subset_size;j++) {
247: PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));
248: }
249: } else {
250: PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
251: PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
252: }
253: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
254: #if defined(PETSC_USE_COMPLEX)
255: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
256: #else
257: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
258: #endif
259: PetscFPTrapPop();
260: B_neigs += B_neigs2;
261: }
262: if (B_ierr) {
263: if (B_ierr < 0 ) {
264: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
265: } else if (B_ierr <= B_N) {
266: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
267: } else {
268: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
269: }
270: }
271: if (pcbddc->dbg_flag) {
272: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);
273: for (j=0;j<B_neigs;j++) {
274: if (eigs[j] == 0.0) {
275: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");
276: } else {
277: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);
278: }
279: }
280: }
281: } else {
282: /* TODO */
283: }
284: }
285: maxneigs = PetscMax(B_neigs,maxneigs);
286: pcbddc->adaptive_constraints_n[i+nv] = B_neigs;
287: if (B_neigs) {
288: PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));
290: if (pcbddc->dbg_flag > 1) {
291: PetscInt ii;
292: for (ii=0;ii<B_neigs;ii++) {
293: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);
294: for (j=0;j<B_N;j++) {
295: #if defined(PETSC_USE_COMPLEX)
296: PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
297: PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
298: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);
299: #else
300: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
301: #endif
302: }
303: }
304: }
305: #if 0
306: for (j=0;j<B_neigs;j++) {
307: PetscBLASInt Blas_N,Blas_one = 1.0;
308: PetscScalar norm;
309: PetscBLASIntCast(subset_size,&Blas_N);
310: PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,
311: &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one));
312: if (pcbddc->adaptive_constraints_data[cum2] > 0.0) {
313: norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
314: } else {
315: norm = -1.0/PetscSqrtReal(PetscRealPart(norm));
316: }
317: PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one));
318: }
319: #endif
320: PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));
321: pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size;
322: pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs;
323: cum++;
324: }
325: ISRestoreIndices(sub_schurs->is_subs[i],&idxs);
326: /* shift for next computation */
327: cumarray += subset_size*subset_size;
328: }
329: if (pcbddc->dbg_flag) {
330: PetscViewerFlush(pcbddc->dbg_viewer);
331: }
333: if (mss) {
334: MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);
335: MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);
336: /* destroy matrices (junk) */
337: MatDestroy(&sub_schurs->sum_S_Ej_inv_all);
338: MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);
339: }
340: if (allocated_S_St) {
341: PetscFree2(S,St);
342: }
343: PetscFree5(eigv,eigs,work,B_iwork,B_ifail);
344: #if defined(PETSC_USE_COMPLEX)
345: PetscFree(rwork);
346: #endif
347: if (pcbddc->dbg_flag) {
348: PetscInt maxneigs_r;
349: MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));
350: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);
351: }
352: return(0);
353: }
357: PetscErrorCode PCBDDCSetUpSolvers(PC pc)
358: {
359: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
360: PetscScalar *coarse_submat_vals;
364: /* Setup local scatters R_to_B and (optionally) R_to_D */
365: /* PCBDDCSetUpLocalWorkVectors should be called first! */
366: PCBDDCSetUpLocalScatters(pc);
368: /* Setup local neumann solver ksp_R */
369: /* PCBDDCSetUpLocalScatters should be called first! */
370: PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);
372: /* Change global null space passed in by the user if change of basis has been requested */
373: if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) {
374: PCBDDCNullSpaceAdaptGlobal(pc);
375: }
377: /*
378: Setup local correction and local part of coarse basis.
379: Gives back the dense local part of the coarse matrix in column major ordering
380: */
381: PCBDDCSetUpCorrection(pc,&coarse_submat_vals);
383: /* Compute total number of coarse nodes and setup coarse solver */
384: PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);
386: /* free */
387: PetscFree(coarse_submat_vals);
388: return(0);
389: }
393: PetscErrorCode PCBDDCResetCustomization(PC pc)
394: {
395: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
399: PCBDDCGraphResetCSR(pcbddc->mat_graph);
400: ISDestroy(&pcbddc->user_primal_vertices);
401: MatNullSpaceDestroy(&pcbddc->NullSpace);
402: ISDestroy(&pcbddc->NeumannBoundaries);
403: ISDestroy(&pcbddc->NeumannBoundariesLocal);
404: ISDestroy(&pcbddc->DirichletBoundaries);
405: MatNullSpaceDestroy(&pcbddc->onearnullspace);
406: PetscFree(pcbddc->onearnullvecs_state);
407: ISDestroy(&pcbddc->DirichletBoundariesLocal);
408: PCBDDCSetDofsSplitting(pc,0,NULL);
409: PCBDDCSetDofsSplittingLocal(pc,0,NULL);
410: return(0);
411: }
415: PetscErrorCode PCBDDCResetTopography(PC pc)
416: {
417: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
421: MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);
422: MatDestroy(&pcbddc->ChangeOfBasisMatrix);
423: MatDestroy(&pcbddc->ConstraintMatrix);
424: PCBDDCGraphReset(pcbddc->mat_graph);
425: PCBDDCSubSchursReset(pcbddc->sub_schurs);
426: return(0);
427: }
431: PetscErrorCode PCBDDCResetSolvers(PC pc)
432: {
433: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
434: PetscScalar *array;
438: VecDestroy(&pcbddc->coarse_vec);
439: if (pcbddc->coarse_phi_B) {
440: MatDenseGetArray(pcbddc->coarse_phi_B,&array);
441: PetscFree(array);
442: }
443: MatDestroy(&pcbddc->coarse_phi_B);
444: MatDestroy(&pcbddc->coarse_phi_D);
445: MatDestroy(&pcbddc->coarse_psi_B);
446: MatDestroy(&pcbddc->coarse_psi_D);
447: VecDestroy(&pcbddc->vec1_P);
448: VecDestroy(&pcbddc->vec1_C);
449: MatDestroy(&pcbddc->local_auxmat2);
450: MatDestroy(&pcbddc->local_auxmat1);
451: VecDestroy(&pcbddc->vec1_R);
452: VecDestroy(&pcbddc->vec2_R);
453: ISDestroy(&pcbddc->is_R_local);
454: VecScatterDestroy(&pcbddc->R_to_B);
455: VecScatterDestroy(&pcbddc->R_to_D);
456: VecScatterDestroy(&pcbddc->coarse_loc_to_glob);
457: KSPDestroy(&pcbddc->ksp_D);
458: KSPDestroy(&pcbddc->ksp_R);
459: KSPDestroy(&pcbddc->coarse_ksp);
460: MatDestroy(&pcbddc->local_mat);
461: PetscFree(pcbddc->primal_indices_local_idxs);
462: PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);
463: PetscFree(pcbddc->global_primal_indices);
464: ISDestroy(&pcbddc->coarse_subassembling);
465: ISDestroy(&pcbddc->coarse_subassembling_init);
466: return(0);
467: }
471: PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
472: {
473: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
474: PC_IS *pcis = (PC_IS*)pc->data;
475: VecType impVecType;
476: PetscInt n_constraints,n_R,old_size;
480: if (!pcbddc->ConstraintMatrix) {
481: SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
482: }
483: /* get sizes */
484: n_constraints = pcbddc->local_primal_size - pcbddc->n_vertices;
485: n_R = pcis->n-pcbddc->n_vertices;
486: VecGetType(pcis->vec1_N,&impVecType);
487: /* local work vectors (try to avoid unneeded work)*/
488: /* R nodes */
489: old_size = -1;
490: if (pcbddc->vec1_R) {
491: VecGetSize(pcbddc->vec1_R,&old_size);
492: }
493: if (n_R != old_size) {
494: VecDestroy(&pcbddc->vec1_R);
495: VecDestroy(&pcbddc->vec2_R);
496: VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);
497: VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);
498: VecSetType(pcbddc->vec1_R,impVecType);
499: VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);
500: }
501: /* local primal dofs */
502: old_size = -1;
503: if (pcbddc->vec1_P) {
504: VecGetSize(pcbddc->vec1_P,&old_size);
505: }
506: if (pcbddc->local_primal_size != old_size) {
507: VecDestroy(&pcbddc->vec1_P);
508: VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);
509: VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);
510: VecSetType(pcbddc->vec1_P,impVecType);
511: }
512: /* local explicit constraints */
513: old_size = -1;
514: if (pcbddc->vec1_C) {
515: VecGetSize(pcbddc->vec1_C,&old_size);
516: }
517: if (n_constraints && n_constraints != old_size) {
518: VecDestroy(&pcbddc->vec1_C);
519: VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);
520: VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);
521: VecSetType(pcbddc->vec1_C,impVecType);
522: }
523: return(0);
524: }
528: PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
529: {
530: PetscErrorCode ierr;
531: /* pointers to pcis and pcbddc */
532: PC_IS* pcis = (PC_IS*)pc->data;
533: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
534: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
535: /* submatrices of local problem */
536: Mat A_RV,A_VR,A_VV,local_auxmat2_R;
537: /* submatrices of local coarse problem */
538: Mat S_VV,S_CV,S_VC,S_CC;
539: /* working matrices */
540: Mat C_CR;
541: /* additional working stuff */
542: PC pc_R;
543: Mat F;
544: PetscBool isLU,isCHOL,isILU;
546: PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */
547: PetscScalar *work;
548: PetscInt *idx_V_B;
549: PetscInt n,n_vertices,n_constraints;
550: PetscInt i,n_R,n_D,n_B;
551: PetscBool unsymmetric_check;
552: /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
553: MatType impMatType;
554: /* some shortcuts to scalars */
555: PetscScalar one=1.0,m_one=-1.0;
558: n_vertices = pcbddc->n_vertices;
559: n_constraints = pcbddc->local_primal_size-n_vertices;
560: /* Set Non-overlapping dimensions */
561: n_B = pcis->n_B;
562: n_D = pcis->n - n_B;
563: n_R = pcis->n - n_vertices;
565: /* Set types for local objects needed by BDDC precondtioner */
566: impMatType = MATSEQDENSE;
568: /* vertices in boundary numbering */
569: PetscMalloc1(n_vertices,&idx_V_B);
570: ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);
571: if (i != n_vertices) {
572: SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
573: }
575: /* Subdomain contribution (Non-overlapping) to coarse matrix */
576: PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);
577: MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);
578: MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);
579: MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);
580: MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);
581: MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);
582: MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);
583: MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);
584: MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);
586: unsymmetric_check = PETSC_FALSE;
587: /* allocate workspace */
588: n = 0;
589: if (n_constraints) {
590: n += n_R*n_constraints;
591: }
592: if (n_vertices) {
593: n = PetscMax(2*n_R*n_vertices,n);
594: n = PetscMax((n_R+n_B)*n_vertices,n);
595: }
596: if (!pcbddc->symmetric_primal) {
597: n = PetscMax(2*n_R*pcbddc->local_primal_size,n);
598: unsymmetric_check = PETSC_TRUE;
599: }
600: PetscMalloc1(n,&work);
602: /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
603: KSPGetPC(pcbddc->ksp_R,&pc_R);
604: PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);
605: PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);
606: PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);
607: if (isLU || isILU || isCHOL) {
608: PCFactorGetMatrix(pc_R,&F);
609: } else if (sub_schurs->reuse_mumps) {
610: PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
611: MatFactorType type;
613: F = reuse_mumps->F;
614: MatGetFactorType(F,&type);
615: if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE;
616: } else {
617: F = NULL;
618: }
620: /* Precompute stuffs needed for preprocessing and application of BDDC*/
621: if (n_constraints) {
622: Mat M1,M2,M3;
623: Mat auxmat;
624: IS is_aux;
625: PetscScalar *array,*array2;
627: MatDestroy(&pcbddc->local_auxmat1);
628: MatDestroy(&pcbddc->local_auxmat2);
630: /* Extract constraints on R nodes: C_{CR} */
631: ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);
632: MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);
633: MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);
635: /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */
636: /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */
637: PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));
638: for (i=0;i<n_constraints;i++) {
639: const PetscScalar *row_cmat_values;
640: const PetscInt *row_cmat_indices;
641: PetscInt size_of_constraint,j;
643: MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);
644: for (j=0;j<size_of_constraint;j++) {
645: work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j];
646: }
647: MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);
648: }
649: MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);
650: if (F) {
651: Mat B;
653: MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);
654: MatMatSolve(F,B,local_auxmat2_R);
655: MatDestroy(&B);
656: } else {
657: PetscScalar *marr;
659: MatDenseGetArray(local_auxmat2_R,&marr);
660: for (i=0;i<n_constraints;i++) {
661: VecPlaceArray(pcbddc->vec1_R,work+i*n_R);
662: VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);
663: KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
664: VecResetArray(pcbddc->vec1_R);
665: VecResetArray(pcbddc->vec2_R);
666: }
667: MatDenseRestoreArray(local_auxmat2_R,&marr);
668: }
669: if (!pcbddc->switch_static) {
670: MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);
671: MatDenseGetArray(pcbddc->local_auxmat2,&array);
672: MatDenseGetArray(local_auxmat2_R,&array2);
673: for (i=0;i<n_constraints;i++) {
674: VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);
675: VecPlaceArray(pcis->vec1_B,array+i*n_B);
676: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
677: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
678: VecResetArray(pcis->vec1_B);
679: VecResetArray(pcbddc->vec1_R);
680: }
681: MatDenseRestoreArray(local_auxmat2_R,&array2);
682: MatDenseRestoreArray(pcbddc->local_auxmat2,&array);
683: MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);
684: } else {
685: PetscObjectReference((PetscObject)local_auxmat2_R);
686: pcbddc->local_auxmat2 = local_auxmat2_R;
687: MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);
688: }
689: ISDestroy(&is_aux);
690: /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */
691: MatScale(M3,m_one);
692: MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);
693: MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);
694: if (isCHOL) {
695: MatCholeskyFactor(M3,NULL,NULL);
696: } else {
697: MatLUFactor(M3,NULL,NULL,NULL);
698: }
699: VecSet(pcbddc->vec1_C,one);
700: MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);
701: MatMatSolve(M3,M2,M1);
702: MatDestroy(&M2);
703: MatDestroy(&M3);
704: /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */
705: MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);
706: MatDestroy(&auxmat);
707: MatCopy(M1,S_CC,SAME_NONZERO_PATTERN); /* S_CC can have a different LDA, MatMatSolve doesn't support it */
708: MatDestroy(&M1);
709: }
710: /* Get submatrices from subdomain matrix */
711: if (n_vertices) {
712: IS is_aux;
714: if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */
715: IS tis;
717: ISDuplicate(pcbddc->is_R_local,&tis);
718: ISSort(tis);
719: ISComplement(tis,0,pcis->n,&is_aux);
720: ISDestroy(&tis);
721: } else {
722: ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);
723: }
724: MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);
725: MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);
726: MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);
727: ISDestroy(&is_aux);
728: }
730: /* Matrix of coarse basis functions (local) */
731: if (pcbddc->coarse_phi_B) {
732: PetscInt on_B,on_primal,on_D=n_D;
733: if (pcbddc->coarse_phi_D) {
734: MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);
735: }
736: MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);
737: if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) {
738: PetscScalar *marray;
740: MatDenseGetArray(pcbddc->coarse_phi_B,&marray);
741: PetscFree(marray);
742: MatDestroy(&pcbddc->coarse_phi_B);
743: MatDestroy(&pcbddc->coarse_psi_B);
744: MatDestroy(&pcbddc->coarse_phi_D);
745: MatDestroy(&pcbddc->coarse_psi_D);
746: }
747: }
749: if (!pcbddc->coarse_phi_B) {
750: PetscScalar *marray;
752: n = n_B*pcbddc->local_primal_size;
753: if (pcbddc->switch_static || pcbddc->dbg_flag) {
754: n += n_D*pcbddc->local_primal_size;
755: }
756: if (!pcbddc->symmetric_primal) {
757: n *= 2;
758: }
759: PetscCalloc1(n,&marray);
760: MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);
761: n = n_B*pcbddc->local_primal_size;
762: if (pcbddc->switch_static || pcbddc->dbg_flag) {
763: MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);
764: n += n_D*pcbddc->local_primal_size;
765: }
766: if (!pcbddc->symmetric_primal) {
767: MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);
768: if (pcbddc->switch_static || pcbddc->dbg_flag) {
769: n = n_B*pcbddc->local_primal_size;
770: MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);
771: }
772: } else {
773: PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);
774: pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
775: if (pcbddc->switch_static || pcbddc->dbg_flag) {
776: PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);
777: pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
778: }
779: }
780: }
781: /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
782: /* vertices */
783: if (n_vertices) {
785: MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);
787: if (n_R) {
788: Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */
789: PetscBLASInt B_N,B_one = 1;
790: PetscScalar *x,*y;
791: PetscBool isseqaij;
793: MatScale(A_RV,m_one);
794: MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);
795: MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);
796: if (F) { /* TODO could be optimized for symmetric problems */
797: MatMatSolve(F,A_RV,A_RRmA_RV);
798: } else {
799: MatDenseGetArray(A_RV,&y);
800: for (i=0;i<n_vertices;i++) {
801: VecPlaceArray(pcbddc->vec1_R,y+i*n_R);
802: VecPlaceArray(pcbddc->vec2_R,work+i*n_R);
803: KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
804: VecResetArray(pcbddc->vec1_R);
805: VecResetArray(pcbddc->vec2_R);
806: }
807: MatDenseRestoreArray(A_RV,&y);
808: }
809: MatDestroy(&A_RV);
810: /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */
811: if (n_constraints) {
812: Mat B;
814: PetscMemzero(work+n_R*n_vertices,n_B*n_vertices*sizeof(PetscScalar));
815: for (i=0;i<n_vertices;i++) {
816: VecPlaceArray(pcbddc->vec1_R,work+i*n_R);
817: VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);
818: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
819: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
820: VecResetArray(pcis->vec1_B);
821: VecResetArray(pcbddc->vec1_R);
822: }
823: MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);
824: MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);
825: MatDestroy(&B);
826: MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);
827: MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);
828: MatScale(S_CV,m_one);
829: PetscBLASIntCast(n_R*n_vertices,&B_N);
830: PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one));
831: MatDestroy(&B);
832: }
833: PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);
834: if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */
835: MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);
836: }
837: MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);
838: MatDestroy(&A_RRmA_RV);
839: PetscBLASIntCast(n_vertices*n_vertices,&B_N);
840: MatDenseGetArray(A_VV,&x);
841: MatDenseGetArray(S_VVt,&y);
842: PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one));
843: MatDenseRestoreArray(A_VV,&x);
844: MatDenseRestoreArray(S_VVt,&y);
845: MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);
846: MatDestroy(&S_VVt);
847: } else {
848: MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);
849: }
850: MatDestroy(&A_VV);
851: /* coarse basis functions */
852: for (i=0;i<n_vertices;i++) {
853: PetscScalar *y;
855: VecPlaceArray(pcbddc->vec1_R,work+n_R*i);
856: MatDenseGetArray(pcbddc->coarse_phi_B,&y);
857: VecPlaceArray(pcis->vec1_B,y+n_B*i);
858: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
859: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
860: y[n_B*i+idx_V_B[i]] = 1.0;
861: MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);
862: VecResetArray(pcis->vec1_B);
864: if (pcbddc->switch_static || pcbddc->dbg_flag) {
865: MatDenseGetArray(pcbddc->coarse_phi_D,&y);
866: VecPlaceArray(pcis->vec1_D,y+n_D*i);
867: VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
868: VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
869: VecResetArray(pcis->vec1_D);
870: MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);
871: }
872: VecResetArray(pcbddc->vec1_R);
873: }
874: /* if n_R == 0 the object is not destroyed */
875: MatDestroy(&A_RV);
876: }
878: if (n_constraints) {
879: Mat B;
881: MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);
882: MatScale(S_CC,m_one);
883: MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);
884: MatScale(S_CC,m_one);
885: if (n_vertices) {
886: if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */
887: MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);
888: } else {
889: Mat S_VCt;
891: MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);
892: MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);
893: MatDestroy(&S_VCt);
894: }
895: }
896: MatDestroy(&B);
897: /* coarse basis functions */
898: for (i=0;i<n_constraints;i++) {
899: PetscScalar *y;
901: VecPlaceArray(pcbddc->vec1_R,work+n_R*i);
902: MatDenseGetArray(pcbddc->coarse_phi_B,&y);
903: VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));
904: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
905: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
906: MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);
907: VecResetArray(pcis->vec1_B);
908: if (pcbddc->switch_static || pcbddc->dbg_flag) {
909: MatDenseGetArray(pcbddc->coarse_phi_D,&y);
910: VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));
911: VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
912: VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
913: VecResetArray(pcis->vec1_D);
914: MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);
915: }
916: VecResetArray(pcbddc->vec1_R);
917: }
918: }
919: if (n_constraints) {
920: MatDestroy(&local_auxmat2_R);
921: }
923: /* compute other basis functions for non-symmetric problems */
924: if (!pcbddc->symmetric_primal) {
926: if (n_constraints) {
927: Mat S_CCT,B_C;
929: /* this is a lazy thing */
930: MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);
931: MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);
932: MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);
933: MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);
934: MatDestroy(&S_CCT);
935: if (n_vertices) {
936: Mat B_V,S_VCT;
938: MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);
939: MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);
940: MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);
941: MatDestroy(&B_V);
942: MatDestroy(&S_VCT);
943: }
944: MatDestroy(&B_C);
945: } else { /* if there are no constraints, reset work */
946: PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));
947: }
948: if (n_vertices && n_R) {
949: Mat A_VRT;
950: PetscScalar *marray;
951: PetscBLASInt B_N,B_one = 1;
953: MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);
954: MatConvert(A_VRT,impMatType,MAT_REUSE_MATRIX,&A_VRT);
955: MatDenseGetArray(A_VRT,&marray);
956: PetscBLASIntCast(n_vertices*n_R,&B_N);
957: PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one));
958: MatDenseRestoreArray(A_VRT,&marray);
959: MatDestroy(&A_VRT);
960: }
962: if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */
963: for (i=0;i<pcbddc->local_primal_size;i++) {
964: VecPlaceArray(pcbddc->vec1_R,work+i*n_R);
965: VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);
966: MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);
967: VecResetArray(pcbddc->vec1_R);
968: VecResetArray(pcbddc->vec2_R);
969: }
970: } else {
971: for (i=0;i<pcbddc->local_primal_size;i++) {
972: VecPlaceArray(pcbddc->vec1_R,work+i*n_R);
973: VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);
974: KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
975: VecResetArray(pcbddc->vec1_R);
976: VecResetArray(pcbddc->vec2_R);
977: }
978: }
979: /* coarse basis functions */
980: for (i=0;i<pcbddc->local_primal_size;i++) {
981: PetscScalar *y;
983: VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));
984: MatDenseGetArray(pcbddc->coarse_psi_B,&y);
985: VecPlaceArray(pcis->vec1_B,y+n_B*i);
986: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
987: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
988: if (i<n_vertices) {
989: y[n_B*i+idx_V_B[i]] = 1.0;
990: }
991: MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);
992: VecResetArray(pcis->vec1_B);
994: if (pcbddc->switch_static || pcbddc->dbg_flag) {
995: MatDenseGetArray(pcbddc->coarse_psi_D,&y);
996: VecPlaceArray(pcis->vec1_D,y+n_D*i);
997: VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
998: VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
999: VecResetArray(pcis->vec1_D);
1000: MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);
1001: }
1002: VecResetArray(pcbddc->vec1_R);
1003: }
1004: }
1005: /* free memory */
1006: PetscFree(idx_V_B);
1007: MatDestroy(&S_VV);
1008: MatDestroy(&S_CV);
1009: MatDestroy(&S_VC);
1010: MatDestroy(&S_CC);
1011: PetscFree(work);
1012: if (n_vertices) {
1013: MatDestroy(&A_VR);
1014: }
1015: if (n_constraints) {
1016: MatDestroy(&C_CR);
1017: }
1018: /* Checking coarse_sub_mat and coarse basis functios */
1019: /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
1020: /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
1021: if (pcbddc->dbg_flag) {
1022: Mat coarse_sub_mat;
1023: Mat AUXMAT,TM1,TM2,TM3,TM4;
1024: Mat coarse_phi_D,coarse_phi_B;
1025: Mat coarse_psi_D,coarse_psi_B;
1026: Mat A_II,A_BB,A_IB,A_BI;
1027: Mat C_B,CPHI;
1028: IS is_dummy;
1029: Vec mones;
1030: MatType checkmattype=MATSEQAIJ;
1031: PetscReal real_value;
1033: MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);
1034: MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);
1035: MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);
1036: MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);
1037: MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);
1038: MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);
1039: if (unsymmetric_check) {
1040: MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);
1041: MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);
1042: }
1043: MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);
1045: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
1046: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);
1047: PetscViewerFlush(pcbddc->dbg_viewer);
1048: if (unsymmetric_check) {
1049: MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
1050: MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);
1051: MatDestroy(&AUXMAT);
1052: MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
1053: MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);
1054: MatDestroy(&AUXMAT);
1055: MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
1056: MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);
1057: MatDestroy(&AUXMAT);
1058: MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
1059: MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);
1060: MatDestroy(&AUXMAT);
1061: } else {
1062: MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);
1063: MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);
1064: MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
1065: MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);
1066: MatDestroy(&AUXMAT);
1067: MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
1068: MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);
1069: MatDestroy(&AUXMAT);
1070: }
1071: MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);
1072: MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);
1073: MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);
1074: MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);
1075: MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);
1076: MatNorm(TM1,NORM_FROBENIUS,&real_value);
1077: PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);
1078: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);
1080: /* check constraints */
1081: ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);
1082: MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);
1083: MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);
1084: MatCreateVecs(CPHI,&mones,NULL);
1085: VecSet(mones,-1.0);
1086: MatDiagonalSet(CPHI,mones,ADD_VALUES);
1087: MatNorm(CPHI,NORM_FROBENIUS,&real_value);
1088: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);
1089: if (unsymmetric_check) {
1090: MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);
1091: VecSet(mones,-1.0);
1092: MatDiagonalSet(CPHI,mones,ADD_VALUES);
1093: MatNorm(CPHI,NORM_FROBENIUS,&real_value);
1094: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);
1095: }
1096: MatDestroy(&C_B);
1097: MatDestroy(&CPHI);
1098: ISDestroy(&is_dummy);
1099: VecDestroy(&mones);
1101: PetscViewerFlush(pcbddc->dbg_viewer);
1102: MatDestroy(&A_II);
1103: MatDestroy(&A_BB);
1104: MatDestroy(&A_IB);
1105: MatDestroy(&A_BI);
1106: MatDestroy(&TM1);
1107: MatDestroy(&TM2);
1108: MatDestroy(&TM3);
1109: MatDestroy(&TM4);
1110: MatDestroy(&coarse_phi_D);
1111: MatDestroy(&coarse_phi_B);
1112: if (unsymmetric_check) {
1113: MatDestroy(&coarse_psi_D);
1114: MatDestroy(&coarse_psi_B);
1115: }
1116: MatDestroy(&coarse_sub_mat);
1117: }
1118: /* get back data */
1119: *coarse_submat_vals_n = coarse_submat_vals;
1120: return(0);
1121: }
1125: PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B)
1126: {
1127: Mat *work_mat;
1128: IS isrow_s,iscol_s;
1129: PetscBool rsorted,csorted;
1130: PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c;
1134: ISSorted(isrow,&rsorted);
1135: ISSorted(iscol,&csorted);
1136: ISGetLocalSize(isrow,&rsize);
1137: ISGetLocalSize(iscol,&csize);
1139: if (!rsorted) {
1140: const PetscInt *idxs;
1141: PetscInt *idxs_sorted,i;
1143: PetscMalloc1(rsize,&idxs_perm_r);
1144: PetscMalloc1(rsize,&idxs_sorted);
1145: for (i=0;i<rsize;i++) {
1146: idxs_perm_r[i] = i;
1147: }
1148: ISGetIndices(isrow,&idxs);
1149: PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);
1150: for (i=0;i<rsize;i++) {
1151: idxs_sorted[i] = idxs[idxs_perm_r[i]];
1152: }
1153: ISRestoreIndices(isrow,&idxs);
1154: ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);
1155: } else {
1156: PetscObjectReference((PetscObject)isrow);
1157: isrow_s = isrow;
1158: }
1160: if (!csorted) {
1161: if (isrow == iscol) {
1162: PetscObjectReference((PetscObject)isrow_s);
1163: iscol_s = isrow_s;
1164: } else {
1165: const PetscInt *idxs;
1166: PetscInt *idxs_sorted,i;
1168: PetscMalloc1(csize,&idxs_perm_c);
1169: PetscMalloc1(csize,&idxs_sorted);
1170: for (i=0;i<csize;i++) {
1171: idxs_perm_c[i] = i;
1172: }
1173: ISGetIndices(iscol,&idxs);
1174: PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);
1175: for (i=0;i<csize;i++) {
1176: idxs_sorted[i] = idxs[idxs_perm_c[i]];
1177: }
1178: ISRestoreIndices(iscol,&idxs);
1179: ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);
1180: }
1181: } else {
1182: PetscObjectReference((PetscObject)iscol);
1183: iscol_s = iscol;
1184: }
1186: MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);
1188: if (!rsorted || !csorted) {
1189: Mat new_mat;
1190: IS is_perm_r,is_perm_c;
1192: if (!rsorted) {
1193: PetscInt *idxs_r,i;
1194: PetscMalloc1(rsize,&idxs_r);
1195: for (i=0;i<rsize;i++) {
1196: idxs_r[idxs_perm_r[i]] = i;
1197: }
1198: PetscFree(idxs_perm_r);
1199: ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);
1200: } else {
1201: ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);
1202: }
1203: ISSetPermutation(is_perm_r);
1205: if (!csorted) {
1206: if (isrow_s == iscol_s) {
1207: PetscObjectReference((PetscObject)is_perm_r);
1208: is_perm_c = is_perm_r;
1209: } else {
1210: PetscInt *idxs_c,i;
1211: PetscMalloc1(csize,&idxs_c);
1212: for (i=0;i<csize;i++) {
1213: idxs_c[idxs_perm_c[i]] = i;
1214: }
1215: PetscFree(idxs_perm_c);
1216: ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);
1217: }
1218: } else {
1219: ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);
1220: }
1221: ISSetPermutation(is_perm_c);
1223: MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);
1224: MatDestroy(&work_mat[0]);
1225: work_mat[0] = new_mat;
1226: ISDestroy(&is_perm_r);
1227: ISDestroy(&is_perm_c);
1228: }
1230: PetscObjectReference((PetscObject)work_mat[0]);
1231: *B = work_mat[0];
1232: MatDestroyMatrices(1,&work_mat);
1233: ISDestroy(&isrow_s);
1234: ISDestroy(&iscol_s);
1235: return(0);
1236: }
1240: PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
1241: {
1242: Mat_IS* matis = (Mat_IS*)pc->pmat->data;
1243: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
1244: Mat new_mat;
1245: IS is_local,is_global;
1246: PetscInt local_size;
1247: PetscBool isseqaij;
1251: MatDestroy(&pcbddc->local_mat);
1252: MatGetSize(matis->A,&local_size,NULL);
1253: ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);
1254: ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);
1255: ISDestroy(&is_local);
1256: MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);
1257: ISDestroy(&is_global);
1259: /* check */
1260: if (pcbddc->dbg_flag) {
1261: Vec x,x_change;
1262: PetscReal error;
1264: MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);
1265: VecSetRandom(x,NULL);
1266: MatMult(ChangeOfBasisMatrix,x,x_change);
1267: VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);
1268: VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);
1269: MatMult(new_mat,matis->x,matis->y);
1270: VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);
1271: VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);
1272: VecAXPY(x,-1.0,x_change);
1273: VecNorm(x,NORM_INFINITY,&error);
1274: PetscViewerFlush(pcbddc->dbg_viewer);
1275: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);
1276: VecDestroy(&x);
1277: VecDestroy(&x_change);
1278: }
1280: /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
1281: PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);
1282: if (isseqaij) {
1283: MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);
1284: } else {
1285: Mat work_mat;
1286: MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);
1287: MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);
1288: MatDestroy(&work_mat);
1289: }
1290: if (matis->A->symmetric_set) {
1291: MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);
1292: #if !defined(PETSC_USE_COMPLEX)
1293: MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);
1294: #endif
1295: }
1296: /*
1297: PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);
1298: MatView(new_mat,(PetscViewer)0);
1299: */
1300: MatDestroy(&new_mat);
1301: return(0);
1302: }
1306: PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
1307: {
1308: PC_IS* pcis = (PC_IS*)(pc->data);
1309: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
1310: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1311: PetscInt *idx_R_local=NULL;
1312: PetscInt n_vertices,i,j,n_R,n_D,n_B;
1313: PetscInt vbs,bs;
1314: PetscBT bitmask=NULL;
1315: PetscErrorCode ierr;
1318: /*
1319: No need to setup local scatters if
1320: - primal space is unchanged
1321: AND
1322: - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
1323: AND
1324: - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
1325: */
1326: if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
1327: return(0);
1328: }
1329: /* destroy old objects */
1330: ISDestroy(&pcbddc->is_R_local);
1331: VecScatterDestroy(&pcbddc->R_to_B);
1332: VecScatterDestroy(&pcbddc->R_to_D);
1333: /* Set Non-overlapping dimensions */
1334: n_B = pcis->n_B;
1335: n_D = pcis->n - n_B;
1336: n_vertices = pcbddc->n_vertices;
1338: /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
1340: /* create auxiliary bitmask and allocate workspace */
1341: if (!sub_schurs->reuse_mumps) {
1342: PetscMalloc1(pcis->n-n_vertices,&idx_R_local);
1343: PetscBTCreate(pcis->n,&bitmask);
1344: for (i=0;i<n_vertices;i++) {
1345: PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);
1346: }
1348: for (i=0, n_R=0; i<pcis->n; i++) {
1349: if (!PetscBTLookup(bitmask,i)) {
1350: idx_R_local[n_R++] = i;
1351: }
1352: }
1353: } else { /* A different ordering (already computed) is present if we are reusing MUMPS Schur solver */
1354: PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1356: ISGetIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);
1357: ISGetLocalSize(reuse_mumps->is_R,&n_R);
1358: }
1360: /* Block code */
1361: vbs = 1;
1362: MatGetBlockSize(pcbddc->local_mat,&bs);
1363: if (bs>1 && !(n_vertices%bs)) {
1364: PetscBool is_blocked = PETSC_TRUE;
1365: PetscInt *vary;
1366: if (!sub_schurs->reuse_mumps) {
1367: PetscMalloc1(pcis->n/bs,&vary);
1368: PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));
1369: /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
1370: /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */
1371: for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++;
1372: for (i=0; i<pcis->n/bs; i++) {
1373: if (vary[i]!=0 && vary[i]!=bs) {
1374: is_blocked = PETSC_FALSE;
1375: break;
1376: }
1377: }
1378: PetscFree(vary);
1379: } else {
1380: /* Verify directly the R set */
1381: for (i=0; i<n_R/bs; i++) {
1382: PetscInt j,node=idx_R_local[bs*i];
1383: for (j=1; j<bs; j++) {
1384: if (node != idx_R_local[bs*i+j]-j) {
1385: is_blocked = PETSC_FALSE;
1386: break;
1387: }
1388: }
1389: }
1390: }
1391: if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
1392: vbs = bs;
1393: for (i=0;i<n_R/vbs;i++) {
1394: idx_R_local[i] = idx_R_local[vbs*i]/vbs;
1395: }
1396: }
1397: }
1398: ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);
1399: if (sub_schurs->reuse_mumps) {
1400: PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1402: ISRestoreIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);
1403: ISDestroy(&reuse_mumps->is_R);
1404: PetscObjectReference((PetscObject)pcbddc->is_R_local);
1405: reuse_mumps->is_R = pcbddc->is_R_local;
1406: } else {
1407: PetscFree(idx_R_local);
1408: }
1410: /* print some info if requested */
1411: if (pcbddc->dbg_flag) {
1412: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
1413: PetscViewerFlush(pcbddc->dbg_viewer);
1414: PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);
1415: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);
1416: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);
1417: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);
1418: PetscViewerFlush(pcbddc->dbg_viewer);
1419: }
1421: /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
1422: if (!sub_schurs->reuse_mumps) {
1423: IS is_aux1,is_aux2;
1424: PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local;
1426: ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);
1427: PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);
1428: PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);
1429: ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);
1430: for (i=0; i<n_D; i++) {
1431: PetscBTSet(bitmask,is_indices[i]);
1432: }
1433: ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);
1434: for (i=0, j=0; i<n_R; i++) {
1435: if (!PetscBTLookup(bitmask,idx_R_local[i])) {
1436: aux_array1[j++] = i;
1437: }
1438: }
1439: ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);
1440: ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);
1441: for (i=0, j=0; i<n_B; i++) {
1442: if (!PetscBTLookup(bitmask,is_indices[i])) {
1443: aux_array2[j++] = i;
1444: }
1445: }
1446: ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);
1447: ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);
1448: VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);
1449: ISDestroy(&is_aux1);
1450: ISDestroy(&is_aux2);
1452: if (pcbddc->switch_static || pcbddc->dbg_flag) {
1453: PetscMalloc1(n_D,&aux_array1);
1454: for (i=0, j=0; i<n_R; i++) {
1455: if (PetscBTLookup(bitmask,idx_R_local[i])) {
1456: aux_array1[j++] = i;
1457: }
1458: }
1459: ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);
1460: VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);
1461: ISDestroy(&is_aux1);
1462: }
1463: PetscBTDestroy(&bitmask);
1464: ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);
1465: } else {
1466: PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1467: IS tis;
1468: PetscInt schur_size;
1470: ISGetLocalSize(reuse_mumps->is_B,&schur_size);
1471: ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);
1472: VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_mumps->is_B,&pcbddc->R_to_B);
1473: ISDestroy(&tis);
1474: if (pcbddc->switch_static || pcbddc->dbg_flag) {
1475: ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);
1476: VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);
1477: ISDestroy(&tis);
1478: }
1479: }
1480: return(0);
1481: }
1486: PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
1487: {
1488: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
1489: PC_IS *pcis = (PC_IS*)pc->data;
1490: PC pc_temp;
1491: Mat A_RR;
1492: MatReuse reuse;
1493: PetscScalar m_one = -1.0;
1494: PetscReal value;
1495: PetscInt n_D,n_R;
1496: PetscBool use_exact,use_exact_reduced,issbaij;
1498: /* prefixes stuff */
1499: char dir_prefix[256],neu_prefix[256],str_level[16];
1500: size_t len;
1504: /* compute prefixes */
1505: PetscStrcpy(dir_prefix,"");
1506: PetscStrcpy(neu_prefix,"");
1507: if (!pcbddc->current_level) {
1508: PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);
1509: PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);
1510: PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");
1511: PetscStrcat(neu_prefix,"pc_bddc_neumann_");
1512: } else {
1513: PetscStrcpy(str_level,"");
1514: sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
1515: PetscStrlen(((PetscObject)pc)->prefix,&len);
1516: len -= 15; /* remove "pc_bddc_coarse_" */
1517: if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
1518: if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
1519: PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);
1520: PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);
1521: PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");
1522: PetscStrcat(neu_prefix,"pc_bddc_neumann_");
1523: PetscStrcat(dir_prefix,str_level);
1524: PetscStrcat(neu_prefix,str_level);
1525: }
1527: /* DIRICHLET PROBLEM */
1528: if (dirichlet) {
1529: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1530: if (pcbddc->local_mat->symmetric_set) {
1531: MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);
1532: }
1533: /* Matrix for Dirichlet problem is pcis->A_II */
1534: n_D = pcis->n - pcis->n_B;
1535: if (!pcbddc->ksp_D) { /* create object if not yet build */
1536: KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);
1537: PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);
1538: /* default */
1539: KSPSetType(pcbddc->ksp_D,KSPPREONLY);
1540: KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);
1541: PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);
1542: KSPGetPC(pcbddc->ksp_D,&pc_temp);
1543: if (issbaij) {
1544: PCSetType(pc_temp,PCCHOLESKY);
1545: } else {
1546: PCSetType(pc_temp,PCLU);
1547: }
1548: /* Allow user's customization */
1549: KSPSetFromOptions(pcbddc->ksp_D);
1550: PCFactorSetReuseFill(pc_temp,PETSC_TRUE);
1551: }
1552: KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);
1553: if (sub_schurs->reuse_mumps) {
1554: PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1556: KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);
1557: }
1558: /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1559: if (!n_D) {
1560: KSPGetPC(pcbddc->ksp_D,&pc_temp);
1561: PCSetType(pc_temp,PCNONE);
1562: }
1563: /* Set Up KSP for Dirichlet problem of BDDC */
1564: KSPSetUp(pcbddc->ksp_D);
1565: /* set ksp_D into pcis data */
1566: KSPDestroy(&pcis->ksp_D);
1567: PetscObjectReference((PetscObject)pcbddc->ksp_D);
1568: pcis->ksp_D = pcbddc->ksp_D;
1569: }
1571: /* NEUMANN PROBLEM */
1572: A_RR = 0;
1573: if (neumann) {
1574: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1575: PetscInt ibs,mbs;
1576: PetscBool issbaij;
1577: Mat_IS* matis = (Mat_IS*)pc->pmat->data;
1578: /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1579: ISGetSize(pcbddc->is_R_local,&n_R);
1580: if (pcbddc->ksp_R) { /* already created ksp */
1581: PetscInt nn_R;
1582: KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);
1583: PetscObjectReference((PetscObject)A_RR);
1584: MatGetSize(A_RR,&nn_R,NULL);
1585: if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1586: KSPReset(pcbddc->ksp_R);
1587: MatDestroy(&A_RR);
1588: reuse = MAT_INITIAL_MATRIX;
1589: } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1590: if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
1591: MatDestroy(&A_RR);
1592: reuse = MAT_INITIAL_MATRIX;
1593: } else { /* safe to reuse the matrix */
1594: reuse = MAT_REUSE_MATRIX;
1595: }
1596: }
1597: /* last check */
1598: if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1599: MatDestroy(&A_RR);
1600: reuse = MAT_INITIAL_MATRIX;
1601: }
1602: } else { /* first time, so we need to create the matrix */
1603: reuse = MAT_INITIAL_MATRIX;
1604: }
1605: /* extract A_RR */
1606: MatGetBlockSize(pcbddc->local_mat,&mbs);
1607: ISGetBlockSize(pcbddc->is_R_local,&ibs);
1608: PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);
1609: if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */
1610: if (matis->A == pcbddc->local_mat) {
1611: MatDestroy(&pcbddc->local_mat);
1612: MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);
1613: } else {
1614: MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);
1615: }
1616: } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
1617: if (matis->A == pcbddc->local_mat) {
1618: MatDestroy(&pcbddc->local_mat);
1619: MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);
1620: } else {
1621: MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);
1622: }
1623: }
1624: if (!sub_schurs->reuse_mumps) {
1625: MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);
1626: if (pcbddc->local_mat->symmetric_set) {
1627: MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);
1628: }
1629: } else {
1630: PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1632: MatDestroy(&A_RR);
1633: PCGetOperators(reuse_mumps->correction_solver,&A_RR,NULL);
1634: PetscObjectReference((PetscObject)A_RR);
1635: }
1636: if (!pcbddc->ksp_R) { /* create object if not present */
1637: KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);
1638: PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);
1639: /* default */
1640: KSPSetType(pcbddc->ksp_R,KSPPREONLY);
1641: KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);
1642: KSPGetPC(pcbddc->ksp_R,&pc_temp);
1643: PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);
1644: if (issbaij) {
1645: PCSetType(pc_temp,PCCHOLESKY);
1646: } else {
1647: PCSetType(pc_temp,PCLU);
1648: }
1649: /* Allow user's customization */
1650: KSPSetFromOptions(pcbddc->ksp_R);
1651: PCFactorSetReuseFill(pc_temp,PETSC_TRUE);
1652: }
1653: KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);
1654: /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1655: if (!n_R) {
1656: KSPGetPC(pcbddc->ksp_R,&pc_temp);
1657: PCSetType(pc_temp,PCNONE);
1658: }
1659: /* Reuse MUMPS solver if it is present */
1660: if (sub_schurs->reuse_mumps) {
1661: PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1663: KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);
1664: }
1665: /* Set Up KSP for Neumann problem of BDDC */
1666: KSPSetUp(pcbddc->ksp_R);
1667: }
1668: /* free Neumann problem's matrix */
1669: MatDestroy(&A_RR);
1671: /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1672: if (pcbddc->NullSpace || pcbddc->dbg_flag) {
1673: if (pcbddc->dbg_flag) {
1674: PetscViewerFlush(pcbddc->dbg_viewer);
1675: PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);
1676: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
1677: }
1678: if (dirichlet) { /* Dirichlet */
1679: VecSetRandom(pcis->vec1_D,NULL);
1680: MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);
1681: KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);
1682: VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);
1683: VecNorm(pcis->vec1_D,NORM_INFINITY,&value);
1684: /* need to be adapted? */
1685: use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1686: MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));
1687: PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);
1688: /* print info */
1689: if (pcbddc->dbg_flag) {
1690: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);
1691: PetscViewerFlush(pcbddc->dbg_viewer);
1692: }
1693: if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1694: PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);
1695: }
1696: }
1697: if (neumann) { /* Neumann */
1698: KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);
1699: VecSetRandom(pcbddc->vec1_R,NULL);
1700: MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);
1701: KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);
1702: VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);
1703: VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);
1704: /* need to be adapted? */
1705: use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1706: MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));
1707: /* print info */
1708: if (pcbddc->dbg_flag) {
1709: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);
1710: PetscViewerFlush(pcbddc->dbg_viewer);
1711: }
1712: if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1713: PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);
1714: }
1715: }
1716: }
1717: return(0);
1718: }
1722: static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose)
1723: {
1724: PetscErrorCode ierr;
1725: PC_BDDC* pcbddc = (PC_BDDC*)(pc->data);
1726: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1729: if (!sub_schurs->reuse_mumps) {
1730: VecSet(pcbddc->vec1_R,0.);
1731: }
1732: if (!pcbddc->switch_static) {
1733: if (applytranspose && pcbddc->local_auxmat1) {
1734: MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);
1735: MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);
1736: }
1737: if (!sub_schurs->reuse_mumps) {
1738: VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
1739: VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
1740: } else {
1741: PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1743: VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);
1744: VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);
1745: }
1746: } else {
1747: VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
1748: VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
1749: VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
1750: VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
1751: if (applytranspose && pcbddc->local_auxmat1) {
1752: MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);
1753: MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);
1754: VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
1755: VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
1756: }
1757: }
1758: if (!sub_schurs->reuse_mumps) {
1759: if (applytranspose) {
1760: KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);
1761: } else {
1762: KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);
1763: }
1764: #if defined(PETSC_HAVE_MUMPS)
1765: } else {
1766: PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1768: if (applytranspose) {
1769: MatMumpsSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);
1770: } else {
1771: MatMumpsSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);
1772: }
1773: #endif
1774: }
1775: VecSet(inout_B,0.);
1776: if (!pcbddc->switch_static) {
1777: if (!sub_schurs->reuse_mumps) {
1778: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
1779: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
1780: } else {
1781: PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1783: VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);
1784: VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);
1785: }
1786: if (!applytranspose && pcbddc->local_auxmat1) {
1787: MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);
1788: MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);
1789: }
1790: } else {
1791: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
1792: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
1793: VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
1794: VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
1795: if (!applytranspose && pcbddc->local_auxmat1) {
1796: MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);
1797: MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);
1798: }
1799: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
1800: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
1801: VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
1802: VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
1803: }
1804: return(0);
1805: }
1807: /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
1810: PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
1811: {
1813: PC_BDDC* pcbddc = (PC_BDDC*)(pc->data);
1814: PC_IS* pcis = (PC_IS*) (pc->data);
1815: const PetscScalar zero = 0.0;
1818: /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
1819: if (applytranspose) {
1820: MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);
1821: if (pcbddc->switch_static) { MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P); }
1822: } else {
1823: MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);
1824: if (pcbddc->switch_static) { MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P); }
1825: }
1826: /* start communications from local primal nodes to rhs of coarse solver */
1827: VecSet(pcbddc->coarse_vec,zero);
1828: PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);
1829: PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);
1831: /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
1832: /* TODO remove null space when doing multilevel */
1833: if (pcbddc->coarse_ksp) {
1834: Vec rhs,sol;
1836: KSPGetRhs(pcbddc->coarse_ksp,&rhs);
1837: KSPGetSolution(pcbddc->coarse_ksp,&sol);
1838: if (applytranspose) {
1839: KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);
1840: } else {
1841: KSPSolve(pcbddc->coarse_ksp,rhs,sol);
1842: }
1843: }
1845: /* Local solution on R nodes */
1846: if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */
1847: PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);
1848: }
1850: /* communications from coarse sol to local primal nodes */
1851: PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);
1852: PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);
1854: /* Sum contributions from two levels */
1855: if (applytranspose) {
1856: MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);
1857: if (pcbddc->switch_static) { MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D); }
1858: } else {
1859: MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);
1860: if (pcbddc->switch_static) { MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D); }
1861: }
1862: return(0);
1863: }
1867: PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
1868: {
1870: PC_BDDC* pcbddc = (PC_BDDC*)(pc->data);
1871: PetscScalar *array;
1872: Vec from,to;
1875: if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1876: from = pcbddc->coarse_vec;
1877: to = pcbddc->vec1_P;
1878: if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1879: Vec tvec;
1881: KSPGetRhs(pcbddc->coarse_ksp,&tvec);
1882: VecResetArray(tvec);
1883: KSPGetSolution(pcbddc->coarse_ksp,&tvec);
1884: VecGetArray(tvec,&array);
1885: VecPlaceArray(from,array);
1886: VecRestoreArray(tvec,&array);
1887: }
1888: } else { /* from local to global -> put data in coarse right hand side */
1889: from = pcbddc->vec1_P;
1890: to = pcbddc->coarse_vec;
1891: }
1892: VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);
1893: return(0);
1894: }
1898: PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
1899: {
1901: PC_BDDC* pcbddc = (PC_BDDC*)(pc->data);
1902: PetscScalar *array;
1903: Vec from,to;
1906: if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1907: from = pcbddc->coarse_vec;
1908: to = pcbddc->vec1_P;
1909: } else { /* from local to global -> put data in coarse right hand side */
1910: from = pcbddc->vec1_P;
1911: to = pcbddc->coarse_vec;
1912: }
1913: VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);
1914: if (smode == SCATTER_FORWARD) {
1915: if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1916: Vec tvec;
1918: KSPGetRhs(pcbddc->coarse_ksp,&tvec);
1919: VecGetArray(to,&array);
1920: VecPlaceArray(tvec,array);
1921: VecRestoreArray(to,&array);
1922: }
1923: } else {
1924: if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
1925: VecResetArray(from);
1926: }
1927: }
1928: return(0);
1929: }
1931: /* uncomment for testing purposes */
1932: /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1935: PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1936: {
1937: PetscErrorCode ierr;
1938: PC_IS* pcis = (PC_IS*)(pc->data);
1939: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
1940: Mat_IS* matis = (Mat_IS*)pc->pmat->data;
1941: /* one and zero */
1942: PetscScalar one=1.0,zero=0.0;
1943: /* space to store constraints and their local indices */
1944: PetscScalar *constraints_data;
1945: PetscInt *constraints_idxs,*constraints_idxs_B;
1946: PetscInt *constraints_idxs_ptr,*constraints_data_ptr;
1947: PetscInt *constraints_n;
1948: /* iterators */
1949: PetscInt i,j,k,total_counts,total_counts_cc,cum;
1950: /* BLAS integers */
1951: PetscBLASInt lwork,lierr;
1952: PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1;
1953: PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC;
1954: /* reuse */
1955: PetscInt olocal_primal_size,olocal_primal_size_cc;
1956: PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult;
1957: /* change of basis */
1958: PetscBool qr_needed;
1959: PetscBT change_basis,qr_needed_idx;
1960: /* auxiliary stuff */
1961: PetscInt *nnz,*is_indices;
1962: PetscInt ncc;
1963: /* some quantities */
1964: PetscInt n_vertices,total_primal_vertices,valid_constraints;
1965: PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints;
1968: /* Destroy Mat objects computed previously */
1969: MatDestroy(&pcbddc->ChangeOfBasisMatrix);
1970: MatDestroy(&pcbddc->ConstraintMatrix);
1971: /* save info on constraints from previous setup (if any) */
1972: olocal_primal_size = pcbddc->local_primal_size;
1973: olocal_primal_size_cc = pcbddc->local_primal_size_cc;
1974: PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);
1975: PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));
1976: PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));
1977: PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);
1978: PetscFree(pcbddc->primal_indices_local_idxs);
1980: /* print some info */
1981: if (pcbddc->dbg_flag) {
1982: IS vertices;
1983: PetscInt nv,nedges,nfaces;
1984: PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);
1985: ISGetSize(vertices,&nv);
1986: ISDestroy(&vertices);
1987: PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);
1988: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");
1989: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);
1990: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);
1991: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);
1992: PetscViewerFlush(pcbddc->dbg_viewer);
1993: }
1995: if (!pcbddc->adaptive_selection) {
1996: IS ISForVertices,*ISForFaces,*ISForEdges;
1997: MatNullSpace nearnullsp;
1998: const Vec *nearnullvecs;
1999: Vec *localnearnullsp;
2000: PetscScalar *array;
2001: PetscInt n_ISForFaces,n_ISForEdges,nnsp_size;
2002: PetscBool nnsp_has_cnst;
2003: /* LAPACK working arrays for SVD or POD */
2004: PetscBool skip_lapack,boolforchange;
2005: PetscScalar *work;
2006: PetscReal *singular_vals;
2007: #if defined(PETSC_USE_COMPLEX)
2008: PetscReal *rwork;
2009: #endif
2010: #if defined(PETSC_MISSING_LAPACK_GESVD)
2011: PetscScalar *temp_basis,*correlation_mat;
2012: #else
2013: PetscBLASInt dummy_int=1;
2014: PetscScalar dummy_scalar=1.;
2015: #endif
2017: /* Get index sets for faces, edges and vertices from graph */
2018: PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);
2019: /* free unneeded index sets */
2020: if (!pcbddc->use_vertices) {
2021: ISDestroy(&ISForVertices);
2022: }
2023: if (!pcbddc->use_edges) {
2024: for (i=0;i<n_ISForEdges;i++) {
2025: ISDestroy(&ISForEdges[i]);
2026: }
2027: PetscFree(ISForEdges);
2028: n_ISForEdges = 0;
2029: }
2030: if (!pcbddc->use_faces) {
2031: for (i=0;i<n_ISForFaces;i++) {
2032: ISDestroy(&ISForFaces[i]);
2033: }
2034: PetscFree(ISForFaces);
2035: n_ISForFaces = 0;
2036: }
2038: #if defined(PETSC_USE_DEBUG)
2039: /* HACK: when solving singular problems not using vertices, a change of basis is mandatory.
2040: Also use_change_of_basis should be consistent among processors */
2041: if (pcbddc->NullSpace) {
2042: PetscBool tbool[2],gbool[2];
2044: if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) {
2045: pcbddc->use_change_of_basis = PETSC_TRUE;
2046: if (!ISForEdges) {
2047: pcbddc->use_change_on_faces = PETSC_TRUE;
2048: }
2049: }
2050: tbool[0] = pcbddc->use_change_of_basis;
2051: tbool[1] = pcbddc->use_change_on_faces;
2052: MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
2053: pcbddc->use_change_of_basis = gbool[0];
2054: pcbddc->use_change_on_faces = gbool[1];
2055: }
2056: #endif
2058: /* check if near null space is attached to global mat */
2059: MatGetNearNullSpace(pc->pmat,&nearnullsp);
2060: if (nearnullsp) {
2061: MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);
2062: /* remove any stored info */
2063: MatNullSpaceDestroy(&pcbddc->onearnullspace);
2064: PetscFree(pcbddc->onearnullvecs_state);
2065: /* store information for BDDC solver reuse */
2066: PetscObjectReference((PetscObject)nearnullsp);
2067: pcbddc->onearnullspace = nearnullsp;
2068: PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);
2069: for (i=0;i<nnsp_size;i++) {
2070: PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);
2071: }
2072: } else { /* if near null space is not provided BDDC uses constants by default */
2073: nnsp_size = 0;
2074: nnsp_has_cnst = PETSC_TRUE;
2075: }
2076: /* get max number of constraints on a single cc */
2077: max_constraints = nnsp_size;
2078: if (nnsp_has_cnst) max_constraints++;
2080: /*
2081: Evaluate maximum storage size needed by the procedure
2082: - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]"
2083: - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]"
2084: There can be multiple constraints per connected component
2085: */
2086: n_vertices = 0;
2087: if (ISForVertices) {
2088: ISGetSize(ISForVertices,&n_vertices);
2089: }
2090: ncc = n_vertices+n_ISForFaces+n_ISForEdges;
2091: PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);
2093: total_counts = n_ISForFaces+n_ISForEdges;
2094: total_counts *= max_constraints;
2095: total_counts += n_vertices;
2096: PetscBTCreate(total_counts,&change_basis);
2098: total_counts = 0;
2099: max_size_of_constraint = 0;
2100: for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
2101: IS used_is;
2102: if (i<n_ISForEdges) {
2103: used_is = ISForEdges[i];
2104: } else {
2105: used_is = ISForFaces[i-n_ISForEdges];
2106: }
2107: ISGetSize(used_is,&j);
2108: total_counts += j;
2109: max_size_of_constraint = PetscMax(j,max_size_of_constraint);
2110: }
2111: PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);
2113: /* get local part of global near null space vectors */
2114: PetscMalloc1(nnsp_size,&localnearnullsp);
2115: for (k=0;k<nnsp_size;k++) {
2116: VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);
2117: VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);
2118: VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);
2119: }
2121: /* whether or not to skip lapack calls */
2122: skip_lapack = PETSC_TRUE;
2123: if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
2125: /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
2126: if (!skip_lapack) {
2127: PetscScalar temp_work;
2129: #if defined(PETSC_MISSING_LAPACK_GESVD)
2130: /* Proper Orthogonal Decomposition (POD) using the snapshot method */
2131: PetscMalloc1(max_constraints*max_constraints,&correlation_mat);
2132: PetscMalloc1(max_constraints,&singular_vals);
2133: PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);
2134: #if defined(PETSC_USE_COMPLEX)
2135: PetscMalloc1(3*max_constraints,&rwork);
2136: #endif
2137: /* now we evaluate the optimal workspace using query with lwork=-1 */
2138: PetscBLASIntCast(max_constraints,&Blas_N);
2139: PetscBLASIntCast(max_constraints,&Blas_LDA);
2140: lwork = -1;
2141: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
2142: #if !defined(PETSC_USE_COMPLEX)
2143: PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
2144: #else
2145: PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
2146: #endif
2147: PetscFPTrapPop();
2148: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
2149: #else /* on missing GESVD */
2150: /* SVD */
2151: PetscInt max_n,min_n;
2152: max_n = max_size_of_constraint;
2153: min_n = max_constraints;
2154: if (max_size_of_constraint < max_constraints) {
2155: min_n = max_size_of_constraint;
2156: max_n = max_constraints;
2157: }
2158: PetscMalloc1(min_n,&singular_vals);
2159: #if defined(PETSC_USE_COMPLEX)
2160: PetscMalloc1(5*min_n,&rwork);
2161: #endif
2162: /* now we evaluate the optimal workspace using query with lwork=-1 */
2163: lwork = -1;
2164: PetscBLASIntCast(max_n,&Blas_M);
2165: PetscBLASIntCast(min_n,&Blas_N);
2166: PetscBLASIntCast(max_n,&Blas_LDA);
2167: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
2168: #if !defined(PETSC_USE_COMPLEX)
2169: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr));
2170: #else
2171: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr));
2172: #endif
2173: PetscFPTrapPop();
2174: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
2175: #endif /* on missing GESVD */
2176: /* Allocate optimal workspace */
2177: PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);
2178: PetscMalloc1(lwork,&work);
2179: }
2180: /* Now we can loop on constraining sets */
2181: total_counts = 0;
2182: constraints_idxs_ptr[0] = 0;
2183: constraints_data_ptr[0] = 0;
2184: /* vertices */
2185: if (n_vertices) {
2186: ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);
2187: if (nnsp_has_cnst) { /* it considers all possible vertices */
2188: PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));
2189: for (i=0;i<n_vertices;i++) {
2190: constraints_n[total_counts] = 1;
2191: constraints_data[total_counts] = 1.0;
2192: constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1;
2193: constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1;
2194: total_counts++;
2195: }
2196: } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
2197: PetscBool used_vertex;
2198: for (i=0;i<n_vertices;i++) {
2199: used_vertex = PETSC_FALSE;
2200: k = 0;
2201: while (!used_vertex && k<nnsp_size) {
2202: VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);
2203: if (PetscAbsScalar(array[is_indices[i]])>0.0) {
2204: constraints_n[total_counts] = 1;
2205: constraints_idxs[total_counts] = is_indices[i];
2206: constraints_data[total_counts] = 1.0;
2207: constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1;
2208: constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1;
2209: total_counts++;
2210: used_vertex = PETSC_TRUE;
2211: }
2212: VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);
2213: k++;
2214: }
2215: }
2216: }
2217: ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);
2218: n_vertices = total_counts;
2219: }
2221: /* edges and faces */
2222: total_counts_cc = total_counts;
2223: for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
2224: IS used_is;
2225: PetscBool idxs_copied = PETSC_FALSE;
2227: if (ncc<n_ISForEdges) {
2228: used_is = ISForEdges[ncc];
2229: boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
2230: } else {
2231: used_is = ISForFaces[ncc-n_ISForEdges];
2232: boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
2233: }
2234: temp_constraints = 0; /* zero the number of constraints I have on this conn comp */
2236: ISGetSize(used_is,&size_of_constraint);
2237: ISGetIndices(used_is,(const PetscInt**)&is_indices);
2238: /* change of basis should not be performed on local periodic nodes */
2239: if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
2240: if (nnsp_has_cnst) {
2241: PetscScalar quad_value;
2243: PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));
2244: idxs_copied = PETSC_TRUE;
2246: if (!pcbddc->use_nnsp_true) {
2247: quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
2248: } else {
2249: quad_value = 1.0;
2250: }
2251: for (j=0;j<size_of_constraint;j++) {
2252: constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value;
2253: }
2254: temp_constraints++;
2255: total_counts++;
2256: }
2257: for (k=0;k<nnsp_size;k++) {
2258: PetscReal real_value;
2259: PetscScalar *ptr_to_data;
2261: VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);
2262: ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint];
2263: for (j=0;j<size_of_constraint;j++) {
2264: ptr_to_data[j] = array[is_indices[j]];
2265: }
2266: VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);
2267: /* check if array is null on the connected component */
2268: PetscBLASIntCast(size_of_constraint,&Blas_N);
2269: PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one));
2270: if (real_value > 0.0) { /* keep indices and values */
2271: temp_constraints++;
2272: total_counts++;
2273: if (!idxs_copied) {
2274: PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));
2275: idxs_copied = PETSC_TRUE;
2276: }
2277: }
2278: }
2279: ISRestoreIndices(used_is,(const PetscInt**)&is_indices);
2280: valid_constraints = temp_constraints;
2281: if (!pcbddc->use_nnsp_true && temp_constraints) {
2282: if (temp_constraints == 1) { /* just normalize the constraint */
2283: PetscScalar norm,*ptr_to_data;
2285: ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
2286: PetscBLASIntCast(size_of_constraint,&Blas_N);
2287: PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one));
2288: norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
2289: PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one));
2290: } else { /* perform SVD */
2291: PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
2292: PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
2294: #if defined(PETSC_MISSING_LAPACK_GESVD)
2295: /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
2296: POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
2297: -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
2298: the constraints basis will differ (by a complex factor with absolute value equal to 1)
2299: from that computed using LAPACKgesvd
2300: -> This is due to a different computation of eigenvectors in LAPACKheev
2301: -> The quality of the POD-computed basis will be the same */
2302: PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));
2303: /* Store upper triangular part of correlation matrix */
2304: PetscBLASIntCast(size_of_constraint,&Blas_N);
2305: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
2306: for (j=0;j<temp_constraints;j++) {
2307: for (k=0;k<j+1;k++) {
2308: PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one));
2309: }
2310: }
2311: /* compute eigenvalues and eigenvectors of correlation matrix */
2312: PetscBLASIntCast(temp_constraints,&Blas_N);
2313: PetscBLASIntCast(temp_constraints,&Blas_LDA);
2314: #if !defined(PETSC_USE_COMPLEX)
2315: PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
2316: #else
2317: PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
2318: #endif
2319: PetscFPTrapPop();
2320: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
2321: /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
2322: j = 0;
2323: while (j < temp_constraints && singular_vals[j] < tol) j++;
2324: total_counts = total_counts-j;
2325: valid_constraints = temp_constraints-j;
2326: /* scale and copy POD basis into used quadrature memory */
2327: PetscBLASIntCast(size_of_constraint,&Blas_M);
2328: PetscBLASIntCast(temp_constraints,&Blas_N);
2329: PetscBLASIntCast(temp_constraints,&Blas_K);
2330: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
2331: PetscBLASIntCast(temp_constraints,&Blas_LDB);
2332: PetscBLASIntCast(size_of_constraint,&Blas_LDC);
2333: if (j<temp_constraints) {
2334: PetscInt ii;
2335: for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]);
2336: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
2337: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
2338: PetscFPTrapPop();
2339: for (k=0;k<temp_constraints-j;k++) {
2340: for (ii=0;ii<size_of_constraint;ii++) {
2341: ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
2342: }
2343: }
2344: }
2345: #else /* on missing GESVD */
2346: PetscBLASIntCast(size_of_constraint,&Blas_M);
2347: PetscBLASIntCast(temp_constraints,&Blas_N);
2348: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
2349: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
2350: #if !defined(PETSC_USE_COMPLEX)
2351: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr));
2352: #else
2353: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr));
2354: #endif
2355: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
2356: PetscFPTrapPop();
2357: /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
2358: k = temp_constraints;
2359: if (k > size_of_constraint) k = size_of_constraint;
2360: j = 0;
2361: while (j < k && singular_vals[k-j-1] < tol) j++;
2362: valid_constraints = k-j;
2363: total_counts = total_counts-temp_constraints+valid_constraints;
2364: #endif /* on missing GESVD */
2365: }
2366: }
2367: /* update pointers information */
2368: if (valid_constraints) {
2369: constraints_n[total_counts_cc] = valid_constraints;
2370: constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint;
2371: constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints;
2372: /* set change_of_basis flag */
2373: if (boolforchange) {
2374: PetscBTSet(change_basis,total_counts_cc);
2375: }
2376: total_counts_cc++;
2377: }
2378: }
2379: /* free workspace */
2380: if (!skip_lapack) {
2381: PetscFree(work);
2382: #if defined(PETSC_USE_COMPLEX)
2383: PetscFree(rwork);
2384: #endif
2385: PetscFree(singular_vals);
2386: #if defined(PETSC_MISSING_LAPACK_GESVD)
2387: PetscFree(correlation_mat);
2388: PetscFree(temp_basis);
2389: #endif
2390: }
2391: for (k=0;k<nnsp_size;k++) {
2392: VecDestroy(&localnearnullsp[k]);
2393: }
2394: PetscFree(localnearnullsp);
2395: /* free index sets of faces, edges and vertices */
2396: for (i=0;i<n_ISForFaces;i++) {
2397: ISDestroy(&ISForFaces[i]);
2398: }
2399: if (n_ISForFaces) {
2400: PetscFree(ISForFaces);
2401: }
2402: for (i=0;i<n_ISForEdges;i++) {
2403: ISDestroy(&ISForEdges[i]);
2404: }
2405: if (n_ISForEdges) {
2406: PetscFree(ISForEdges);
2407: }
2408: ISDestroy(&ISForVertices);
2409: } else {
2410: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2412: total_counts = 0;
2413: n_vertices = 0;
2414: if (sub_schurs->is_vertices && pcbddc->use_vertices) {
2415: ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);
2416: }
2417: max_constraints = 0;
2418: total_counts_cc = 0;
2419: for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
2420: total_counts += pcbddc->adaptive_constraints_n[i];
2421: if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++;
2422: max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]);
2423: }
2424: constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr;
2425: constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr;
2426: constraints_idxs = pcbddc->adaptive_constraints_idxs;
2427: constraints_data = pcbddc->adaptive_constraints_data;
2428: /* constraints_n differs from pcbddc->adaptive_constraints_n */
2429: PetscMalloc1(total_counts_cc,&constraints_n);
2430: total_counts_cc = 0;
2431: for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
2432: if (pcbddc->adaptive_constraints_n[i]) {
2433: constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i];
2434: }
2435: }
2436: #if 0
2437: printf("Found %d totals (%d)\n",total_counts_cc,total_counts);
2438: for (i=0;i<total_counts_cc;i++) {
2439: printf("const %d, start %d",i,constraints_idxs_ptr[i]);
2440: printf(" end %d:\n",constraints_idxs_ptr[i+1]);
2441: for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) {
2442: printf(" %d",constraints_idxs[j]);
2443: }
2444: printf("\n");
2445: printf("number of cc: %d\n",constraints_n[i]);
2446: }
2447: for (i=0;i<n_vertices;i++) {
2448: PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]);
2449: }
2450: for (i=0;i<sub_schurs->n_subs;i++) {
2451: PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]);
2452: }
2453: #endif
2455: max_size_of_constraint = 0;
2456: for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]);
2457: PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);
2458: /* Change of basis */
2459: PetscBTCreate(total_counts_cc,&change_basis);
2460: if (pcbddc->use_change_of_basis) {
2461: for (i=0;i<sub_schurs->n_subs;i++) {
2462: if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) {
2463: PetscBTSet(change_basis,i+n_vertices);
2464: }
2465: }
2466: }
2467: }
2468: pcbddc->local_primal_size = total_counts;
2469: PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);
2471: /* map constraints_idxs in boundary numbering */
2472: ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);
2473: if (i != constraints_idxs_ptr[total_counts_cc]) {
2474: SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i);
2475: }
2477: /* Create constraint matrix */
2478: MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);
2479: MatSetType(pcbddc->ConstraintMatrix,MATAIJ);
2480: MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);
2482: /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
2483: /* determine if a QR strategy is needed for change of basis */
2484: qr_needed = PETSC_FALSE;
2485: PetscBTCreate(total_counts_cc,&qr_needed_idx);
2486: total_primal_vertices=0;
2487: pcbddc->local_primal_size_cc = 0;
2488: for (i=0;i<total_counts_cc;i++) {
2489: size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2490: if (size_of_constraint == 1) {
2491: pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]];
2492: pcbddc->local_primal_size_cc += 1;
2493: } else if (PetscBTLookup(change_basis,i)) {
2494: for (k=0;k<constraints_n[i];k++) {
2495: pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k];
2496: }
2497: pcbddc->local_primal_size_cc += constraints_n[i];
2498: if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) {
2499: PetscBTSet(qr_needed_idx,i);
2500: qr_needed = PETSC_TRUE;
2501: }
2502: } else {
2503: pcbddc->local_primal_size_cc += 1;
2504: }
2505: }
2506: /* note that the local variable n_vertices used below stores the number of pointwise constraints */
2507: pcbddc->n_vertices = total_primal_vertices;
2508: /* permute indices in order to have a sorted set of vertices */
2509: PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);
2511: PetscMalloc2(pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_mult);
2512: PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));
2513: for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1;
2515: /* nonzero structure of constraint matrix */
2516: /* and get reference dof for local constraints */
2517: PetscMalloc1(pcbddc->local_primal_size,&nnz);
2518: for (i=0;i<total_primal_vertices;i++) nnz[i] = 1;
2520: j = total_primal_vertices;
2521: total_counts = total_primal_vertices;
2522: cum = total_primal_vertices;
2523: for (i=n_vertices;i<total_counts_cc;i++) {
2524: if (!PetscBTLookup(change_basis,i)) {
2525: pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]];
2526: pcbddc->local_primal_ref_mult[cum] = constraints_n[i];
2527: cum++;
2528: size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2529: for (k=0;k<constraints_n[i];k++) {
2530: pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k];
2531: nnz[j+k] = size_of_constraint;
2532: }
2533: j += constraints_n[i];
2534: }
2535: }
2536: MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);
2537: PetscFree(nnz);
2539: /* set values in constraint matrix */
2540: for (i=0;i<total_primal_vertices;i++) {
2541: MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);
2542: }
2543: total_counts = total_primal_vertices;
2544: for (i=n_vertices;i<total_counts_cc;i++) {
2545: if (!PetscBTLookup(change_basis,i)) {
2546: PetscInt *cols;
2548: size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2549: cols = constraints_idxs+constraints_idxs_ptr[i];
2550: for (k=0;k<constraints_n[i];k++) {
2551: PetscInt row = total_counts+k;
2552: PetscScalar *vals;
2554: vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint;
2555: MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);
2556: }
2557: total_counts += constraints_n[i];
2558: }
2559: }
2560: /* assembling */
2561: MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);
2562: MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);
2564: /*
2565: PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);
2566: MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);
2567: */
2568: /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
2569: if (pcbddc->use_change_of_basis) {
2570: /* dual and primal dofs on a single cc */
2571: PetscInt dual_dofs,primal_dofs;
2572: /* working stuff for GEQRF */
2573: PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
2574: PetscBLASInt lqr_work;
2575: /* working stuff for UNGQR */
2576: PetscScalar *gqr_work,lgqr_work_t;
2577: PetscBLASInt lgqr_work;
2578: /* working stuff for TRTRS */
2579: PetscScalar *trs_rhs;
2580: PetscBLASInt Blas_NRHS;
2581: /* pointers for values insertion into change of basis matrix */
2582: PetscInt *start_rows,*start_cols;
2583: PetscScalar *start_vals;
2584: /* working stuff for values insertion */
2585: PetscBT is_primal;
2586: PetscInt *aux_primal_numbering_B;
2587: /* matrix sizes */
2588: PetscInt global_size,local_size;
2589: /* temporary change of basis */
2590: Mat localChangeOfBasisMatrix;
2591: /* extra space for debugging */
2592: PetscScalar *dbg_work;
2594: /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
2595: MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);
2596: MatSetType(localChangeOfBasisMatrix,MATAIJ);
2597: MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);
2598: /* nonzeros for local mat */
2599: PetscMalloc1(pcis->n,&nnz);
2600: for (i=0;i<pcis->n;i++) nnz[i]=1;
2601: for (i=n_vertices;i<total_counts_cc;i++) {
2602: if (PetscBTLookup(change_basis,i)) {
2603: size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2604: if (PetscBTLookup(qr_needed_idx,i)) {
2605: for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint;
2606: } else {
2607: nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint;
2608: for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2;
2609: }
2610: }
2611: }
2612: MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);
2613: PetscFree(nnz);
2614: /* Set initial identity in the matrix */
2615: for (i=0;i<pcis->n;i++) {
2616: MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);
2617: }
2619: if (pcbddc->dbg_flag) {
2620: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");
2621: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);
2622: }
2625: /* Now we loop on the constraints which need a change of basis */
2626: /*
2627: Change of basis matrix is evaluated similarly to the FIRST APPROACH in
2628: Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
2630: Basic blocks of change of basis matrix T computed by
2632: - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
2634: | 1 0 ... 0 s_1/S |
2635: | 0 1 ... 0 s_2/S |
2636: | ... |
2637: | 0 ... 1 s_{n-1}/S |
2638: | -s_1/s_n ... -s_{n-1}/s_n s_n/S |
2640: with S = \sum_{i=1}^n s_i^2
2641: NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
2642: in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
2644: - QR decomposition of constraints otherwise
2645: */
2646: if (qr_needed) {
2647: /* space to store Q */
2648: PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);
2649: /* first we issue queries for optimal work */
2650: PetscBLASIntCast(max_size_of_constraint,&Blas_M);
2651: PetscBLASIntCast(max_constraints,&Blas_N);
2652: PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);
2653: lqr_work = -1;
2654: PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
2655: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
2656: PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);
2657: PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);
2658: lgqr_work = -1;
2659: PetscBLASIntCast(max_size_of_constraint,&Blas_M);
2660: PetscBLASIntCast(max_size_of_constraint,&Blas_N);
2661: PetscBLASIntCast(max_constraints,&Blas_K);
2662: PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);
2663: if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
2664: PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
2665: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
2666: PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);
2667: PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);
2668: /* array to store scaling factors for reflectors */
2669: PetscMalloc1(max_constraints,&qr_tau);
2670: /* array to store rhs and solution of triangular solver */
2671: PetscMalloc1(max_constraints*max_constraints,&trs_rhs);
2672: /* allocating workspace for check */
2673: if (pcbddc->dbg_flag) {
2674: PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);
2675: }
2676: }
2677: /* array to store whether a node is primal or not */
2678: PetscBTCreate(pcis->n_B,&is_primal);
2679: PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);
2680: ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);
2681: if (i != total_primal_vertices) {
2682: SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
2683: }
2684: for (i=0;i<total_primal_vertices;i++) {
2685: PetscBTSet(is_primal,aux_primal_numbering_B[i]);
2686: }
2687: PetscFree(aux_primal_numbering_B);
2689: /* loop on constraints and see whether or not they need a change of basis and compute it */
2690: for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) {
2691: size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts];
2692: if (PetscBTLookup(change_basis,total_counts)) {
2693: /* get constraint info */
2694: primal_dofs = constraints_n[total_counts];
2695: dual_dofs = size_of_constraint-primal_dofs;
2697: if (pcbddc->dbg_flag) {
2698: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);
2699: }
2701: if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */
2703: /* copy quadrature constraints for change of basis check */
2704: if (pcbddc->dbg_flag) {
2705: PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));
2706: }
2707: /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
2708: PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));
2710: /* compute QR decomposition of constraints */
2711: PetscBLASIntCast(size_of_constraint,&Blas_M);
2712: PetscBLASIntCast(primal_dofs,&Blas_N);
2713: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
2714: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
2715: PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
2716: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
2717: PetscFPTrapPop();
2719: /* explictly compute R^-T */
2720: PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));
2721: for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
2722: PetscBLASIntCast(primal_dofs,&Blas_N);
2723: PetscBLASIntCast(primal_dofs,&Blas_NRHS);
2724: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
2725: PetscBLASIntCast(primal_dofs,&Blas_LDB);
2726: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
2727: PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
2728: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
2729: PetscFPTrapPop();
2731: /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
2732: PetscBLASIntCast(size_of_constraint,&Blas_M);
2733: PetscBLASIntCast(size_of_constraint,&Blas_N);
2734: PetscBLASIntCast(primal_dofs,&Blas_K);
2735: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
2736: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
2737: PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
2738: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
2739: PetscFPTrapPop();
2741: /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
2742: i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
2743: where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
2744: PetscBLASIntCast(size_of_constraint,&Blas_M);
2745: PetscBLASIntCast(primal_dofs,&Blas_N);
2746: PetscBLASIntCast(primal_dofs,&Blas_K);
2747: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
2748: PetscBLASIntCast(primal_dofs,&Blas_LDB);
2749: PetscBLASIntCast(size_of_constraint,&Blas_LDC);
2750: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
2751: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC));
2752: PetscFPTrapPop();
2753: PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));
2755: /* insert values in change of basis matrix respecting global ordering of new primal dofs */
2756: start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]];
2757: /* insert cols for primal dofs */
2758: for (j=0;j<primal_dofs;j++) {
2759: start_vals = &qr_basis[j*size_of_constraint];
2760: start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j];
2761: MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);
2762: }
2763: /* insert cols for dual dofs */
2764: for (j=0,k=0;j<dual_dofs;k++) {
2765: if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) {
2766: start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
2767: start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k];
2768: MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);
2769: j++;
2770: }
2771: }
2773: /* check change of basis */
2774: if (pcbddc->dbg_flag) {
2775: PetscInt ii,jj;
2776: PetscBool valid_qr=PETSC_TRUE;
2777: PetscBLASIntCast(primal_dofs,&Blas_M);
2778: PetscBLASIntCast(size_of_constraint,&Blas_N);
2779: PetscBLASIntCast(size_of_constraint,&Blas_K);
2780: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
2781: PetscBLASIntCast(size_of_constraint,&Blas_LDB);
2782: PetscBLASIntCast(primal_dofs,&Blas_LDC);
2783: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
2784: PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC));
2785: PetscFPTrapPop();
2786: for (jj=0;jj<size_of_constraint;jj++) {
2787: for (ii=0;ii<primal_dofs;ii++) {
2788: if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
2789: if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
2790: }
2791: }
2792: if (!valid_qr) {
2793: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");
2794: for (jj=0;jj<size_of_constraint;jj++) {
2795: for (ii=0;ii<primal_dofs;ii++) {
2796: if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
2797: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2798: }
2799: if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
2800: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2801: }
2802: }
2803: }
2804: } else {
2805: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");
2806: }
2807: }
2808: } else { /* simple transformation block */
2809: PetscInt row,col;
2810: PetscScalar val,norm;
2812: PetscBLASIntCast(size_of_constraint,&Blas_N);
2813: PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one));
2814: for (j=0;j<size_of_constraint;j++) {
2815: PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j];
2816: row = constraints_idxs[constraints_idxs_ptr[total_counts]+j];
2817: if (!PetscBTLookup(is_primal,row_B)) {
2818: col = constraints_idxs[constraints_idxs_ptr[total_counts]];
2819: MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);
2820: MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);
2821: } else {
2822: for (k=0;k<size_of_constraint;k++) {
2823: col = constraints_idxs[constraints_idxs_ptr[total_counts]+k];
2824: if (row != col) {
2825: val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]];
2826: } else {
2827: val = constraints_data[constraints_data_ptr[total_counts]]/norm;
2828: }
2829: MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);
2830: }
2831: }
2832: }
2833: if (pcbddc->dbg_flag) {
2834: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");
2835: }
2836: }
2837: } else {
2838: if (pcbddc->dbg_flag) {
2839: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);
2840: }
2841: }
2842: }
2844: /* free workspace */
2845: if (qr_needed) {
2846: if (pcbddc->dbg_flag) {
2847: PetscFree(dbg_work);
2848: }
2849: PetscFree(trs_rhs);
2850: PetscFree(qr_tau);
2851: PetscFree(qr_work);
2852: PetscFree(gqr_work);
2853: PetscFree(qr_basis);
2854: }
2855: PetscBTDestroy(&is_primal);
2856: MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);
2857: MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);
2859: /* assembling of global change of variable */
2860: {
2861: Mat tmat;
2862: PetscInt bs;
2864: VecGetSize(pcis->vec1_global,&global_size);
2865: VecGetLocalSize(pcis->vec1_global,&local_size);
2866: MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);
2867: MatISSetLocalMat(tmat,localChangeOfBasisMatrix);
2868: MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);
2869: MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);
2870: MatGetBlockSize(pc->pmat,&bs);
2871: MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);
2872: MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);
2873: MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);
2874: MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);
2875: MatDestroy(&tmat);
2876: VecSet(pcis->vec1_global,0.0);
2877: VecSet(pcis->vec1_N,1.0);
2878: VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
2879: VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
2880: VecReciprocal(pcis->vec1_global);
2881: MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);
2882: }
2883: /* check */
2884: if (pcbddc->dbg_flag) {
2885: PetscReal error;
2886: Vec x,x_change;
2888: VecDuplicate(pcis->vec1_global,&x);
2889: VecDuplicate(pcis->vec1_global,&x_change);
2890: VecSetRandom(x,NULL);
2891: VecCopy(x,pcis->vec1_global);
2892: VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
2893: VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
2894: MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);
2895: VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);
2896: VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);
2897: MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);
2898: VecAXPY(x,-1.0,x_change);
2899: VecNorm(x,NORM_INFINITY,&error);
2900: PetscViewerFlush(pcbddc->dbg_viewer);
2901: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);
2902: VecDestroy(&x);
2903: VecDestroy(&x_change);
2904: }
2906: /* adapt sub_schurs computed (if any) */
2907: if (pcbddc->use_deluxe_scaling) {
2908: PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
2909: if (sub_schurs->S_Ej_all) {
2910: Mat S_new,tmat;
2911: IS is_all_N;
2913: ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);
2914: MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);
2915: ISDestroy(&is_all_N);
2916: MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);
2917: MatDestroy(&sub_schurs->S_Ej_all);
2918: PetscObjectReference((PetscObject)S_new);
2919: sub_schurs->S_Ej_all = S_new;
2920: MatDestroy(&S_new);
2921: if (sub_schurs->sum_S_Ej_all) {
2922: MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);
2923: MatDestroy(&sub_schurs->sum_S_Ej_all);
2924: PetscObjectReference((PetscObject)S_new);
2925: sub_schurs->sum_S_Ej_all = S_new;
2926: MatDestroy(&S_new);
2927: }
2928: MatDestroy(&tmat);
2929: }
2930: }
2931: MatDestroy(&localChangeOfBasisMatrix);
2932: } else if (pcbddc->user_ChangeOfBasisMatrix) {
2933: PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);
2934: pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
2935: }
2937: /* set up change of basis context */
2938: if (pcbddc->ChangeOfBasisMatrix) {
2939: PCBDDCChange_ctx change_ctx;
2941: if (!pcbddc->new_global_mat) {
2942: PetscInt global_size,local_size;
2944: VecGetSize(pcis->vec1_global,&global_size);
2945: VecGetLocalSize(pcis->vec1_global,&local_size);
2946: MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);
2947: MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);
2948: MatSetType(pcbddc->new_global_mat,MATSHELL);
2949: MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);
2950: MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);
2951: PetscNew(&change_ctx);
2952: MatShellSetContext(pcbddc->new_global_mat,change_ctx);
2953: } else {
2954: MatShellGetContext(pcbddc->new_global_mat,&change_ctx);
2955: MatDestroy(&change_ctx->global_change);
2956: VecDestroyVecs(2,&change_ctx->work);
2957: }
2958: if (!pcbddc->user_ChangeOfBasisMatrix) {
2959: PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);
2960: change_ctx->global_change = pcbddc->ChangeOfBasisMatrix;
2961: } else {
2962: PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);
2963: change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix;
2964: }
2965: VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);
2966: MatSetUp(pcbddc->new_global_mat);
2967: MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);
2968: MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);
2969: }
2971: /* check if a new primal space has been introduced */
2972: pcbddc->new_primal_space_local = PETSC_TRUE;
2973: if (olocal_primal_size == pcbddc->local_primal_size) {
2974: PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);
2975: pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2976: if (!pcbddc->new_primal_space_local) {
2977: PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);
2978: pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2979: }
2980: }
2981: PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);
2982: /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
2983: MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
2985: /* flush dbg viewer */
2986: if (pcbddc->dbg_flag) {
2987: PetscViewerFlush(pcbddc->dbg_viewer);
2988: }
2990: /* free workspace */
2991: PetscBTDestroy(&qr_needed_idx);
2992: PetscBTDestroy(&change_basis);
2993: if (!pcbddc->adaptive_selection) {
2994: PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);
2995: PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);
2996: } else {
2997: PetscFree5(pcbddc->adaptive_constraints_n,
2998: pcbddc->adaptive_constraints_idxs_ptr,
2999: pcbddc->adaptive_constraints_data_ptr,
3000: pcbddc->adaptive_constraints_idxs,
3001: pcbddc->adaptive_constraints_data);
3002: PetscFree(constraints_n);
3003: PetscFree(constraints_idxs_B);
3004: }
3005: return(0);
3006: }
3010: PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
3011: {
3012: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
3013: PC_IS *pcis = (PC_IS*)pc->data;
3014: Mat_IS *matis = (Mat_IS*)pc->pmat->data;
3015: PetscInt ierr,i,vertex_size,N;
3016: PetscViewer viewer=pcbddc->dbg_viewer;
3019: /* Reset previously computed graph */
3020: PCBDDCGraphReset(pcbddc->mat_graph);
3021: /* Init local Graph struct */
3022: MatGetSize(pc->pmat,&N,NULL);
3023: PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping,N);
3025: /* Check validity of the csr graph passed in by the user */
3026: if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
3027: PCBDDCGraphResetCSR(pcbddc->mat_graph);
3028: }
3030: /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
3031: if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
3032: PetscInt *xadj,*adjncy;
3033: PetscInt nvtxs;
3034: PetscBool flg_row=PETSC_FALSE;
3036: if (pcbddc->use_local_adj) {
3038: MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);
3039: if (flg_row) {
3040: PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);
3041: pcbddc->computed_rowadj = PETSC_TRUE;
3042: }
3043: MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);
3044: } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */
3045: IS is_dummy;
3046: ISLocalToGlobalMapping l2gmap_dummy;
3047: PetscInt j,sum;
3048: PetscInt *cxadj,*cadjncy;
3049: const PetscInt *idxs;
3050: PCBDDCGraph graph;
3051: PetscBT is_on_boundary;
3053: ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);
3054: ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);
3055: ISDestroy(&is_dummy);
3056: PCBDDCGraphCreate(&graph);
3057: PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);
3058: ISLocalToGlobalMappingDestroy(&l2gmap_dummy);
3059: MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);
3060: if (flg_row) {
3061: graph->xadj = xadj;
3062: graph->adjncy = adjncy;
3063: }
3064: PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);
3065: PCBDDCGraphComputeConnectedComponents(graph);
3066: MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);
3068: if (pcbddc->dbg_flag) {
3069: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);
3070: for (i=0;i<graph->ncc;i++) {
3071: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);
3072: }
3073: }
3075: PetscBTCreate(pcis->n,&is_on_boundary);
3076: ISGetIndices(pcis->is_B_local,&idxs);
3077: for (i=0;i<pcis->n_B;i++) {
3078: PetscBTSet(is_on_boundary,idxs[i]);
3079: }
3080: ISRestoreIndices(pcis->is_B_local,&idxs);
3082: PetscCalloc1(pcis->n+1,&cxadj);
3083: sum = 0;
3084: for (i=0;i<graph->ncc;i++) {
3085: PetscInt sizecc = 0;
3086: for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3087: if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3088: sizecc++;
3089: }
3090: }
3091: for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3092: if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3093: cxadj[graph->queue[j]] = sizecc;
3094: }
3095: }
3096: sum += sizecc*sizecc;
3097: }
3098: PetscMalloc1(sum,&cadjncy);
3099: sum = 0;
3100: for (i=0;i<pcis->n;i++) {
3101: PetscInt temp = cxadj[i];
3102: cxadj[i] = sum;
3103: sum += temp;
3104: }
3105: cxadj[pcis->n] = sum;
3106: for (i=0;i<graph->ncc;i++) {
3107: for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3108: if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3109: PetscInt k,sizecc = 0;
3110: for (k=graph->cptr[i];k<graph->cptr[i+1];k++) {
3111: if (PetscBTLookup(is_on_boundary,graph->queue[k])) {
3112: cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k];
3113: sizecc++;
3114: }
3115: }
3116: }
3117: }
3118: }
3119: if (sum) {
3120: PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);
3121: } else {
3122: PetscFree(cxadj);
3123: PetscFree(cadjncy);
3124: }
3125: graph->xadj = 0;
3126: graph->adjncy = 0;
3127: PCBDDCGraphDestroy(&graph);
3128: PetscBTDestroy(&is_on_boundary);
3129: }
3130: }
3131: if (pcbddc->dbg_flag) {
3132: PetscViewerFlush(pcbddc->dbg_viewer);
3133: }
3135: /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
3136: vertex_size = 1;
3137: if (pcbddc->user_provided_isfordofs) {
3138: if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
3139: PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);
3140: for (i=0;i<pcbddc->n_ISForDofs;i++) {
3141: PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);
3142: ISDestroy(&pcbddc->ISForDofs[i]);
3143: }
3144: pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
3145: pcbddc->n_ISForDofs = 0;
3146: PetscFree(pcbddc->ISForDofs);
3147: }
3148: /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
3149: MatGetBlockSize(matis->A,&vertex_size);
3150: } else {
3151: if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
3152: MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);
3153: PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);
3154: for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
3155: ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);
3156: }
3157: }
3158: }
3160: /* Setup of Graph */
3161: if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
3162: PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);
3163: }
3164: if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
3165: PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);
3166: }
3167: PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);
3169: /* Graph's connected components analysis */
3170: PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);
3172: /* print some info to stdout */
3173: if (pcbddc->dbg_flag) {
3174: PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);
3175: }
3177: /* mark topography has done */
3178: pcbddc->recompute_topography = PETSC_FALSE;
3179: return(0);
3180: }
3182: /* given an index sets possibly with holes, renumbers the indexes removing the holes */
3185: PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n)
3186: {
3187: PetscSF sf;
3188: PetscLayout map;
3189: const PetscInt *idxs;
3190: PetscInt *leaf_data,*root_data,*gidxs;
3191: PetscInt N,n,i,lbounds[2],gbounds[2],Nl;
3192: PetscInt n_n,nlocals,start,first_index;
3193: PetscMPIInt commsize;
3194: PetscBool first_found;
3198: ISGetLocalSize(subset,&n);
3199: if (subset_mult) {
3201: ISGetLocalSize(subset,&i);
3202: if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i);
3203: }
3204: /* create workspace layout for computing global indices of subset */
3205: ISGetIndices(subset,&idxs);
3206: lbounds[0] = lbounds[1] = 0;
3207: for (i=0;i<n;i++) {
3208: if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i];
3209: else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i];
3210: }
3211: lbounds[0] = -lbounds[0];
3212: MPI_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));
3213: gbounds[0] = -gbounds[0];
3214: N = gbounds[1] - gbounds[0] + 1;
3215: PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);
3216: PetscLayoutSetBlockSize(map,1);
3217: PetscLayoutSetSize(map,N);
3218: PetscLayoutSetUp(map);
3219: PetscLayoutGetLocalSize(map,&Nl);
3221: /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */
3222: PetscMalloc2(n,&leaf_data,Nl,&root_data);
3223: if (subset_mult) {
3224: const PetscInt* idxs_mult;
3226: ISGetIndices(subset_mult,&idxs_mult);
3227: PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));
3228: ISRestoreIndices(subset_mult,&idxs_mult);
3229: } else {
3230: for (i=0;i<n;i++) leaf_data[i] = 1;
3231: }
3232: /* local size of new subset */
3233: n_n = 0;
3234: for (i=0;i<n;i++) n_n += leaf_data[i];
3236: /* global indexes in layout */
3237: PetscMalloc1(n_n,&gidxs); /* allocating possibly extra space in gidxs which will be used later */
3238: for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0];
3239: ISRestoreIndices(subset,&idxs);
3240: PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);
3241: PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);
3242: PetscLayoutDestroy(&map);
3244: /* reduce from leaves to roots */
3245: PetscMemzero(root_data,Nl*sizeof(PetscInt));
3246: PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);
3247: PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);
3249: /* count indexes in local part of layout */
3250: nlocals = 0;
3251: first_index = -1;
3252: first_found = PETSC_FALSE;
3253: for (i=0;i<Nl;i++) {
3254: if (!first_found && root_data[i]) {
3255: first_found = PETSC_TRUE;
3256: first_index = i;
3257: }
3258: nlocals += root_data[i];
3259: }
3261: /* cumulative of number of indexes and size of subset without holes */
3262: #if defined(PETSC_HAVE_MPI_EXSCAN)
3263: start = 0;
3264: MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));
3265: #else
3266: MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));
3267: start = start-nlocals;
3268: #endif
3270: if (N_n) { /* compute total size of new subset if requested */
3271: *N_n = start + nlocals;
3272: MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);
3273: MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));
3274: }
3276: /* adapt root data with cumulative */
3277: if (first_found) {
3278: PetscInt old_index;
3280: root_data[first_index] += start;
3281: old_index = first_index;
3282: for (i=first_index+1;i<Nl;i++) {
3283: if (root_data[i]) {
3284: root_data[i] += root_data[old_index];
3285: old_index = i;
3286: }
3287: }
3288: }
3290: /* from roots to leaves */
3291: PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);
3292: PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);
3293: PetscSFDestroy(&sf);
3295: /* create new IS with global indexes without holes */
3296: if (subset_mult) {
3297: const PetscInt* idxs_mult;
3298: PetscInt cum;
3300: cum = 0;
3301: ISGetIndices(subset_mult,&idxs_mult);
3302: for (i=0;i<n;i++) {
3303: PetscInt j;
3304: for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j;
3305: }
3306: ISRestoreIndices(subset_mult,&idxs_mult);
3307: } else {
3308: for (i=0;i<n;i++) {
3309: gidxs[i] = leaf_data[i]-1;
3310: }
3311: }
3312: ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);
3313: PetscFree2(leaf_data,root_data);
3314: return(0);
3315: }
3319: PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
3320: {
3321: PetscInt i,j;
3322: PetscScalar *alphas;
3326: /* this implements stabilized Gram-Schmidt */
3327: PetscMalloc1(n,&alphas);
3328: for (i=0;i<n;i++) {
3329: VecNormalize(vecs[i],NULL);
3330: if (i<n) { VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]); }
3331: for (j=i+1;j<n;j++) { VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]); }
3332: }
3333: PetscFree(alphas);
3334: return(0);
3335: }
3339: PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends)
3340: {
3341: Mat_IS *matis;
3342: IS ranks_send_to;
3343: PetscInt n_neighs,*neighs,*n_shared,**shared;
3344: PetscMPIInt size,rank,color;
3345: PetscInt *xadj,*adjncy;
3346: PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx;
3347: PetscInt i,local_size,threshold=0;
3348: PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
3349: PetscSubcomm subcomm;
3353: PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);
3354: PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);
3355: PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);
3357: /* Get info on mapping */
3358: matis = (Mat_IS*)(mat->data);
3359: ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);
3360: ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);
3362: /* build local CSR graph of subdomains' connectivity */
3363: PetscMalloc1(2,&xadj);
3364: xadj[0] = 0;
3365: xadj[1] = PetscMax(n_neighs-1,0);
3366: PetscMalloc1(xadj[1],&adjncy);
3367: PetscMalloc1(xadj[1],&adjncy_wgt);
3369: if (threshold) {
3370: PetscInt xadj_count = 0;
3371: for (i=1;i<n_neighs;i++) {
3372: if (n_shared[i] > threshold) {
3373: adjncy[xadj_count] = neighs[i];
3374: adjncy_wgt[xadj_count] = n_shared[i];
3375: xadj_count++;
3376: }
3377: }
3378: xadj[1] = xadj_count;
3379: } else {
3380: if (xadj[1]) {
3381: PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));
3382: PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));
3383: }
3384: }
3385: ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);
3386: if (use_square) {
3387: for (i=0;i<xadj[1];i++) {
3388: adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
3389: }
3390: }
3391: PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);
3393: PetscMalloc1(1,&ranks_send_to_idx);
3395: /*
3396: Restrict work on active processes only.
3397: */
3398: PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);
3399: PetscSubcommSetNumber(subcomm,2); /* 2 groups, active process and not active processes */
3400: MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);
3401: PetscMPIIntCast(!local_size,&color);
3402: PetscSubcommSetTypeGeneral(subcomm,color,rank);
3403: if (color) {
3404: PetscFree(xadj);
3405: PetscFree(adjncy);
3406: PetscFree(adjncy_wgt);
3407: } else {
3408: Mat subdomain_adj;
3409: IS new_ranks,new_ranks_contig;
3410: MatPartitioning partitioner;
3411: PetscInt prank,rstart=0,rend=0;
3412: PetscInt *is_indices,*oldranks;
3413: PetscBool aggregate;
3415: MPI_Comm_size(PetscSubcommChild(subcomm),&size);
3416: PetscMalloc1(size,&oldranks);
3417: prank = rank;
3418: MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));
3419: /*
3420: for (i=0;i<size;i++) {
3421: PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
3422: }
3423: */
3424: for (i=0;i<xadj[1];i++) {
3425: PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);
3426: }
3427: PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);
3428: aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE);
3429: if (aggregate) {
3430: PetscInt lrows,row,ncols,*cols;
3431: PetscMPIInt nrank;
3432: PetscScalar *vals;
3434: MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);
3435: lrows = 0;
3436: if (nrank<redprocs) {
3437: lrows = size/redprocs;
3438: if (nrank<size%redprocs) lrows++;
3439: }
3440: MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);
3441: MatGetOwnershipRange(subdomain_adj,&rstart,&rend);
3442: MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);
3443: MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);
3444: row = nrank;
3445: ncols = xadj[1]-xadj[0];
3446: cols = adjncy;
3447: PetscMalloc1(ncols,&vals);
3448: for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i];
3449: MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);
3450: MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);
3451: MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);
3452: PetscFree(xadj);
3453: PetscFree(adjncy);
3454: PetscFree(adjncy_wgt);
3455: PetscFree(vals);
3456: } else {
3457: MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);
3458: }
3459: /* MatView(subdomain_adj,0); */
3461: /* Partition */
3462: MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);
3463: MatPartitioningSetAdjacency(partitioner,subdomain_adj);
3464: if (use_vwgt) {
3465: PetscMalloc1(1,&v_wgt);
3466: v_wgt[0] = local_size;
3467: MatPartitioningSetVertexWeights(partitioner,v_wgt);
3468: }
3469: n_subdomains = PetscMin((PetscInt)size,n_subdomains);
3470: MatPartitioningSetNParts(partitioner,n_subdomains);
3471: MatPartitioningSetFromOptions(partitioner);
3472: MatPartitioningApply(partitioner,&new_ranks);
3473: /* MatPartitioningView(partitioner,0); */
3475: /* renumber new_ranks to avoid "holes" in new set of processors */
3476: PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);
3477: ISDestroy(&new_ranks);
3478: ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);
3479: if (!redprocs) {
3480: ranks_send_to_idx[0] = oldranks[is_indices[0]];
3481: } else {
3482: PetscInt idxs[1];
3483: PetscMPIInt tag;
3484: MPI_Request *reqs;
3486: PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);
3487: PetscMalloc1(rend-rstart,&reqs);
3488: for (i=rstart;i<rend;i++) {
3489: MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);
3490: }
3491: MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);
3492: MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);
3493: PetscFree(reqs);
3494: ranks_send_to_idx[0] = oldranks[idxs[0]];
3495: }
3496: ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);
3497: /* clean up */
3498: PetscFree(oldranks);
3499: ISDestroy(&new_ranks_contig);
3500: MatDestroy(&subdomain_adj);
3501: MatPartitioningDestroy(&partitioner);
3502: }
3503: PetscSubcommDestroy(&subcomm);
3505: /* assemble parallel IS for sends */
3506: i = 1;
3507: if (color) i=0;
3508: ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);
3509: /* get back IS */
3510: *is_sends = ranks_send_to;
3511: return(0);
3512: }
3514: typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
3518: PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[])
3519: {
3520: Mat local_mat;
3521: Mat_IS *matis;
3522: IS is_sends_internal;
3523: PetscInt rows,cols,new_local_rows;
3524: PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals;
3525: PetscBool ismatis,isdense,newisdense,destroy_mat;
3526: ISLocalToGlobalMapping l2gmap;
3527: PetscInt* l2gmap_indices;
3528: const PetscInt* is_indices;
3529: MatType new_local_type;
3530: /* buffers */
3531: PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
3532: PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
3533: PetscInt *recv_buffer_idxs_local;
3534: PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
3535: /* MPI */
3536: MPI_Comm comm,comm_n;
3537: PetscSubcomm subcomm;
3538: PetscMPIInt n_sends,n_recvs,commsize;
3539: PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
3540: PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
3541: PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest;
3542: MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals;
3543: MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals;
3544: PetscErrorCode ierr;
3547: /* TODO: add missing checks */
3552: PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);
3553: if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__);
3554: MatISGetLocalMat(mat,&local_mat);
3555: PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);
3556: if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
3557: MatGetSize(local_mat,&rows,&cols);
3558: if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
3559: if (reuse == MAT_REUSE_MATRIX && *mat_n) {
3560: PetscInt mrows,mcols,mnrows,mncols;
3561: PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);
3562: if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
3563: MatGetSize(mat,&mrows,&mcols);
3564: MatGetSize(*mat_n,&mnrows,&mncols);
3565: if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
3566: if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
3567: }
3568: MatGetBlockSize(local_mat,&bs);
3570: /* prepare IS for sending if not provided */
3571: if (!is_sends) {
3572: if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
3573: MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);
3574: } else {
3575: PetscObjectReference((PetscObject)is_sends);
3576: is_sends_internal = is_sends;
3577: }
3579: /* get pointer of MATIS data */
3580: matis = (Mat_IS*)mat->data;
3582: /* get comm */
3583: PetscObjectGetComm((PetscObject)mat,&comm);
3585: /* compute number of sends */
3586: ISGetLocalSize(is_sends_internal,&i);
3587: PetscMPIIntCast(i,&n_sends);
3589: /* compute number of receives */
3590: MPI_Comm_size(comm,&commsize);
3591: PetscMalloc1(commsize,&iflags);
3592: PetscMemzero(iflags,commsize*sizeof(*iflags));
3593: ISGetIndices(is_sends_internal,&is_indices);
3594: for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
3595: PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);
3596: PetscFree(iflags);
3598: /* restrict comm if requested */
3599: subcomm = 0;
3600: destroy_mat = PETSC_FALSE;
3601: if (restrict_comm) {
3602: PetscMPIInt color,subcommsize;
3604: color = 0;
3605: if (restrict_full) {
3606: if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */
3607: } else {
3608: if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */
3609: }
3610: MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);
3611: subcommsize = commsize - subcommsize;
3612: /* check if reuse has been requested */
3613: if (reuse == MAT_REUSE_MATRIX) {
3614: if (*mat_n) {
3615: PetscMPIInt subcommsize2;
3616: MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);
3617: if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
3618: comm_n = PetscObjectComm((PetscObject)*mat_n);
3619: } else {
3620: comm_n = PETSC_COMM_SELF;
3621: }
3622: } else { /* MAT_INITIAL_MATRIX */
3623: PetscMPIInt rank;
3625: MPI_Comm_rank(comm,&rank);
3626: PetscSubcommCreate(comm,&subcomm);
3627: PetscSubcommSetNumber(subcomm,2);
3628: PetscSubcommSetTypeGeneral(subcomm,color,rank);
3629: comm_n = PetscSubcommChild(subcomm);
3630: }
3631: /* flag to destroy *mat_n if not significative */
3632: if (color) destroy_mat = PETSC_TRUE;
3633: } else {
3634: comm_n = comm;
3635: }
3637: /* prepare send/receive buffers */
3638: PetscMalloc1(commsize,&ilengths_idxs);
3639: PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));
3640: PetscMalloc1(commsize,&ilengths_vals);
3641: PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));
3642: if (nis) {
3643: PetscCalloc1(commsize,&ilengths_idxs_is);
3644: }
3646: /* Get data from local matrices */
3647: if (!isdense) {
3648: SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
3649: /* TODO: See below some guidelines on how to prepare the local buffers */
3650: /*
3651: send_buffer_vals should contain the raw values of the local matrix
3652: send_buffer_idxs should contain:
3653: - MatType_PRIVATE type
3654: - PetscInt size_of_l2gmap
3655: - PetscInt global_row_indices[size_of_l2gmap]
3656: - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values
3657: */
3658: } else {
3659: MatDenseGetArray(local_mat,&send_buffer_vals);
3660: ISLocalToGlobalMappingGetSize(matis->mapping,&i);
3661: PetscMalloc1(i+2,&send_buffer_idxs);
3662: send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
3663: send_buffer_idxs[1] = i;
3664: ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);
3665: PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));
3666: ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);
3667: PetscMPIIntCast(i,&len);
3668: for (i=0;i<n_sends;i++) {
3669: ilengths_vals[is_indices[i]] = len*len;
3670: ilengths_idxs[is_indices[i]] = len+2;
3671: }
3672: }
3673: PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);
3674: /* additional is (if any) */
3675: if (nis) {
3676: PetscMPIInt psum;
3677: PetscInt j;
3678: for (j=0,psum=0;j<nis;j++) {
3679: PetscInt plen;
3680: ISGetLocalSize(isarray[j],&plen);
3681: PetscMPIIntCast(plen,&len);
3682: psum += len+1; /* indices + lenght */
3683: }
3684: PetscMalloc1(psum,&send_buffer_idxs_is);
3685: for (j=0,psum=0;j<nis;j++) {
3686: PetscInt plen;
3687: const PetscInt *is_array_idxs;
3688: ISGetLocalSize(isarray[j],&plen);
3689: send_buffer_idxs_is[psum] = plen;
3690: ISGetIndices(isarray[j],&is_array_idxs);
3691: PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));
3692: ISRestoreIndices(isarray[j],&is_array_idxs);
3693: psum += plen+1; /* indices + lenght */
3694: }
3695: for (i=0;i<n_sends;i++) {
3696: ilengths_idxs_is[is_indices[i]] = psum;
3697: }
3698: PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);
3699: }
3701: buf_size_idxs = 0;
3702: buf_size_vals = 0;
3703: buf_size_idxs_is = 0;
3704: for (i=0;i<n_recvs;i++) {
3705: buf_size_idxs += (PetscInt)olengths_idxs[i];
3706: buf_size_vals += (PetscInt)olengths_vals[i];
3707: if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
3708: }
3709: PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);
3710: PetscMalloc1(buf_size_vals,&recv_buffer_vals);
3711: PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);
3713: /* get new tags for clean communications */
3714: PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);
3715: PetscObjectGetNewTag((PetscObject)mat,&tag_vals);
3716: PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);
3718: /* allocate for requests */
3719: PetscMalloc1(n_sends,&send_req_idxs);
3720: PetscMalloc1(n_sends,&send_req_vals);
3721: PetscMalloc1(n_sends,&send_req_idxs_is);
3722: PetscMalloc1(n_recvs,&recv_req_idxs);
3723: PetscMalloc1(n_recvs,&recv_req_vals);
3724: PetscMalloc1(n_recvs,&recv_req_idxs_is);
3726: /* communications */
3727: ptr_idxs = recv_buffer_idxs;
3728: ptr_vals = recv_buffer_vals;
3729: ptr_idxs_is = recv_buffer_idxs_is;
3730: for (i=0;i<n_recvs;i++) {
3731: source_dest = onodes[i];
3732: MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);
3733: MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);
3734: ptr_idxs += olengths_idxs[i];
3735: ptr_vals += olengths_vals[i];
3736: if (nis) {
3737: MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);
3738: ptr_idxs_is += olengths_idxs_is[i];
3739: }
3740: }
3741: for (i=0;i<n_sends;i++) {
3742: PetscMPIIntCast(is_indices[i],&source_dest);
3743: MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);
3744: MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);
3745: if (nis) {
3746: MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);
3747: }
3748: }
3749: ISRestoreIndices(is_sends_internal,&is_indices);
3750: ISDestroy(&is_sends_internal);
3752: /* assemble new l2g map */
3753: MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);
3754: ptr_idxs = recv_buffer_idxs;
3755: new_local_rows = 0;
3756: for (i=0;i<n_recvs;i++) {
3757: new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3758: ptr_idxs += olengths_idxs[i];
3759: }
3760: PetscMalloc1(new_local_rows,&l2gmap_indices);
3761: ptr_idxs = recv_buffer_idxs;
3762: new_local_rows = 0;
3763: for (i=0;i<n_recvs;i++) {
3764: PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));
3765: new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3766: ptr_idxs += olengths_idxs[i];
3767: }
3768: PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);
3769: ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);
3770: PetscFree(l2gmap_indices);
3772: /* infer new local matrix type from received local matrices type */
3773: /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
3774: /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
3775: if (n_recvs) {
3776: MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
3777: ptr_idxs = recv_buffer_idxs;
3778: for (i=0;i<n_recvs;i++) {
3779: if ((PetscInt)new_local_type_private != *ptr_idxs) {
3780: new_local_type_private = MATAIJ_PRIVATE;
3781: break;
3782: }
3783: ptr_idxs += olengths_idxs[i];
3784: }
3785: switch (new_local_type_private) {
3786: case MATDENSE_PRIVATE:
3787: if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */
3788: new_local_type = MATSEQAIJ;
3789: bs = 1;
3790: } else { /* if I receive only 1 dense matrix */
3791: new_local_type = MATSEQDENSE;
3792: bs = 1;
3793: }
3794: break;
3795: case MATAIJ_PRIVATE:
3796: new_local_type = MATSEQAIJ;
3797: bs = 1;
3798: break;
3799: case MATBAIJ_PRIVATE:
3800: new_local_type = MATSEQBAIJ;
3801: break;
3802: case MATSBAIJ_PRIVATE:
3803: new_local_type = MATSEQSBAIJ;
3804: break;
3805: default:
3806: SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__);
3807: break;
3808: }
3809: } else { /* by default, new_local_type is seqdense */
3810: new_local_type = MATSEQDENSE;
3811: bs = 1;
3812: }
3814: /* create MATIS object if needed */
3815: if (reuse == MAT_INITIAL_MATRIX) {
3816: MatGetSize(mat,&rows,&cols);
3817: MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);
3818: } else {
3819: /* it also destroys the local matrices */
3820: MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);
3821: }
3822: MatISGetLocalMat(*mat_n,&local_mat);
3823: MatSetType(local_mat,new_local_type);
3825: MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);
3827: /* Global to local map of received indices */
3828: PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local); /* needed for values insertion */
3829: ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);
3830: ISLocalToGlobalMappingDestroy(&l2gmap);
3832: /* restore attributes -> type of incoming data and its size */
3833: buf_size_idxs = 0;
3834: for (i=0;i<n_recvs;i++) {
3835: recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
3836: recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
3837: buf_size_idxs += (PetscInt)olengths_idxs[i];
3838: }
3839: PetscFree(recv_buffer_idxs);
3841: /* set preallocation */
3842: PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);
3843: if (!newisdense) {
3844: PetscInt *new_local_nnz=0;
3846: ptr_vals = recv_buffer_vals;
3847: ptr_idxs = recv_buffer_idxs_local;
3848: if (n_recvs) {
3849: PetscCalloc1(new_local_rows,&new_local_nnz);
3850: }
3851: for (i=0;i<n_recvs;i++) {
3852: PetscInt j;
3853: if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
3854: for (j=0;j<*(ptr_idxs+1);j++) {
3855: new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
3856: }
3857: } else {
3858: /* TODO */
3859: }
3860: ptr_idxs += olengths_idxs[i];
3861: }
3862: if (new_local_nnz) {
3863: for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
3864: MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);
3865: for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
3866: MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);
3867: for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
3868: MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);
3869: } else {
3870: MatSetUp(local_mat);
3871: }
3872: PetscFree(new_local_nnz);
3873: } else {
3874: MatSetUp(local_mat);
3875: }
3877: /* set values */
3878: ptr_vals = recv_buffer_vals;
3879: ptr_idxs = recv_buffer_idxs_local;
3880: for (i=0;i<n_recvs;i++) {
3881: if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
3882: MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);
3883: MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);
3884: MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);
3885: MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);
3886: MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);
3887: } else {
3888: /* TODO */
3889: }
3890: ptr_idxs += olengths_idxs[i];
3891: ptr_vals += olengths_vals[i];
3892: }
3893: MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);
3894: MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);
3895: MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);
3896: MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);
3897: PetscFree(recv_buffer_vals);
3898: PetscFree(recv_buffer_idxs_local);
3900: #if 0
3901: if (!restrict_comm) { /* check */
3902: Vec lvec,rvec;
3903: PetscReal infty_error;
3905: MatCreateVecs(mat,&rvec,&lvec);
3906: VecSetRandom(rvec,NULL);
3907: MatMult(mat,rvec,lvec);
3908: VecScale(lvec,-1.0);
3909: MatMultAdd(*mat_n,rvec,lvec,lvec);
3910: VecNorm(lvec,NORM_INFINITY,&infty_error);
3911: PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
3912: VecDestroy(&rvec);
3913: VecDestroy(&lvec);
3914: }
3915: #endif
3917: /* assemble new additional is (if any) */
3918: if (nis) {
3919: PetscInt **temp_idxs,*count_is,j,psum;
3921: MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);
3922: PetscCalloc1(nis,&count_is);
3923: ptr_idxs = recv_buffer_idxs_is;
3924: psum = 0;
3925: for (i=0;i<n_recvs;i++) {
3926: for (j=0;j<nis;j++) {
3927: PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3928: count_is[j] += plen; /* increment counting of buffer for j-th IS */
3929: psum += plen;
3930: ptr_idxs += plen+1; /* shift pointer to received data */
3931: }
3932: }
3933: PetscMalloc1(nis,&temp_idxs);
3934: PetscMalloc1(psum,&temp_idxs[0]);
3935: for (i=1;i<nis;i++) {
3936: temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
3937: }
3938: PetscMemzero(count_is,nis*sizeof(PetscInt));
3939: ptr_idxs = recv_buffer_idxs_is;
3940: for (i=0;i<n_recvs;i++) {
3941: for (j=0;j<nis;j++) {
3942: PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3943: PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));
3944: count_is[j] += plen; /* increment starting point of buffer for j-th IS */
3945: ptr_idxs += plen+1; /* shift pointer to received data */
3946: }
3947: }
3948: for (i=0;i<nis;i++) {
3949: ISDestroy(&isarray[i]);
3950: PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);
3951: ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);
3952: }
3953: PetscFree(count_is);
3954: PetscFree(temp_idxs[0]);
3955: PetscFree(temp_idxs);
3956: }
3957: /* free workspace */
3958: PetscFree(recv_buffer_idxs_is);
3959: MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);
3960: PetscFree(send_buffer_idxs);
3961: MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);
3962: if (isdense) {
3963: MatISGetLocalMat(mat,&local_mat);
3964: MatDenseRestoreArray(local_mat,&send_buffer_vals);
3965: } else {
3966: /* PetscFree(send_buffer_vals); */
3967: }
3968: if (nis) {
3969: MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);
3970: PetscFree(send_buffer_idxs_is);
3971: }
3972: PetscFree(recv_req_idxs);
3973: PetscFree(recv_req_vals);
3974: PetscFree(recv_req_idxs_is);
3975: PetscFree(send_req_idxs);
3976: PetscFree(send_req_vals);
3977: PetscFree(send_req_idxs_is);
3978: PetscFree(ilengths_vals);
3979: PetscFree(ilengths_idxs);
3980: PetscFree(olengths_vals);
3981: PetscFree(olengths_idxs);
3982: PetscFree(onodes);
3983: if (nis) {
3984: PetscFree(ilengths_idxs_is);
3985: PetscFree(olengths_idxs_is);
3986: PetscFree(onodes_is);
3987: }
3988: PetscSubcommDestroy(&subcomm);
3989: if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
3990: MatDestroy(mat_n);
3991: for (i=0;i<nis;i++) {
3992: ISDestroy(&isarray[i]);
3993: }
3994: *mat_n = NULL;
3995: }
3996: return(0);
3997: }
3999: /* temporary hack into ksp private data structure */
4000: #include <petsc/private/kspimpl.h>
4004: PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
4005: {
4006: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
4007: PC_IS *pcis = (PC_IS*)pc->data;
4008: Mat coarse_mat,coarse_mat_is,coarse_submat_dense;
4009: MatNullSpace CoarseNullSpace=NULL;
4010: ISLocalToGlobalMapping coarse_islg;
4011: IS coarse_is,*isarray;
4012: PetscInt i,im_active=-1,active_procs=-1;
4013: PetscInt nis,nisdofs,nisneu;
4014: PC pc_temp;
4015: PCType coarse_pc_type;
4016: KSPType coarse_ksp_type;
4017: PetscBool multilevel_requested,multilevel_allowed;
4018: PetscBool isredundant,isbddc,isnn,coarse_reuse;
4019: Mat t_coarse_mat_is;
4020: PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse;
4021: PetscMPIInt all_procs;
4022: PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist;
4023: PetscBool compute_vecs = PETSC_FALSE;
4024: PetscScalar *array;
4025: PetscErrorCode ierr;
4028: /* Assign global numbering to coarse dofs */
4029: if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
4030: PetscInt ocoarse_size;
4031: compute_vecs = PETSC_TRUE;
4032: ocoarse_size = pcbddc->coarse_size;
4033: PetscFree(pcbddc->global_primal_indices);
4034: PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);
4035: /* see if we can avoid some work */
4036: if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
4037: /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */
4038: if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) {
4039: PC pc;
4040: PetscBool isbddc;
4042: /* temporary workaround since PCBDDC does not have a reset method so far */
4043: KSPGetPC(pcbddc->coarse_ksp,&pc);
4044: PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);
4045: if (isbddc) {
4046: PCDestroy(&pc);
4047: }
4048: KSPReset(pcbddc->coarse_ksp);
4049: coarse_reuse = PETSC_FALSE;
4050: } else { /* we can safely reuse already computed coarse matrix */
4051: coarse_reuse = PETSC_TRUE;
4052: }
4053: } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
4054: coarse_reuse = PETSC_FALSE;
4055: }
4056: /* reset any subassembling information */
4057: ISDestroy(&pcbddc->coarse_subassembling);
4058: ISDestroy(&pcbddc->coarse_subassembling_init);
4059: } else { /* primal space is unchanged, so we can reuse coarse matrix */
4060: coarse_reuse = PETSC_TRUE;
4061: }
4063: /* count "active" (i.e. with positive local size) and "void" processes */
4064: im_active = !!(pcis->n);
4065: MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));
4066: MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);
4067: void_procs = all_procs-active_procs;
4068: csin_type_simple = PETSC_TRUE;
4069: redist = PETSC_FALSE;
4070: if (pcbddc->current_level && void_procs) {
4071: csin_ml = PETSC_TRUE;
4072: ncoarse_ml = void_procs;
4073: /* it has no sense to redistribute on a set of processors larger than the number of active processes */
4074: if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) {
4075: csin_ds = PETSC_TRUE;
4076: ncoarse_ds = pcbddc->redistribute_coarse;
4077: redist = PETSC_TRUE;
4078: } else {
4079: csin_ds = PETSC_TRUE;
4080: ncoarse_ds = active_procs;
4081: redist = PETSC_TRUE;
4082: }
4083: } else {
4084: csin_ml = PETSC_FALSE;
4085: ncoarse_ml = all_procs;
4086: if (void_procs) {
4087: csin_ds = PETSC_TRUE;
4088: ncoarse_ds = void_procs;
4089: csin_type_simple = PETSC_FALSE;
4090: } else {
4091: if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) {
4092: csin_ds = PETSC_TRUE;
4093: ncoarse_ds = pcbddc->redistribute_coarse;
4094: redist = PETSC_TRUE;
4095: } else {
4096: csin_ds = PETSC_FALSE;
4097: ncoarse_ds = all_procs;
4098: }
4099: }
4100: }
4102: /*
4103: test if we can go multilevel: three conditions must be satisfied:
4104: - we have not exceeded the number of levels requested
4105: - we can actually subassemble the active processes
4106: - we can find a suitable number of MPI processes where we can place the subassembled problem
4107: */
4108: multilevel_allowed = PETSC_FALSE;
4109: multilevel_requested = PETSC_FALSE;
4110: if (pcbddc->current_level < pcbddc->max_levels) {
4111: multilevel_requested = PETSC_TRUE;
4112: if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) {
4113: multilevel_allowed = PETSC_FALSE;
4114: } else {
4115: multilevel_allowed = PETSC_TRUE;
4116: }
4117: }
4118: /* determine number of process partecipating to coarse solver */
4119: if (multilevel_allowed) {
4120: ncoarse = ncoarse_ml;
4121: csin = csin_ml;
4122: redist = PETSC_FALSE;
4123: } else {
4124: ncoarse = ncoarse_ds;
4125: csin = csin_ds;
4126: }
4128: /* creates temporary l2gmap and IS for coarse indexes */
4129: ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);
4130: ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);
4132: /* creates temporary MATIS object for coarse matrix */
4133: MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);
4134: MatDenseGetArray(coarse_submat_dense,&array);
4135: PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);
4136: MatDenseRestoreArray(coarse_submat_dense,&array);
4137: #if 0
4138: {
4139: PetscViewer viewer;
4140: char filename[256];
4141: sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank);
4142: PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);
4143: PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
4144: MatView(coarse_submat_dense,viewer);
4145: PetscViewerDestroy(&viewer);
4146: }
4147: #endif
4148: MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);
4149: MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);
4150: MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);
4151: MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);
4152: MatDestroy(&coarse_submat_dense);
4154: /* compute dofs splitting and neumann boundaries for coarse dofs */
4155: if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */
4156: PetscInt *tidxs,*tidxs2,nout,tsize,i;
4157: const PetscInt *idxs;
4158: ISLocalToGlobalMapping tmap;
4160: /* create map between primal indices (in local representative ordering) and local primal numbering */
4161: ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);
4162: /* allocate space for temporary storage */
4163: PetscMalloc1(pcbddc->local_primal_size,&tidxs);
4164: PetscMalloc1(pcbddc->local_primal_size,&tidxs2);
4165: /* allocate for IS array */
4166: nisdofs = pcbddc->n_ISForDofsLocal;
4167: nisneu = !!pcbddc->NeumannBoundariesLocal;
4168: nis = nisdofs + nisneu;
4169: PetscMalloc1(nis,&isarray);
4170: /* dofs splitting */
4171: for (i=0;i<nisdofs;i++) {
4172: /* ISView(pcbddc->ISForDofsLocal[i],0); */
4173: ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);
4174: ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);
4175: ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);
4176: ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);
4177: ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);
4178: ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);
4179: /* ISView(isarray[i],0); */
4180: }
4181: /* neumann boundaries */
4182: if (pcbddc->NeumannBoundariesLocal) {
4183: /* ISView(pcbddc->NeumannBoundariesLocal,0); */
4184: ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);
4185: ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);
4186: ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);
4187: ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);
4188: ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);
4189: ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);
4190: /* ISView(isarray[nisdofs],0); */
4191: }
4192: /* free memory */
4193: PetscFree(tidxs);
4194: PetscFree(tidxs2);
4195: ISLocalToGlobalMappingDestroy(&tmap);
4196: } else {
4197: nis = 0;
4198: nisdofs = 0;
4199: nisneu = 0;
4200: isarray = NULL;
4201: }
4202: /* destroy no longer needed map */
4203: ISLocalToGlobalMappingDestroy(&coarse_islg);
4205: /* restrict on coarse candidates (if needed) */
4206: coarse_mat_is = NULL;
4207: if (csin) {
4208: if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */
4209: if (redist) {
4210: PetscMPIInt rank;
4211: PetscInt spc,n_spc_p1,dest[1],destsize;
4213: MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);
4214: spc = active_procs/ncoarse;
4215: n_spc_p1 = active_procs%ncoarse;
4216: if (im_active) {
4217: destsize = 1;
4218: if (rank > n_spc_p1*(spc+1)-1) {
4219: dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc;
4220: } else {
4221: dest[0] = rank/(spc+1);
4222: }
4223: } else {
4224: destsize = 0;
4225: }
4226: ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);
4227: } else if (csin_type_simple) {
4228: PetscMPIInt rank;
4229: PetscInt issize,isidx;
4231: MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);
4232: if (im_active) {
4233: issize = 1;
4234: isidx = (PetscInt)rank;
4235: } else {
4236: issize = 0;
4237: isidx = -1;
4238: }
4239: ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);
4240: } else { /* get a suitable subassembling pattern from MATIS code */
4241: MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);
4242: }
4244: /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */
4245: if (!redist || ncoarse <= void_procs) {
4246: PetscInt ncoarse_cand,tissize,*nisindices;
4247: PetscInt *coarse_candidates;
4248: const PetscInt* tisindices;
4250: /* get coarse candidates' ranks in pc communicator */
4251: PetscMalloc1(all_procs,&coarse_candidates);
4252: MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));
4253: for (i=0,ncoarse_cand=0;i<all_procs;i++) {
4254: if (!coarse_candidates[i]) {
4255: coarse_candidates[ncoarse_cand++]=i;
4256: }
4257: }
4258: if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse);
4261: if (pcbddc->dbg_flag) {
4262: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
4263: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");
4264: ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);
4265: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");
4266: for (i=0;i<ncoarse_cand;i++) {
4267: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);
4268: }
4269: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");
4270: PetscViewerFlush(pcbddc->dbg_viewer);
4271: }
4272: /* shift the pattern on coarse candidates */
4273: ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);
4274: ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);
4275: PetscMalloc1(tissize,&nisindices);
4276: for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]];
4277: ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);
4278: ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);
4279: PetscFree(coarse_candidates);
4280: }
4281: if (pcbddc->dbg_flag) {
4282: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
4283: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");
4284: ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);
4285: PetscViewerFlush(pcbddc->dbg_viewer);
4286: }
4287: }
4288: /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */
4289: if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */
4290: MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);
4291: } else { /* this is the last level, so use just receiving processes in subcomm */
4292: MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);
4293: }
4294: } else {
4295: if (pcbddc->dbg_flag) {
4296: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
4297: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");
4298: PetscViewerFlush(pcbddc->dbg_viewer);
4299: }
4300: PetscObjectReference((PetscObject)t_coarse_mat_is);
4301: coarse_mat_is = t_coarse_mat_is;
4302: }
4304: /* create local to global scatters for coarse problem */
4305: if (compute_vecs) {
4306: PetscInt lrows;
4307: VecDestroy(&pcbddc->coarse_vec);
4308: if (coarse_mat_is) {
4309: MatGetLocalSize(coarse_mat_is,&lrows,NULL);
4310: } else {
4311: lrows = 0;
4312: }
4313: VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);
4314: VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);
4315: VecSetType(pcbddc->coarse_vec,VECSTANDARD);
4316: VecScatterDestroy(&pcbddc->coarse_loc_to_glob);
4317: VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);
4318: }
4319: ISDestroy(&coarse_is);
4320: MatDestroy(&t_coarse_mat_is);
4322: /* set defaults for coarse KSP and PC */
4323: if (multilevel_allowed) {
4324: coarse_ksp_type = KSPRICHARDSON;
4325: coarse_pc_type = PCBDDC;
4326: } else {
4327: coarse_ksp_type = KSPPREONLY;
4328: coarse_pc_type = PCREDUNDANT;
4329: }
4331: /* print some info if requested */
4332: if (pcbddc->dbg_flag) {
4333: if (!multilevel_allowed) {
4334: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
4335: if (multilevel_requested) {
4336: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);
4337: } else if (pcbddc->max_levels) {
4338: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);
4339: }
4340: PetscViewerFlush(pcbddc->dbg_viewer);
4341: }
4342: }
4344: /* create the coarse KSP object only once with defaults */
4345: if (coarse_mat_is) {
4346: MatReuse coarse_mat_reuse;
4347: PetscViewer dbg_viewer = NULL;
4348: if (pcbddc->dbg_flag) {
4349: dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is));
4350: PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);
4351: }
4352: if (!pcbddc->coarse_ksp) {
4353: char prefix[256],str_level[16];
4354: size_t len;
4355: KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);
4356: KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);
4357: PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);
4358: KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);
4359: KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);
4360: KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);
4361: KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);
4362: KSPGetPC(pcbddc->coarse_ksp,&pc_temp);
4363: PCSetType(pc_temp,coarse_pc_type);
4364: /* prefix */
4365: PetscStrcpy(prefix,"");
4366: PetscStrcpy(str_level,"");
4367: if (!pcbddc->current_level) {
4368: PetscStrcpy(prefix,((PetscObject)pc)->prefix);
4369: PetscStrcat(prefix,"pc_bddc_coarse_");
4370: } else {
4371: PetscStrlen(((PetscObject)pc)->prefix,&len);
4372: if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
4373: if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
4374: PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);
4375: sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
4376: PetscStrcat(prefix,str_level);
4377: }
4378: KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);
4379: /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4380: PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);
4381: PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);
4382: PCBDDCSetLevels(pc_temp,pcbddc->max_levels);
4383: /* allow user customization */
4384: KSPSetFromOptions(pcbddc->coarse_ksp);
4385: }
4386: /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4387: KSPGetPC(pcbddc->coarse_ksp,&pc_temp);
4388: if (nisdofs) {
4389: PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);
4390: for (i=0;i<nisdofs;i++) {
4391: ISDestroy(&isarray[i]);
4392: }
4393: }
4394: if (nisneu) {
4395: PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);
4396: ISDestroy(&isarray[nisdofs]);
4397: }
4399: /* get some info after set from options */
4400: PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);
4401: PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);
4402: PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);
4403: if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */
4404: PCSetType(pc_temp,coarse_pc_type);
4405: isbddc = PETSC_FALSE;
4406: }
4407: PCFactorSetReuseFill(pc_temp,PETSC_TRUE);
4408: if (isredundant) {
4409: KSP inner_ksp;
4410: PC inner_pc;
4411: PCRedundantGetKSP(pc_temp,&inner_ksp);
4412: KSPGetPC(inner_ksp,&inner_pc);
4413: PCFactorSetReuseFill(inner_pc,PETSC_TRUE);
4414: }
4416: /* assemble coarse matrix */
4417: if (coarse_reuse) {
4418: KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);
4419: PetscObjectReference((PetscObject)coarse_mat);
4420: coarse_mat_reuse = MAT_REUSE_MATRIX;
4421: } else {
4422: coarse_mat_reuse = MAT_INITIAL_MATRIX;
4423: }
4424: if (isbddc || isnn) {
4425: if (pcbddc->coarsening_ratio > 1) {
4426: if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */
4427: MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);
4428: if (pcbddc->dbg_flag) {
4429: PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");
4430: PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");
4431: ISView(pcbddc->coarse_subassembling,dbg_viewer);
4432: PetscViewerFlush(dbg_viewer);
4433: }
4434: }
4435: MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);
4436: } else {
4437: PetscObjectReference((PetscObject)coarse_mat_is);
4438: coarse_mat = coarse_mat_is;
4439: }
4440: } else {
4441: MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);
4442: }
4443: MatDestroy(&coarse_mat_is);
4445: /* propagate symmetry info of coarse matrix */
4446: MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);
4447: if (pc->pmat->symmetric_set) {
4448: MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);
4449: }
4450: if (pc->pmat->hermitian_set) {
4451: MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);
4452: }
4453: if (pc->pmat->spd_set) {
4454: MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);
4455: }
4456: /* set operators */
4457: KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);
4458: if (pcbddc->dbg_flag) {
4459: PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);
4460: }
4461: } else { /* processes non partecipating to coarse solver (if any) */
4462: coarse_mat = 0;
4463: }
4464: PetscFree(isarray);
4465: #if 0
4466: {
4467: PetscViewer viewer;
4468: char filename[256];
4469: sprintf(filename,"coarse_mat.m");
4470: PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);
4471: PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
4472: MatView(coarse_mat,viewer);
4473: PetscViewerDestroy(&viewer);
4474: }
4475: #endif
4477: /* Compute coarse null space (special handling by BDDC only) */
4478: #if 0
4479: if (pcbddc->NullSpace) {
4480: PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);
4481: }
4482: #endif
4484: if (pcbddc->coarse_ksp) {
4485: Vec crhs,csol;
4486: PetscBool ispreonly;
4488: if (CoarseNullSpace) {
4489: if (isbddc) {
4490: PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);
4491: } else {
4492: MatSetNullSpace(coarse_mat,CoarseNullSpace);
4493: }
4494: }
4495: /* setup coarse ksp */
4496: KSPSetUp(pcbddc->coarse_ksp);
4497: KSPGetSolution(pcbddc->coarse_ksp,&csol);
4498: KSPGetRhs(pcbddc->coarse_ksp,&crhs);
4499: /* hack */
4500: if (!csol) {
4501: MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);
4502: }
4503: if (!crhs) {
4504: MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));
4505: }
4506: /* Check coarse problem if in debug mode or if solving with an iterative method */
4507: PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);
4508: if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
4509: KSP check_ksp;
4510: KSPType check_ksp_type;
4511: PC check_pc;
4512: Vec check_vec,coarse_vec;
4513: PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
4514: PetscInt its;
4515: PetscBool compute_eigs;
4516: PetscReal *eigs_r,*eigs_c;
4517: PetscInt neigs;
4518: const char *prefix;
4520: /* Create ksp object suitable for estimation of extreme eigenvalues */
4521: KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);
4522: KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);
4523: KSPSetOperators(check_ksp,coarse_mat,coarse_mat);
4524: KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);
4525: if (ispreonly) {
4526: check_ksp_type = KSPPREONLY;
4527: compute_eigs = PETSC_FALSE;
4528: } else {
4529: check_ksp_type = KSPGMRES;
4530: compute_eigs = PETSC_TRUE;
4531: }
4532: KSPSetType(check_ksp,check_ksp_type);
4533: KSPSetComputeSingularValues(check_ksp,compute_eigs);
4534: KSPSetComputeEigenvalues(check_ksp,compute_eigs);
4535: KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);
4536: KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);
4537: KSPSetOptionsPrefix(check_ksp,prefix);
4538: KSPAppendOptionsPrefix(check_ksp,"check_");
4539: KSPSetFromOptions(check_ksp);
4540: KSPSetUp(check_ksp);
4541: KSPGetPC(pcbddc->coarse_ksp,&check_pc);
4542: KSPSetPC(check_ksp,check_pc);
4543: /* create random vec */
4544: KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);
4545: VecDuplicate(coarse_vec,&check_vec);
4546: VecSetRandom(check_vec,NULL);
4547: if (CoarseNullSpace) {
4548: MatNullSpaceRemove(CoarseNullSpace,check_vec);
4549: }
4550: MatMult(coarse_mat,check_vec,coarse_vec);
4551: /* solve coarse problem */
4552: KSPSolve(check_ksp,coarse_vec,coarse_vec);
4553: if (CoarseNullSpace) {
4554: MatNullSpaceRemove(CoarseNullSpace,coarse_vec);
4555: }
4556: /* set eigenvalue estimation if preonly has not been requested */
4557: if (compute_eigs) {
4558: PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);
4559: PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);
4560: KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);
4561: lambda_max = eigs_r[neigs-1];
4562: lambda_min = eigs_r[0];
4563: if (pcbddc->use_coarse_estimates) {
4564: if (lambda_max>lambda_min) {
4565: KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);
4566: KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));
4567: }
4568: }
4569: }
4571: /* check coarse problem residual error */
4572: if (pcbddc->dbg_flag) {
4573: PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
4574: PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));
4575: VecAXPY(check_vec,-1.0,coarse_vec);
4576: VecNorm(check_vec,NORM_INFINITY,&infty_error);
4577: MatMult(coarse_mat,check_vec,coarse_vec);
4578: VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);
4579: VecDestroy(&check_vec);
4580: PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);
4581: PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);
4582: PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);
4583: PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);
4584: PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);
4585: if (compute_eigs) {
4586: PetscReal lambda_max_s,lambda_min_s;
4587: KSPGetType(check_ksp,&check_ksp_type);
4588: KSPGetIterationNumber(check_ksp,&its);
4589: KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);
4590: PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);
4591: for (i=0;i<neigs;i++) {
4592: PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);
4593: }
4594: }
4595: PetscViewerFlush(dbg_viewer);
4596: PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));
4597: }
4598: KSPDestroy(&check_ksp);
4599: if (compute_eigs) {
4600: PetscFree(eigs_r);
4601: PetscFree(eigs_c);
4602: }
4603: }
4604: }
4605: /* print additional info */
4606: if (pcbddc->dbg_flag) {
4607: /* waits until all processes reaches this point */
4608: PetscBarrier((PetscObject)pc);
4609: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);
4610: PetscViewerFlush(pcbddc->dbg_viewer);
4611: }
4613: /* free memory */
4614: MatNullSpaceDestroy(&CoarseNullSpace);
4615: MatDestroy(&coarse_mat);
4616: return(0);
4617: }
4621: PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
4622: {
4623: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
4624: PC_IS* pcis = (PC_IS*)pc->data;
4625: Mat_IS* matis = (Mat_IS*)pc->pmat->data;
4626: IS subset,subset_mult,subset_n;
4627: PetscInt local_size,coarse_size=0;
4628: PetscInt *local_primal_indices=NULL;
4629: const PetscInt *t_local_primal_indices;
4633: /* Compute global number of coarse dofs */
4634: if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) {
4635: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first");
4636: }
4637: ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);
4638: ISLocalToGlobalMappingApplyIS(matis->mapping,subset_n,&subset);
4639: ISDestroy(&subset_n);
4640: ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);
4641: PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);
4642: ISDestroy(&subset);
4643: ISDestroy(&subset_mult);
4644: ISGetLocalSize(subset_n,&local_size);
4645: if (local_size != pcbddc->local_primal_size) {
4646: SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size);
4647: }
4648: PetscMalloc1(local_size,&local_primal_indices);
4649: ISGetIndices(subset_n,&t_local_primal_indices);
4650: PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));
4651: ISRestoreIndices(subset_n,&t_local_primal_indices);
4652: ISDestroy(&subset_n);
4654: /* check numbering */
4655: if (pcbddc->dbg_flag) {
4656: PetscScalar coarsesum,*array;
4657: PetscInt i;
4658: PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;
4660: PetscViewerFlush(pcbddc->dbg_viewer);
4661: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
4662: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");
4663: PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);
4664: VecSet(pcis->vec1_N,0.0);
4665: for (i=0;i<pcbddc->local_primal_size;i++) {
4666: VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);
4667: }
4668: VecAssemblyBegin(pcis->vec1_N);
4669: VecAssemblyEnd(pcis->vec1_N);
4670: VecSet(pcis->vec1_global,0.0);
4671: VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
4672: VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
4673: VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
4674: VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
4675: VecGetArray(pcis->vec1_N,&array);
4676: for (i=0;i<pcis->n;i++) {
4677: if (array[i] == 1.0) {
4678: set_error = PETSC_TRUE;
4679: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);
4680: }
4681: }
4682: MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
4683: PetscViewerFlush(pcbddc->dbg_viewer);
4684: for (i=0;i<pcis->n;i++) {
4685: if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
4686: }
4687: VecRestoreArray(pcis->vec1_N,&array);
4688: VecSet(pcis->vec1_global,0.0);
4689: VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
4690: VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
4691: VecSum(pcis->vec1_global,&coarsesum);
4692: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));
4693: if (pcbddc->dbg_flag > 1 || set_error_reduced) {
4694: PetscInt *gidxs;
4696: PetscMalloc1(pcbddc->local_primal_size,&gidxs);
4697: ISLocalToGlobalMappingApply(matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);
4698: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");
4699: PetscViewerFlush(pcbddc->dbg_viewer);
4700: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);
4701: for (i=0;i<pcbddc->local_primal_size;i++) {
4702: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);
4703: }
4704: PetscViewerFlush(pcbddc->dbg_viewer);
4705: PetscFree(gidxs);
4706: }
4707: PetscViewerFlush(pcbddc->dbg_viewer);
4708: if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
4709: }
4710: /* PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size); */
4711: /* get back data */
4712: *coarse_size_n = coarse_size;
4713: *local_primal_indices_n = local_primal_indices;
4714: return(0);
4715: }
4719: PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
4720: {
4721: IS localis_t;
4722: PetscInt i,lsize,*idxs,n;
4723: PetscScalar *vals;
4727: /* get indices in local ordering exploiting local to global map */
4728: ISGetLocalSize(globalis,&lsize);
4729: PetscMalloc1(lsize,&vals);
4730: for (i=0;i<lsize;i++) vals[i] = 1.0;
4731: ISGetIndices(globalis,(const PetscInt**)&idxs);
4732: VecSet(gwork,0.0);
4733: VecSet(lwork,0.0);
4734: if (idxs) { /* multilevel guard */
4735: VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);
4736: }
4737: VecAssemblyBegin(gwork);
4738: ISRestoreIndices(globalis,(const PetscInt**)&idxs);
4739: PetscFree(vals);
4740: VecAssemblyEnd(gwork);
4741: /* now compute set in local ordering */
4742: VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);
4743: VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);
4744: VecGetArrayRead(lwork,(const PetscScalar**)&vals);
4745: VecGetSize(lwork,&n);
4746: for (i=0,lsize=0;i<n;i++) {
4747: if (PetscRealPart(vals[i]) > 0.5) {
4748: lsize++;
4749: }
4750: }
4751: PetscMalloc1(lsize,&idxs);
4752: for (i=0,lsize=0;i<n;i++) {
4753: if (PetscRealPart(vals[i]) > 0.5) {
4754: idxs[lsize++] = i;
4755: }
4756: }
4757: VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);
4758: ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);
4759: *localis = localis_t;
4760: return(0);
4761: }
4763: /* the next two functions will be called in KSPMatMult if a change of basis has been requested */
4766: static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y)
4767: {
4768: PCBDDCChange_ctx change_ctx;
4769: PetscErrorCode ierr;
4772: MatShellGetContext(A,&change_ctx);
4773: MatMult(change_ctx->global_change,x,change_ctx->work[0]);
4774: MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);
4775: MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);
4776: return(0);
4777: }
4781: static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y)
4782: {
4783: PCBDDCChange_ctx change_ctx;
4784: PetscErrorCode ierr;
4787: MatShellGetContext(A,&change_ctx);
4788: MatMult(change_ctx->global_change,x,change_ctx->work[0]);
4789: MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);
4790: MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);
4791: return(0);
4792: }
4796: PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
4797: {
4798: PC_IS *pcis=(PC_IS*)pc->data;
4799: PC_BDDC *pcbddc=(PC_BDDC*)pc->data;
4800: PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
4801: Mat S_j;
4802: PetscInt *used_xadj,*used_adjncy;
4803: PetscBool free_used_adj;
4804: PetscErrorCode ierr;
4807: /* decide the adjacency to be used for determining internal problems for local schur on subsets */
4808: free_used_adj = PETSC_FALSE;
4809: if (pcbddc->sub_schurs_layers == -1) {
4810: used_xadj = NULL;
4811: used_adjncy = NULL;
4812: } else {
4813: if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
4814: used_xadj = pcbddc->mat_graph->xadj;
4815: used_adjncy = pcbddc->mat_graph->adjncy;
4816: } else if (pcbddc->computed_rowadj) {
4817: used_xadj = pcbddc->mat_graph->xadj;
4818: used_adjncy = pcbddc->mat_graph->adjncy;
4819: } else {
4820: PetscBool flg_row=PETSC_FALSE;
4821: const PetscInt *xadj,*adjncy;
4822: PetscInt nvtxs;
4824: MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);
4825: if (flg_row) {
4826: PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);
4827: PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));
4828: PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));
4829: free_used_adj = PETSC_TRUE;
4830: } else {
4831: pcbddc->sub_schurs_layers = -1;
4832: used_xadj = NULL;
4833: used_adjncy = NULL;
4834: }
4835: MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);
4836: }
4837: }
4839: /* setup sub_schurs data */
4840: MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);
4841: if (!sub_schurs->use_mumps) {
4842: /* pcbddc->ksp_D up to date only if not using MUMPS */
4843: MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);
4844: PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE);
4845: } else {
4846: PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis;
4847: PetscBool isseqaij;
4848: if (!pcbddc->use_vertices && reuse_solvers) {
4849: PetscInt n_vertices;
4851: ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);
4852: reuse_solvers = (PetscBool)!n_vertices;
4853: }
4854: PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);
4855: if (!isseqaij) {
4856: Mat_IS* matis = (Mat_IS*)pc->pmat->data;
4857: if (matis->A == pcbddc->local_mat) {
4858: MatDestroy(&pcbddc->local_mat);
4859: MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);
4860: } else {
4861: MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);
4862: }
4863: }
4864: PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers);
4865: }
4866: MatDestroy(&S_j);
4868: /* free adjacency */
4869: if (free_used_adj) {
4870: PetscFree2(used_xadj,used_adjncy);
4871: }
4872: return(0);
4873: }
4877: PetscErrorCode PCBDDCInitSubSchurs(PC pc)
4878: {
4879: PC_IS *pcis=(PC_IS*)pc->data;
4880: PC_BDDC *pcbddc=(PC_BDDC*)pc->data;
4881: PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
4882: PCBDDCGraph graph;
4883: PetscErrorCode ierr;
4886: /* attach interface graph for determining subsets */
4887: if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
4888: IS verticesIS,verticescomm;
4889: PetscInt vsize,*idxs;
4891: PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);
4892: ISGetSize(verticesIS,&vsize);
4893: ISGetIndices(verticesIS,(const PetscInt**)&idxs);
4894: ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);
4895: ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);
4896: ISDestroy(&verticesIS);
4897: PCBDDCGraphCreate(&graph);
4898: PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);
4899: PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);
4900: ISDestroy(&verticescomm);
4901: PCBDDCGraphComputeConnectedComponents(graph);
4902: /*
4903: if (pcbddc->dbg_flag) {
4904: PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);
4905: }
4906: */
4907: } else {
4908: graph = pcbddc->mat_graph;
4909: }
4911: /* sub_schurs init */
4912: PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);
4914: /* free graph struct */
4915: if (pcbddc->sub_schurs_rebuild) {
4916: PCBDDCGraphDestroy(&graph);
4917: }
4918: return(0);
4919: }