Actual source code: bddcprivate.c
petsc-3.7.3 2016-08-01
1: #include <../src/ksp/pc/impls/bddc/bddc.h>
2: #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3: #include <petscblaslapack.h>
5: static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y);
6: static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y);
10: PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
11: {
12: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
13: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
14: PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork;
15: PetscBLASInt *B_iwork,*B_ifail;
16: PetscScalar *work,lwork;
17: PetscScalar *St,*S,*eigv;
18: PetscScalar *Sarray,*Starray;
19: PetscReal *eigs,thresh;
20: PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs;
21: PetscBool allocated_S_St;
22: #if defined(PETSC_USE_COMPLEX)
23: PetscReal *rwork;
24: #endif
25: PetscErrorCode ierr;
28: if (!sub_schurs->use_mumps) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS");
30: if (pcbddc->dbg_flag) {
31: PetscViewerFlush(pcbddc->dbg_viewer);
32: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
33: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");
34: PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
35: }
37: if (pcbddc->dbg_flag) {
38: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef);
39: }
41: if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef);
43: /* max size of subsets */
44: mss = 0;
45: for (i=0;i<sub_schurs->n_subs;i++) {
46: PetscInt subset_size;
48: ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
49: mss = PetscMax(mss,subset_size);
50: }
52: /* min/max and threshold */
53: nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
54: nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0;
55: nmax = PetscMax(nmin,nmax);
56: allocated_S_St = PETSC_FALSE;
57: if (nmin) {
58: allocated_S_St = PETSC_TRUE;
59: }
61: /* allocate lapack workspace */
62: cum = cum2 = 0;
63: maxneigs = 0;
64: for (i=0;i<sub_schurs->n_subs;i++) {
65: PetscInt n,subset_size;
67: ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
68: n = PetscMin(subset_size,nmax);
69: cum += subset_size;
70: cum2 += subset_size*n;
71: maxneigs = PetscMax(maxneigs,n);
72: }
73: if (mss) {
74: if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
75: PetscBLASInt B_itype = 1;
76: PetscBLASInt B_N = mss;
77: PetscReal zero = 0.0;
78: PetscReal eps = 0.0; /* dlamch? */
80: B_lwork = -1;
81: S = NULL;
82: St = NULL;
83: eigs = NULL;
84: eigv = NULL;
85: B_iwork = NULL;
86: B_ifail = NULL;
87: #if defined(PETSC_USE_COMPLEX)
88: rwork = NULL;
89: #endif
90: thresh = 1.0;
91: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
92: #if defined(PETSC_USE_COMPLEX)
93: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
94: #else
95: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr));
96: #endif
97: if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr);
98: PetscFPTrapPop();
99: } else {
100: /* TODO */
101: }
102: } else {
103: lwork = 0;
104: }
106: nv = 0;
107: if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */
108: ISGetLocalSize(sub_schurs->is_vertices,&nv);
109: }
110: PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);
111: if (allocated_S_St) {
112: PetscMalloc2(mss*mss,&S,mss*mss,&St);
113: }
114: PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);
115: #if defined(PETSC_USE_COMPLEX)
116: PetscMalloc1(7*mss,&rwork);
117: #endif
118: PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n,
119: nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr,
120: nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr,
121: nv+cum,&pcbddc->adaptive_constraints_idxs,
122: nv+cum2,&pcbddc->adaptive_constraints_data);
123: PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));
125: maxneigs = 0;
126: cum = cumarray = 0;
127: pcbddc->adaptive_constraints_idxs_ptr[0] = 0;
128: pcbddc->adaptive_constraints_data_ptr[0] = 0;
129: if (sub_schurs->is_vertices && pcbddc->use_vertices) {
130: const PetscInt *idxs;
132: ISGetIndices(sub_schurs->is_vertices,&idxs);
133: for (cum=0;cum<nv;cum++) {
134: pcbddc->adaptive_constraints_n[cum] = 1;
135: pcbddc->adaptive_constraints_idxs[cum] = idxs[cum];
136: pcbddc->adaptive_constraints_data[cum] = 1.0;
137: pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1;
138: pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1;
139: }
140: ISRestoreIndices(sub_schurs->is_vertices,&idxs);
141: }
143: if (mss) { /* multilevel */
144: MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);
145: MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);
146: }
148: for (i=0;i<sub_schurs->n_subs;i++) {
150: const PetscInt *idxs;
151: PetscReal infty = PETSC_MAX_REAL;
152: PetscInt j,subset_size,eigs_start = 0;
153: PetscBLASInt B_N;
154: PetscBool same_data = PETSC_FALSE;
156: ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
157: PetscBLASIntCast(subset_size,&B_N);
158: if (allocated_S_St) { /* S and S_t should be copied since we could need them later */
159: if (sub_schurs->is_hermitian) {
160: PetscInt j,k;
161: if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */
162: PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));
163: PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));
164: }
165: for (j=0;j<subset_size;j++) {
166: for (k=j;k<subset_size;k++) {
167: S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k];
168: St[j*subset_size+k] = Starray[cumarray+j*subset_size+k];
169: }
170: }
171: } else {
172: PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
173: PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
174: }
175: } else {
176: S = Sarray + cumarray;
177: St = Starray + cumarray;
178: }
180: ISGetIndices(sub_schurs->is_subs[i],&idxs);
181: /* see if we can save some work */
182: if (sub_schurs->n_subs == 1) {
183: PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);
184: }
186: if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */
187: B_neigs = 0;
188: } else {
189: /* Threshold: this is an heuristic for edges */
190: thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold;
192: if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
193: PetscBLASInt B_itype = 1;
194: PetscBLASInt B_IL, B_IU;
195: PetscReal eps = -1.0; /* dlamch? */
196: PetscInt nmin_s;
198: /* ask for eigenvalues larger than thresh */
199: if (pcbddc->dbg_flag) {
200: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]);
201: }
202: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
203: #if defined(PETSC_USE_COMPLEX)
204: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
205: #else
206: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
207: #endif
208: PetscFPTrapPop();
209: if (B_ierr) {
210: if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
211: else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
212: else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
213: }
215: if (B_neigs > nmax) {
216: if (pcbddc->dbg_flag) {
217: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax);
218: }
219: eigs_start = B_neigs -nmax;
220: B_neigs = nmax;
221: }
223: nmin_s = PetscMin(nmin,B_N);
224: if (B_neigs < nmin_s) {
225: PetscBLASInt B_neigs2;
227: B_IU = B_N - B_neigs;
228: B_IL = B_N - nmin_s + 1;
229: if (pcbddc->dbg_flag) {
230: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU);
231: }
232: if (sub_schurs->is_hermitian) {
233: PetscInt j;
234: for (j=0;j<subset_size;j++) {
235: PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));
236: }
237: for (j=0;j<subset_size;j++) {
238: PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));
239: }
240: } else {
241: PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
242: PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
243: }
244: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
245: #if defined(PETSC_USE_COMPLEX)
246: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
247: #else
248: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
249: #endif
250: PetscFPTrapPop();
251: B_neigs += B_neigs2;
252: }
253: if (B_ierr) {
254: if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
255: else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
256: else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
257: }
258: if (pcbddc->dbg_flag) {
259: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);
260: for (j=0;j<B_neigs;j++) {
261: if (eigs[j] == 0.0) {
262: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");
263: } else {
264: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);
265: }
266: }
267: }
268: } else {
269: /* TODO */
270: }
271: }
272: maxneigs = PetscMax(B_neigs,maxneigs);
273: pcbddc->adaptive_constraints_n[i+nv] = B_neigs;
274: if (B_neigs) {
275: PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));
277: if (pcbddc->dbg_flag > 1) {
278: PetscInt ii;
279: for (ii=0;ii<B_neigs;ii++) {
280: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);
281: for (j=0;j<B_N;j++) {
282: #if defined(PETSC_USE_COMPLEX)
283: PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
284: PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
285: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);
286: #else
287: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
288: #endif
289: }
290: }
291: }
292: #if 0
293: for (j=0;j<B_neigs;j++) {
294: PetscBLASInt Blas_N,Blas_one = 1.0;
295: PetscScalar norm;
296: PetscBLASIntCast(subset_size,&Blas_N);
297: PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,
298: &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one));
299: if (pcbddc->adaptive_constraints_data[cum] > 0.0) {
300: norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
301: } else {
302: norm = -1.0/PetscSqrtReal(PetscRealPart(norm));
303: }
304: PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one));
305: }
306: #endif
307: PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));
308: pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size;
309: pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs;
310: cum++;
311: }
312: ISRestoreIndices(sub_schurs->is_subs[i],&idxs);
313: /* shift for next computation */
314: cumarray += subset_size*subset_size;
315: }
316: if (pcbddc->dbg_flag) {
317: PetscViewerFlush(pcbddc->dbg_viewer);
318: }
320: if (mss) {
321: MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);
322: MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);
323: /* destroy matrices (junk) */
324: MatDestroy(&sub_schurs->sum_S_Ej_inv_all);
325: MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);
326: }
327: if (allocated_S_St) {
328: PetscFree2(S,St);
329: }
330: PetscFree5(eigv,eigs,work,B_iwork,B_ifail);
331: #if defined(PETSC_USE_COMPLEX)
332: PetscFree(rwork);
333: #endif
334: if (pcbddc->dbg_flag) {
335: PetscInt maxneigs_r;
336: MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));
337: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);
338: }
339: return(0);
340: }
344: PetscErrorCode PCBDDCSetUpSolvers(PC pc)
345: {
346: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
347: PetscScalar *coarse_submat_vals;
351: /* Setup local scatters R_to_B and (optionally) R_to_D */
352: /* PCBDDCSetUpLocalWorkVectors should be called first! */
353: PCBDDCSetUpLocalScatters(pc);
355: /* Setup local neumann solver ksp_R */
356: /* PCBDDCSetUpLocalScatters should be called first! */
357: PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);
359: /* Change global null space passed in by the user if change of basis has been requested */
360: if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) {
361: PCBDDCNullSpaceAdaptGlobal(pc);
362: }
364: /*
365: Setup local correction and local part of coarse basis.
366: Gives back the dense local part of the coarse matrix in column major ordering
367: */
368: PCBDDCSetUpCorrection(pc,&coarse_submat_vals);
370: /* Compute total number of coarse nodes and setup coarse solver */
371: PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);
373: /* free */
374: PetscFree(coarse_submat_vals);
375: return(0);
376: }
380: PetscErrorCode PCBDDCResetCustomization(PC pc)
381: {
382: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
386: PCBDDCGraphResetCSR(pcbddc->mat_graph);
387: ISDestroy(&pcbddc->user_primal_vertices);
388: MatNullSpaceDestroy(&pcbddc->NullSpace);
389: ISDestroy(&pcbddc->NeumannBoundaries);
390: ISDestroy(&pcbddc->NeumannBoundariesLocal);
391: ISDestroy(&pcbddc->DirichletBoundaries);
392: MatNullSpaceDestroy(&pcbddc->onearnullspace);
393: PetscFree(pcbddc->onearnullvecs_state);
394: ISDestroy(&pcbddc->DirichletBoundariesLocal);
395: PCBDDCSetDofsSplitting(pc,0,NULL);
396: PCBDDCSetDofsSplittingLocal(pc,0,NULL);
397: return(0);
398: }
402: PetscErrorCode PCBDDCResetTopography(PC pc)
403: {
404: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
408: MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);
409: MatDestroy(&pcbddc->ChangeOfBasisMatrix);
410: MatDestroy(&pcbddc->ConstraintMatrix);
411: PCBDDCGraphReset(pcbddc->mat_graph);
412: PCBDDCSubSchursReset(pcbddc->sub_schurs);
413: return(0);
414: }
418: PetscErrorCode PCBDDCResetSolvers(PC pc)
419: {
420: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
421: PetscScalar *array;
425: VecDestroy(&pcbddc->coarse_vec);
426: if (pcbddc->coarse_phi_B) {
427: MatDenseGetArray(pcbddc->coarse_phi_B,&array);
428: PetscFree(array);
429: }
430: MatDestroy(&pcbddc->coarse_phi_B);
431: MatDestroy(&pcbddc->coarse_phi_D);
432: MatDestroy(&pcbddc->coarse_psi_B);
433: MatDestroy(&pcbddc->coarse_psi_D);
434: VecDestroy(&pcbddc->vec1_P);
435: VecDestroy(&pcbddc->vec1_C);
436: MatDestroy(&pcbddc->local_auxmat2);
437: MatDestroy(&pcbddc->local_auxmat1);
438: VecDestroy(&pcbddc->vec1_R);
439: VecDestroy(&pcbddc->vec2_R);
440: ISDestroy(&pcbddc->is_R_local);
441: VecScatterDestroy(&pcbddc->R_to_B);
442: VecScatterDestroy(&pcbddc->R_to_D);
443: VecScatterDestroy(&pcbddc->coarse_loc_to_glob);
444: KSPDestroy(&pcbddc->ksp_D);
445: KSPDestroy(&pcbddc->ksp_R);
446: KSPDestroy(&pcbddc->coarse_ksp);
447: MatDestroy(&pcbddc->local_mat);
448: PetscFree(pcbddc->primal_indices_local_idxs);
449: PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);
450: PetscFree(pcbddc->global_primal_indices);
451: ISDestroy(&pcbddc->coarse_subassembling);
452: ISDestroy(&pcbddc->coarse_subassembling_init);
453: return(0);
454: }
458: PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
459: {
460: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
461: PC_IS *pcis = (PC_IS*)pc->data;
462: VecType impVecType;
463: PetscInt n_constraints,n_R,old_size;
467: if (!pcbddc->ConstraintMatrix) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
468: /* get sizes */
469: n_constraints = pcbddc->local_primal_size - pcbddc->n_vertices;
470: n_R = pcis->n-pcbddc->n_vertices;
471: VecGetType(pcis->vec1_N,&impVecType);
472: /* local work vectors (try to avoid unneeded work)*/
473: /* R nodes */
474: old_size = -1;
475: if (pcbddc->vec1_R) {
476: VecGetSize(pcbddc->vec1_R,&old_size);
477: }
478: if (n_R != old_size) {
479: VecDestroy(&pcbddc->vec1_R);
480: VecDestroy(&pcbddc->vec2_R);
481: VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);
482: VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);
483: VecSetType(pcbddc->vec1_R,impVecType);
484: VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);
485: }
486: /* local primal dofs */
487: old_size = -1;
488: if (pcbddc->vec1_P) {
489: VecGetSize(pcbddc->vec1_P,&old_size);
490: }
491: if (pcbddc->local_primal_size != old_size) {
492: VecDestroy(&pcbddc->vec1_P);
493: VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);
494: VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);
495: VecSetType(pcbddc->vec1_P,impVecType);
496: }
497: /* local explicit constraints */
498: old_size = -1;
499: if (pcbddc->vec1_C) {
500: VecGetSize(pcbddc->vec1_C,&old_size);
501: }
502: if (n_constraints && n_constraints != old_size) {
503: VecDestroy(&pcbddc->vec1_C);
504: VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);
505: VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);
506: VecSetType(pcbddc->vec1_C,impVecType);
507: }
508: return(0);
509: }
513: PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
514: {
515: PetscErrorCode ierr;
516: /* pointers to pcis and pcbddc */
517: PC_IS* pcis = (PC_IS*)pc->data;
518: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
519: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
520: /* submatrices of local problem */
521: Mat A_RV,A_VR,A_VV,local_auxmat2_R;
522: /* submatrices of local coarse problem */
523: Mat S_VV,S_CV,S_VC,S_CC;
524: /* working matrices */
525: Mat C_CR;
526: /* additional working stuff */
527: PC pc_R;
528: Mat F;
529: PetscBool isLU,isCHOL,isILU;
531: PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */
532: PetscScalar *work;
533: PetscInt *idx_V_B;
534: PetscInt n,n_vertices,n_constraints;
535: PetscInt i,n_R,n_D,n_B;
536: PetscBool unsymmetric_check;
537: /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
538: MatType impMatType;
539: /* some shortcuts to scalars */
540: PetscScalar one=1.0,m_one=-1.0;
543: n_vertices = pcbddc->n_vertices;
544: n_constraints = pcbddc->local_primal_size-n_vertices;
545: /* Set Non-overlapping dimensions */
546: n_B = pcis->n_B;
547: n_D = pcis->n - n_B;
548: n_R = pcis->n - n_vertices;
550: /* Set types for local objects needed by BDDC precondtioner */
551: impMatType = MATSEQDENSE;
553: /* vertices in boundary numbering */
554: PetscMalloc1(n_vertices,&idx_V_B);
555: ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);
556: if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i);
558: /* Subdomain contribution (Non-overlapping) to coarse matrix */
559: PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);
560: MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);
561: MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);
562: MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);
563: MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);
564: MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);
565: MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);
566: MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);
567: MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);
569: unsymmetric_check = PETSC_FALSE;
570: /* allocate workspace */
571: n = 0;
572: if (n_constraints) {
573: n += n_R*n_constraints;
574: }
575: if (n_vertices) {
576: n = PetscMax(2*n_R*n_vertices,n);
577: n = PetscMax((n_R+n_B)*n_vertices,n);
578: }
579: if (!pcbddc->symmetric_primal) {
580: n = PetscMax(2*n_R*pcbddc->local_primal_size,n);
581: unsymmetric_check = PETSC_TRUE;
582: }
583: PetscMalloc1(n,&work);
585: /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
586: KSPGetPC(pcbddc->ksp_R,&pc_R);
587: PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);
588: PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);
589: PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);
590: if (isLU || isILU || isCHOL) {
591: PCFactorGetMatrix(pc_R,&F);
592: } else if (sub_schurs->reuse_mumps) {
593: PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
594: MatFactorType type;
596: F = reuse_mumps->F;
597: MatGetFactorType(F,&type);
598: if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE;
599: } else {
600: F = NULL;
601: }
603: /* Precompute stuffs needed for preprocessing and application of BDDC*/
604: if (n_constraints) {
605: Mat M1,M2,M3;
606: Mat auxmat;
607: IS is_aux;
608: PetscScalar *array,*array2;
610: MatDestroy(&pcbddc->local_auxmat1);
611: MatDestroy(&pcbddc->local_auxmat2);
613: /* Extract constraints on R nodes: C_{CR} */
614: ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);
615: MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);
616: MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);
618: /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */
619: /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */
620: PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));
621: for (i=0;i<n_constraints;i++) {
622: const PetscScalar *row_cmat_values;
623: const PetscInt *row_cmat_indices;
624: PetscInt size_of_constraint,j;
626: MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);
627: for (j=0;j<size_of_constraint;j++) {
628: work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j];
629: }
630: MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);
631: }
632: MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);
633: if (F) {
634: Mat B;
636: MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);
637: MatMatSolve(F,B,local_auxmat2_R);
638: MatDestroy(&B);
639: } else {
640: PetscScalar *marr;
642: MatDenseGetArray(local_auxmat2_R,&marr);
643: for (i=0;i<n_constraints;i++) {
644: VecPlaceArray(pcbddc->vec1_R,work+i*n_R);
645: VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);
646: KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
647: VecResetArray(pcbddc->vec1_R);
648: VecResetArray(pcbddc->vec2_R);
649: }
650: MatDenseRestoreArray(local_auxmat2_R,&marr);
651: }
652: if (!pcbddc->switch_static) {
653: MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);
654: MatDenseGetArray(pcbddc->local_auxmat2,&array);
655: MatDenseGetArray(local_auxmat2_R,&array2);
656: for (i=0;i<n_constraints;i++) {
657: VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);
658: VecPlaceArray(pcis->vec1_B,array+i*n_B);
659: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
660: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
661: VecResetArray(pcis->vec1_B);
662: VecResetArray(pcbddc->vec1_R);
663: }
664: MatDenseRestoreArray(local_auxmat2_R,&array2);
665: MatDenseRestoreArray(pcbddc->local_auxmat2,&array);
666: MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);
667: } else {
668: PetscObjectReference((PetscObject)local_auxmat2_R);
669: pcbddc->local_auxmat2 = local_auxmat2_R;
670: MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);
671: }
672: ISDestroy(&is_aux);
673: /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */
674: MatScale(M3,m_one);
675: MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);
676: MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);
677: if (isCHOL) {
678: MatCholeskyFactor(M3,NULL,NULL);
679: } else {
680: MatLUFactor(M3,NULL,NULL,NULL);
681: }
682: VecSet(pcbddc->vec1_C,one);
683: MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);
684: MatMatSolve(M3,M2,M1);
685: MatDestroy(&M2);
686: MatDestroy(&M3);
687: /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */
688: MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);
689: MatDestroy(&auxmat);
690: MatCopy(M1,S_CC,SAME_NONZERO_PATTERN); /* S_CC can have a different LDA, MatMatSolve doesn't support it */
691: MatDestroy(&M1);
692: }
693: /* Get submatrices from subdomain matrix */
694: if (n_vertices) {
695: IS is_aux;
697: if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */
698: IS tis;
700: ISDuplicate(pcbddc->is_R_local,&tis);
701: ISSort(tis);
702: ISComplement(tis,0,pcis->n,&is_aux);
703: ISDestroy(&tis);
704: } else {
705: ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);
706: }
707: MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);
708: MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);
709: MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);
710: ISDestroy(&is_aux);
711: }
713: /* Matrix of coarse basis functions (local) */
714: if (pcbddc->coarse_phi_B) {
715: PetscInt on_B,on_primal,on_D=n_D;
716: if (pcbddc->coarse_phi_D) {
717: MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);
718: }
719: MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);
720: if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) {
721: PetscScalar *marray;
723: MatDenseGetArray(pcbddc->coarse_phi_B,&marray);
724: PetscFree(marray);
725: MatDestroy(&pcbddc->coarse_phi_B);
726: MatDestroy(&pcbddc->coarse_psi_B);
727: MatDestroy(&pcbddc->coarse_phi_D);
728: MatDestroy(&pcbddc->coarse_psi_D);
729: }
730: }
732: if (!pcbddc->coarse_phi_B) {
733: PetscScalar *marray;
735: n = n_B*pcbddc->local_primal_size;
736: if (pcbddc->switch_static || pcbddc->dbg_flag) {
737: n += n_D*pcbddc->local_primal_size;
738: }
739: if (!pcbddc->symmetric_primal) {
740: n *= 2;
741: }
742: PetscCalloc1(n,&marray);
743: MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);
744: n = n_B*pcbddc->local_primal_size;
745: if (pcbddc->switch_static || pcbddc->dbg_flag) {
746: MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);
747: n += n_D*pcbddc->local_primal_size;
748: }
749: if (!pcbddc->symmetric_primal) {
750: MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);
751: if (pcbddc->switch_static || pcbddc->dbg_flag) {
752: n = n_B*pcbddc->local_primal_size;
753: MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);
754: }
755: } else {
756: PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);
757: pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
758: if (pcbddc->switch_static || pcbddc->dbg_flag) {
759: PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);
760: pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
761: }
762: }
763: }
764: /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
765: /* vertices */
766: if (n_vertices) {
768: MatConvert(A_VV,impMatType,MAT_INPLACE_MATRIX,&A_VV);
770: if (n_R) {
771: Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */
772: PetscBLASInt B_N,B_one = 1;
773: PetscScalar *x,*y;
774: PetscBool isseqaij;
776: MatScale(A_RV,m_one);
777: MatConvert(A_RV,impMatType,MAT_INPLACE_MATRIX,&A_RV);
778: MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);
779: if (F) { /* TODO could be optimized for symmetric problems */
780: MatMatSolve(F,A_RV,A_RRmA_RV);
781: } else {
782: MatDenseGetArray(A_RV,&y);
783: for (i=0;i<n_vertices;i++) {
784: VecPlaceArray(pcbddc->vec1_R,y+i*n_R);
785: VecPlaceArray(pcbddc->vec2_R,work+i*n_R);
786: KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
787: VecResetArray(pcbddc->vec1_R);
788: VecResetArray(pcbddc->vec2_R);
789: }
790: MatDenseRestoreArray(A_RV,&y);
791: }
792: MatDestroy(&A_RV);
793: /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */
794: if (n_constraints) {
795: Mat B;
797: PetscMemzero(work+n_R*n_vertices,n_B*n_vertices*sizeof(PetscScalar));
798: for (i=0;i<n_vertices;i++) {
799: VecPlaceArray(pcbddc->vec1_R,work+i*n_R);
800: VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);
801: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
802: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
803: VecResetArray(pcis->vec1_B);
804: VecResetArray(pcbddc->vec1_R);
805: }
806: MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);
807: MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);
808: MatDestroy(&B);
809: MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);
810: MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);
811: MatScale(S_CV,m_one);
812: PetscBLASIntCast(n_R*n_vertices,&B_N);
813: PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one));
814: MatDestroy(&B);
815: }
816: PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);
817: if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */
818: MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);
819: }
820: MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);
821: MatDestroy(&A_RRmA_RV);
822: PetscBLASIntCast(n_vertices*n_vertices,&B_N);
823: MatDenseGetArray(A_VV,&x);
824: MatDenseGetArray(S_VVt,&y);
825: PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one));
826: MatDenseRestoreArray(A_VV,&x);
827: MatDenseRestoreArray(S_VVt,&y);
828: MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);
829: MatDestroy(&S_VVt);
830: } else {
831: MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);
832: }
833: MatDestroy(&A_VV);
834: /* coarse basis functions */
835: for (i=0;i<n_vertices;i++) {
836: PetscScalar *y;
838: VecPlaceArray(pcbddc->vec1_R,work+n_R*i);
839: MatDenseGetArray(pcbddc->coarse_phi_B,&y);
840: VecPlaceArray(pcis->vec1_B,y+n_B*i);
841: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
842: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
843: y[n_B*i+idx_V_B[i]] = 1.0;
844: MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);
845: VecResetArray(pcis->vec1_B);
847: if (pcbddc->switch_static || pcbddc->dbg_flag) {
848: MatDenseGetArray(pcbddc->coarse_phi_D,&y);
849: VecPlaceArray(pcis->vec1_D,y+n_D*i);
850: VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
851: VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
852: VecResetArray(pcis->vec1_D);
853: MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);
854: }
855: VecResetArray(pcbddc->vec1_R);
856: }
857: /* if n_R == 0 the object is not destroyed */
858: MatDestroy(&A_RV);
859: }
861: if (n_constraints) {
862: Mat B;
864: MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);
865: MatScale(S_CC,m_one);
866: MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);
867: MatScale(S_CC,m_one);
868: if (n_vertices) {
869: if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */
870: MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);
871: } else {
872: Mat S_VCt;
874: MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);
875: MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);
876: MatDestroy(&S_VCt);
877: }
878: }
879: MatDestroy(&B);
880: /* coarse basis functions */
881: for (i=0;i<n_constraints;i++) {
882: PetscScalar *y;
884: VecPlaceArray(pcbddc->vec1_R,work+n_R*i);
885: MatDenseGetArray(pcbddc->coarse_phi_B,&y);
886: VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));
887: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
888: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
889: MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);
890: VecResetArray(pcis->vec1_B);
891: if (pcbddc->switch_static || pcbddc->dbg_flag) {
892: MatDenseGetArray(pcbddc->coarse_phi_D,&y);
893: VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));
894: VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
895: VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
896: VecResetArray(pcis->vec1_D);
897: MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);
898: }
899: VecResetArray(pcbddc->vec1_R);
900: }
901: }
902: if (n_constraints) {
903: MatDestroy(&local_auxmat2_R);
904: }
906: /* compute other basis functions for non-symmetric problems */
907: if (!pcbddc->symmetric_primal) {
909: if (n_constraints) {
910: Mat S_CCT,B_C;
912: /* this is a lazy thing */
913: MatConvert(C_CR,impMatType,MAT_INPLACE_MATRIX,&C_CR);
914: MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);
915: MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);
916: MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);
917: MatDestroy(&S_CCT);
918: if (n_vertices) {
919: Mat B_V,S_VCT;
921: MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);
922: MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);
923: MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);
924: MatDestroy(&B_V);
925: MatDestroy(&S_VCT);
926: }
927: MatDestroy(&B_C);
928: } else { /* if there are no constraints, reset work */
929: PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));
930: }
931: if (n_vertices && n_R) {
932: Mat A_VRT;
933: PetscScalar *marray;
934: PetscBLASInt B_N,B_one = 1;
936: MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);
937: MatConvert(A_VRT,impMatType,MAT_INPLACE_MATRIX,&A_VRT);
938: MatDenseGetArray(A_VRT,&marray);
939: PetscBLASIntCast(n_vertices*n_R,&B_N);
940: PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one));
941: MatDenseRestoreArray(A_VRT,&marray);
942: MatDestroy(&A_VRT);
943: }
945: if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */
946: for (i=0;i<pcbddc->local_primal_size;i++) {
947: VecPlaceArray(pcbddc->vec1_R,work+i*n_R);
948: VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);
949: MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);
950: VecResetArray(pcbddc->vec1_R);
951: VecResetArray(pcbddc->vec2_R);
952: }
953: } else {
954: for (i=0;i<pcbddc->local_primal_size;i++) {
955: VecPlaceArray(pcbddc->vec1_R,work+i*n_R);
956: VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);
957: KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
958: VecResetArray(pcbddc->vec1_R);
959: VecResetArray(pcbddc->vec2_R);
960: }
961: }
962: /* coarse basis functions */
963: for (i=0;i<pcbddc->local_primal_size;i++) {
964: PetscScalar *y;
966: VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));
967: MatDenseGetArray(pcbddc->coarse_psi_B,&y);
968: VecPlaceArray(pcis->vec1_B,y+n_B*i);
969: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
970: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
971: if (i<n_vertices) {
972: y[n_B*i+idx_V_B[i]] = 1.0;
973: }
974: MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);
975: VecResetArray(pcis->vec1_B);
977: if (pcbddc->switch_static || pcbddc->dbg_flag) {
978: MatDenseGetArray(pcbddc->coarse_psi_D,&y);
979: VecPlaceArray(pcis->vec1_D,y+n_D*i);
980: VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
981: VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
982: VecResetArray(pcis->vec1_D);
983: MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);
984: }
985: VecResetArray(pcbddc->vec1_R);
986: }
987: }
988: /* free memory */
989: PetscFree(idx_V_B);
990: MatDestroy(&S_VV);
991: MatDestroy(&S_CV);
992: MatDestroy(&S_VC);
993: MatDestroy(&S_CC);
994: PetscFree(work);
995: if (n_vertices) {
996: MatDestroy(&A_VR);
997: }
998: if (n_constraints) {
999: MatDestroy(&C_CR);
1000: }
1001: /* Checking coarse_sub_mat and coarse basis functios */
1002: /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
1003: /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
1004: if (pcbddc->dbg_flag) {
1005: Mat coarse_sub_mat;
1006: Mat AUXMAT,TM1,TM2,TM3,TM4;
1007: Mat coarse_phi_D,coarse_phi_B;
1008: Mat coarse_psi_D,coarse_psi_B;
1009: Mat A_II,A_BB,A_IB,A_BI;
1010: Mat C_B,CPHI;
1011: IS is_dummy;
1012: Vec mones;
1013: MatType checkmattype=MATSEQAIJ;
1014: PetscReal real_value;
1016: MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);
1017: MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);
1018: MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);
1019: MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);
1020: MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);
1021: MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);
1022: if (unsymmetric_check) {
1023: MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);
1024: MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);
1025: }
1026: MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);
1028: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
1029: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);
1030: PetscViewerFlush(pcbddc->dbg_viewer);
1031: if (unsymmetric_check) {
1032: MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
1033: MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);
1034: MatDestroy(&AUXMAT);
1035: MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
1036: MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);
1037: MatDestroy(&AUXMAT);
1038: MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
1039: MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);
1040: MatDestroy(&AUXMAT);
1041: MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
1042: MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);
1043: MatDestroy(&AUXMAT);
1044: } else {
1045: MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);
1046: MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);
1047: MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
1048: MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);
1049: MatDestroy(&AUXMAT);
1050: MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
1051: MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);
1052: MatDestroy(&AUXMAT);
1053: }
1054: MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);
1055: MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);
1056: MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);
1057: MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);
1058: MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);
1059: MatNorm(TM1,NORM_FROBENIUS,&real_value);
1060: PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
1061: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);
1063: /* check constraints */
1064: ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);
1065: MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);
1066: MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);
1067: MatCreateVecs(CPHI,&mones,NULL);
1068: VecSet(mones,-1.0);
1069: MatDiagonalSet(CPHI,mones,ADD_VALUES);
1070: MatNorm(CPHI,NORM_FROBENIUS,&real_value);
1071: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);
1072: if (unsymmetric_check) {
1073: MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);
1074: VecSet(mones,-1.0);
1075: MatDiagonalSet(CPHI,mones,ADD_VALUES);
1076: MatNorm(CPHI,NORM_FROBENIUS,&real_value);
1077: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);
1078: }
1079: MatDestroy(&C_B);
1080: MatDestroy(&CPHI);
1081: ISDestroy(&is_dummy);
1082: VecDestroy(&mones);
1084: PetscViewerFlush(pcbddc->dbg_viewer);
1085: MatDestroy(&A_II);
1086: MatDestroy(&A_BB);
1087: MatDestroy(&A_IB);
1088: MatDestroy(&A_BI);
1089: MatDestroy(&TM1);
1090: MatDestroy(&TM2);
1091: MatDestroy(&TM3);
1092: MatDestroy(&TM4);
1093: MatDestroy(&coarse_phi_D);
1094: MatDestroy(&coarse_phi_B);
1095: if (unsymmetric_check) {
1096: MatDestroy(&coarse_psi_D);
1097: MatDestroy(&coarse_psi_B);
1098: }
1099: MatDestroy(&coarse_sub_mat);
1100: }
1101: /* get back data */
1102: *coarse_submat_vals_n = coarse_submat_vals;
1103: return(0);
1104: }
1108: PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B)
1109: {
1110: Mat *work_mat;
1111: IS isrow_s,iscol_s;
1112: PetscBool rsorted,csorted;
1113: PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL;
1117: ISSorted(isrow,&rsorted);
1118: ISSorted(iscol,&csorted);
1119: ISGetLocalSize(isrow,&rsize);
1120: ISGetLocalSize(iscol,&csize);
1122: if (!rsorted) {
1123: const PetscInt *idxs;
1124: PetscInt *idxs_sorted,i;
1126: PetscMalloc1(rsize,&idxs_perm_r);
1127: PetscMalloc1(rsize,&idxs_sorted);
1128: for (i=0;i<rsize;i++) {
1129: idxs_perm_r[i] = i;
1130: }
1131: ISGetIndices(isrow,&idxs);
1132: PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);
1133: for (i=0;i<rsize;i++) {
1134: idxs_sorted[i] = idxs[idxs_perm_r[i]];
1135: }
1136: ISRestoreIndices(isrow,&idxs);
1137: ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);
1138: } else {
1139: PetscObjectReference((PetscObject)isrow);
1140: isrow_s = isrow;
1141: }
1143: if (!csorted) {
1144: if (isrow == iscol) {
1145: PetscObjectReference((PetscObject)isrow_s);
1146: iscol_s = isrow_s;
1147: } else {
1148: const PetscInt *idxs;
1149: PetscInt *idxs_sorted,i;
1151: PetscMalloc1(csize,&idxs_perm_c);
1152: PetscMalloc1(csize,&idxs_sorted);
1153: for (i=0;i<csize;i++) {
1154: idxs_perm_c[i] = i;
1155: }
1156: ISGetIndices(iscol,&idxs);
1157: PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);
1158: for (i=0;i<csize;i++) {
1159: idxs_sorted[i] = idxs[idxs_perm_c[i]];
1160: }
1161: ISRestoreIndices(iscol,&idxs);
1162: ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);
1163: }
1164: } else {
1165: PetscObjectReference((PetscObject)iscol);
1166: iscol_s = iscol;
1167: }
1169: MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);
1171: if (!rsorted || !csorted) {
1172: Mat new_mat;
1173: IS is_perm_r,is_perm_c;
1175: if (!rsorted) {
1176: PetscInt *idxs_r,i;
1177: PetscMalloc1(rsize,&idxs_r);
1178: for (i=0;i<rsize;i++) {
1179: idxs_r[idxs_perm_r[i]] = i;
1180: }
1181: PetscFree(idxs_perm_r);
1182: ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);
1183: } else {
1184: ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);
1185: }
1186: ISSetPermutation(is_perm_r);
1188: if (!csorted) {
1189: if (isrow_s == iscol_s) {
1190: PetscObjectReference((PetscObject)is_perm_r);
1191: is_perm_c = is_perm_r;
1192: } else {
1193: PetscInt *idxs_c,i;
1194: PetscMalloc1(csize,&idxs_c);
1195: for (i=0;i<csize;i++) {
1196: idxs_c[idxs_perm_c[i]] = i;
1197: }
1198: PetscFree(idxs_perm_c);
1199: ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);
1200: }
1201: } else {
1202: ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);
1203: }
1204: ISSetPermutation(is_perm_c);
1206: MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);
1207: MatDestroy(&work_mat[0]);
1208: work_mat[0] = new_mat;
1209: ISDestroy(&is_perm_r);
1210: ISDestroy(&is_perm_c);
1211: }
1213: PetscObjectReference((PetscObject)work_mat[0]);
1214: *B = work_mat[0];
1215: MatDestroyMatrices(1,&work_mat);
1216: ISDestroy(&isrow_s);
1217: ISDestroy(&iscol_s);
1218: return(0);
1219: }
1223: PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
1224: {
1225: Mat_IS* matis = (Mat_IS*)pc->pmat->data;
1226: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
1227: Mat new_mat;
1228: IS is_local,is_global;
1229: PetscInt local_size;
1230: PetscBool isseqaij;
1234: MatDestroy(&pcbddc->local_mat);
1235: MatGetSize(matis->A,&local_size,NULL);
1236: ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);
1237: ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);
1238: ISDestroy(&is_local);
1239: MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);
1240: ISDestroy(&is_global);
1242: /* check */
1243: if (pcbddc->dbg_flag) {
1244: Vec x,x_change;
1245: PetscReal error;
1247: MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);
1248: VecSetRandom(x,NULL);
1249: MatMult(ChangeOfBasisMatrix,x,x_change);
1250: VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);
1251: VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);
1252: MatMult(new_mat,matis->x,matis->y);
1253: VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);
1254: VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);
1255: VecAXPY(x,-1.0,x_change);
1256: VecNorm(x,NORM_INFINITY,&error);
1257: PetscViewerFlush(pcbddc->dbg_viewer);
1258: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);
1259: VecDestroy(&x);
1260: VecDestroy(&x_change);
1261: }
1263: /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
1264: PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);
1265: if (isseqaij) {
1266: MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);
1267: } else {
1268: Mat work_mat;
1269: MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);
1270: MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);
1271: MatDestroy(&work_mat);
1272: }
1273: if (matis->A->symmetric_set) {
1274: MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);
1275: #if !defined(PETSC_USE_COMPLEX)
1276: MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);
1277: #endif
1278: }
1279: MatDestroy(&new_mat);
1280: return(0);
1281: }
1285: PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
1286: {
1287: PC_IS* pcis = (PC_IS*)(pc->data);
1288: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
1289: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1290: PetscInt *idx_R_local=NULL;
1291: PetscInt n_vertices,i,j,n_R,n_D,n_B;
1292: PetscInt vbs,bs;
1293: PetscBT bitmask=NULL;
1294: PetscErrorCode ierr;
1297: /*
1298: No need to setup local scatters if
1299: - primal space is unchanged
1300: AND
1301: - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
1302: AND
1303: - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
1304: */
1305: if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
1306: return(0);
1307: }
1308: /* destroy old objects */
1309: ISDestroy(&pcbddc->is_R_local);
1310: VecScatterDestroy(&pcbddc->R_to_B);
1311: VecScatterDestroy(&pcbddc->R_to_D);
1312: /* Set Non-overlapping dimensions */
1313: n_B = pcis->n_B;
1314: n_D = pcis->n - n_B;
1315: n_vertices = pcbddc->n_vertices;
1317: /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
1319: /* create auxiliary bitmask and allocate workspace */
1320: if (!sub_schurs->reuse_mumps) {
1321: PetscMalloc1(pcis->n-n_vertices,&idx_R_local);
1322: PetscBTCreate(pcis->n,&bitmask);
1323: for (i=0;i<n_vertices;i++) {
1324: PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);
1325: }
1327: for (i=0, n_R=0; i<pcis->n; i++) {
1328: if (!PetscBTLookup(bitmask,i)) {
1329: idx_R_local[n_R++] = i;
1330: }
1331: }
1332: } else { /* A different ordering (already computed) is present if we are reusing MUMPS Schur solver */
1333: PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1335: ISGetIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);
1336: ISGetLocalSize(reuse_mumps->is_R,&n_R);
1337: }
1339: /* Block code */
1340: vbs = 1;
1341: MatGetBlockSize(pcbddc->local_mat,&bs);
1342: if (bs>1 && !(n_vertices%bs)) {
1343: PetscBool is_blocked = PETSC_TRUE;
1344: PetscInt *vary;
1345: if (!sub_schurs->reuse_mumps) {
1346: PetscMalloc1(pcis->n/bs,&vary);
1347: PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));
1348: /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
1349: /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */
1350: for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++;
1351: for (i=0; i<pcis->n/bs; i++) {
1352: if (vary[i]!=0 && vary[i]!=bs) {
1353: is_blocked = PETSC_FALSE;
1354: break;
1355: }
1356: }
1357: PetscFree(vary);
1358: } else {
1359: /* Verify directly the R set */
1360: for (i=0; i<n_R/bs; i++) {
1361: PetscInt j,node=idx_R_local[bs*i];
1362: for (j=1; j<bs; j++) {
1363: if (node != idx_R_local[bs*i+j]-j) {
1364: is_blocked = PETSC_FALSE;
1365: break;
1366: }
1367: }
1368: }
1369: }
1370: if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
1371: vbs = bs;
1372: for (i=0;i<n_R/vbs;i++) {
1373: idx_R_local[i] = idx_R_local[vbs*i]/vbs;
1374: }
1375: }
1376: }
1377: ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);
1378: if (sub_schurs->reuse_mumps) {
1379: PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1381: ISRestoreIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);
1382: ISDestroy(&reuse_mumps->is_R);
1383: PetscObjectReference((PetscObject)pcbddc->is_R_local);
1384: reuse_mumps->is_R = pcbddc->is_R_local;
1385: } else {
1386: PetscFree(idx_R_local);
1387: }
1389: /* print some info if requested */
1390: if (pcbddc->dbg_flag) {
1391: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
1392: PetscViewerFlush(pcbddc->dbg_viewer);
1393: PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
1394: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);
1395: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);
1396: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);
1397: PetscViewerFlush(pcbddc->dbg_viewer);
1398: }
1400: /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
1401: if (!sub_schurs->reuse_mumps) {
1402: IS is_aux1,is_aux2;
1403: PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local;
1405: ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);
1406: PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);
1407: PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);
1408: ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);
1409: for (i=0; i<n_D; i++) {
1410: PetscBTSet(bitmask,is_indices[i]);
1411: }
1412: ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);
1413: for (i=0, j=0; i<n_R; i++) {
1414: if (!PetscBTLookup(bitmask,idx_R_local[i])) {
1415: aux_array1[j++] = i;
1416: }
1417: }
1418: ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);
1419: ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);
1420: for (i=0, j=0; i<n_B; i++) {
1421: if (!PetscBTLookup(bitmask,is_indices[i])) {
1422: aux_array2[j++] = i;
1423: }
1424: }
1425: ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);
1426: ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);
1427: VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);
1428: ISDestroy(&is_aux1);
1429: ISDestroy(&is_aux2);
1431: if (pcbddc->switch_static || pcbddc->dbg_flag) {
1432: PetscMalloc1(n_D,&aux_array1);
1433: for (i=0, j=0; i<n_R; i++) {
1434: if (PetscBTLookup(bitmask,idx_R_local[i])) {
1435: aux_array1[j++] = i;
1436: }
1437: }
1438: ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);
1439: VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);
1440: ISDestroy(&is_aux1);
1441: }
1442: PetscBTDestroy(&bitmask);
1443: ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);
1444: } else {
1445: PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1446: IS tis;
1447: PetscInt schur_size;
1449: ISGetLocalSize(reuse_mumps->is_B,&schur_size);
1450: ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);
1451: VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_mumps->is_B,&pcbddc->R_to_B);
1452: ISDestroy(&tis);
1453: if (pcbddc->switch_static || pcbddc->dbg_flag) {
1454: ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);
1455: VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);
1456: ISDestroy(&tis);
1457: }
1458: }
1459: return(0);
1460: }
1465: PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
1466: {
1467: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
1468: PC_IS *pcis = (PC_IS*)pc->data;
1469: PC pc_temp;
1470: Mat A_RR;
1471: MatReuse reuse;
1472: PetscScalar m_one = -1.0;
1473: PetscReal value;
1474: PetscInt n_D,n_R;
1475: PetscBool use_exact,use_exact_reduced,issbaij;
1477: /* prefixes stuff */
1478: char dir_prefix[256],neu_prefix[256],str_level[16];
1479: size_t len;
1483: /* compute prefixes */
1484: PetscStrcpy(dir_prefix,"");
1485: PetscStrcpy(neu_prefix,"");
1486: if (!pcbddc->current_level) {
1487: PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);
1488: PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);
1489: PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");
1490: PetscStrcat(neu_prefix,"pc_bddc_neumann_");
1491: } else {
1492: PetscStrcpy(str_level,"");
1493: sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
1494: PetscStrlen(((PetscObject)pc)->prefix,&len);
1495: len -= 15; /* remove "pc_bddc_coarse_" */
1496: if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
1497: if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
1498: PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);
1499: PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);
1500: PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");
1501: PetscStrcat(neu_prefix,"pc_bddc_neumann_");
1502: PetscStrcat(dir_prefix,str_level);
1503: PetscStrcat(neu_prefix,str_level);
1504: }
1506: /* DIRICHLET PROBLEM */
1507: if (dirichlet) {
1508: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1509: if (pcbddc->local_mat->symmetric_set) {
1510: MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);
1511: }
1512: /* Matrix for Dirichlet problem is pcis->A_II */
1513: n_D = pcis->n - pcis->n_B;
1514: if (!pcbddc->ksp_D) { /* create object if not yet build */
1515: KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);
1516: PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);
1517: /* default */
1518: KSPSetType(pcbddc->ksp_D,KSPPREONLY);
1519: KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);
1520: PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);
1521: KSPGetPC(pcbddc->ksp_D,&pc_temp);
1522: if (issbaij) {
1523: PCSetType(pc_temp,PCCHOLESKY);
1524: } else {
1525: PCSetType(pc_temp,PCLU);
1526: }
1527: /* Allow user's customization */
1528: KSPSetFromOptions(pcbddc->ksp_D);
1529: PCFactorSetReuseFill(pc_temp,PETSC_TRUE);
1530: }
1531: KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);
1532: if (sub_schurs->reuse_mumps) {
1533: PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1535: KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);
1536: }
1537: /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1538: if (!n_D) {
1539: KSPGetPC(pcbddc->ksp_D,&pc_temp);
1540: PCSetType(pc_temp,PCNONE);
1541: }
1542: /* Set Up KSP for Dirichlet problem of BDDC */
1543: KSPSetUp(pcbddc->ksp_D);
1544: /* set ksp_D into pcis data */
1545: KSPDestroy(&pcis->ksp_D);
1546: PetscObjectReference((PetscObject)pcbddc->ksp_D);
1547: pcis->ksp_D = pcbddc->ksp_D;
1548: }
1550: /* NEUMANN PROBLEM */
1551: A_RR = 0;
1552: if (neumann) {
1553: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1554: PetscInt ibs,mbs;
1555: PetscBool issbaij;
1556: Mat_IS* matis = (Mat_IS*)pc->pmat->data;
1557: /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1558: ISGetSize(pcbddc->is_R_local,&n_R);
1559: if (pcbddc->ksp_R) { /* already created ksp */
1560: PetscInt nn_R;
1561: KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);
1562: PetscObjectReference((PetscObject)A_RR);
1563: MatGetSize(A_RR,&nn_R,NULL);
1564: if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1565: KSPReset(pcbddc->ksp_R);
1566: MatDestroy(&A_RR);
1567: reuse = MAT_INITIAL_MATRIX;
1568: } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1569: if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
1570: MatDestroy(&A_RR);
1571: reuse = MAT_INITIAL_MATRIX;
1572: } else { /* safe to reuse the matrix */
1573: reuse = MAT_REUSE_MATRIX;
1574: }
1575: }
1576: /* last check */
1577: if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1578: MatDestroy(&A_RR);
1579: reuse = MAT_INITIAL_MATRIX;
1580: }
1581: } else { /* first time, so we need to create the matrix */
1582: reuse = MAT_INITIAL_MATRIX;
1583: }
1584: /* extract A_RR */
1585: MatGetBlockSize(pcbddc->local_mat,&mbs);
1586: ISGetBlockSize(pcbddc->is_R_local,&ibs);
1587: PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);
1588: if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */
1589: if (matis->A == pcbddc->local_mat) {
1590: MatDestroy(&pcbddc->local_mat);
1591: MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);
1592: } else {
1593: MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);
1594: }
1595: } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
1596: if (matis->A == pcbddc->local_mat) {
1597: MatDestroy(&pcbddc->local_mat);
1598: MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);
1599: } else {
1600: MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);
1601: }
1602: }
1603: if (!sub_schurs->reuse_mumps) {
1604: MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);
1605: if (pcbddc->local_mat->symmetric_set) {
1606: MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);
1607: }
1608: } else {
1609: PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1611: MatDestroy(&A_RR);
1612: PCGetOperators(reuse_mumps->correction_solver,&A_RR,NULL);
1613: PetscObjectReference((PetscObject)A_RR);
1614: }
1615: if (!pcbddc->ksp_R) { /* create object if not present */
1616: KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);
1617: PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);
1618: /* default */
1619: KSPSetType(pcbddc->ksp_R,KSPPREONLY);
1620: KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);
1621: KSPGetPC(pcbddc->ksp_R,&pc_temp);
1622: PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);
1623: if (issbaij) {
1624: PCSetType(pc_temp,PCCHOLESKY);
1625: } else {
1626: PCSetType(pc_temp,PCLU);
1627: }
1628: /* Allow user's customization */
1629: KSPSetFromOptions(pcbddc->ksp_R);
1630: PCFactorSetReuseFill(pc_temp,PETSC_TRUE);
1631: }
1632: KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);
1633: /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1634: if (!n_R) {
1635: KSPGetPC(pcbddc->ksp_R,&pc_temp);
1636: PCSetType(pc_temp,PCNONE);
1637: }
1638: /* Reuse MUMPS solver if it is present */
1639: if (sub_schurs->reuse_mumps) {
1640: PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1642: KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);
1643: }
1644: /* Set Up KSP for Neumann problem of BDDC */
1645: KSPSetUp(pcbddc->ksp_R);
1646: }
1647: /* free Neumann problem's matrix */
1648: MatDestroy(&A_RR);
1650: /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1651: if (pcbddc->NullSpace || pcbddc->dbg_flag) {
1652: if (pcbddc->dbg_flag) {
1653: PetscViewerFlush(pcbddc->dbg_viewer);
1654: PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
1655: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
1656: }
1657: if (dirichlet) { /* Dirichlet */
1658: VecSetRandom(pcis->vec1_D,NULL);
1659: MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);
1660: KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);
1661: VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);
1662: VecNorm(pcis->vec1_D,NORM_INFINITY,&value);
1663: /* need to be adapted? */
1664: use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1665: MPIU_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));
1666: PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);
1667: /* print info */
1668: if (pcbddc->dbg_flag) {
1669: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);
1670: PetscViewerFlush(pcbddc->dbg_viewer);
1671: }
1672: if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1673: PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);
1674: }
1675: }
1676: if (neumann) { /* Neumann */
1677: KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);
1678: VecSetRandom(pcbddc->vec1_R,NULL);
1679: MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);
1680: KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);
1681: VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);
1682: VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);
1683: /* need to be adapted? */
1684: use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1685: MPIU_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));
1686: /* print info */
1687: if (pcbddc->dbg_flag) {
1688: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);
1689: PetscViewerFlush(pcbddc->dbg_viewer);
1690: }
1691: if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1692: PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);
1693: }
1694: }
1695: }
1696: return(0);
1697: }
1701: static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose)
1702: {
1703: PetscErrorCode ierr;
1704: PC_BDDC* pcbddc = (PC_BDDC*)(pc->data);
1705: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1708: if (!sub_schurs->reuse_mumps) {
1709: VecSet(pcbddc->vec1_R,0.);
1710: }
1711: if (!pcbddc->switch_static) {
1712: if (applytranspose && pcbddc->local_auxmat1) {
1713: MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);
1714: MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);
1715: }
1716: if (!sub_schurs->reuse_mumps) {
1717: VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
1718: VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
1719: } else {
1720: PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1722: VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);
1723: VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);
1724: }
1725: } else {
1726: VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
1727: VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
1728: VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
1729: VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
1730: if (applytranspose && pcbddc->local_auxmat1) {
1731: MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);
1732: MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);
1733: VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
1734: VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
1735: }
1736: }
1737: if (!sub_schurs->reuse_mumps) {
1738: if (applytranspose) {
1739: KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);
1740: } else {
1741: KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);
1742: }
1743: } else {
1744: PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1746: if (applytranspose) {
1747: MatFactorSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);
1748: } else {
1749: MatFactorSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);
1750: }
1751: }
1752: VecSet(inout_B,0.);
1753: if (!pcbddc->switch_static) {
1754: if (!sub_schurs->reuse_mumps) {
1755: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
1756: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
1757: } else {
1758: PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1760: VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);
1761: VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);
1762: }
1763: if (!applytranspose && pcbddc->local_auxmat1) {
1764: MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);
1765: MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);
1766: }
1767: } else {
1768: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
1769: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
1770: VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
1771: VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
1772: if (!applytranspose && pcbddc->local_auxmat1) {
1773: MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);
1774: MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);
1775: }
1776: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
1777: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
1778: VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
1779: VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
1780: }
1781: return(0);
1782: }
1784: /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
1787: PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
1788: {
1790: PC_BDDC* pcbddc = (PC_BDDC*)(pc->data);
1791: PC_IS* pcis = (PC_IS*) (pc->data);
1792: const PetscScalar zero = 0.0;
1795: /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
1796: if (applytranspose) {
1797: MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);
1798: if (pcbddc->switch_static) { MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P); }
1799: } else {
1800: MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);
1801: if (pcbddc->switch_static) { MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P); }
1802: }
1803: /* start communications from local primal nodes to rhs of coarse solver */
1804: VecSet(pcbddc->coarse_vec,zero);
1805: PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);
1806: PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);
1808: /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
1809: /* TODO remove null space when doing multilevel */
1810: if (pcbddc->coarse_ksp) {
1811: Vec rhs,sol;
1813: KSPGetRhs(pcbddc->coarse_ksp,&rhs);
1814: KSPGetSolution(pcbddc->coarse_ksp,&sol);
1815: if (applytranspose) {
1816: KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);
1817: } else {
1818: KSPSolve(pcbddc->coarse_ksp,rhs,sol);
1819: }
1820: }
1822: /* Local solution on R nodes */
1823: if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */
1824: PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);
1825: }
1827: /* communications from coarse sol to local primal nodes */
1828: PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);
1829: PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);
1831: /* Sum contributions from two levels */
1832: if (applytranspose) {
1833: MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);
1834: if (pcbddc->switch_static) { MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D); }
1835: } else {
1836: MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);
1837: if (pcbddc->switch_static) { MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D); }
1838: }
1839: return(0);
1840: }
1844: PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
1845: {
1847: PC_BDDC* pcbddc = (PC_BDDC*)(pc->data);
1848: PetscScalar *array;
1849: Vec from,to;
1852: if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1853: from = pcbddc->coarse_vec;
1854: to = pcbddc->vec1_P;
1855: if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1856: Vec tvec;
1858: KSPGetRhs(pcbddc->coarse_ksp,&tvec);
1859: VecResetArray(tvec);
1860: KSPGetSolution(pcbddc->coarse_ksp,&tvec);
1861: VecGetArray(tvec,&array);
1862: VecPlaceArray(from,array);
1863: VecRestoreArray(tvec,&array);
1864: }
1865: } else { /* from local to global -> put data in coarse right hand side */
1866: from = pcbddc->vec1_P;
1867: to = pcbddc->coarse_vec;
1868: }
1869: VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);
1870: return(0);
1871: }
1875: PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
1876: {
1878: PC_BDDC* pcbddc = (PC_BDDC*)(pc->data);
1879: PetscScalar *array;
1880: Vec from,to;
1883: if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1884: from = pcbddc->coarse_vec;
1885: to = pcbddc->vec1_P;
1886: } else { /* from local to global -> put data in coarse right hand side */
1887: from = pcbddc->vec1_P;
1888: to = pcbddc->coarse_vec;
1889: }
1890: VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);
1891: if (smode == SCATTER_FORWARD) {
1892: if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1893: Vec tvec;
1895: KSPGetRhs(pcbddc->coarse_ksp,&tvec);
1896: VecGetArray(to,&array);
1897: VecPlaceArray(tvec,array);
1898: VecRestoreArray(to,&array);
1899: }
1900: } else {
1901: if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
1902: VecResetArray(from);
1903: }
1904: }
1905: return(0);
1906: }
1908: /* uncomment for testing purposes */
1909: /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1912: PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1913: {
1914: PetscErrorCode ierr;
1915: PC_IS* pcis = (PC_IS*)(pc->data);
1916: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
1917: Mat_IS* matis = (Mat_IS*)pc->pmat->data;
1918: /* one and zero */
1919: PetscScalar one=1.0,zero=0.0;
1920: /* space to store constraints and their local indices */
1921: PetscScalar *constraints_data;
1922: PetscInt *constraints_idxs,*constraints_idxs_B;
1923: PetscInt *constraints_idxs_ptr,*constraints_data_ptr;
1924: PetscInt *constraints_n;
1925: /* iterators */
1926: PetscInt i,j,k,total_counts,total_counts_cc,cum;
1927: /* BLAS integers */
1928: PetscBLASInt lwork,lierr;
1929: PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1;
1930: PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC;
1931: /* reuse */
1932: PetscInt olocal_primal_size,olocal_primal_size_cc;
1933: PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult;
1934: /* change of basis */
1935: PetscBool qr_needed;
1936: PetscBT change_basis,qr_needed_idx;
1937: /* auxiliary stuff */
1938: PetscInt *nnz,*is_indices;
1939: PetscInt ncc;
1940: /* some quantities */
1941: PetscInt n_vertices,total_primal_vertices,valid_constraints;
1942: PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints;
1945: /* Destroy Mat objects computed previously */
1946: MatDestroy(&pcbddc->ChangeOfBasisMatrix);
1947: MatDestroy(&pcbddc->ConstraintMatrix);
1948: /* save info on constraints from previous setup (if any) */
1949: olocal_primal_size = pcbddc->local_primal_size;
1950: olocal_primal_size_cc = pcbddc->local_primal_size_cc;
1951: PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);
1952: PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));
1953: PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));
1954: PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);
1955: PetscFree(pcbddc->primal_indices_local_idxs);
1957: /* print some info */
1958: if (pcbddc->dbg_flag) {
1959: IS vertices;
1960: PetscInt nv,nedges,nfaces;
1961: PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);
1962: ISGetSize(vertices,&nv);
1963: ISDestroy(&vertices);
1964: PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
1965: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");
1966: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);
1967: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);
1968: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);
1969: PetscViewerFlush(pcbddc->dbg_viewer);
1970: PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);
1971: }
1973: if (!pcbddc->adaptive_selection) {
1974: IS ISForVertices,*ISForFaces,*ISForEdges;
1975: MatNullSpace nearnullsp;
1976: const Vec *nearnullvecs;
1977: Vec *localnearnullsp;
1978: PetscScalar *array;
1979: PetscInt n_ISForFaces,n_ISForEdges,nnsp_size;
1980: PetscBool nnsp_has_cnst;
1981: /* LAPACK working arrays for SVD or POD */
1982: PetscBool skip_lapack,boolforchange;
1983: PetscScalar *work;
1984: PetscReal *singular_vals;
1985: #if defined(PETSC_USE_COMPLEX)
1986: PetscReal *rwork;
1987: #endif
1988: #if defined(PETSC_MISSING_LAPACK_GESVD)
1989: PetscScalar *temp_basis,*correlation_mat;
1990: #else
1991: PetscBLASInt dummy_int=1;
1992: PetscScalar dummy_scalar=1.;
1993: #endif
1995: /* Get index sets for faces, edges and vertices from graph */
1996: PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);
1997: /* free unneeded index sets */
1998: if (!pcbddc->use_vertices) {
1999: ISDestroy(&ISForVertices);
2000: }
2001: if (!pcbddc->use_edges) {
2002: for (i=0;i<n_ISForEdges;i++) {
2003: ISDestroy(&ISForEdges[i]);
2004: }
2005: PetscFree(ISForEdges);
2006: n_ISForEdges = 0;
2007: }
2008: if (!pcbddc->use_faces) {
2009: for (i=0;i<n_ISForFaces;i++) {
2010: ISDestroy(&ISForFaces[i]);
2011: }
2012: PetscFree(ISForFaces);
2013: n_ISForFaces = 0;
2014: }
2016: #if defined(PETSC_USE_DEBUG)
2017: /* HACK: when solving singular problems not using vertices, a change of basis is mandatory.
2018: Also use_change_of_basis should be consistent among processors */
2019: if (pcbddc->NullSpace) {
2020: PetscBool tbool[2],gbool[2];
2022: if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) {
2023: pcbddc->use_change_of_basis = PETSC_TRUE;
2024: if (!ISForEdges) {
2025: pcbddc->use_change_on_faces = PETSC_TRUE;
2026: }
2027: }
2028: tbool[0] = pcbddc->use_change_of_basis;
2029: tbool[1] = pcbddc->use_change_on_faces;
2030: MPIU_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
2031: pcbddc->use_change_of_basis = gbool[0];
2032: pcbddc->use_change_on_faces = gbool[1];
2033: }
2034: #endif
2036: /* check if near null space is attached to global mat */
2037: MatGetNearNullSpace(pc->pmat,&nearnullsp);
2038: if (nearnullsp) {
2039: MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);
2040: /* remove any stored info */
2041: MatNullSpaceDestroy(&pcbddc->onearnullspace);
2042: PetscFree(pcbddc->onearnullvecs_state);
2043: /* store information for BDDC solver reuse */
2044: PetscObjectReference((PetscObject)nearnullsp);
2045: pcbddc->onearnullspace = nearnullsp;
2046: PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);
2047: for (i=0;i<nnsp_size;i++) {
2048: PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);
2049: }
2050: } else { /* if near null space is not provided BDDC uses constants by default */
2051: nnsp_size = 0;
2052: nnsp_has_cnst = PETSC_TRUE;
2053: }
2054: /* get max number of constraints on a single cc */
2055: max_constraints = nnsp_size;
2056: if (nnsp_has_cnst) max_constraints++;
2058: /*
2059: Evaluate maximum storage size needed by the procedure
2060: - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]"
2061: - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]"
2062: There can be multiple constraints per connected component
2063: */
2064: n_vertices = 0;
2065: if (ISForVertices) {
2066: ISGetSize(ISForVertices,&n_vertices);
2067: }
2068: ncc = n_vertices+n_ISForFaces+n_ISForEdges;
2069: PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);
2071: total_counts = n_ISForFaces+n_ISForEdges;
2072: total_counts *= max_constraints;
2073: total_counts += n_vertices;
2074: PetscBTCreate(total_counts,&change_basis);
2076: total_counts = 0;
2077: max_size_of_constraint = 0;
2078: for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
2079: IS used_is;
2080: if (i<n_ISForEdges) {
2081: used_is = ISForEdges[i];
2082: } else {
2083: used_is = ISForFaces[i-n_ISForEdges];
2084: }
2085: ISGetSize(used_is,&j);
2086: total_counts += j;
2087: max_size_of_constraint = PetscMax(j,max_size_of_constraint);
2088: }
2089: PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);
2091: /* get local part of global near null space vectors */
2092: PetscMalloc1(nnsp_size,&localnearnullsp);
2093: for (k=0;k<nnsp_size;k++) {
2094: VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);
2095: VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);
2096: VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);
2097: }
2099: /* whether or not to skip lapack calls */
2100: skip_lapack = PETSC_TRUE;
2101: if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
2103: /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
2104: if (!skip_lapack) {
2105: PetscScalar temp_work;
2107: #if defined(PETSC_MISSING_LAPACK_GESVD)
2108: /* Proper Orthogonal Decomposition (POD) using the snapshot method */
2109: PetscMalloc1(max_constraints*max_constraints,&correlation_mat);
2110: PetscMalloc1(max_constraints,&singular_vals);
2111: PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);
2112: #if defined(PETSC_USE_COMPLEX)
2113: PetscMalloc1(3*max_constraints,&rwork);
2114: #endif
2115: /* now we evaluate the optimal workspace using query with lwork=-1 */
2116: PetscBLASIntCast(max_constraints,&Blas_N);
2117: PetscBLASIntCast(max_constraints,&Blas_LDA);
2118: lwork = -1;
2119: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
2120: #if !defined(PETSC_USE_COMPLEX)
2121: PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
2122: #else
2123: PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
2124: #endif
2125: PetscFPTrapPop();
2126: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
2127: #else /* on missing GESVD */
2128: /* SVD */
2129: PetscInt max_n,min_n;
2130: max_n = max_size_of_constraint;
2131: min_n = max_constraints;
2132: if (max_size_of_constraint < max_constraints) {
2133: min_n = max_size_of_constraint;
2134: max_n = max_constraints;
2135: }
2136: PetscMalloc1(min_n,&singular_vals);
2137: #if defined(PETSC_USE_COMPLEX)
2138: PetscMalloc1(5*min_n,&rwork);
2139: #endif
2140: /* now we evaluate the optimal workspace using query with lwork=-1 */
2141: lwork = -1;
2142: PetscBLASIntCast(max_n,&Blas_M);
2143: PetscBLASIntCast(min_n,&Blas_N);
2144: PetscBLASIntCast(max_n,&Blas_LDA);
2145: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
2146: #if !defined(PETSC_USE_COMPLEX)
2147: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr));
2148: #else
2149: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr));
2150: #endif
2151: PetscFPTrapPop();
2152: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
2153: #endif /* on missing GESVD */
2154: /* Allocate optimal workspace */
2155: PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);
2156: PetscMalloc1(lwork,&work);
2157: }
2158: /* Now we can loop on constraining sets */
2159: total_counts = 0;
2160: constraints_idxs_ptr[0] = 0;
2161: constraints_data_ptr[0] = 0;
2162: /* vertices */
2163: if (n_vertices) {
2164: ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);
2165: if (nnsp_has_cnst) { /* it considers all possible vertices */
2166: PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));
2167: for (i=0;i<n_vertices;i++) {
2168: constraints_n[total_counts] = 1;
2169: constraints_data[total_counts] = 1.0;
2170: constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1;
2171: constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1;
2172: total_counts++;
2173: }
2174: } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
2175: PetscBool used_vertex;
2176: for (i=0;i<n_vertices;i++) {
2177: used_vertex = PETSC_FALSE;
2178: k = 0;
2179: while (!used_vertex && k<nnsp_size) {
2180: VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);
2181: if (PetscAbsScalar(array[is_indices[i]])>0.0) {
2182: constraints_n[total_counts] = 1;
2183: constraints_idxs[total_counts] = is_indices[i];
2184: constraints_data[total_counts] = 1.0;
2185: constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1;
2186: constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1;
2187: total_counts++;
2188: used_vertex = PETSC_TRUE;
2189: }
2190: VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);
2191: k++;
2192: }
2193: }
2194: }
2195: ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);
2196: n_vertices = total_counts;
2197: }
2199: /* edges and faces */
2200: total_counts_cc = total_counts;
2201: for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
2202: IS used_is;
2203: PetscBool idxs_copied = PETSC_FALSE;
2205: if (ncc<n_ISForEdges) {
2206: used_is = ISForEdges[ncc];
2207: boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
2208: } else {
2209: used_is = ISForFaces[ncc-n_ISForEdges];
2210: boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
2211: }
2212: temp_constraints = 0; /* zero the number of constraints I have on this conn comp */
2214: ISGetSize(used_is,&size_of_constraint);
2215: ISGetIndices(used_is,(const PetscInt**)&is_indices);
2216: /* change of basis should not be performed on local periodic nodes */
2217: if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
2218: if (nnsp_has_cnst) {
2219: PetscScalar quad_value;
2221: PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));
2222: idxs_copied = PETSC_TRUE;
2224: if (!pcbddc->use_nnsp_true) {
2225: quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
2226: } else {
2227: quad_value = 1.0;
2228: }
2229: for (j=0;j<size_of_constraint;j++) {
2230: constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value;
2231: }
2232: temp_constraints++;
2233: total_counts++;
2234: }
2235: for (k=0;k<nnsp_size;k++) {
2236: PetscReal real_value;
2237: PetscScalar *ptr_to_data;
2239: VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);
2240: ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint];
2241: for (j=0;j<size_of_constraint;j++) {
2242: ptr_to_data[j] = array[is_indices[j]];
2243: }
2244: VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);
2245: /* check if array is null on the connected component */
2246: PetscBLASIntCast(size_of_constraint,&Blas_N);
2247: PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one));
2248: if (real_value > 0.0) { /* keep indices and values */
2249: temp_constraints++;
2250: total_counts++;
2251: if (!idxs_copied) {
2252: PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));
2253: idxs_copied = PETSC_TRUE;
2254: }
2255: }
2256: }
2257: ISRestoreIndices(used_is,(const PetscInt**)&is_indices);
2258: valid_constraints = temp_constraints;
2259: if (!pcbddc->use_nnsp_true && temp_constraints) {
2260: if (temp_constraints == 1) { /* just normalize the constraint */
2261: PetscScalar norm,*ptr_to_data;
2263: ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
2264: PetscBLASIntCast(size_of_constraint,&Blas_N);
2265: PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one));
2266: norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
2267: PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one));
2268: } else { /* perform SVD */
2269: PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
2270: PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
2272: #if defined(PETSC_MISSING_LAPACK_GESVD)
2273: /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
2274: POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
2275: -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
2276: the constraints basis will differ (by a complex factor with absolute value equal to 1)
2277: from that computed using LAPACKgesvd
2278: -> This is due to a different computation of eigenvectors in LAPACKheev
2279: -> The quality of the POD-computed basis will be the same */
2280: PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));
2281: /* Store upper triangular part of correlation matrix */
2282: PetscBLASIntCast(size_of_constraint,&Blas_N);
2283: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
2284: for (j=0;j<temp_constraints;j++) {
2285: for (k=0;k<j+1;k++) {
2286: PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one));
2287: }
2288: }
2289: /* compute eigenvalues and eigenvectors of correlation matrix */
2290: PetscBLASIntCast(temp_constraints,&Blas_N);
2291: PetscBLASIntCast(temp_constraints,&Blas_LDA);
2292: #if !defined(PETSC_USE_COMPLEX)
2293: PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
2294: #else
2295: PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
2296: #endif
2297: PetscFPTrapPop();
2298: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
2299: /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
2300: j = 0;
2301: while (j < temp_constraints && singular_vals[j] < tol) j++;
2302: total_counts = total_counts-j;
2303: valid_constraints = temp_constraints-j;
2304: /* scale and copy POD basis into used quadrature memory */
2305: PetscBLASIntCast(size_of_constraint,&Blas_M);
2306: PetscBLASIntCast(temp_constraints,&Blas_N);
2307: PetscBLASIntCast(temp_constraints,&Blas_K);
2308: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
2309: PetscBLASIntCast(temp_constraints,&Blas_LDB);
2310: PetscBLASIntCast(size_of_constraint,&Blas_LDC);
2311: if (j<temp_constraints) {
2312: PetscInt ii;
2313: for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]);
2314: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
2315: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
2316: PetscFPTrapPop();
2317: for (k=0;k<temp_constraints-j;k++) {
2318: for (ii=0;ii<size_of_constraint;ii++) {
2319: ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
2320: }
2321: }
2322: }
2323: #else /* on missing GESVD */
2324: PetscBLASIntCast(size_of_constraint,&Blas_M);
2325: PetscBLASIntCast(temp_constraints,&Blas_N);
2326: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
2327: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
2328: #if !defined(PETSC_USE_COMPLEX)
2329: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr));
2330: #else
2331: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr));
2332: #endif
2333: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
2334: PetscFPTrapPop();
2335: /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
2336: k = temp_constraints;
2337: if (k > size_of_constraint) k = size_of_constraint;
2338: j = 0;
2339: while (j < k && singular_vals[k-j-1] < tol) j++;
2340: valid_constraints = k-j;
2341: total_counts = total_counts-temp_constraints+valid_constraints;
2342: #endif /* on missing GESVD */
2343: }
2344: }
2345: /* update pointers information */
2346: if (valid_constraints) {
2347: constraints_n[total_counts_cc] = valid_constraints;
2348: constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint;
2349: constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints;
2350: /* set change_of_basis flag */
2351: if (boolforchange) {
2352: PetscBTSet(change_basis,total_counts_cc);
2353: }
2354: total_counts_cc++;
2355: }
2356: }
2357: /* free workspace */
2358: if (!skip_lapack) {
2359: PetscFree(work);
2360: #if defined(PETSC_USE_COMPLEX)
2361: PetscFree(rwork);
2362: #endif
2363: PetscFree(singular_vals);
2364: #if defined(PETSC_MISSING_LAPACK_GESVD)
2365: PetscFree(correlation_mat);
2366: PetscFree(temp_basis);
2367: #endif
2368: }
2369: for (k=0;k<nnsp_size;k++) {
2370: VecDestroy(&localnearnullsp[k]);
2371: }
2372: PetscFree(localnearnullsp);
2373: /* free index sets of faces, edges and vertices */
2374: for (i=0;i<n_ISForFaces;i++) {
2375: ISDestroy(&ISForFaces[i]);
2376: }
2377: if (n_ISForFaces) {
2378: PetscFree(ISForFaces);
2379: }
2380: for (i=0;i<n_ISForEdges;i++) {
2381: ISDestroy(&ISForEdges[i]);
2382: }
2383: if (n_ISForEdges) {
2384: PetscFree(ISForEdges);
2385: }
2386: ISDestroy(&ISForVertices);
2387: } else {
2388: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2390: total_counts = 0;
2391: n_vertices = 0;
2392: if (sub_schurs->is_vertices && pcbddc->use_vertices) {
2393: ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);
2394: }
2395: max_constraints = 0;
2396: total_counts_cc = 0;
2397: for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
2398: total_counts += pcbddc->adaptive_constraints_n[i];
2399: if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++;
2400: max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]);
2401: }
2402: constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr;
2403: constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr;
2404: constraints_idxs = pcbddc->adaptive_constraints_idxs;
2405: constraints_data = pcbddc->adaptive_constraints_data;
2406: /* constraints_n differs from pcbddc->adaptive_constraints_n */
2407: PetscMalloc1(total_counts_cc,&constraints_n);
2408: total_counts_cc = 0;
2409: for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
2410: if (pcbddc->adaptive_constraints_n[i]) {
2411: constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i];
2412: }
2413: }
2414: #if 0
2415: printf("Found %d totals (%d)\n",total_counts_cc,total_counts);
2416: for (i=0;i<total_counts_cc;i++) {
2417: printf("const %d, start %d",i,constraints_idxs_ptr[i]);
2418: printf(" end %d:\n",constraints_idxs_ptr[i+1]);
2419: for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) {
2420: printf(" %d",constraints_idxs[j]);
2421: }
2422: printf("\n");
2423: printf("number of cc: %d\n",constraints_n[i]);
2424: }
2425: for (i=0;i<n_vertices;i++) {
2426: PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]);
2427: }
2428: for (i=0;i<sub_schurs->n_subs;i++) {
2429: PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]);
2430: }
2431: #endif
2433: max_size_of_constraint = 0;
2434: for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]);
2435: PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);
2436: /* Change of basis */
2437: PetscBTCreate(total_counts_cc,&change_basis);
2438: if (pcbddc->use_change_of_basis) {
2439: for (i=0;i<sub_schurs->n_subs;i++) {
2440: if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) {
2441: PetscBTSet(change_basis,i+n_vertices);
2442: }
2443: }
2444: }
2445: }
2446: pcbddc->local_primal_size = total_counts;
2447: PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);
2449: /* map constraints_idxs in boundary numbering */
2450: ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);
2451: if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i);
2453: /* Create constraint matrix */
2454: MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);
2455: MatSetType(pcbddc->ConstraintMatrix,MATAIJ);
2456: MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);
2458: /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
2459: /* determine if a QR strategy is needed for change of basis */
2460: qr_needed = PETSC_FALSE;
2461: PetscBTCreate(total_counts_cc,&qr_needed_idx);
2462: total_primal_vertices=0;
2463: pcbddc->local_primal_size_cc = 0;
2464: for (i=0;i<total_counts_cc;i++) {
2465: size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2466: if (size_of_constraint == 1) {
2467: pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]];
2468: pcbddc->local_primal_size_cc += 1;
2469: } else if (PetscBTLookup(change_basis,i)) {
2470: for (k=0;k<constraints_n[i];k++) {
2471: pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k];
2472: }
2473: pcbddc->local_primal_size_cc += constraints_n[i];
2474: if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) {
2475: PetscBTSet(qr_needed_idx,i);
2476: qr_needed = PETSC_TRUE;
2477: }
2478: } else {
2479: pcbddc->local_primal_size_cc += 1;
2480: }
2481: }
2482: /* note that the local variable n_vertices used below stores the number of pointwise constraints */
2483: pcbddc->n_vertices = total_primal_vertices;
2484: /* permute indices in order to have a sorted set of vertices */
2485: PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);
2487: PetscMalloc2(pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_mult);
2488: PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));
2489: for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1;
2491: /* nonzero structure of constraint matrix */
2492: /* and get reference dof for local constraints */
2493: PetscMalloc1(pcbddc->local_primal_size,&nnz);
2494: for (i=0;i<total_primal_vertices;i++) nnz[i] = 1;
2496: j = total_primal_vertices;
2497: total_counts = total_primal_vertices;
2498: cum = total_primal_vertices;
2499: for (i=n_vertices;i<total_counts_cc;i++) {
2500: if (!PetscBTLookup(change_basis,i)) {
2501: pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]];
2502: pcbddc->local_primal_ref_mult[cum] = constraints_n[i];
2503: cum++;
2504: size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2505: for (k=0;k<constraints_n[i];k++) {
2506: pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k];
2507: nnz[j+k] = size_of_constraint;
2508: }
2509: j += constraints_n[i];
2510: }
2511: }
2512: MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);
2513: PetscFree(nnz);
2515: /* set values in constraint matrix */
2516: for (i=0;i<total_primal_vertices;i++) {
2517: MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);
2518: }
2519: total_counts = total_primal_vertices;
2520: for (i=n_vertices;i<total_counts_cc;i++) {
2521: if (!PetscBTLookup(change_basis,i)) {
2522: PetscInt *cols;
2524: size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2525: cols = constraints_idxs+constraints_idxs_ptr[i];
2526: for (k=0;k<constraints_n[i];k++) {
2527: PetscInt row = total_counts+k;
2528: PetscScalar *vals;
2530: vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint;
2531: MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);
2532: }
2533: total_counts += constraints_n[i];
2534: }
2535: }
2536: /* assembling */
2537: MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);
2538: MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);
2540: /*
2541: PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);
2542: MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);
2543: PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);
2544: */
2545: /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
2546: if (pcbddc->use_change_of_basis) {
2547: /* dual and primal dofs on a single cc */
2548: PetscInt dual_dofs,primal_dofs;
2549: /* working stuff for GEQRF */
2550: PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
2551: PetscBLASInt lqr_work;
2552: /* working stuff for UNGQR */
2553: PetscScalar *gqr_work,lgqr_work_t;
2554: PetscBLASInt lgqr_work;
2555: /* working stuff for TRTRS */
2556: PetscScalar *trs_rhs;
2557: PetscBLASInt Blas_NRHS;
2558: /* pointers for values insertion into change of basis matrix */
2559: PetscInt *start_rows,*start_cols;
2560: PetscScalar *start_vals;
2561: /* working stuff for values insertion */
2562: PetscBT is_primal;
2563: PetscInt *aux_primal_numbering_B;
2564: /* matrix sizes */
2565: PetscInt global_size,local_size;
2566: /* temporary change of basis */
2567: Mat localChangeOfBasisMatrix;
2568: /* extra space for debugging */
2569: PetscScalar *dbg_work;
2571: /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
2572: MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);
2573: MatSetType(localChangeOfBasisMatrix,MATAIJ);
2574: MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);
2575: /* nonzeros for local mat */
2576: PetscMalloc1(pcis->n,&nnz);
2577: for (i=0;i<pcis->n;i++) nnz[i]=1;
2578: for (i=n_vertices;i<total_counts_cc;i++) {
2579: if (PetscBTLookup(change_basis,i)) {
2580: size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2581: if (PetscBTLookup(qr_needed_idx,i)) {
2582: for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint;
2583: } else {
2584: nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint;
2585: for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2;
2586: }
2587: }
2588: }
2589: MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);
2590: PetscFree(nnz);
2591: /* Set initial identity in the matrix */
2592: for (i=0;i<pcis->n;i++) {
2593: MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);
2594: }
2596: if (pcbddc->dbg_flag) {
2597: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");
2598: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);
2599: }
2602: /* Now we loop on the constraints which need a change of basis */
2603: /*
2604: Change of basis matrix is evaluated similarly to the FIRST APPROACH in
2605: Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
2607: Basic blocks of change of basis matrix T computed by
2609: - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
2611: | 1 0 ... 0 s_1/S |
2612: | 0 1 ... 0 s_2/S |
2613: | ... |
2614: | 0 ... 1 s_{n-1}/S |
2615: | -s_1/s_n ... -s_{n-1}/s_n s_n/S |
2617: with S = \sum_{i=1}^n s_i^2
2618: NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
2619: in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
2621: - QR decomposition of constraints otherwise
2622: */
2623: if (qr_needed) {
2624: /* space to store Q */
2625: PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);
2626: /* first we issue queries for optimal work */
2627: PetscBLASIntCast(max_size_of_constraint,&Blas_M);
2628: PetscBLASIntCast(max_constraints,&Blas_N);
2629: PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);
2630: lqr_work = -1;
2631: PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
2632: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
2633: PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);
2634: PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);
2635: lgqr_work = -1;
2636: PetscBLASIntCast(max_size_of_constraint,&Blas_M);
2637: PetscBLASIntCast(max_size_of_constraint,&Blas_N);
2638: PetscBLASIntCast(max_constraints,&Blas_K);
2639: PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);
2640: if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
2641: PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
2642: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
2643: PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);
2644: PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);
2645: /* array to store scaling factors for reflectors */
2646: PetscMalloc1(max_constraints,&qr_tau);
2647: /* array to store rhs and solution of triangular solver */
2648: PetscMalloc1(max_constraints*max_constraints,&trs_rhs);
2649: /* allocating workspace for check */
2650: if (pcbddc->dbg_flag) {
2651: PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);
2652: }
2653: }
2654: /* array to store whether a node is primal or not */
2655: PetscBTCreate(pcis->n_B,&is_primal);
2656: PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);
2657: ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);
2658: if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i);
2659: for (i=0;i<total_primal_vertices;i++) {
2660: PetscBTSet(is_primal,aux_primal_numbering_B[i]);
2661: }
2662: PetscFree(aux_primal_numbering_B);
2664: /* loop on constraints and see whether or not they need a change of basis and compute it */
2665: for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) {
2666: size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts];
2667: if (PetscBTLookup(change_basis,total_counts)) {
2668: /* get constraint info */
2669: primal_dofs = constraints_n[total_counts];
2670: dual_dofs = size_of_constraint-primal_dofs;
2672: if (pcbddc->dbg_flag) {
2673: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);
2674: }
2676: if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */
2678: /* copy quadrature constraints for change of basis check */
2679: if (pcbddc->dbg_flag) {
2680: PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));
2681: }
2682: /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
2683: PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));
2685: /* compute QR decomposition of constraints */
2686: PetscBLASIntCast(size_of_constraint,&Blas_M);
2687: PetscBLASIntCast(primal_dofs,&Blas_N);
2688: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
2689: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
2690: PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
2691: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
2692: PetscFPTrapPop();
2694: /* explictly compute R^-T */
2695: PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));
2696: for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
2697: PetscBLASIntCast(primal_dofs,&Blas_N);
2698: PetscBLASIntCast(primal_dofs,&Blas_NRHS);
2699: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
2700: PetscBLASIntCast(primal_dofs,&Blas_LDB);
2701: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
2702: PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
2703: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
2704: PetscFPTrapPop();
2706: /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
2707: PetscBLASIntCast(size_of_constraint,&Blas_M);
2708: PetscBLASIntCast(size_of_constraint,&Blas_N);
2709: PetscBLASIntCast(primal_dofs,&Blas_K);
2710: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
2711: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
2712: PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
2713: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
2714: PetscFPTrapPop();
2716: /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
2717: i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
2718: where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
2719: PetscBLASIntCast(size_of_constraint,&Blas_M);
2720: PetscBLASIntCast(primal_dofs,&Blas_N);
2721: PetscBLASIntCast(primal_dofs,&Blas_K);
2722: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
2723: PetscBLASIntCast(primal_dofs,&Blas_LDB);
2724: PetscBLASIntCast(size_of_constraint,&Blas_LDC);
2725: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
2726: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC));
2727: PetscFPTrapPop();
2728: PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));
2730: /* insert values in change of basis matrix respecting global ordering of new primal dofs */
2731: start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]];
2732: /* insert cols for primal dofs */
2733: for (j=0;j<primal_dofs;j++) {
2734: start_vals = &qr_basis[j*size_of_constraint];
2735: start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j];
2736: MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);
2737: }
2738: /* insert cols for dual dofs */
2739: for (j=0,k=0;j<dual_dofs;k++) {
2740: if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) {
2741: start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
2742: start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k];
2743: MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);
2744: j++;
2745: }
2746: }
2748: /* check change of basis */
2749: if (pcbddc->dbg_flag) {
2750: PetscInt ii,jj;
2751: PetscBool valid_qr=PETSC_TRUE;
2752: PetscBLASIntCast(primal_dofs,&Blas_M);
2753: PetscBLASIntCast(size_of_constraint,&Blas_N);
2754: PetscBLASIntCast(size_of_constraint,&Blas_K);
2755: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
2756: PetscBLASIntCast(size_of_constraint,&Blas_LDB);
2757: PetscBLASIntCast(primal_dofs,&Blas_LDC);
2758: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
2759: PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC));
2760: PetscFPTrapPop();
2761: for (jj=0;jj<size_of_constraint;jj++) {
2762: for (ii=0;ii<primal_dofs;ii++) {
2763: if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
2764: if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
2765: }
2766: }
2767: if (!valid_qr) {
2768: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");
2769: for (jj=0;jj<size_of_constraint;jj++) {
2770: for (ii=0;ii<primal_dofs;ii++) {
2771: if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
2772: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2773: }
2774: if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
2775: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2776: }
2777: }
2778: }
2779: } else {
2780: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");
2781: }
2782: }
2783: } else { /* simple transformation block */
2784: PetscInt row,col;
2785: PetscScalar val,norm;
2787: PetscBLASIntCast(size_of_constraint,&Blas_N);
2788: PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one));
2789: for (j=0;j<size_of_constraint;j++) {
2790: PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j];
2791: row = constraints_idxs[constraints_idxs_ptr[total_counts]+j];
2792: if (!PetscBTLookup(is_primal,row_B)) {
2793: col = constraints_idxs[constraints_idxs_ptr[total_counts]];
2794: MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);
2795: MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);
2796: } else {
2797: for (k=0;k<size_of_constraint;k++) {
2798: col = constraints_idxs[constraints_idxs_ptr[total_counts]+k];
2799: if (row != col) {
2800: val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]];
2801: } else {
2802: val = constraints_data[constraints_data_ptr[total_counts]]/norm;
2803: }
2804: MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);
2805: }
2806: }
2807: }
2808: if (pcbddc->dbg_flag) {
2809: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");
2810: }
2811: }
2812: } else {
2813: if (pcbddc->dbg_flag) {
2814: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);
2815: }
2816: }
2817: }
2819: /* free workspace */
2820: if (qr_needed) {
2821: if (pcbddc->dbg_flag) {
2822: PetscFree(dbg_work);
2823: }
2824: PetscFree(trs_rhs);
2825: PetscFree(qr_tau);
2826: PetscFree(qr_work);
2827: PetscFree(gqr_work);
2828: PetscFree(qr_basis);
2829: }
2830: PetscBTDestroy(&is_primal);
2831: MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);
2832: MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);
2834: /* assembling of global change of variable */
2835: {
2836: Mat tmat;
2837: PetscInt bs;
2839: VecGetSize(pcis->vec1_global,&global_size);
2840: VecGetLocalSize(pcis->vec1_global,&local_size);
2841: MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);
2842: MatISSetLocalMat(tmat,localChangeOfBasisMatrix);
2843: MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);
2844: MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);
2845: MatGetBlockSize(pc->pmat,&bs);
2846: MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);
2847: MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);
2848: MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);
2849: MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);
2850: MatDestroy(&tmat);
2851: VecSet(pcis->vec1_global,0.0);
2852: VecSet(pcis->vec1_N,1.0);
2853: VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
2854: VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
2855: VecReciprocal(pcis->vec1_global);
2856: MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);
2857: }
2858: /* check */
2859: if (pcbddc->dbg_flag) {
2860: PetscReal error;
2861: Vec x,x_change;
2863: VecDuplicate(pcis->vec1_global,&x);
2864: VecDuplicate(pcis->vec1_global,&x_change);
2865: VecSetRandom(x,NULL);
2866: VecCopy(x,pcis->vec1_global);
2867: VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
2868: VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
2869: MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);
2870: VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);
2871: VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);
2872: MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);
2873: VecAXPY(x,-1.0,x_change);
2874: VecNorm(x,NORM_INFINITY,&error);
2875: PetscViewerFlush(pcbddc->dbg_viewer);
2876: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);
2877: VecDestroy(&x);
2878: VecDestroy(&x_change);
2879: }
2881: /* adapt sub_schurs computed (if any) */
2882: if (pcbddc->use_deluxe_scaling) {
2883: PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
2884: if (sub_schurs->S_Ej_all) {
2885: Mat S_new,tmat;
2886: IS is_all_N;
2888: ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);
2889: MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);
2890: ISDestroy(&is_all_N);
2891: MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);
2892: MatDestroy(&sub_schurs->S_Ej_all);
2893: PetscObjectReference((PetscObject)S_new);
2894: sub_schurs->S_Ej_all = S_new;
2895: MatDestroy(&S_new);
2896: if (sub_schurs->sum_S_Ej_all) {
2897: MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);
2898: MatDestroy(&sub_schurs->sum_S_Ej_all);
2899: PetscObjectReference((PetscObject)S_new);
2900: sub_schurs->sum_S_Ej_all = S_new;
2901: MatDestroy(&S_new);
2902: }
2903: MatDestroy(&tmat);
2904: }
2905: }
2906: MatDestroy(&localChangeOfBasisMatrix);
2907: } else if (pcbddc->user_ChangeOfBasisMatrix) {
2908: PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);
2909: pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
2910: }
2912: /* set up change of basis context */
2913: if (pcbddc->ChangeOfBasisMatrix) {
2914: PCBDDCChange_ctx change_ctx;
2916: if (!pcbddc->new_global_mat) {
2917: PetscInt global_size,local_size;
2919: VecGetSize(pcis->vec1_global,&global_size);
2920: VecGetLocalSize(pcis->vec1_global,&local_size);
2921: MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);
2922: MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);
2923: MatSetType(pcbddc->new_global_mat,MATSHELL);
2924: MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);
2925: MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);
2926: PetscNew(&change_ctx);
2927: MatShellSetContext(pcbddc->new_global_mat,change_ctx);
2928: } else {
2929: MatShellGetContext(pcbddc->new_global_mat,&change_ctx);
2930: MatDestroy(&change_ctx->global_change);
2931: VecDestroyVecs(2,&change_ctx->work);
2932: }
2933: if (!pcbddc->user_ChangeOfBasisMatrix) {
2934: PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);
2935: change_ctx->global_change = pcbddc->ChangeOfBasisMatrix;
2936: } else {
2937: PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);
2938: change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix;
2939: }
2940: VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);
2941: MatSetUp(pcbddc->new_global_mat);
2942: MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);
2943: MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);
2944: }
2946: /* check if a new primal space has been introduced */
2947: pcbddc->new_primal_space_local = PETSC_TRUE;
2948: if (olocal_primal_size == pcbddc->local_primal_size) {
2949: PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);
2950: pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2951: if (!pcbddc->new_primal_space_local) {
2952: PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);
2953: pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2954: }
2955: }
2956: PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);
2957: /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
2958: MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
2960: /* flush dbg viewer */
2961: if (pcbddc->dbg_flag) {
2962: PetscViewerFlush(pcbddc->dbg_viewer);
2963: }
2965: /* free workspace */
2966: PetscBTDestroy(&qr_needed_idx);
2967: PetscBTDestroy(&change_basis);
2968: if (!pcbddc->adaptive_selection) {
2969: PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);
2970: PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);
2971: } else {
2972: PetscFree5(pcbddc->adaptive_constraints_n,
2973: pcbddc->adaptive_constraints_idxs_ptr,
2974: pcbddc->adaptive_constraints_data_ptr,
2975: pcbddc->adaptive_constraints_idxs,
2976: pcbddc->adaptive_constraints_data);
2977: PetscFree(constraints_n);
2978: PetscFree(constraints_idxs_B);
2979: }
2980: return(0);
2981: }
2985: PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
2986: {
2987: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
2988: PC_IS *pcis = (PC_IS*)pc->data;
2989: Mat_IS *matis = (Mat_IS*)pc->pmat->data;
2990: PetscInt ierr,i,vertex_size,N;
2991: PetscViewer viewer=pcbddc->dbg_viewer;
2994: /* Reset previously computed graph */
2995: PCBDDCGraphReset(pcbddc->mat_graph);
2996: /* Init local Graph struct */
2997: MatGetSize(pc->pmat,&N,NULL);
2998: PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);
3000: /* Check validity of the csr graph passed in by the user */
3001: if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
3002: PCBDDCGraphResetCSR(pcbddc->mat_graph);
3003: }
3005: /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
3006: if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
3007: PetscInt *xadj,*adjncy;
3008: PetscInt nvtxs;
3009: PetscBool flg_row=PETSC_FALSE;
3011: if (pcbddc->use_local_adj) {
3013: MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);
3014: if (flg_row) {
3015: PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);
3016: pcbddc->computed_rowadj = PETSC_TRUE;
3017: }
3018: MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);
3019: } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */
3020: IS is_dummy;
3021: ISLocalToGlobalMapping l2gmap_dummy;
3022: PetscInt j,sum;
3023: PetscInt *cxadj,*cadjncy;
3024: const PetscInt *idxs;
3025: PCBDDCGraph graph;
3026: PetscBT is_on_boundary;
3028: ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);
3029: ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);
3030: ISDestroy(&is_dummy);
3031: PCBDDCGraphCreate(&graph);
3032: PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);
3033: ISLocalToGlobalMappingDestroy(&l2gmap_dummy);
3034: MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);
3035: if (flg_row) {
3036: graph->xadj = xadj;
3037: graph->adjncy = adjncy;
3038: }
3039: PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);
3040: PCBDDCGraphComputeConnectedComponents(graph);
3041: MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);
3043: if (pcbddc->dbg_flag) {
3044: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);
3045: for (i=0;i<graph->ncc;i++) {
3046: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);
3047: }
3048: }
3050: PetscBTCreate(pcis->n,&is_on_boundary);
3051: ISGetIndices(pcis->is_B_local,&idxs);
3052: for (i=0;i<pcis->n_B;i++) {
3053: PetscBTSet(is_on_boundary,idxs[i]);
3054: }
3055: ISRestoreIndices(pcis->is_B_local,&idxs);
3057: PetscCalloc1(pcis->n+1,&cxadj);
3058: sum = 0;
3059: for (i=0;i<graph->ncc;i++) {
3060: PetscInt sizecc = 0;
3061: for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3062: if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3063: sizecc++;
3064: }
3065: }
3066: for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3067: if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3068: cxadj[graph->queue[j]] = sizecc;
3069: }
3070: }
3071: sum += sizecc*sizecc;
3072: }
3073: PetscMalloc1(sum,&cadjncy);
3074: sum = 0;
3075: for (i=0;i<pcis->n;i++) {
3076: PetscInt temp = cxadj[i];
3077: cxadj[i] = sum;
3078: sum += temp;
3079: }
3080: cxadj[pcis->n] = sum;
3081: for (i=0;i<graph->ncc;i++) {
3082: for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3083: if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3084: PetscInt k,sizecc = 0;
3085: for (k=graph->cptr[i];k<graph->cptr[i+1];k++) {
3086: if (PetscBTLookup(is_on_boundary,graph->queue[k])) {
3087: cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k];
3088: sizecc++;
3089: }
3090: }
3091: }
3092: }
3093: }
3094: if (sum) {
3095: PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);
3096: } else {
3097: PetscFree(cxadj);
3098: PetscFree(cadjncy);
3099: }
3100: graph->xadj = 0;
3101: graph->adjncy = 0;
3102: PCBDDCGraphDestroy(&graph);
3103: PetscBTDestroy(&is_on_boundary);
3104: }
3105: }
3106: if (pcbddc->dbg_flag) {
3107: PetscViewerFlush(pcbddc->dbg_viewer);
3108: }
3110: /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
3111: vertex_size = 1;
3112: if (pcbddc->user_provided_isfordofs) {
3113: if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
3114: PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);
3115: for (i=0;i<pcbddc->n_ISForDofs;i++) {
3116: PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);
3117: ISDestroy(&pcbddc->ISForDofs[i]);
3118: }
3119: pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
3120: pcbddc->n_ISForDofs = 0;
3121: PetscFree(pcbddc->ISForDofs);
3122: }
3123: /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
3124: MatGetBlockSize(matis->A,&vertex_size);
3125: } else {
3126: if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
3127: MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);
3128: PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);
3129: for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
3130: ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);
3131: }
3132: }
3133: }
3135: /* Setup of Graph */
3136: if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
3137: PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);
3138: }
3139: if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
3140: PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);
3141: }
3142: PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);
3144: /* Graph's connected components analysis */
3145: PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);
3147: /* print some info to stdout */
3148: if (pcbddc->dbg_flag) {
3149: PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);
3150: }
3152: /* mark topography has done */
3153: pcbddc->recompute_topography = PETSC_FALSE;
3154: return(0);
3155: }
3157: /* given an index sets possibly with holes, renumbers the indexes removing the holes */
3160: PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n)
3161: {
3162: PetscSF sf;
3163: PetscLayout map;
3164: const PetscInt *idxs;
3165: PetscInt *leaf_data,*root_data,*gidxs;
3166: PetscInt N,n,i,lbounds[2],gbounds[2],Nl;
3167: PetscInt n_n,nlocals,start,first_index;
3168: PetscMPIInt commsize;
3169: PetscBool first_found;
3173: ISGetLocalSize(subset,&n);
3174: if (subset_mult) {
3176: ISGetLocalSize(subset,&i);
3177: if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i);
3178: }
3179: /* create workspace layout for computing global indices of subset */
3180: ISGetIndices(subset,&idxs);
3181: lbounds[0] = lbounds[1] = 0;
3182: for (i=0;i<n;i++) {
3183: if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i];
3184: else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i];
3185: }
3186: lbounds[0] = -lbounds[0];
3187: MPIU_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));
3188: gbounds[0] = -gbounds[0];
3189: N = gbounds[1] - gbounds[0] + 1;
3190: PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);
3191: PetscLayoutSetBlockSize(map,1);
3192: PetscLayoutSetSize(map,N);
3193: PetscLayoutSetUp(map);
3194: PetscLayoutGetLocalSize(map,&Nl);
3196: /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */
3197: PetscMalloc2(n,&leaf_data,Nl,&root_data);
3198: if (subset_mult) {
3199: const PetscInt* idxs_mult;
3201: ISGetIndices(subset_mult,&idxs_mult);
3202: PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));
3203: ISRestoreIndices(subset_mult,&idxs_mult);
3204: } else {
3205: for (i=0;i<n;i++) leaf_data[i] = 1;
3206: }
3207: /* local size of new subset */
3208: n_n = 0;
3209: for (i=0;i<n;i++) n_n += leaf_data[i];
3211: /* global indexes in layout */
3212: PetscMalloc1(n_n,&gidxs); /* allocating possibly extra space in gidxs which will be used later */
3213: for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0];
3214: ISRestoreIndices(subset,&idxs);
3215: PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);
3216: PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);
3217: PetscLayoutDestroy(&map);
3219: /* reduce from leaves to roots */
3220: PetscMemzero(root_data,Nl*sizeof(PetscInt));
3221: PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);
3222: PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);
3224: /* count indexes in local part of layout */
3225: nlocals = 0;
3226: first_index = -1;
3227: first_found = PETSC_FALSE;
3228: for (i=0;i<Nl;i++) {
3229: if (!first_found && root_data[i]) {
3230: first_found = PETSC_TRUE;
3231: first_index = i;
3232: }
3233: nlocals += root_data[i];
3234: }
3236: /* cumulative of number of indexes and size of subset without holes */
3237: #if defined(PETSC_HAVE_MPI_EXSCAN)
3238: start = 0;
3239: MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));
3240: #else
3241: MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));
3242: start = start-nlocals;
3243: #endif
3245: if (N_n) { /* compute total size of new subset if requested */
3246: *N_n = start + nlocals;
3247: MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);
3248: MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));
3249: }
3251: /* adapt root data with cumulative */
3252: if (first_found) {
3253: PetscInt old_index;
3255: root_data[first_index] += start;
3256: old_index = first_index;
3257: for (i=first_index+1;i<Nl;i++) {
3258: if (root_data[i]) {
3259: root_data[i] += root_data[old_index];
3260: old_index = i;
3261: }
3262: }
3263: }
3265: /* from roots to leaves */
3266: PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);
3267: PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);
3268: PetscSFDestroy(&sf);
3270: /* create new IS with global indexes without holes */
3271: if (subset_mult) {
3272: const PetscInt* idxs_mult;
3273: PetscInt cum;
3275: cum = 0;
3276: ISGetIndices(subset_mult,&idxs_mult);
3277: for (i=0;i<n;i++) {
3278: PetscInt j;
3279: for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j;
3280: }
3281: ISRestoreIndices(subset_mult,&idxs_mult);
3282: } else {
3283: for (i=0;i<n;i++) {
3284: gidxs[i] = leaf_data[i]-1;
3285: }
3286: }
3287: ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);
3288: PetscFree2(leaf_data,root_data);
3289: return(0);
3290: }
3294: PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
3295: {
3296: PetscInt i,j;
3297: PetscScalar *alphas;
3301: /* this implements stabilized Gram-Schmidt */
3302: PetscMalloc1(n,&alphas);
3303: for (i=0;i<n;i++) {
3304: VecNormalize(vecs[i],NULL);
3305: if (i<n) { VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]); }
3306: for (j=i+1;j<n;j++) { VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]); }
3307: }
3308: PetscFree(alphas);
3309: return(0);
3310: }
3314: PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends)
3315: {
3316: IS ranks_send_to;
3317: PetscInt n_neighs,*neighs,*n_shared,**shared;
3318: PetscMPIInt size,rank,color;
3319: PetscInt *xadj,*adjncy;
3320: PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx;
3321: PetscInt i,local_size,threshold=0;
3322: PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
3323: PetscSubcomm subcomm;
3327: PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_square",&use_square,NULL);
3328: PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);
3329: PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);
3331: /* Get info on mapping */
3332: ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);
3333: ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);
3335: /* build local CSR graph of subdomains' connectivity */
3336: PetscMalloc1(2,&xadj);
3337: xadj[0] = 0;
3338: xadj[1] = PetscMax(n_neighs-1,0);
3339: PetscMalloc1(xadj[1],&adjncy);
3340: PetscMalloc1(xadj[1],&adjncy_wgt);
3342: if (threshold) {
3343: PetscInt xadj_count = 0;
3344: for (i=1;i<n_neighs;i++) {
3345: if (n_shared[i] > threshold) {
3346: adjncy[xadj_count] = neighs[i];
3347: adjncy_wgt[xadj_count] = n_shared[i];
3348: xadj_count++;
3349: }
3350: }
3351: xadj[1] = xadj_count;
3352: } else {
3353: if (xadj[1]) {
3354: PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));
3355: PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));
3356: }
3357: }
3358: ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);
3359: if (use_square) {
3360: for (i=0;i<xadj[1];i++) {
3361: adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
3362: }
3363: }
3364: PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);
3366: PetscMalloc1(1,&ranks_send_to_idx);
3368: /*
3369: Restrict work on active processes only.
3370: */
3371: PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);
3372: PetscSubcommSetNumber(subcomm,2); /* 2 groups, active process and not active processes */
3373: MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);
3374: PetscMPIIntCast(!local_size,&color);
3375: PetscSubcommSetTypeGeneral(subcomm,color,rank);
3376: if (color) {
3377: PetscFree(xadj);
3378: PetscFree(adjncy);
3379: PetscFree(adjncy_wgt);
3380: } else {
3381: Mat subdomain_adj;
3382: IS new_ranks,new_ranks_contig;
3383: MatPartitioning partitioner;
3384: PetscInt prank,rstart=0,rend=0;
3385: PetscInt *is_indices,*oldranks;
3386: PetscBool aggregate;
3388: MPI_Comm_size(PetscSubcommChild(subcomm),&size);
3389: PetscMalloc1(size,&oldranks);
3390: prank = rank;
3391: MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));
3392: /*
3393: for (i=0;i<size;i++) {
3394: PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
3395: }
3396: */
3397: for (i=0;i<xadj[1];i++) {
3398: PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);
3399: }
3400: PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);
3401: aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE);
3402: if (aggregate) {
3403: PetscInt lrows,row,ncols,*cols;
3404: PetscMPIInt nrank;
3405: PetscScalar *vals;
3407: MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);
3408: lrows = 0;
3409: if (nrank<redprocs) {
3410: lrows = size/redprocs;
3411: if (nrank<size%redprocs) lrows++;
3412: }
3413: MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);
3414: MatGetOwnershipRange(subdomain_adj,&rstart,&rend);
3415: MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);
3416: MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);
3417: row = nrank;
3418: ncols = xadj[1]-xadj[0];
3419: cols = adjncy;
3420: PetscMalloc1(ncols,&vals);
3421: for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i];
3422: MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);
3423: MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);
3424: MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);
3425: PetscFree(xadj);
3426: PetscFree(adjncy);
3427: PetscFree(adjncy_wgt);
3428: PetscFree(vals);
3429: } else {
3430: MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);
3431: }
3432: /* MatView(subdomain_adj,0); */
3434: /* Partition */
3435: MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);
3436: MatPartitioningSetAdjacency(partitioner,subdomain_adj);
3437: if (use_vwgt) {
3438: PetscMalloc1(1,&v_wgt);
3439: v_wgt[0] = local_size;
3440: MatPartitioningSetVertexWeights(partitioner,v_wgt);
3441: }
3442: n_subdomains = PetscMin((PetscInt)size,n_subdomains);
3443: MatPartitioningSetNParts(partitioner,n_subdomains);
3444: MatPartitioningSetFromOptions(partitioner);
3445: MatPartitioningApply(partitioner,&new_ranks);
3446: /* MatPartitioningView(partitioner,0); */
3448: /* renumber new_ranks to avoid "holes" in new set of processors */
3449: PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);
3450: ISDestroy(&new_ranks);
3451: ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);
3452: if (!redprocs) {
3453: ranks_send_to_idx[0] = oldranks[is_indices[0]];
3454: } else {
3455: PetscInt idxs[1];
3456: PetscMPIInt tag;
3457: MPI_Request *reqs;
3459: PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);
3460: PetscMalloc1(rend-rstart,&reqs);
3461: for (i=rstart;i<rend;i++) {
3462: MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);
3463: }
3464: MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);
3465: MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);
3466: PetscFree(reqs);
3467: ranks_send_to_idx[0] = oldranks[idxs[0]];
3468: }
3469: ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);
3470: /* clean up */
3471: PetscFree(oldranks);
3472: ISDestroy(&new_ranks_contig);
3473: MatDestroy(&subdomain_adj);
3474: MatPartitioningDestroy(&partitioner);
3475: }
3476: PetscSubcommDestroy(&subcomm);
3478: /* assemble parallel IS for sends */
3479: i = 1;
3480: if (color) i=0;
3481: ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);
3482: /* get back IS */
3483: *is_sends = ranks_send_to;
3484: return(0);
3485: }
3487: typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
3491: PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[])
3492: {
3493: Mat local_mat;
3494: IS is_sends_internal;
3495: PetscInt rows,cols,new_local_rows;
3496: PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals;
3497: PetscBool ismatis,isdense,newisdense,destroy_mat;
3498: ISLocalToGlobalMapping l2gmap;
3499: PetscInt* l2gmap_indices;
3500: const PetscInt* is_indices;
3501: MatType new_local_type;
3502: /* buffers */
3503: PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
3504: PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
3505: PetscInt *recv_buffer_idxs_local;
3506: PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
3507: /* MPI */
3508: MPI_Comm comm,comm_n;
3509: PetscSubcomm subcomm;
3510: PetscMPIInt n_sends,n_recvs,commsize;
3511: PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
3512: PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
3513: PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest;
3514: MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals;
3515: MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals;
3516: PetscErrorCode ierr;
3519: /* TODO: add missing checks */
3524: PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);
3525: if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__);
3526: MatISGetLocalMat(mat,&local_mat);
3527: PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);
3528: if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
3529: MatGetSize(local_mat,&rows,&cols);
3530: if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
3531: if (reuse == MAT_REUSE_MATRIX && *mat_n) {
3532: PetscInt mrows,mcols,mnrows,mncols;
3533: PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);
3534: if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
3535: MatGetSize(mat,&mrows,&mcols);
3536: MatGetSize(*mat_n,&mnrows,&mncols);
3537: if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
3538: if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
3539: }
3540: MatGetBlockSize(local_mat,&bs);
3542: /* prepare IS for sending if not provided */
3543: if (!is_sends) {
3544: if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
3545: MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);
3546: } else {
3547: PetscObjectReference((PetscObject)is_sends);
3548: is_sends_internal = is_sends;
3549: }
3551: /* get comm */
3552: PetscObjectGetComm((PetscObject)mat,&comm);
3554: /* compute number of sends */
3555: ISGetLocalSize(is_sends_internal,&i);
3556: PetscMPIIntCast(i,&n_sends);
3558: /* compute number of receives */
3559: MPI_Comm_size(comm,&commsize);
3560: PetscMalloc1(commsize,&iflags);
3561: PetscMemzero(iflags,commsize*sizeof(*iflags));
3562: ISGetIndices(is_sends_internal,&is_indices);
3563: for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
3564: PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);
3565: PetscFree(iflags);
3567: /* restrict comm if requested */
3568: subcomm = 0;
3569: destroy_mat = PETSC_FALSE;
3570: if (restrict_comm) {
3571: PetscMPIInt color,subcommsize;
3573: color = 0;
3574: if (restrict_full) {
3575: if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */
3576: } else {
3577: if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */
3578: }
3579: MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);
3580: subcommsize = commsize - subcommsize;
3581: /* check if reuse has been requested */
3582: if (reuse == MAT_REUSE_MATRIX) {
3583: if (*mat_n) {
3584: PetscMPIInt subcommsize2;
3585: MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);
3586: if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
3587: comm_n = PetscObjectComm((PetscObject)*mat_n);
3588: } else {
3589: comm_n = PETSC_COMM_SELF;
3590: }
3591: } else { /* MAT_INITIAL_MATRIX */
3592: PetscMPIInt rank;
3594: MPI_Comm_rank(comm,&rank);
3595: PetscSubcommCreate(comm,&subcomm);
3596: PetscSubcommSetNumber(subcomm,2);
3597: PetscSubcommSetTypeGeneral(subcomm,color,rank);
3598: comm_n = PetscSubcommChild(subcomm);
3599: }
3600: /* flag to destroy *mat_n if not significative */
3601: if (color) destroy_mat = PETSC_TRUE;
3602: } else {
3603: comm_n = comm;
3604: }
3606: /* prepare send/receive buffers */
3607: PetscMalloc1(commsize,&ilengths_idxs);
3608: PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));
3609: PetscMalloc1(commsize,&ilengths_vals);
3610: PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));
3611: if (nis) {
3612: PetscCalloc1(commsize,&ilengths_idxs_is);
3613: }
3615: /* Get data from local matrices */
3616: if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
3617: /* TODO: See below some guidelines on how to prepare the local buffers */
3618: /*
3619: send_buffer_vals should contain the raw values of the local matrix
3620: send_buffer_idxs should contain:
3621: - MatType_PRIVATE type
3622: - PetscInt size_of_l2gmap
3623: - PetscInt global_row_indices[size_of_l2gmap]
3624: - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values
3625: */
3626: else {
3627: MatDenseGetArray(local_mat,&send_buffer_vals);
3628: ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);
3629: PetscMalloc1(i+2,&send_buffer_idxs);
3630: send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
3631: send_buffer_idxs[1] = i;
3632: ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);
3633: PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));
3634: ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);
3635: PetscMPIIntCast(i,&len);
3636: for (i=0;i<n_sends;i++) {
3637: ilengths_vals[is_indices[i]] = len*len;
3638: ilengths_idxs[is_indices[i]] = len+2;
3639: }
3640: }
3641: PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);
3642: /* additional is (if any) */
3643: if (nis) {
3644: PetscMPIInt psum;
3645: PetscInt j;
3646: for (j=0,psum=0;j<nis;j++) {
3647: PetscInt plen;
3648: ISGetLocalSize(isarray[j],&plen);
3649: PetscMPIIntCast(plen,&len);
3650: psum += len+1; /* indices + lenght */
3651: }
3652: PetscMalloc1(psum,&send_buffer_idxs_is);
3653: for (j=0,psum=0;j<nis;j++) {
3654: PetscInt plen;
3655: const PetscInt *is_array_idxs;
3656: ISGetLocalSize(isarray[j],&plen);
3657: send_buffer_idxs_is[psum] = plen;
3658: ISGetIndices(isarray[j],&is_array_idxs);
3659: PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));
3660: ISRestoreIndices(isarray[j],&is_array_idxs);
3661: psum += plen+1; /* indices + lenght */
3662: }
3663: for (i=0;i<n_sends;i++) {
3664: ilengths_idxs_is[is_indices[i]] = psum;
3665: }
3666: PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);
3667: }
3669: buf_size_idxs = 0;
3670: buf_size_vals = 0;
3671: buf_size_idxs_is = 0;
3672: for (i=0;i<n_recvs;i++) {
3673: buf_size_idxs += (PetscInt)olengths_idxs[i];
3674: buf_size_vals += (PetscInt)olengths_vals[i];
3675: if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
3676: }
3677: PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);
3678: PetscMalloc1(buf_size_vals,&recv_buffer_vals);
3679: PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);
3681: /* get new tags for clean communications */
3682: PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);
3683: PetscObjectGetNewTag((PetscObject)mat,&tag_vals);
3684: PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);
3686: /* allocate for requests */
3687: PetscMalloc1(n_sends,&send_req_idxs);
3688: PetscMalloc1(n_sends,&send_req_vals);
3689: PetscMalloc1(n_sends,&send_req_idxs_is);
3690: PetscMalloc1(n_recvs,&recv_req_idxs);
3691: PetscMalloc1(n_recvs,&recv_req_vals);
3692: PetscMalloc1(n_recvs,&recv_req_idxs_is);
3694: /* communications */
3695: ptr_idxs = recv_buffer_idxs;
3696: ptr_vals = recv_buffer_vals;
3697: ptr_idxs_is = recv_buffer_idxs_is;
3698: for (i=0;i<n_recvs;i++) {
3699: source_dest = onodes[i];
3700: MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);
3701: MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);
3702: ptr_idxs += olengths_idxs[i];
3703: ptr_vals += olengths_vals[i];
3704: if (nis) {
3705: MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);
3706: ptr_idxs_is += olengths_idxs_is[i];
3707: }
3708: }
3709: for (i=0;i<n_sends;i++) {
3710: PetscMPIIntCast(is_indices[i],&source_dest);
3711: MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);
3712: MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);
3713: if (nis) {
3714: MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);
3715: }
3716: }
3717: ISRestoreIndices(is_sends_internal,&is_indices);
3718: ISDestroy(&is_sends_internal);
3720: /* assemble new l2g map */
3721: MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);
3722: ptr_idxs = recv_buffer_idxs;
3723: new_local_rows = 0;
3724: for (i=0;i<n_recvs;i++) {
3725: new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3726: ptr_idxs += olengths_idxs[i];
3727: }
3728: PetscMalloc1(new_local_rows,&l2gmap_indices);
3729: ptr_idxs = recv_buffer_idxs;
3730: new_local_rows = 0;
3731: for (i=0;i<n_recvs;i++) {
3732: PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));
3733: new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3734: ptr_idxs += olengths_idxs[i];
3735: }
3736: PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);
3737: ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);
3738: PetscFree(l2gmap_indices);
3740: /* infer new local matrix type from received local matrices type */
3741: /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
3742: /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
3743: if (n_recvs) {
3744: MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
3745: ptr_idxs = recv_buffer_idxs;
3746: for (i=0;i<n_recvs;i++) {
3747: if ((PetscInt)new_local_type_private != *ptr_idxs) {
3748: new_local_type_private = MATAIJ_PRIVATE;
3749: break;
3750: }
3751: ptr_idxs += olengths_idxs[i];
3752: }
3753: switch (new_local_type_private) {
3754: case MATDENSE_PRIVATE:
3755: if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */
3756: new_local_type = MATSEQAIJ;
3757: bs = 1;
3758: } else { /* if I receive only 1 dense matrix */
3759: new_local_type = MATSEQDENSE;
3760: bs = 1;
3761: }
3762: break;
3763: case MATAIJ_PRIVATE:
3764: new_local_type = MATSEQAIJ;
3765: bs = 1;
3766: break;
3767: case MATBAIJ_PRIVATE:
3768: new_local_type = MATSEQBAIJ;
3769: break;
3770: case MATSBAIJ_PRIVATE:
3771: new_local_type = MATSEQSBAIJ;
3772: break;
3773: default:
3774: SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__);
3775: break;
3776: }
3777: } else { /* by default, new_local_type is seqdense */
3778: new_local_type = MATSEQDENSE;
3779: bs = 1;
3780: }
3782: /* create MATIS object if needed */
3783: if (reuse == MAT_INITIAL_MATRIX) {
3784: MatGetSize(mat,&rows,&cols);
3785: MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);
3786: } else {
3787: /* it also destroys the local matrices */
3788: MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);
3789: }
3790: MatISGetLocalMat(*mat_n,&local_mat);
3791: MatSetType(local_mat,new_local_type);
3793: MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);
3795: /* Global to local map of received indices */
3796: PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local); /* needed for values insertion */
3797: ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);
3798: ISLocalToGlobalMappingDestroy(&l2gmap);
3800: /* restore attributes -> type of incoming data and its size */
3801: buf_size_idxs = 0;
3802: for (i=0;i<n_recvs;i++) {
3803: recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
3804: recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
3805: buf_size_idxs += (PetscInt)olengths_idxs[i];
3806: }
3807: PetscFree(recv_buffer_idxs);
3809: /* set preallocation */
3810: PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);
3811: if (!newisdense) {
3812: PetscInt *new_local_nnz=0;
3814: ptr_idxs = recv_buffer_idxs_local;
3815: if (n_recvs) {
3816: PetscCalloc1(new_local_rows,&new_local_nnz);
3817: }
3818: for (i=0;i<n_recvs;i++) {
3819: PetscInt j;
3820: if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
3821: for (j=0;j<*(ptr_idxs+1);j++) {
3822: new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
3823: }
3824: } else {
3825: /* TODO */
3826: }
3827: ptr_idxs += olengths_idxs[i];
3828: }
3829: if (new_local_nnz) {
3830: for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
3831: MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);
3832: for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
3833: MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);
3834: for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
3835: MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);
3836: } else {
3837: MatSetUp(local_mat);
3838: }
3839: PetscFree(new_local_nnz);
3840: } else {
3841: MatSetUp(local_mat);
3842: }
3844: /* set values */
3845: ptr_vals = recv_buffer_vals;
3846: ptr_idxs = recv_buffer_idxs_local;
3847: for (i=0;i<n_recvs;i++) {
3848: if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
3849: MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);
3850: MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);
3851: MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);
3852: MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);
3853: MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);
3854: } else {
3855: /* TODO */
3856: }
3857: ptr_idxs += olengths_idxs[i];
3858: ptr_vals += olengths_vals[i];
3859: }
3860: MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);
3861: MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);
3862: MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);
3863: MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);
3864: PetscFree(recv_buffer_vals);
3865: PetscFree(recv_buffer_idxs_local);
3867: #if 0
3868: if (!restrict_comm) { /* check */
3869: Vec lvec,rvec;
3870: PetscReal infty_error;
3872: MatCreateVecs(mat,&rvec,&lvec);
3873: VecSetRandom(rvec,NULL);
3874: MatMult(mat,rvec,lvec);
3875: VecScale(lvec,-1.0);
3876: MatMultAdd(*mat_n,rvec,lvec,lvec);
3877: VecNorm(lvec,NORM_INFINITY,&infty_error);
3878: PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
3879: VecDestroy(&rvec);
3880: VecDestroy(&lvec);
3881: }
3882: #endif
3884: /* assemble new additional is (if any) */
3885: if (nis) {
3886: PetscInt **temp_idxs,*count_is,j,psum;
3888: MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);
3889: PetscCalloc1(nis,&count_is);
3890: ptr_idxs = recv_buffer_idxs_is;
3891: psum = 0;
3892: for (i=0;i<n_recvs;i++) {
3893: for (j=0;j<nis;j++) {
3894: PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3895: count_is[j] += plen; /* increment counting of buffer for j-th IS */
3896: psum += plen;
3897: ptr_idxs += plen+1; /* shift pointer to received data */
3898: }
3899: }
3900: PetscMalloc1(nis,&temp_idxs);
3901: PetscMalloc1(psum,&temp_idxs[0]);
3902: for (i=1;i<nis;i++) {
3903: temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
3904: }
3905: PetscMemzero(count_is,nis*sizeof(PetscInt));
3906: ptr_idxs = recv_buffer_idxs_is;
3907: for (i=0;i<n_recvs;i++) {
3908: for (j=0;j<nis;j++) {
3909: PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3910: PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));
3911: count_is[j] += plen; /* increment starting point of buffer for j-th IS */
3912: ptr_idxs += plen+1; /* shift pointer to received data */
3913: }
3914: }
3915: for (i=0;i<nis;i++) {
3916: ISDestroy(&isarray[i]);
3917: PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);
3918: ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);
3919: }
3920: PetscFree(count_is);
3921: PetscFree(temp_idxs[0]);
3922: PetscFree(temp_idxs);
3923: }
3924: /* free workspace */
3925: PetscFree(recv_buffer_idxs_is);
3926: MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);
3927: PetscFree(send_buffer_idxs);
3928: MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);
3929: if (isdense) {
3930: MatISGetLocalMat(mat,&local_mat);
3931: MatDenseRestoreArray(local_mat,&send_buffer_vals);
3932: } else {
3933: /* PetscFree(send_buffer_vals); */
3934: }
3935: if (nis) {
3936: MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);
3937: PetscFree(send_buffer_idxs_is);
3938: }
3939: PetscFree(recv_req_idxs);
3940: PetscFree(recv_req_vals);
3941: PetscFree(recv_req_idxs_is);
3942: PetscFree(send_req_idxs);
3943: PetscFree(send_req_vals);
3944: PetscFree(send_req_idxs_is);
3945: PetscFree(ilengths_vals);
3946: PetscFree(ilengths_idxs);
3947: PetscFree(olengths_vals);
3948: PetscFree(olengths_idxs);
3949: PetscFree(onodes);
3950: if (nis) {
3951: PetscFree(ilengths_idxs_is);
3952: PetscFree(olengths_idxs_is);
3953: PetscFree(onodes_is);
3954: }
3955: PetscSubcommDestroy(&subcomm);
3956: if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
3957: MatDestroy(mat_n);
3958: for (i=0;i<nis;i++) {
3959: ISDestroy(&isarray[i]);
3960: }
3961: *mat_n = NULL;
3962: }
3963: return(0);
3964: }
3966: /* temporary hack into ksp private data structure */
3967: #include <petsc/private/kspimpl.h>
3971: PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
3972: {
3973: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
3974: PC_IS *pcis = (PC_IS*)pc->data;
3975: Mat coarse_mat,coarse_mat_is,coarse_submat_dense;
3976: MatNullSpace CoarseNullSpace=NULL;
3977: ISLocalToGlobalMapping coarse_islg;
3978: IS coarse_is,*isarray;
3979: PetscInt i,im_active=-1,active_procs=-1;
3980: PetscInt nis,nisdofs,nisneu;
3981: PC pc_temp;
3982: PCType coarse_pc_type;
3983: KSPType coarse_ksp_type;
3984: PetscBool multilevel_requested,multilevel_allowed;
3985: PetscBool isredundant,isbddc,isnn,coarse_reuse;
3986: Mat t_coarse_mat_is;
3987: PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse;
3988: PetscMPIInt all_procs;
3989: PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist;
3990: PetscBool compute_vecs = PETSC_FALSE;
3991: PetscScalar *array;
3992: PetscErrorCode ierr;
3995: /* Assign global numbering to coarse dofs */
3996: if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
3997: PetscInt ocoarse_size;
3998: compute_vecs = PETSC_TRUE;
3999: ocoarse_size = pcbddc->coarse_size;
4000: PetscFree(pcbddc->global_primal_indices);
4001: PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);
4002: /* see if we can avoid some work */
4003: if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
4004: /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */
4005: if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) {
4006: PC pc;
4007: PetscBool isbddc;
4009: /* temporary workaround since PCBDDC does not have a reset method so far */
4010: KSPGetPC(pcbddc->coarse_ksp,&pc);
4011: PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);
4012: if (isbddc) {
4013: PCDestroy(&pc);
4014: }
4015: KSPReset(pcbddc->coarse_ksp);
4016: coarse_reuse = PETSC_FALSE;
4017: } else { /* we can safely reuse already computed coarse matrix */
4018: coarse_reuse = PETSC_TRUE;
4019: }
4020: } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
4021: coarse_reuse = PETSC_FALSE;
4022: }
4023: /* reset any subassembling information */
4024: ISDestroy(&pcbddc->coarse_subassembling);
4025: ISDestroy(&pcbddc->coarse_subassembling_init);
4026: } else { /* primal space is unchanged, so we can reuse coarse matrix */
4027: coarse_reuse = PETSC_TRUE;
4028: }
4030: /* count "active" (i.e. with positive local size) and "void" processes */
4031: im_active = !!(pcis->n);
4032: MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));
4033: MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);
4034: void_procs = all_procs-active_procs;
4035: csin_type_simple = PETSC_TRUE;
4036: redist = PETSC_FALSE;
4037: if (pcbddc->current_level && void_procs) {
4038: csin_ml = PETSC_TRUE;
4039: ncoarse_ml = void_procs;
4040: /* it has no sense to redistribute on a set of processors larger than the number of active processes */
4041: if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) {
4042: csin_ds = PETSC_TRUE;
4043: ncoarse_ds = pcbddc->redistribute_coarse;
4044: redist = PETSC_TRUE;
4045: } else {
4046: csin_ds = PETSC_TRUE;
4047: ncoarse_ds = active_procs;
4048: redist = PETSC_TRUE;
4049: }
4050: } else {
4051: csin_ml = PETSC_FALSE;
4052: ncoarse_ml = all_procs;
4053: if (void_procs) {
4054: csin_ds = PETSC_TRUE;
4055: ncoarse_ds = void_procs;
4056: csin_type_simple = PETSC_FALSE;
4057: } else {
4058: if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) {
4059: csin_ds = PETSC_TRUE;
4060: ncoarse_ds = pcbddc->redistribute_coarse;
4061: redist = PETSC_TRUE;
4062: } else {
4063: csin_ds = PETSC_FALSE;
4064: ncoarse_ds = all_procs;
4065: }
4066: }
4067: }
4069: /*
4070: test if we can go multilevel: three conditions must be satisfied:
4071: - we have not exceeded the number of levels requested
4072: - we can actually subassemble the active processes
4073: - we can find a suitable number of MPI processes where we can place the subassembled problem
4074: */
4075: multilevel_allowed = PETSC_FALSE;
4076: multilevel_requested = PETSC_FALSE;
4077: if (pcbddc->current_level < pcbddc->max_levels) {
4078: multilevel_requested = PETSC_TRUE;
4079: if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) {
4080: multilevel_allowed = PETSC_FALSE;
4081: } else {
4082: multilevel_allowed = PETSC_TRUE;
4083: }
4084: }
4085: /* determine number of process partecipating to coarse solver */
4086: if (multilevel_allowed) {
4087: ncoarse = ncoarse_ml;
4088: csin = csin_ml;
4089: redist = PETSC_FALSE;
4090: } else {
4091: ncoarse = ncoarse_ds;
4092: csin = csin_ds;
4093: }
4095: /* creates temporary l2gmap and IS for coarse indexes */
4096: ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);
4097: ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);
4099: /* creates temporary MATIS object for coarse matrix */
4100: MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);
4101: MatDenseGetArray(coarse_submat_dense,&array);
4102: PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);
4103: MatDenseRestoreArray(coarse_submat_dense,&array);
4104: #if 0
4105: {
4106: PetscViewer viewer;
4107: char filename[256];
4108: sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank);
4109: PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);
4110: PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
4111: MatView(coarse_submat_dense,viewer);
4112: PetscViewerPopFormat(viewer);
4113: PetscViewerDestroy(&viewer);
4114: }
4115: #endif
4116: MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);
4117: MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);
4118: MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);
4119: MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);
4120: MatDestroy(&coarse_submat_dense);
4122: /* compute dofs splitting and neumann boundaries for coarse dofs */
4123: if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */
4124: PetscInt *tidxs,*tidxs2,nout,tsize,i;
4125: const PetscInt *idxs;
4126: ISLocalToGlobalMapping tmap;
4128: /* create map between primal indices (in local representative ordering) and local primal numbering */
4129: ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);
4130: /* allocate space for temporary storage */
4131: PetscMalloc1(pcbddc->local_primal_size,&tidxs);
4132: PetscMalloc1(pcbddc->local_primal_size,&tidxs2);
4133: /* allocate for IS array */
4134: nisdofs = pcbddc->n_ISForDofsLocal;
4135: nisneu = !!pcbddc->NeumannBoundariesLocal;
4136: nis = nisdofs + nisneu;
4137: PetscMalloc1(nis,&isarray);
4138: /* dofs splitting */
4139: for (i=0;i<nisdofs;i++) {
4140: /* ISView(pcbddc->ISForDofsLocal[i],0); */
4141: ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);
4142: ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);
4143: ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);
4144: ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);
4145: ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);
4146: ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);
4147: /* ISView(isarray[i],0); */
4148: }
4149: /* neumann boundaries */
4150: if (pcbddc->NeumannBoundariesLocal) {
4151: /* ISView(pcbddc->NeumannBoundariesLocal,0); */
4152: ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);
4153: ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);
4154: ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);
4155: ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);
4156: ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);
4157: ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);
4158: /* ISView(isarray[nisdofs],0); */
4159: }
4160: /* free memory */
4161: PetscFree(tidxs);
4162: PetscFree(tidxs2);
4163: ISLocalToGlobalMappingDestroy(&tmap);
4164: } else {
4165: nis = 0;
4166: nisdofs = 0;
4167: nisneu = 0;
4168: isarray = NULL;
4169: }
4170: /* destroy no longer needed map */
4171: ISLocalToGlobalMappingDestroy(&coarse_islg);
4173: /* restrict on coarse candidates (if needed) */
4174: coarse_mat_is = NULL;
4175: if (csin) {
4176: if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */
4177: if (redist) {
4178: PetscMPIInt rank;
4179: PetscInt spc,n_spc_p1,dest[1],destsize;
4181: MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);
4182: spc = active_procs/ncoarse;
4183: n_spc_p1 = active_procs%ncoarse;
4184: if (im_active) {
4185: destsize = 1;
4186: if (rank > n_spc_p1*(spc+1)-1) {
4187: dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc;
4188: } else {
4189: dest[0] = rank/(spc+1);
4190: }
4191: } else {
4192: destsize = 0;
4193: }
4194: ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);
4195: } else if (csin_type_simple) {
4196: PetscMPIInt rank;
4197: PetscInt issize,isidx;
4199: MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);
4200: if (im_active) {
4201: issize = 1;
4202: isidx = (PetscInt)rank;
4203: } else {
4204: issize = 0;
4205: isidx = -1;
4206: }
4207: ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);
4208: } else { /* get a suitable subassembling pattern from MATIS code */
4209: MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);
4210: }
4212: /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */
4213: if (!redist || ncoarse <= void_procs) {
4214: PetscInt ncoarse_cand,tissize,*nisindices;
4215: PetscInt *coarse_candidates;
4216: const PetscInt* tisindices;
4218: /* get coarse candidates' ranks in pc communicator */
4219: PetscMalloc1(all_procs,&coarse_candidates);
4220: MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));
4221: for (i=0,ncoarse_cand=0;i<all_procs;i++) {
4222: if (!coarse_candidates[i]) {
4223: coarse_candidates[ncoarse_cand++]=i;
4224: }
4225: }
4226: if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse);
4229: if (pcbddc->dbg_flag) {
4230: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
4231: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");
4232: ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);
4233: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");
4234: for (i=0;i<ncoarse_cand;i++) {
4235: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);
4236: }
4237: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");
4238: PetscViewerFlush(pcbddc->dbg_viewer);
4239: }
4240: /* shift the pattern on coarse candidates */
4241: ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);
4242: ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);
4243: PetscMalloc1(tissize,&nisindices);
4244: for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]];
4245: ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);
4246: ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);
4247: PetscFree(coarse_candidates);
4248: }
4249: if (pcbddc->dbg_flag) {
4250: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
4251: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");
4252: ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);
4253: PetscViewerFlush(pcbddc->dbg_viewer);
4254: }
4255: }
4256: /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */
4257: if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */
4258: MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);
4259: } else { /* this is the last level, so use just receiving processes in subcomm */
4260: MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);
4261: }
4262: } else {
4263: if (pcbddc->dbg_flag) {
4264: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
4265: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");
4266: PetscViewerFlush(pcbddc->dbg_viewer);
4267: }
4268: PetscObjectReference((PetscObject)t_coarse_mat_is);
4269: coarse_mat_is = t_coarse_mat_is;
4270: }
4272: /* create local to global scatters for coarse problem */
4273: if (compute_vecs) {
4274: PetscInt lrows;
4275: VecDestroy(&pcbddc->coarse_vec);
4276: if (coarse_mat_is) {
4277: MatGetLocalSize(coarse_mat_is,&lrows,NULL);
4278: } else {
4279: lrows = 0;
4280: }
4281: VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);
4282: VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);
4283: VecSetType(pcbddc->coarse_vec,VECSTANDARD);
4284: VecScatterDestroy(&pcbddc->coarse_loc_to_glob);
4285: VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);
4286: }
4287: ISDestroy(&coarse_is);
4288: MatDestroy(&t_coarse_mat_is);
4290: /* set defaults for coarse KSP and PC */
4291: if (multilevel_allowed) {
4292: coarse_ksp_type = KSPRICHARDSON;
4293: coarse_pc_type = PCBDDC;
4294: } else {
4295: coarse_ksp_type = KSPPREONLY;
4296: coarse_pc_type = PCREDUNDANT;
4297: }
4299: /* print some info if requested */
4300: if (pcbddc->dbg_flag) {
4301: if (!multilevel_allowed) {
4302: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
4303: if (multilevel_requested) {
4304: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);
4305: } else if (pcbddc->max_levels) {
4306: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);
4307: }
4308: PetscViewerFlush(pcbddc->dbg_viewer);
4309: }
4310: }
4312: /* create the coarse KSP object only once with defaults */
4313: if (coarse_mat_is) {
4314: MatReuse coarse_mat_reuse;
4315: PetscViewer dbg_viewer = NULL;
4316: if (pcbddc->dbg_flag) {
4317: dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is));
4318: PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);
4319: }
4320: if (!pcbddc->coarse_ksp) {
4321: char prefix[256],str_level[16];
4322: size_t len;
4323: KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);
4324: KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);
4325: PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);
4326: KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);
4327: KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);
4328: KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);
4329: KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);
4330: KSPGetPC(pcbddc->coarse_ksp,&pc_temp);
4331: PCSetType(pc_temp,coarse_pc_type);
4332: /* prefix */
4333: PetscStrcpy(prefix,"");
4334: PetscStrcpy(str_level,"");
4335: if (!pcbddc->current_level) {
4336: PetscStrcpy(prefix,((PetscObject)pc)->prefix);
4337: PetscStrcat(prefix,"pc_bddc_coarse_");
4338: } else {
4339: PetscStrlen(((PetscObject)pc)->prefix,&len);
4340: if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
4341: if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
4342: PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);
4343: sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
4344: PetscStrcat(prefix,str_level);
4345: }
4346: KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);
4347: /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4348: PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);
4349: PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);
4350: PCBDDCSetLevels(pc_temp,pcbddc->max_levels);
4351: /* allow user customization */
4352: KSPSetFromOptions(pcbddc->coarse_ksp);
4353: }
4354: /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4355: KSPGetPC(pcbddc->coarse_ksp,&pc_temp);
4356: if (nisdofs) {
4357: PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);
4358: for (i=0;i<nisdofs;i++) {
4359: ISDestroy(&isarray[i]);
4360: }
4361: }
4362: if (nisneu) {
4363: PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);
4364: ISDestroy(&isarray[nisdofs]);
4365: }
4367: /* get some info after set from options */
4368: PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);
4369: PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);
4370: PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);
4371: if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */
4372: PCSetType(pc_temp,coarse_pc_type);
4373: isbddc = PETSC_FALSE;
4374: }
4375: PCFactorSetReuseFill(pc_temp,PETSC_TRUE);
4376: if (isredundant) {
4377: KSP inner_ksp;
4378: PC inner_pc;
4379: PCRedundantGetKSP(pc_temp,&inner_ksp);
4380: KSPGetPC(inner_ksp,&inner_pc);
4381: PCFactorSetReuseFill(inner_pc,PETSC_TRUE);
4382: }
4384: /* assemble coarse matrix */
4385: if (coarse_reuse) {
4386: KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);
4387: PetscObjectReference((PetscObject)coarse_mat);
4388: coarse_mat_reuse = MAT_REUSE_MATRIX;
4389: } else {
4390: coarse_mat_reuse = MAT_INITIAL_MATRIX;
4391: }
4392: if (isbddc || isnn) {
4393: if (pcbddc->coarsening_ratio > 1) {
4394: if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */
4395: MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);
4396: if (pcbddc->dbg_flag) {
4397: PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");
4398: PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");
4399: ISView(pcbddc->coarse_subassembling,dbg_viewer);
4400: PetscViewerFlush(dbg_viewer);
4401: }
4402: }
4403: MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);
4404: } else {
4405: PetscObjectReference((PetscObject)coarse_mat_is);
4406: coarse_mat = coarse_mat_is;
4407: }
4408: } else {
4409: MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);
4410: }
4411: MatDestroy(&coarse_mat_is);
4413: /* propagate symmetry info of coarse matrix */
4414: MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);
4415: if (pc->pmat->symmetric_set) {
4416: MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);
4417: }
4418: if (pc->pmat->hermitian_set) {
4419: MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);
4420: }
4421: if (pc->pmat->spd_set) {
4422: MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);
4423: }
4424: /* set operators */
4425: KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);
4426: if (pcbddc->dbg_flag) {
4427: PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);
4428: }
4429: } else { /* processes non partecipating to coarse solver (if any) */
4430: coarse_mat = 0;
4431: }
4432: PetscFree(isarray);
4433: #if 0
4434: {
4435: PetscViewer viewer;
4436: char filename[256];
4437: sprintf(filename,"coarse_mat.m");
4438: PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);
4439: PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
4440: MatView(coarse_mat,viewer);
4441: PetscViewerPopFormat(viewer);
4442: PetscViewerDestroy(&viewer);
4443: }
4444: #endif
4446: /* Compute coarse null space (special handling by BDDC only) */
4447: #if 0
4448: if (pcbddc->NullSpace) {
4449: PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);
4450: }
4451: #endif
4453: if (pcbddc->coarse_ksp) {
4454: Vec crhs,csol;
4455: PetscBool ispreonly;
4457: if (CoarseNullSpace) {
4458: if (isbddc) {
4459: PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);
4460: } else {
4461: MatSetNullSpace(coarse_mat,CoarseNullSpace);
4462: }
4463: }
4464: /* setup coarse ksp */
4465: KSPSetUp(pcbddc->coarse_ksp);
4466: KSPGetSolution(pcbddc->coarse_ksp,&csol);
4467: KSPGetRhs(pcbddc->coarse_ksp,&crhs);
4468: /* hack */
4469: if (!csol) {
4470: MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);
4471: }
4472: if (!crhs) {
4473: MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));
4474: }
4475: /* Check coarse problem if in debug mode or if solving with an iterative method */
4476: PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);
4477: if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
4478: KSP check_ksp;
4479: KSPType check_ksp_type;
4480: PC check_pc;
4481: Vec check_vec,coarse_vec;
4482: PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
4483: PetscInt its;
4484: PetscBool compute_eigs;
4485: PetscReal *eigs_r,*eigs_c;
4486: PetscInt neigs;
4487: const char *prefix;
4489: /* Create ksp object suitable for estimation of extreme eigenvalues */
4490: KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);
4491: KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);
4492: KSPSetOperators(check_ksp,coarse_mat,coarse_mat);
4493: KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);
4494: if (ispreonly) {
4495: check_ksp_type = KSPPREONLY;
4496: compute_eigs = PETSC_FALSE;
4497: } else {
4498: check_ksp_type = KSPGMRES;
4499: compute_eigs = PETSC_TRUE;
4500: }
4501: KSPSetType(check_ksp,check_ksp_type);
4502: KSPSetComputeSingularValues(check_ksp,compute_eigs);
4503: KSPSetComputeEigenvalues(check_ksp,compute_eigs);
4504: KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);
4505: KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);
4506: KSPSetOptionsPrefix(check_ksp,prefix);
4507: KSPAppendOptionsPrefix(check_ksp,"check_");
4508: KSPSetFromOptions(check_ksp);
4509: KSPSetUp(check_ksp);
4510: KSPGetPC(pcbddc->coarse_ksp,&check_pc);
4511: KSPSetPC(check_ksp,check_pc);
4512: /* create random vec */
4513: KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);
4514: VecDuplicate(coarse_vec,&check_vec);
4515: VecSetRandom(check_vec,NULL);
4516: if (CoarseNullSpace) {
4517: MatNullSpaceRemove(CoarseNullSpace,check_vec);
4518: }
4519: MatMult(coarse_mat,check_vec,coarse_vec);
4520: /* solve coarse problem */
4521: KSPSolve(check_ksp,coarse_vec,coarse_vec);
4522: if (CoarseNullSpace) {
4523: MatNullSpaceRemove(CoarseNullSpace,coarse_vec);
4524: }
4525: /* set eigenvalue estimation if preonly has not been requested */
4526: if (compute_eigs) {
4527: PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);
4528: PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);
4529: KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);
4530: lambda_max = eigs_r[neigs-1];
4531: lambda_min = eigs_r[0];
4532: if (pcbddc->use_coarse_estimates) {
4533: if (lambda_max>lambda_min) {
4534: KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);
4535: KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));
4536: }
4537: }
4538: }
4540: /* check coarse problem residual error */
4541: if (pcbddc->dbg_flag) {
4542: PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
4543: PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));
4544: VecAXPY(check_vec,-1.0,coarse_vec);
4545: VecNorm(check_vec,NORM_INFINITY,&infty_error);
4546: MatMult(coarse_mat,check_vec,coarse_vec);
4547: VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);
4548: VecDestroy(&check_vec);
4549: PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);
4550: PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);
4551: PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);
4552: PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);
4553: PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);
4554: if (compute_eigs) {
4555: PetscReal lambda_max_s,lambda_min_s;
4556: KSPGetType(check_ksp,&check_ksp_type);
4557: KSPGetIterationNumber(check_ksp,&its);
4558: KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);
4559: PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);
4560: for (i=0;i<neigs;i++) {
4561: PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);
4562: }
4563: }
4564: PetscViewerFlush(dbg_viewer);
4565: PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));
4566: }
4567: KSPDestroy(&check_ksp);
4568: if (compute_eigs) {
4569: PetscFree(eigs_r);
4570: PetscFree(eigs_c);
4571: }
4572: }
4573: }
4574: /* print additional info */
4575: if (pcbddc->dbg_flag) {
4576: /* waits until all processes reaches this point */
4577: PetscBarrier((PetscObject)pc);
4578: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);
4579: PetscViewerFlush(pcbddc->dbg_viewer);
4580: }
4582: /* free memory */
4583: MatNullSpaceDestroy(&CoarseNullSpace);
4584: MatDestroy(&coarse_mat);
4585: return(0);
4586: }
4590: PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
4591: {
4592: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
4593: PC_IS* pcis = (PC_IS*)pc->data;
4594: Mat_IS* matis = (Mat_IS*)pc->pmat->data;
4595: IS subset,subset_mult,subset_n;
4596: PetscInt local_size,coarse_size=0;
4597: PetscInt *local_primal_indices=NULL;
4598: const PetscInt *t_local_primal_indices;
4602: /* Compute global number of coarse dofs */
4603: if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first");
4604: ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);
4605: ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);
4606: ISDestroy(&subset_n);
4607: ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);
4608: PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);
4609: ISDestroy(&subset);
4610: ISDestroy(&subset_mult);
4611: ISGetLocalSize(subset_n,&local_size);
4612: if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size);
4613: PetscMalloc1(local_size,&local_primal_indices);
4614: ISGetIndices(subset_n,&t_local_primal_indices);
4615: PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));
4616: ISRestoreIndices(subset_n,&t_local_primal_indices);
4617: ISDestroy(&subset_n);
4619: /* check numbering */
4620: if (pcbddc->dbg_flag) {
4621: PetscScalar coarsesum,*array;
4622: PetscInt i;
4623: PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;
4625: PetscViewerFlush(pcbddc->dbg_viewer);
4626: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
4627: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");
4628: PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
4629: VecSet(pcis->vec1_N,0.0);
4630: for (i=0;i<pcbddc->local_primal_size;i++) {
4631: VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);
4632: }
4633: VecAssemblyBegin(pcis->vec1_N);
4634: VecAssemblyEnd(pcis->vec1_N);
4635: VecSet(pcis->vec1_global,0.0);
4636: VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
4637: VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
4638: VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
4639: VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
4640: VecGetArray(pcis->vec1_N,&array);
4641: for (i=0;i<pcis->n;i++) {
4642: if (array[i] == 1.0) {
4643: set_error = PETSC_TRUE;
4644: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);
4645: }
4646: }
4647: MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
4648: PetscViewerFlush(pcbddc->dbg_viewer);
4649: for (i=0;i<pcis->n;i++) {
4650: if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
4651: }
4652: VecRestoreArray(pcis->vec1_N,&array);
4653: VecSet(pcis->vec1_global,0.0);
4654: VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
4655: VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
4656: VecSum(pcis->vec1_global,&coarsesum);
4657: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));
4658: if (pcbddc->dbg_flag > 1 || set_error_reduced) {
4659: PetscInt *gidxs;
4661: PetscMalloc1(pcbddc->local_primal_size,&gidxs);
4662: ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);
4663: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");
4664: PetscViewerFlush(pcbddc->dbg_viewer);
4665: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);
4666: for (i=0;i<pcbddc->local_primal_size;i++) {
4667: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);
4668: }
4669: PetscViewerFlush(pcbddc->dbg_viewer);
4670: PetscFree(gidxs);
4671: }
4672: PetscViewerFlush(pcbddc->dbg_viewer);
4673: PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
4674: if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
4675: }
4676: /* PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size); */
4677: /* get back data */
4678: *coarse_size_n = coarse_size;
4679: *local_primal_indices_n = local_primal_indices;
4680: return(0);
4681: }
4685: PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
4686: {
4687: IS localis_t;
4688: PetscInt i,lsize,*idxs,n;
4689: PetscScalar *vals;
4693: /* get indices in local ordering exploiting local to global map */
4694: ISGetLocalSize(globalis,&lsize);
4695: PetscMalloc1(lsize,&vals);
4696: for (i=0;i<lsize;i++) vals[i] = 1.0;
4697: ISGetIndices(globalis,(const PetscInt**)&idxs);
4698: VecSet(gwork,0.0);
4699: VecSet(lwork,0.0);
4700: if (idxs) { /* multilevel guard */
4701: VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);
4702: }
4703: VecAssemblyBegin(gwork);
4704: ISRestoreIndices(globalis,(const PetscInt**)&idxs);
4705: PetscFree(vals);
4706: VecAssemblyEnd(gwork);
4707: /* now compute set in local ordering */
4708: VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);
4709: VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);
4710: VecGetArrayRead(lwork,(const PetscScalar**)&vals);
4711: VecGetSize(lwork,&n);
4712: for (i=0,lsize=0;i<n;i++) {
4713: if (PetscRealPart(vals[i]) > 0.5) {
4714: lsize++;
4715: }
4716: }
4717: PetscMalloc1(lsize,&idxs);
4718: for (i=0,lsize=0;i<n;i++) {
4719: if (PetscRealPart(vals[i]) > 0.5) {
4720: idxs[lsize++] = i;
4721: }
4722: }
4723: VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);
4724: ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);
4725: *localis = localis_t;
4726: return(0);
4727: }
4729: /* the next two functions will be called in KSPMatMult if a change of basis has been requested */
4732: static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y)
4733: {
4734: PCBDDCChange_ctx change_ctx;
4735: PetscErrorCode ierr;
4738: MatShellGetContext(A,&change_ctx);
4739: MatMult(change_ctx->global_change,x,change_ctx->work[0]);
4740: MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);
4741: MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);
4742: return(0);
4743: }
4747: static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y)
4748: {
4749: PCBDDCChange_ctx change_ctx;
4750: PetscErrorCode ierr;
4753: MatShellGetContext(A,&change_ctx);
4754: MatMult(change_ctx->global_change,x,change_ctx->work[0]);
4755: MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);
4756: MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);
4757: return(0);
4758: }
4762: PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
4763: {
4764: PC_IS *pcis=(PC_IS*)pc->data;
4765: PC_BDDC *pcbddc=(PC_BDDC*)pc->data;
4766: PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
4767: Mat S_j;
4768: PetscInt *used_xadj,*used_adjncy;
4769: PetscBool free_used_adj;
4770: PetscErrorCode ierr;
4773: /* decide the adjacency to be used for determining internal problems for local schur on subsets */
4774: free_used_adj = PETSC_FALSE;
4775: if (pcbddc->sub_schurs_layers == -1) {
4776: used_xadj = NULL;
4777: used_adjncy = NULL;
4778: } else {
4779: if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
4780: used_xadj = pcbddc->mat_graph->xadj;
4781: used_adjncy = pcbddc->mat_graph->adjncy;
4782: } else if (pcbddc->computed_rowadj) {
4783: used_xadj = pcbddc->mat_graph->xadj;
4784: used_adjncy = pcbddc->mat_graph->adjncy;
4785: } else {
4786: PetscBool flg_row=PETSC_FALSE;
4787: const PetscInt *xadj,*adjncy;
4788: PetscInt nvtxs;
4790: MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);
4791: if (flg_row) {
4792: PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);
4793: PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));
4794: PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));
4795: free_used_adj = PETSC_TRUE;
4796: } else {
4797: pcbddc->sub_schurs_layers = -1;
4798: used_xadj = NULL;
4799: used_adjncy = NULL;
4800: }
4801: MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);
4802: }
4803: }
4805: /* setup sub_schurs data */
4806: MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);
4807: if (!sub_schurs->use_mumps) {
4808: /* pcbddc->ksp_D up to date only if not using MUMPS */
4809: MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);
4810: PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE);
4811: } else {
4812: PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis;
4813: PetscBool isseqaij;
4814: if (!pcbddc->use_vertices && reuse_solvers) {
4815: PetscInt n_vertices;
4817: ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);
4818: reuse_solvers = (PetscBool)!n_vertices;
4819: }
4820: PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);
4821: if (!isseqaij) {
4822: Mat_IS* matis = (Mat_IS*)pc->pmat->data;
4823: if (matis->A == pcbddc->local_mat) {
4824: MatDestroy(&pcbddc->local_mat);
4825: MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);
4826: } else {
4827: MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);
4828: }
4829: }
4830: PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers);
4831: }
4832: MatDestroy(&S_j);
4834: /* free adjacency */
4835: if (free_used_adj) {
4836: PetscFree2(used_xadj,used_adjncy);
4837: }
4838: return(0);
4839: }
4843: PetscErrorCode PCBDDCInitSubSchurs(PC pc)
4844: {
4845: PC_IS *pcis=(PC_IS*)pc->data;
4846: PC_BDDC *pcbddc=(PC_BDDC*)pc->data;
4847: PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
4848: PCBDDCGraph graph;
4849: PetscErrorCode ierr;
4852: /* attach interface graph for determining subsets */
4853: if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
4854: IS verticesIS,verticescomm;
4855: PetscInt vsize,*idxs;
4857: PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);
4858: ISGetSize(verticesIS,&vsize);
4859: ISGetIndices(verticesIS,(const PetscInt**)&idxs);
4860: ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);
4861: ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);
4862: ISDestroy(&verticesIS);
4863: PCBDDCGraphCreate(&graph);
4864: PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);
4865: PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);
4866: ISDestroy(&verticescomm);
4867: PCBDDCGraphComputeConnectedComponents(graph);
4868: /*
4869: if (pcbddc->dbg_flag) {
4870: PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);
4871: }
4872: */
4873: } else {
4874: graph = pcbddc->mat_graph;
4875: }
4877: /* sub_schurs init */
4878: PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);
4880: /* free graph struct */
4881: if (pcbddc->sub_schurs_rebuild) {
4882: PCBDDCGraphDestroy(&graph);
4883: }
4884: return(0);
4885: }