Actual source code: bddcprivate.c
petsc-3.5.4 2015-05-23
1: #include <../src/ksp/pc/impls/bddc/bddc.h>
2: #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3: #include <petscblaslapack.h>
7: PetscErrorCode PCBDDCSetUpSolvers(PC pc)
8: {
9: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
10: PetscScalar *coarse_submat_vals;
14: /* Compute matrix after change of basis and extract local submatrices */
15: PCBDDCSetUpLocalMatrices(pc);
17: /* Setup local scatters R_to_B and (optionally) R_to_D */
18: /* PCBDDCSetUpLocalWorkVectors and PCBDDCSetUpLocalMatrices should be called first! */
19: PCBDDCSetUpLocalScatters(pc);
21: /* Setup local solvers ksp_D and ksp_R */
22: /* PCBDDCSetUpLocalScatters should be called first! */
23: PCBDDCSetUpLocalSolvers(pc);
25: /* Change global null space passed in by the user if change of basis has been requested */
26: if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) {
27: PCBDDCNullSpaceAdaptGlobal(pc);
28: }
30: /*
31: Setup local correction and local part of coarse basis.
32: Gives back the dense local part of the coarse matrix in column major ordering
33: */
34: PCBDDCSetUpCorrection(pc,&coarse_submat_vals);
36: /* Compute total number of coarse nodes and setup coarse solver */
37: PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);
39: /* free */
40: PetscFree(coarse_submat_vals);
41: return(0);
42: }
46: PetscErrorCode PCBDDCResetCustomization(PC pc)
47: {
48: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
52: PCBDDCGraphResetCSR(pcbddc->mat_graph);
53: ISDestroy(&pcbddc->user_primal_vertices);
54: MatNullSpaceDestroy(&pcbddc->NullSpace);
55: ISDestroy(&pcbddc->NeumannBoundaries);
56: ISDestroy(&pcbddc->NeumannBoundariesLocal);
57: ISDestroy(&pcbddc->DirichletBoundaries);
58: MatNullSpaceDestroy(&pcbddc->onearnullspace);
59: PetscFree(pcbddc->onearnullvecs_state);
60: ISDestroy(&pcbddc->DirichletBoundariesLocal);
61: PCBDDCSetDofsSplitting(pc,0,NULL);
62: PCBDDCSetDofsSplittingLocal(pc,0,NULL);
63: return(0);
64: }
68: PetscErrorCode PCBDDCResetTopography(PC pc)
69: {
70: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
74: MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);
75: MatDestroy(&pcbddc->ChangeOfBasisMatrix);
76: MatDestroy(&pcbddc->ConstraintMatrix);
77: PCBDDCGraphReset(pcbddc->mat_graph);
78: return(0);
79: }
83: PetscErrorCode PCBDDCResetSolvers(PC pc)
84: {
85: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
89: VecDestroy(&pcbddc->coarse_vec);
90: VecDestroy(&pcbddc->coarse_rhs);
91: MatDestroy(&pcbddc->coarse_phi_B);
92: MatDestroy(&pcbddc->coarse_phi_D);
93: MatDestroy(&pcbddc->coarse_psi_B);
94: MatDestroy(&pcbddc->coarse_psi_D);
95: VecDestroy(&pcbddc->vec1_P);
96: VecDestroy(&pcbddc->vec1_C);
97: MatDestroy(&pcbddc->local_auxmat1);
98: MatDestroy(&pcbddc->local_auxmat2);
99: VecDestroy(&pcbddc->vec1_R);
100: VecDestroy(&pcbddc->vec2_R);
101: ISDestroy(&pcbddc->is_R_local);
102: VecScatterDestroy(&pcbddc->R_to_B);
103: VecScatterDestroy(&pcbddc->R_to_D);
104: VecScatterDestroy(&pcbddc->coarse_loc_to_glob);
105: KSPDestroy(&pcbddc->ksp_D);
106: KSPDestroy(&pcbddc->ksp_R);
107: KSPDestroy(&pcbddc->coarse_ksp);
108: MatDestroy(&pcbddc->local_mat);
109: PetscFree(pcbddc->primal_indices_local_idxs);
110: PetscFree(pcbddc->global_primal_indices);
111: ISDestroy(&pcbddc->coarse_subassembling);
112: ISDestroy(&pcbddc->coarse_subassembling_init);
113: return(0);
114: }
118: PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
119: {
120: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
121: PC_IS *pcis = (PC_IS*)pc->data;
122: VecType impVecType;
123: PetscInt n_constraints,n_R,old_size;
127: if (!pcbddc->ConstraintMatrix) {
128: SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
129: }
130: /* get sizes */
131: n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices;
132: n_R = pcis->n-pcbddc->n_actual_vertices;
133: VecGetType(pcis->vec1_N,&impVecType);
134: /* local work vectors (try to avoid unneeded work)*/
135: /* R nodes */
136: old_size = -1;
137: if (pcbddc->vec1_R) {
138: VecGetSize(pcbddc->vec1_R,&old_size);
139: }
140: if (n_R != old_size) {
141: VecDestroy(&pcbddc->vec1_R);
142: VecDestroy(&pcbddc->vec2_R);
143: VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);
144: VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);
145: VecSetType(pcbddc->vec1_R,impVecType);
146: VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);
147: }
148: /* local primal dofs */
149: old_size = -1;
150: if (pcbddc->vec1_P) {
151: VecGetSize(pcbddc->vec1_P,&old_size);
152: }
153: if (pcbddc->local_primal_size != old_size) {
154: VecDestroy(&pcbddc->vec1_P);
155: VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);
156: VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);
157: VecSetType(pcbddc->vec1_P,impVecType);
158: }
159: /* local explicit constraints */
160: old_size = -1;
161: if (pcbddc->vec1_C) {
162: VecGetSize(pcbddc->vec1_C,&old_size);
163: }
164: if (n_constraints && n_constraints != old_size) {
165: VecDestroy(&pcbddc->vec1_C);
166: VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);
167: VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);
168: VecSetType(pcbddc->vec1_C,impVecType);
169: }
170: return(0);
171: }
175: PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
176: {
177: PetscErrorCode ierr;
178: /* pointers to pcis and pcbddc */
179: PC_IS* pcis = (PC_IS*)pc->data;
180: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
181: /* submatrices of local problem */
182: Mat A_RV,A_VR,A_VV;
183: /* working matrices */
184: Mat M1,M2,M3,C_CR;
185: /* working vectors */
186: Vec vec1_C,vec2_C,vec1_V,vec2_V;
187: /* additional working stuff */
188: IS is_aux;
189: PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */
190: const PetscScalar *array,*row_cmat_values;
191: const PetscInt *row_cmat_indices,*idx_R_local;
192: PetscInt *idx_V_B,*auxindices;
193: PetscInt n_vertices,n_constraints,size_of_constraint;
194: PetscInt i,j,n_R,n_D,n_B;
195: PetscBool unsymmetric_check;
196: /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
197: MatType impMatType;
198: /* some shortcuts to scalars */
199: PetscScalar zero=0.0,one=1.0,m_one=-1.0;
200: /* for debugging purposes */
201: PetscReal *coarsefunctions_errors,*constraints_errors;
204: /* get number of vertices (corners plus constraints with change of basis)
205: pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */
206: n_vertices = pcbddc->n_actual_vertices;
207: n_constraints = pcbddc->local_primal_size-n_vertices;
208: /* Set Non-overlapping dimensions */
209: n_B = pcis->n_B; n_D = pcis->n - n_B;
210: n_R = pcis->n-n_vertices;
212: /* Set types for local objects needed by BDDC precondtioner */
213: impMatType = MATSEQDENSE;
215: /* Allocating some extra storage just to be safe */
216: PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);
217: for (i=0;i<pcis->n;i++) auxindices[i]=i;
219: /* vertices in boundary numbering */
220: PetscMalloc1(n_vertices,&idx_V_B);
221: ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);
222: if (i != n_vertices) {
223: SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
224: }
226: /* Precompute stuffs needed for preprocessing and application of BDDC*/
227: if (n_constraints) {
228: /* see if we can save some allocations */
229: if (pcbddc->local_auxmat2) {
230: PetscInt on_R,on_constraints;
231: MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);
232: if (on_R != n_R || on_constraints != n_constraints) {
233: MatDestroy(&pcbddc->local_auxmat2);
234: MatDestroy(&pcbddc->local_auxmat1);
235: }
236: }
237: /* work vectors */
238: VecDuplicate(pcbddc->vec1_C,&vec1_C);
239: VecDuplicate(pcbddc->vec1_C,&vec2_C);
240: /* auxiliary matrices */
241: if (!pcbddc->local_auxmat2) {
242: MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);
243: MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);
244: MatSetType(pcbddc->local_auxmat2,impMatType);
245: MatSetUp(pcbddc->local_auxmat2);
246: }
248: /* Extract constraints on R nodes: C_{CR} */
249: ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);
250: MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);
251: ISDestroy(&is_aux);
253: /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
254: for (i=0;i<n_constraints;i++) {
255: VecSet(pcbddc->vec1_R,zero);
256: /* Get row of constraint matrix in R numbering */
257: MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);
258: VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);
259: MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);
260: VecAssemblyBegin(pcbddc->vec1_R);
261: VecAssemblyEnd(pcbddc->vec1_R);
262: /* Solve for row of constraint matrix in R numbering */
263: KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
264: /* Set values in local_auxmat2 */
265: VecGetArrayRead(pcbddc->vec2_R,&array);
266: MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);
267: VecRestoreArrayRead(pcbddc->vec2_R,&array);
268: }
269: MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);
270: MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);
271: MatScale(pcbddc->local_auxmat2,m_one);
273: /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */
274: MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);
275: MatLUFactor(M3,NULL,NULL,NULL);
276: MatCreate(PETSC_COMM_SELF,&M1);
277: MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);
278: MatSetType(M1,impMatType);
279: MatSetUp(M1);
280: MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);
281: MatZeroEntries(M2);
282: VecSet(vec1_C,m_one);
283: MatDiagonalSet(M2,vec1_C,INSERT_VALUES);
284: MatMatSolve(M3,M2,M1);
285: MatDestroy(&M2);
286: MatDestroy(&M3);
287: /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */
288: if (!pcbddc->local_auxmat1) {
289: MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);
290: } else {
291: MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);
292: }
293: }
295: /* Get submatrices from subdomain matrix */
296: if (n_vertices) {
297: PetscInt ibs,mbs;
298: PetscBool issbaij;
299: Mat newmat;
301: ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);
302: MatGetBlockSize(pcbddc->local_mat,&mbs);
303: ISGetBlockSize(pcbddc->is_R_local,&ibs);
304: if (ibs != mbs) { /* need to convert to SEQAIJ */
305: MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);
306: MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);
307: MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);
308: MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);
309: MatDestroy(&newmat);
310: } else {
311: /* this is safe */
312: MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);
313: PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);
314: if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
315: MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);
316: /* which of the two approaches is faster? */
317: /* MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);
318: MatCreateTranspose(A_RV,&A_VR);*/
319: MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);
320: MatCreateTranspose(A_VR,&A_RV);
321: MatDestroy(&newmat);
322: } else {
323: MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);
324: MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);
325: }
326: }
327: MatGetVecs(A_RV,&vec1_V,NULL);
328: VecDuplicate(vec1_V,&vec2_V);
329: ISDestroy(&is_aux);
330: }
332: /* Matrix of coarse basis functions (local) */
333: if (pcbddc->coarse_phi_B) {
334: PetscInt on_B,on_primal;
335: MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);
336: if (on_B != n_B || on_primal != pcbddc->local_primal_size) {
337: MatDestroy(&pcbddc->coarse_phi_B);
338: MatDestroy(&pcbddc->coarse_psi_B);
339: }
340: }
341: if (pcbddc->coarse_phi_D) {
342: PetscInt on_D,on_primal;
343: MatGetSize(pcbddc->coarse_phi_D,&on_D,&on_primal);
344: if (on_D != n_D || on_primal != pcbddc->local_primal_size) {
345: MatDestroy(&pcbddc->coarse_phi_D);
346: MatDestroy(&pcbddc->coarse_psi_D);
347: }
348: }
349: if (!pcbddc->coarse_phi_B) {
350: MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);
351: MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);
352: MatSetType(pcbddc->coarse_phi_B,impMatType);
353: MatSetUp(pcbddc->coarse_phi_B);
354: }
355: if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_phi_D ) {
356: MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);
357: MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);
358: MatSetType(pcbddc->coarse_phi_D,impMatType);
359: MatSetUp(pcbddc->coarse_phi_D);
360: }
362: if (pcbddc->dbg_flag) {
363: ISGetIndices(pcbddc->is_R_local,&idx_R_local);
364: PetscMalloc1(2*pcbddc->local_primal_size,&coarsefunctions_errors);
365: PetscMalloc1(2*pcbddc->local_primal_size,&constraints_errors);
366: }
367: /* Subdomain contribution (Non-overlapping) to coarse matrix */
368: PetscMalloc1((pcbddc->local_primal_size)*(pcbddc->local_primal_size),&coarse_submat_vals);
370: /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
372: /* vertices */
373: for (i=0;i<n_vertices;i++) {
374: /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */
375: VecSet(pcbddc->vec1_R,zero); /* TODO: REMOVE IT */
376: VecSet(vec1_V,zero);
377: VecSetValue(vec1_V,i,one,INSERT_VALUES);
378: VecAssemblyBegin(vec1_V);
379: VecAssemblyEnd(vec1_V);
380: /* simplified solution of saddle point problem with null rhs on constraints multipliers */
381: MatMult(A_RV,vec1_V,pcbddc->vec1_R);
382: KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);
383: VecScale(pcbddc->vec1_R,m_one);
384: if (n_constraints) {
385: MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);
386: MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);
387: VecScale(vec1_C,m_one);
388: }
389: MatMult(A_VR,pcbddc->vec1_R,vec2_V);
390: MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);
392: /* Set values in coarse basis function and subdomain part of coarse_mat */
393: /* coarse basis functions */
394: VecSet(pcis->vec1_B,zero);
395: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
396: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
397: VecGetArrayRead(pcis->vec1_B,&array);
398: MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);
399: VecRestoreArrayRead(pcis->vec1_B,&array);
400: MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);
401: if (pcbddc->switch_static || pcbddc->dbg_flag) {
402: VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
403: VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
404: VecGetArrayRead(pcis->vec1_D,&array);
405: MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);
406: VecRestoreArrayRead(pcis->vec1_D,&array);
407: }
408: /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
409: VecGetArrayRead(vec2_V,&array);
410: PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));
411: VecRestoreArrayRead(vec2_V,&array);
412: if (n_constraints) {
413: VecGetArrayRead(vec1_C,&array);
414: PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));
415: VecRestoreArrayRead(vec1_C,&array);
416: }
418: /* check */
419: if (pcbddc->dbg_flag) {
420: /* assemble subdomain vector on local nodes */
421: VecSet(pcis->vec1_N,zero);
422: VecGetArrayRead(pcbddc->vec1_R,&array);
423: VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);
424: VecRestoreArrayRead(pcbddc->vec1_R,&array);
425: VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);
426: VecAssemblyBegin(pcis->vec1_N);
427: VecAssemblyEnd(pcis->vec1_N);
428: /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */
429: VecSet(pcbddc->vec1_P,zero);
430: VecGetArrayRead(vec2_V,&array);
431: VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);
432: VecRestoreArrayRead(vec2_V,&array);
433: if (n_constraints) {
434: VecGetArrayRead(vec1_C,&array);
435: VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);
436: VecRestoreArrayRead(vec1_C,&array);
437: }
438: VecAssemblyBegin(pcbddc->vec1_P);
439: VecAssemblyEnd(pcbddc->vec1_P);
440: VecScale(pcbddc->vec1_P,m_one);
441: /* check saddle point solution */
442: MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);
443: MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);
444: VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);
445: MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);
446: /* shift by the identity matrix */
447: VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);
448: VecAssemblyBegin(pcbddc->vec1_P);
449: VecAssemblyEnd(pcbddc->vec1_P);
450: VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);
451: }
452: }
454: /* constraints */
455: for (i=0;i<n_constraints;i++) {
456: VecSet(vec2_C,zero);
457: VecSetValue(vec2_C,i,m_one,INSERT_VALUES);
458: VecAssemblyBegin(vec2_C);
459: VecAssemblyEnd(vec2_C);
460: /* simplified solution of saddle point problem with null rhs on vertices multipliers */
461: MatMult(M1,vec2_C,vec1_C);
462: MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);
463: VecScale(vec1_C,m_one);
464: if (n_vertices) {
465: MatMult(A_VR,pcbddc->vec1_R,vec2_V);
466: }
467: /* Set values in coarse basis function and subdomain part of coarse_mat */
468: /* coarse basis functions */
469: j = i+n_vertices; /* don't touch this! */
470: VecSet(pcis->vec1_B,zero);
471: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
472: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
473: VecGetArrayRead(pcis->vec1_B,&array);
474: MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);
475: VecRestoreArrayRead(pcis->vec1_B,&array);
476: if (pcbddc->switch_static || pcbddc->dbg_flag) {
477: VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
478: VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
479: VecGetArrayRead(pcis->vec1_D,&array);
480: MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);
481: VecRestoreArrayRead(pcis->vec1_D,&array);
482: }
483: /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
484: if (n_vertices) {
485: VecGetArrayRead(vec2_V,&array);
486: PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));
487: VecRestoreArrayRead(vec2_V,&array);
488: }
489: VecGetArrayRead(vec1_C,&array);
490: PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));
491: VecRestoreArrayRead(vec1_C,&array);
493: if (pcbddc->dbg_flag) {
494: /* assemble subdomain vector on nodes */
495: VecSet(pcis->vec1_N,zero);
496: VecGetArrayRead(pcbddc->vec1_R,&array);
497: VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);
498: VecRestoreArrayRead(pcbddc->vec1_R,&array);
499: VecAssemblyBegin(pcis->vec1_N);
500: VecAssemblyEnd(pcis->vec1_N);
501: /* assemble subdomain vector of lagrange multipliers */
502: VecSet(pcbddc->vec1_P,zero);
503: if (n_vertices) {
504: VecGetArrayRead(vec2_V,&array);
505: VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);
506: VecRestoreArrayRead(vec2_V,&array);
507: }
508: VecGetArrayRead(vec1_C,&array);
509: VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);
510: VecRestoreArrayRead(vec1_C,&array);
511: VecAssemblyBegin(pcbddc->vec1_P);
512: VecAssemblyEnd(pcbddc->vec1_P);
513: VecScale(pcbddc->vec1_P,m_one);
514: /* check saddle point solution */
515: MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);
516: MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);
517: VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);
518: MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);
519: /* shift by the identity matrix */
520: VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);
521: VecAssemblyBegin(pcbddc->vec1_P);
522: VecAssemblyEnd(pcbddc->vec1_P);
523: VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);
524: }
525: }
526: /* call assembling routines for local coarse basis */
527: MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);
528: MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);
529: if (pcbddc->switch_static || pcbddc->dbg_flag) {
530: MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);
531: MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);
532: }
534: /* compute other basis functions for non-symmetric problems */
535: MatIsSymmetric(pc->pmat,1.e-4,&pcbddc->issym);
536: if (!pcbddc->issym) {
537: if (!pcbddc->coarse_psi_B) {
538: MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);
539: MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);
540: MatSetType(pcbddc->coarse_psi_B,impMatType);
541: MatSetUp(pcbddc->coarse_psi_B);
542: }
543: if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_psi_D) {
544: MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);
545: MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);
546: MatSetType(pcbddc->coarse_psi_D,impMatType);
547: MatSetUp(pcbddc->coarse_psi_D);
548: }
549: for (i=0;i<pcbddc->local_primal_size;i++) {
550: if (n_constraints) {
551: VecSet(vec1_C,zero);
552: for (j=0;j<n_constraints;j++) {
553: VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);
554: }
555: VecAssemblyBegin(vec1_C);
556: VecAssemblyEnd(vec1_C);
557: }
558: if (i<n_vertices) {
559: VecSet(vec1_V,zero);
560: VecSetValue(vec1_V,i,m_one,INSERT_VALUES);
561: VecAssemblyBegin(vec1_V);
562: VecAssemblyEnd(vec1_V);
563: MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);
564: if (n_constraints) {
565: MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);
566: }
567: } else {
568: MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);
569: }
570: KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);
571: VecSet(pcis->vec1_B,zero);
572: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
573: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
574: VecGetArrayRead(pcis->vec1_B,&array);
575: MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);
576: VecRestoreArrayRead(pcis->vec1_B,&array);
577: if (i<n_vertices) {
578: MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);
579: }
580: if (pcbddc->switch_static || pcbddc->dbg_flag) {
581: VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
582: VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
583: VecGetArrayRead(pcis->vec1_D,&array);
584: MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);
585: VecRestoreArrayRead(pcis->vec1_D,&array);
586: }
588: if (pcbddc->dbg_flag) {
589: /* assemble subdomain vector on nodes */
590: VecSet(pcis->vec1_N,zero);
591: VecGetArrayRead(pcbddc->vec1_R,&array);
592: VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);
593: VecRestoreArrayRead(pcbddc->vec1_R,&array);
594: if (i<n_vertices) {
595: VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);
596: }
597: VecAssemblyBegin(pcis->vec1_N);
598: VecAssemblyEnd(pcis->vec1_N);
599: /* assemble subdomain vector of lagrange multipliers */
600: for (j=0;j<pcbddc->local_primal_size;j++) {
601: VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);
602: }
603: VecAssemblyBegin(pcbddc->vec1_P);
604: VecAssemblyEnd(pcbddc->vec1_P);
605: /* check saddle point solution */
606: MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);
607: MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);
608: VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);
609: MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);
610: /* shift by the identity matrix */
611: VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);
612: VecAssemblyBegin(pcbddc->vec1_P);
613: VecAssemblyEnd(pcbddc->vec1_P);
614: VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);
615: }
616: }
617: MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);
618: MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);
619: if (pcbddc->switch_static || pcbddc->dbg_flag) {
620: MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);
621: MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);
622: }
623: unsymmetric_check = PETSC_TRUE;
624: } else { /* take references to already computed coarse basis */
625: unsymmetric_check = PETSC_FALSE;
626: PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);
627: pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
628: if (pcbddc->coarse_phi_D) {
629: PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);
630: pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
631: }
632: }
633: PetscFree(idx_V_B);
634: /* Checking coarse_sub_mat and coarse basis functios */
635: /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
636: /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
637: if (pcbddc->dbg_flag) {
638: Mat coarse_sub_mat;
639: Mat AUXMAT,TM1,TM2,TM3,TM4;
640: Mat coarse_phi_D,coarse_phi_B;
641: Mat coarse_psi_D,coarse_psi_B;
642: Mat A_II,A_BB,A_IB,A_BI;
643: MatType checkmattype=MATSEQAIJ;
644: PetscReal real_value;
646: MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);
647: MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);
648: MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);
649: MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);
650: MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);
651: MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);
652: if (unsymmetric_check) {
653: MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);
654: MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);
655: }
656: MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);
658: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
659: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");
660: PetscViewerFlush(pcbddc->dbg_viewer);
661: if (unsymmetric_check) {
662: MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
663: MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);
664: MatDestroy(&AUXMAT);
665: MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
666: MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);
667: MatDestroy(&AUXMAT);
668: MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
669: MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);
670: MatDestroy(&AUXMAT);
671: MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
672: MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);
673: MatDestroy(&AUXMAT);
674: } else {
675: MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);
676: MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);
677: MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
678: MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);
679: MatDestroy(&AUXMAT);
680: MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
681: MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);
682: MatDestroy(&AUXMAT);
683: }
684: MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);
685: MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);
686: MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);
687: MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);
688: MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);
689: MatNorm(TM1,NORM_INFINITY,&real_value);
690: PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);
691: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");
692: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);
693: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);
694: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");
695: for (i=0;i<pcbddc->local_primal_size;i++) {
696: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);
697: }
698: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");
699: for (i=0;i<pcbddc->local_primal_size;i++) {
700: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);
701: }
702: if (unsymmetric_check) {
703: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");
704: for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
705: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);
706: }
707: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");
708: for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
709: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);
710: }
711: }
712: PetscViewerFlush(pcbddc->dbg_viewer);
713: MatDestroy(&A_II);
714: MatDestroy(&A_BB);
715: MatDestroy(&A_IB);
716: MatDestroy(&A_BI);
717: MatDestroy(&TM1);
718: MatDestroy(&TM2);
719: MatDestroy(&TM3);
720: MatDestroy(&TM4);
721: MatDestroy(&coarse_phi_D);
722: MatDestroy(&coarse_phi_B);
723: if (unsymmetric_check) {
724: MatDestroy(&coarse_psi_D);
725: MatDestroy(&coarse_psi_B);
726: }
727: MatDestroy(&coarse_sub_mat);
728: ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);
729: PetscFree(coarsefunctions_errors);
730: PetscFree(constraints_errors);
731: }
732: /* free memory */
733: if (n_vertices) {
734: VecDestroy(&vec1_V);
735: VecDestroy(&vec2_V);
736: MatDestroy(&A_RV);
737: MatDestroy(&A_VR);
738: MatDestroy(&A_VV);
739: }
740: if (n_constraints) {
741: VecDestroy(&vec1_C);
742: VecDestroy(&vec2_C);
743: MatDestroy(&M1);
744: MatDestroy(&C_CR);
745: }
746: PetscFree(auxindices);
747: /* get back data */
748: *coarse_submat_vals_n = coarse_submat_vals;
749: return(0);
750: }
754: PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc)
755: {
756: PC_IS* pcis = (PC_IS*)(pc->data);
757: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
758: Mat_IS* matis = (Mat_IS*)pc->pmat->data;
759: PetscBool issbaij,isseqaij;
760: /* manage repeated solves */
761: MatReuse reuse;
762: PetscErrorCode ierr;
765: if ( (pcbddc->use_change_of_basis && !pcbddc->ChangeOfBasisMatrix) || (pcbddc->user_ChangeOfBasisMatrix && !pcbddc->ChangeOfBasisMatrix) ) {
766: SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Change of basis matrix has not been created");
767: }
768: /* get mat flags */
769: reuse = MAT_INITIAL_MATRIX;
770: if (pc->setupcalled) {
771: if (pc->flag == SAME_NONZERO_PATTERN) {
772: reuse = MAT_REUSE_MATRIX;
773: } else {
774: reuse = MAT_INITIAL_MATRIX;
775: }
776: }
777: if (reuse == MAT_INITIAL_MATRIX) {
778: MatDestroy(&pcis->A_II);
779: MatDestroy(&pcis->A_IB);
780: MatDestroy(&pcis->A_BI);
781: MatDestroy(&pcis->A_BB);
782: MatDestroy(&pcbddc->local_mat);
783: }
785: /* transform local matrices if needed */
786: if (pcbddc->ChangeOfBasisMatrix) {
787: Mat change_mat_all;
788: PetscScalar *row_cmat_values;
789: PetscInt *row_cmat_indices;
790: PetscInt *nnz,*is_indices,*temp_indices;
791: PetscInt i,j,k,n_D,n_B;
793: /* Get Non-overlapping dimensions */
794: n_B = pcis->n_B;
795: n_D = pcis->n-n_B;
797: /* compute nonzero structure of change of basis on all local nodes */
798: PetscMalloc1(pcis->n,&nnz);
799: ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);
800: for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1;
801: ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);
802: ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);
803: k=1;
804: for (i=0;i<n_B;i++) {
805: MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);
806: nnz[is_indices[i]]=j;
807: if (k < j) k = j;
808: MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);
809: }
810: ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);
811: /* assemble change of basis matrix on the whole set of local dofs */
812: PetscMalloc1(k,&temp_indices);
813: MatCreate(PETSC_COMM_SELF,&change_mat_all);
814: MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);
815: MatSetType(change_mat_all,MATSEQAIJ);
816: MatSeqAIJSetPreallocation(change_mat_all,0,nnz);
817: ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);
818: for (i=0;i<n_D;i++) {
819: MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);
820: }
821: ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);
822: ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);
823: for (i=0;i<n_B;i++) {
824: MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);
825: for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]];
826: MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);
827: MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);
828: }
829: MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);
830: MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);
831: /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
832: PetscObjectTypeCompare((PetscObject)matis->A,MATSEQBAIJ,&isseqaij);
833: if (isseqaij) {
834: MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);
835: } else {
836: Mat work_mat;
837: MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);
838: MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);
839: MatDestroy(&work_mat);
840: }
841: /*
842: PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);
843: MatView(change_mat_all,(PetscViewer)0);
844: */
845: MatDestroy(&change_mat_all);
846: PetscFree(nnz);
847: PetscFree(temp_indices);
848: } else {
849: /* without change of basis, the local matrix is unchanged */
850: if (!pcbddc->local_mat) {
851: PetscObjectReference((PetscObject)matis->A);
852: pcbddc->local_mat = matis->A;
853: }
854: }
856: /* get submatrices */
857: MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);
858: MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);
859: PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);
860: if (!issbaij) {
861: MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);
862: MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);
863: } else {
864: Mat newmat;
865: MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);
866: MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);
867: MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);
868: MatDestroy(&newmat);
869: }
870: return(0);
871: }
875: PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
876: {
877: PC_IS* pcis = (PC_IS*)(pc->data);
878: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
879: IS is_aux1,is_aux2;
880: PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local;
881: PetscInt n_vertices,i,j,n_R,n_D,n_B;
882: PetscInt vbs,bs;
883: PetscBT bitmask;
887: /*
888: No need to setup local scatters if
889: - primal space is unchanged
890: AND
891: - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
892: AND
893: - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
894: */
895: if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
896: return(0);
897: }
898: /* destroy old objects */
899: ISDestroy(&pcbddc->is_R_local);
900: VecScatterDestroy(&pcbddc->R_to_B);
901: VecScatterDestroy(&pcbddc->R_to_D);
902: /* Set Non-overlapping dimensions */
903: n_B = pcis->n_B; n_D = pcis->n - n_B;
904: n_vertices = pcbddc->n_actual_vertices;
905: /* create auxiliary bitmask */
906: PetscBTCreate(pcis->n,&bitmask);
907: for (i=0;i<n_vertices;i++) {
908: PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);
909: }
911: /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
912: PetscMalloc1((pcis->n-n_vertices),&idx_R_local);
913: for (i=0, n_R=0; i<pcis->n; i++) {
914: if (!PetscBTLookup(bitmask,i)) {
915: idx_R_local[n_R] = i;
916: n_R++;
917: }
918: }
920: /* Block code */
921: vbs = 1;
922: MatGetBlockSize(pcbddc->local_mat,&bs);
923: if (bs>1 && !(n_vertices%bs)) {
924: PetscBool is_blocked = PETSC_TRUE;
925: PetscInt *vary;
926: /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
927: PetscMalloc1(pcis->n/bs,&vary);
928: PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));
929: for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++;
930: for (i=0; i<n_vertices; i++) {
931: if (vary[i]!=0 && vary[i]!=bs) {
932: is_blocked = PETSC_FALSE;
933: break;
934: }
935: }
936: if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
937: vbs = bs;
938: for (i=0;i<n_R/vbs;i++) {
939: idx_R_local[i] = idx_R_local[vbs*i]/vbs;
940: }
941: }
942: PetscFree(vary);
943: }
944: ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);
945: PetscFree(idx_R_local);
947: /* print some info if requested */
948: if (pcbddc->dbg_flag) {
949: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
950: PetscViewerFlush(pcbddc->dbg_viewer);
951: PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);
952: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);
953: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);
954: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);
955: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);
956: PetscViewerFlush(pcbddc->dbg_viewer);
957: }
959: /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
960: ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);
961: PetscMalloc1((pcis->n_B-n_vertices),&aux_array1);
962: PetscMalloc1((pcis->n_B-n_vertices),&aux_array2);
963: ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);
964: for (i=0; i<n_D; i++) {
965: PetscBTSet(bitmask,is_indices[i]);
966: }
967: ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);
968: for (i=0, j=0; i<n_R; i++) {
969: if (!PetscBTLookup(bitmask,idx_R_local[i])) {
970: aux_array1[j++] = i;
971: }
972: }
973: ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);
974: ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);
975: for (i=0, j=0; i<n_B; i++) {
976: if (!PetscBTLookup(bitmask,is_indices[i])) {
977: aux_array2[j++] = i;
978: }
979: }
980: ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);
981: ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);
982: VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);
983: ISDestroy(&is_aux1);
984: ISDestroy(&is_aux2);
986: if (pcbddc->switch_static || pcbddc->dbg_flag) {
987: PetscMalloc1(n_D,&aux_array1);
988: for (i=0, j=0; i<n_R; i++) {
989: if (PetscBTLookup(bitmask,idx_R_local[i])) {
990: aux_array1[j++] = i;
991: }
992: }
993: ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);
994: VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);
995: ISDestroy(&is_aux1);
996: }
997: PetscBTDestroy(&bitmask);
998: ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);
999: return(0);
1000: }
1005: PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc)
1006: {
1007: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
1008: PC_IS *pcis = (PC_IS*)pc->data;
1009: PC pc_temp;
1010: Mat A_RR;
1011: MatReuse reuse;
1012: PetscScalar m_one = -1.0;
1013: PetscReal value;
1014: PetscInt n_D,n_R,ibs,mbs;
1015: PetscBool use_exact,use_exact_reduced,issbaij;
1017: /* prefixes stuff */
1018: char dir_prefix[256],neu_prefix[256],str_level[16];
1019: size_t len;
1023: /* compute prefixes */
1024: PetscStrcpy(dir_prefix,"");
1025: PetscStrcpy(neu_prefix,"");
1026: if (!pcbddc->current_level) {
1027: PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);
1028: PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);
1029: PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");
1030: PetscStrcat(neu_prefix,"pc_bddc_neumann_");
1031: } else {
1032: PetscStrcpy(str_level,"");
1033: sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
1034: PetscStrlen(((PetscObject)pc)->prefix,&len);
1035: len -= 15; /* remove "pc_bddc_coarse_" */
1036: if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
1037: if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
1038: PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);
1039: PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);
1040: PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");
1041: PetscStrcat(neu_prefix,"pc_bddc_neumann_");
1042: PetscStrcat(dir_prefix,str_level);
1043: PetscStrcat(neu_prefix,str_level);
1044: }
1046: /* DIRICHLET PROBLEM */
1047: /* Matrix for Dirichlet problem is pcis->A_II */
1048: ISGetSize(pcis->is_I_local,&n_D);
1049: if (!pcbddc->ksp_D) { /* create object if not yet build */
1050: KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);
1051: PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);
1052: /* default */
1053: KSPSetType(pcbddc->ksp_D,KSPPREONLY);
1054: KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);
1055: PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);
1056: KSPGetPC(pcbddc->ksp_D,&pc_temp);
1057: if (issbaij) {
1058: PCSetType(pc_temp,PCCHOLESKY);
1059: } else {
1060: PCSetType(pc_temp,PCLU);
1061: }
1062: /* Allow user's customization */
1063: KSPSetFromOptions(pcbddc->ksp_D);
1064: PCFactorSetReuseFill(pc_temp,PETSC_TRUE);
1065: }
1066: KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);
1067: /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1068: if (!n_D) {
1069: KSPGetPC(pcbddc->ksp_D,&pc_temp);
1070: PCSetType(pc_temp,PCNONE);
1071: }
1072: /* Set Up KSP for Dirichlet problem of BDDC */
1073: KSPSetUp(pcbddc->ksp_D);
1074: /* set ksp_D into pcis data */
1075: KSPDestroy(&pcis->ksp_D);
1076: PetscObjectReference((PetscObject)pcbddc->ksp_D);
1077: pcis->ksp_D = pcbddc->ksp_D;
1079: /* NEUMANN PROBLEM */
1080: /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1081: ISGetSize(pcbddc->is_R_local,&n_R);
1082: if (pcbddc->ksp_R) { /* already created ksp */
1083: PetscInt nn_R;
1084: KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);
1085: PetscObjectReference((PetscObject)A_RR);
1086: MatGetSize(A_RR,&nn_R,NULL);
1087: if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1088: KSPReset(pcbddc->ksp_R);
1089: MatDestroy(&A_RR);
1090: reuse = MAT_INITIAL_MATRIX;
1091: } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1092: if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
1093: MatDestroy(&A_RR);
1094: reuse = MAT_INITIAL_MATRIX;
1095: } else { /* safe to reuse the matrix */
1096: reuse = MAT_REUSE_MATRIX;
1097: }
1098: }
1099: /* last check */
1100: if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1101: MatDestroy(&A_RR);
1102: reuse = MAT_INITIAL_MATRIX;
1103: }
1104: } else { /* first time, so we need to create the matrix */
1105: reuse = MAT_INITIAL_MATRIX;
1106: }
1107: /* extract A_RR */
1108: MatGetBlockSize(pcbddc->local_mat,&mbs);
1109: ISGetBlockSize(pcbddc->is_R_local,&ibs);
1110: if (ibs != mbs) {
1111: Mat newmat;
1112: MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);
1113: MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);
1114: MatDestroy(&newmat);
1115: } else {
1116: MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);
1117: }
1118: if (!pcbddc->ksp_R) { /* create object if not present */
1119: KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);
1120: PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);
1121: /* default */
1122: KSPSetType(pcbddc->ksp_R,KSPPREONLY);
1123: KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);
1124: KSPGetPC(pcbddc->ksp_R,&pc_temp);
1125: PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);
1126: if (issbaij) {
1127: PCSetType(pc_temp,PCCHOLESKY);
1128: } else {
1129: PCSetType(pc_temp,PCLU);
1130: }
1131: /* Allow user's customization */
1132: KSPSetFromOptions(pcbddc->ksp_R);
1133: PCFactorSetReuseFill(pc_temp,PETSC_TRUE);
1134: }
1135: KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);
1136: /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1137: if (!n_R) {
1138: KSPGetPC(pcbddc->ksp_R,&pc_temp);
1139: PCSetType(pc_temp,PCNONE);
1140: }
1141: /* Set Up KSP for Neumann problem of BDDC */
1142: KSPSetUp(pcbddc->ksp_R);
1144: /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1145: if (pcbddc->NullSpace || pcbddc->dbg_flag) {
1146: /* Dirichlet */
1147: VecSetRandom(pcis->vec1_D,NULL);
1148: MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);
1149: KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);
1150: VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);
1151: VecNorm(pcis->vec1_D,NORM_INFINITY,&value);
1152: /* need to be adapted? */
1153: use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1154: MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));
1155: PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);
1156: /* print info */
1157: if (pcbddc->dbg_flag) {
1158: PetscViewerFlush(pcbddc->dbg_viewer);
1159: PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);
1160: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
1161: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");
1162: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);
1163: PetscViewerFlush(pcbddc->dbg_viewer);
1164: }
1165: if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1166: PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);
1167: }
1169: /* Neumann */
1170: VecSetRandom(pcbddc->vec1_R,NULL);
1171: MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);
1172: KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);
1173: VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);
1174: VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);
1175: /* need to be adapted? */
1176: use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1177: MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));
1178: /* print info */
1179: if (pcbddc->dbg_flag) {
1180: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);
1181: PetscViewerFlush(pcbddc->dbg_viewer);
1182: }
1183: if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1184: PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);
1185: }
1186: }
1187: /* free Neumann problem's matrix */
1188: MatDestroy(&A_RR);
1189: return(0);
1190: }
1194: static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose)
1195: {
1197: PC_BDDC* pcbddc = (PC_BDDC*)(pc->data);
1200: if (applytranspose) {
1201: if (pcbddc->local_auxmat1) {
1202: MatMultTranspose(pcbddc->local_auxmat2,rhs,work);
1203: MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);
1204: }
1205: KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);
1206: } else {
1207: KSPSolve(pcbddc->ksp_R,rhs,sol);
1208: if (pcbddc->local_auxmat1) {
1209: MatMult(pcbddc->local_auxmat1,sol,work);
1210: MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);
1211: }
1212: }
1213: return(0);
1214: }
1216: /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
1219: PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
1220: {
1222: PC_BDDC* pcbddc = (PC_BDDC*)(pc->data);
1223: PC_IS* pcis = (PC_IS*) (pc->data);
1224: const PetscScalar zero = 0.0;
1227: /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
1228: if (applytranspose) {
1229: MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);
1230: if (pcbddc->switch_static) { MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P); }
1231: } else {
1232: MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);
1233: if (pcbddc->switch_static) { MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P); }
1234: }
1235: /* start communications from local primal nodes to rhs of coarse solver */
1236: VecSet(pcbddc->coarse_vec,zero);
1237: PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);
1238: PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);
1240: /* Coarse solution -> rhs and sol updated in PCBDDCScattarCoarseDataBegin/End */
1241: /* TODO remove null space when doing multilevel */
1242: if (pcbddc->coarse_ksp) {
1243: if (applytranspose) {
1244: KSPSolveTranspose(pcbddc->coarse_ksp,NULL,NULL);
1245: } else {
1246: KSPSolve(pcbddc->coarse_ksp,NULL,NULL);
1247: }
1248: }
1249: /* start communications from coarse solver solution to local primal nodes */
1250: PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);
1252: /* Local solution on R nodes */
1253: VecSet(pcbddc->vec1_R,zero);
1254: VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
1255: VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
1256: if (pcbddc->switch_static) {
1257: VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
1258: VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
1259: }
1260: PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);
1261: VecSet(pcis->vec1_B,zero);
1262: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
1263: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
1264: if (pcbddc->switch_static) {
1265: VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
1266: VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
1267: }
1269: /* complete communications from coarse sol to local primal nodes */
1270: PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);
1272: /* Sum contributions from two levels */
1273: if (applytranspose) {
1274: MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);
1275: if (pcbddc->switch_static) { MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D); }
1276: } else {
1277: MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);
1278: if (pcbddc->switch_static) { MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D); }
1279: }
1280: return(0);
1281: }
1283: /* TODO: the following two function can be optimized using VecPlaceArray whenever possible and using overlap flag */
1286: PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
1287: {
1289: PC_BDDC* pcbddc = (PC_BDDC*)(pc->data);
1290: PetscScalar *array,*array2;
1291: Vec from,to;
1294: if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1295: from = pcbddc->coarse_vec;
1296: to = pcbddc->vec1_P;
1297: if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1298: Vec tvec;
1299: PetscInt lsize;
1300: KSPGetSolution(pcbddc->coarse_ksp,&tvec);
1301: VecGetLocalSize(tvec,&lsize);
1302: VecGetArrayRead(tvec,(const PetscScalar**)&array);
1303: VecGetArray(from,&array2);
1304: PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));
1305: VecRestoreArrayRead(tvec,(const PetscScalar**)&array);
1306: VecRestoreArray(from,&array2);
1307: }
1308: } else { /* from local to global -> put data in coarse right hand side */
1309: from = pcbddc->vec1_P;
1310: to = pcbddc->coarse_vec;
1311: }
1312: VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);
1313: return(0);
1314: }
1318: PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
1319: {
1321: PC_BDDC* pcbddc = (PC_BDDC*)(pc->data);
1322: PetscScalar *array,*array2;
1323: Vec from,to;
1326: if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1327: from = pcbddc->coarse_vec;
1328: to = pcbddc->vec1_P;
1329: } else { /* from local to global -> put data in coarse right hand side */
1330: from = pcbddc->vec1_P;
1331: to = pcbddc->coarse_vec;
1332: }
1333: VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);
1334: if (smode == SCATTER_FORWARD) {
1335: if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1336: Vec tvec;
1337: PetscInt lsize;
1338: KSPGetRhs(pcbddc->coarse_ksp,&tvec);
1339: VecGetLocalSize(tvec,&lsize);
1340: VecGetArrayRead(to,(const PetscScalar**)&array);
1341: VecGetArray(tvec,&array2);
1342: PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));
1343: VecRestoreArrayRead(to,(const PetscScalar**)&array);
1344: VecRestoreArray(tvec,&array2);
1345: }
1346: }
1347: return(0);
1348: }
1350: /* uncomment for testing purposes */
1351: /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1354: PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1355: {
1356: PetscErrorCode ierr;
1357: PC_IS* pcis = (PC_IS*)(pc->data);
1358: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
1359: Mat_IS* matis = (Mat_IS*)pc->pmat->data;
1360: /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */
1361: MatType impMatType=MATSEQAIJ;
1362: /* one and zero */
1363: PetscScalar one=1.0,zero=0.0;
1364: /* space to store constraints and their local indices */
1365: PetscScalar *temp_quadrature_constraint;
1366: PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1367: /* iterators */
1368: PetscInt i,j,k,total_counts,temp_start_ptr;
1369: /* stuff to store connected components stored in pcbddc->mat_graph */
1370: IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1371: PetscInt n_ISForFaces,n_ISForEdges;
1372: /* near null space stuff */
1373: MatNullSpace nearnullsp;
1374: const Vec *nearnullvecs;
1375: Vec *localnearnullsp;
1376: PetscBool nnsp_has_cnst;
1377: PetscInt nnsp_size;
1378: PetscScalar *array;
1379: /* BLAS integers */
1380: PetscBLASInt lwork,lierr;
1381: PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1;
1382: PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC;
1383: /* LAPACK working arrays for SVD or POD */
1384: PetscBool skip_lapack;
1385: PetscScalar *work;
1386: PetscReal *singular_vals;
1387: #if defined(PETSC_USE_COMPLEX)
1388: PetscReal *rwork;
1389: #endif
1390: #if defined(PETSC_MISSING_LAPACK_GESVD)
1391: PetscBLASInt Blas_one_2=1;
1392: PetscScalar *temp_basis,*correlation_mat;
1393: #else
1394: PetscBLASInt dummy_int_1=1,dummy_int_2=1;
1395: PetscScalar dummy_scalar_1=0.0,dummy_scalar_2=0.0;
1396: #endif
1397: /* reuse */
1398: PetscInt olocal_primal_size;
1399: PetscInt *oprimal_indices_local_idxs;
1400: /* change of basis */
1401: PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices;
1402: PetscBool boolforchange,qr_needed;
1403: PetscBT touched,change_basis,qr_needed_idx;
1404: /* auxiliary stuff */
1405: PetscInt *nnz,*is_indices,*aux_primal_numbering_B;
1406: /* some quantities */
1407: PetscInt n_vertices,total_primal_vertices,valid_constraints;
1408: PetscInt size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints;
1412: /* Destroy Mat objects computed previously */
1413: MatDestroy(&pcbddc->ChangeOfBasisMatrix);
1414: MatDestroy(&pcbddc->ConstraintMatrix);
1415: /* Get index sets for faces, edges and vertices from graph */
1416: if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) {
1417: pcbddc->use_vertices = PETSC_TRUE;
1418: }
1419: PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);
1420: /* HACKS (the two following code branches) */
1421: if (!ISForVertices && pcbddc->NullSpace && !pcbddc->user_ChangeOfBasisMatrix) {
1422: pcbddc->use_change_of_basis = PETSC_TRUE;
1423: pcbddc->use_change_on_faces = PETSC_FALSE;
1424: }
1425: if (pcbddc->NullSpace) {
1426: /* use_change_of_basis should be consistent among processors */
1427: PetscBool tbool = pcbddc->use_change_of_basis;
1428: MPI_Allreduce(&tbool,&(pcbddc->use_change_of_basis),1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
1429: }
1430: /* print some info */
1431: if (pcbddc->dbg_flag) {
1432: PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);
1433: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");
1434: i = 0;
1435: if (ISForVertices) {
1436: ISGetSize(ISForVertices,&i);
1437: }
1438: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);
1439: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);
1440: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);
1441: PetscViewerFlush(pcbddc->dbg_viewer);
1442: }
1443: /* check if near null space is attached to global mat */
1444: MatGetNearNullSpace(pc->pmat,&nearnullsp);
1445: if (nearnullsp) {
1446: MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);
1447: /* remove any stored info */
1448: MatNullSpaceDestroy(&pcbddc->onearnullspace);
1449: PetscFree(pcbddc->onearnullvecs_state);
1450: /* store information for BDDC solver reuse */
1451: PetscObjectReference((PetscObject)nearnullsp);
1452: pcbddc->onearnullspace = nearnullsp;
1453: PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);
1454: for (i=0;i<nnsp_size;i++) {
1455: PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);
1456: }
1457: } else { /* if near null space is not provided BDDC uses constants by default */
1458: nnsp_size = 0;
1459: nnsp_has_cnst = PETSC_TRUE;
1460: }
1461: /* get max number of constraints on a single cc */
1462: max_constraints = nnsp_size;
1463: if (nnsp_has_cnst) max_constraints++;
1465: /*
1466: Evaluate maximum storage size needed by the procedure
1467: - temp_indices will contain start index of each constraint stored as follows
1468: - temp_indices_to_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1469: - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1470: - temp_quadrature_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself
1471: */
1472: total_counts = n_ISForFaces+n_ISForEdges;
1473: total_counts *= max_constraints;
1474: n_vertices = 0;
1475: if (ISForVertices) {
1476: ISGetSize(ISForVertices,&n_vertices);
1477: }
1478: total_counts += n_vertices;
1479: PetscMalloc1((total_counts+1),&temp_indices);
1480: PetscBTCreate(total_counts,&change_basis);
1481: total_counts = 0;
1482: max_size_of_constraint = 0;
1483: for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1484: if (i<n_ISForEdges) {
1485: used_IS = &ISForEdges[i];
1486: } else {
1487: used_IS = &ISForFaces[i-n_ISForEdges];
1488: }
1489: ISGetSize(*used_IS,&j);
1490: total_counts += j;
1491: max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1492: }
1493: total_counts *= max_constraints;
1494: total_counts += n_vertices;
1495: PetscMalloc1(total_counts,&temp_quadrature_constraint);
1496: PetscMalloc1(total_counts,&temp_indices_to_constraint);
1497: PetscMalloc1(total_counts,&temp_indices_to_constraint_B);
1498: /* get local part of global near null space vectors */
1499: PetscMalloc1(nnsp_size,&localnearnullsp);
1500: for (k=0;k<nnsp_size;k++) {
1501: VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);
1502: VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);
1503: VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);
1504: }
1506: /* whether or not to skip lapack calls */
1507: skip_lapack = PETSC_TRUE;
1508: if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE;
1510: /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1511: if (!pcbddc->use_nnsp_true && !skip_lapack) {
1512: PetscScalar temp_work;
1513: #if defined(PETSC_MISSING_LAPACK_GESVD)
1514: /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1515: PetscMalloc1(max_constraints*max_constraints,&correlation_mat);
1516: PetscMalloc1(max_constraints,&singular_vals);
1517: PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);
1518: #if defined(PETSC_USE_COMPLEX)
1519: PetscMalloc1(3*max_constraints,&rwork);
1520: #endif
1521: /* now we evaluate the optimal workspace using query with lwork=-1 */
1522: PetscBLASIntCast(max_constraints,&Blas_N);
1523: PetscBLASIntCast(max_constraints,&Blas_LDA);
1524: lwork = -1;
1525: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
1526: #if !defined(PETSC_USE_COMPLEX)
1527: PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1528: #else
1529: PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1530: #endif
1531: PetscFPTrapPop();
1532: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1533: #else /* on missing GESVD */
1534: /* SVD */
1535: PetscInt max_n,min_n;
1536: max_n = max_size_of_constraint;
1537: min_n = max_constraints;
1538: if (max_size_of_constraint < max_constraints) {
1539: min_n = max_size_of_constraint;
1540: max_n = max_constraints;
1541: }
1542: PetscMalloc1(min_n,&singular_vals);
1543: #if defined(PETSC_USE_COMPLEX)
1544: PetscMalloc1(5*min_n,&rwork);
1545: #endif
1546: /* now we evaluate the optimal workspace using query with lwork=-1 */
1547: lwork = -1;
1548: PetscBLASIntCast(max_n,&Blas_M);
1549: PetscBLASIntCast(min_n,&Blas_N);
1550: PetscBLASIntCast(max_n,&Blas_LDA);
1551: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
1552: #if !defined(PETSC_USE_COMPLEX)
1553: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr));
1554: #else
1555: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr));
1556: #endif
1557: PetscFPTrapPop();
1558: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1559: #endif /* on missing GESVD */
1560: /* Allocate optimal workspace */
1561: PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);
1562: PetscMalloc1((PetscInt)lwork,&work);
1563: }
1564: /* Now we can loop on constraining sets */
1565: total_counts = 0;
1566: temp_indices[0] = 0;
1567: /* vertices */
1568: if (ISForVertices) {
1569: ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);
1570: if (nnsp_has_cnst) { /* consider all vertices */
1571: PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));
1572: for (i=0;i<n_vertices;i++) {
1573: temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1574: temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1575: total_counts++;
1576: }
1577: } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
1578: PetscBool used_vertex;
1579: for (i=0;i<n_vertices;i++) {
1580: used_vertex = PETSC_FALSE;
1581: k = 0;
1582: while (!used_vertex && k<nnsp_size) {
1583: VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);
1584: if (PetscAbsScalar(array[is_indices[i]])>0.0) {
1585: temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1586: temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1587: temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1588: total_counts++;
1589: used_vertex = PETSC_TRUE;
1590: }
1591: VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);
1592: k++;
1593: }
1594: }
1595: }
1596: ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);
1597: n_vertices = total_counts;
1598: }
1600: /* edges and faces */
1601: for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1602: if (i<n_ISForEdges) {
1603: used_IS = &ISForEdges[i];
1604: boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
1605: } else {
1606: used_IS = &ISForFaces[i-n_ISForEdges];
1607: boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
1608: }
1609: temp_constraints = 0; /* zero the number of constraints I have on this conn comp */
1610: temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
1611: ISGetSize(*used_IS,&size_of_constraint);
1612: ISGetIndices(*used_IS,(const PetscInt**)&is_indices);
1613: /* change of basis should not be performed on local periodic nodes */
1614: if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
1615: if (nnsp_has_cnst) {
1616: PetscScalar quad_value;
1617: temp_constraints++;
1618: quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
1619: PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));
1620: for (j=0;j<size_of_constraint;j++) {
1621: temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
1622: }
1623: temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */
1624: total_counts++;
1625: }
1626: for (k=0;k<nnsp_size;k++) {
1627: PetscReal real_value;
1628: VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);
1629: PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));
1630: for (j=0;j<size_of_constraint;j++) {
1631: temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
1632: }
1633: VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);
1634: /* check if array is null on the connected component */
1635: PetscBLASIntCast(size_of_constraint,&Blas_N);
1636: PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
1637: if (real_value > 0.0) { /* keep indices and values */
1638: temp_constraints++;
1639: temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */
1640: total_counts++;
1641: }
1642: }
1643: ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);
1644: valid_constraints = temp_constraints;
1645: /* perform SVD on the constraints if use_nnsp_true has not be requested by the user and there are non-null constraints on the cc */
1646: if (!pcbddc->use_nnsp_true && temp_constraints) {
1647: PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
1649: #if defined(PETSC_MISSING_LAPACK_GESVD)
1650: /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
1651: POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
1652: -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
1653: the constraints basis will differ (by a complex factor with absolute value equal to 1)
1654: from that computed using LAPACKgesvd
1655: -> This is due to a different computation of eigenvectors in LAPACKheev
1656: -> The quality of the POD-computed basis will be the same */
1657: PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));
1658: /* Store upper triangular part of correlation matrix */
1659: PetscBLASIntCast(size_of_constraint,&Blas_N);
1660: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
1661: for (j=0;j<temp_constraints;j++) {
1662: for (k=0;k<j+1;k++) {
1663: PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2));
1664: }
1665: }
1666: /* compute eigenvalues and eigenvectors of correlation matrix */
1667: PetscBLASIntCast(temp_constraints,&Blas_N);
1668: PetscBLASIntCast(temp_constraints,&Blas_LDA);
1669: #if !defined(PETSC_USE_COMPLEX)
1670: PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
1671: #else
1672: PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
1673: #endif
1674: PetscFPTrapPop();
1675: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
1676: /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
1677: j=0;
1678: while (j < temp_constraints && singular_vals[j] < tol) j++;
1679: total_counts=total_counts-j;
1680: valid_constraints = temp_constraints-j;
1681: /* scale and copy POD basis into used quadrature memory */
1682: PetscBLASIntCast(size_of_constraint,&Blas_M);
1683: PetscBLASIntCast(temp_constraints,&Blas_N);
1684: PetscBLASIntCast(temp_constraints,&Blas_K);
1685: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
1686: PetscBLASIntCast(temp_constraints,&Blas_LDB);
1687: PetscBLASIntCast(size_of_constraint,&Blas_LDC);
1688: if (j<temp_constraints) {
1689: PetscInt ii;
1690: for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
1691: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
1692: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
1693: PetscFPTrapPop();
1694: for (k=0;k<temp_constraints-j;k++) {
1695: for (ii=0;ii<size_of_constraint;ii++) {
1696: temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
1697: }
1698: }
1699: }
1700: #else /* on missing GESVD */
1701: PetscBLASIntCast(size_of_constraint,&Blas_M);
1702: PetscBLASIntCast(temp_constraints,&Blas_N);
1703: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
1704: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
1705: #if !defined(PETSC_USE_COMPLEX)
1706: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr));
1707: #else
1708: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr));
1709: #endif
1710: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
1711: PetscFPTrapPop();
1712: /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
1713: k = temp_constraints;
1714: if (k > size_of_constraint) k = size_of_constraint;
1715: j = 0;
1716: while (j < k && singular_vals[k-j-1] < tol) j++;
1717: total_counts = total_counts-temp_constraints+k-j;
1718: valid_constraints = k-j;
1719: #endif /* on missing GESVD */
1720: }
1721: /* setting change_of_basis flag is safe now */
1722: if (boolforchange) {
1723: for (j=0;j<valid_constraints;j++) {
1724: PetscBTSet(change_basis,total_counts-j-1);
1725: }
1726: }
1727: }
1728: /* free index sets of faces, edges and vertices */
1729: for (i=0;i<n_ISForFaces;i++) {
1730: ISDestroy(&ISForFaces[i]);
1731: }
1732: PetscFree(ISForFaces);
1733: for (i=0;i<n_ISForEdges;i++) {
1734: ISDestroy(&ISForEdges[i]);
1735: }
1736: PetscFree(ISForEdges);
1737: ISDestroy(&ISForVertices);
1738: /* map temp_indices_to_constraint in boundary numbering */
1739: ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);
1740: if (i != temp_indices[total_counts]) {
1741: SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i);
1742: }
1744: /* free workspace */
1745: if (!pcbddc->use_nnsp_true && !skip_lapack) {
1746: PetscFree(work);
1747: #if defined(PETSC_USE_COMPLEX)
1748: PetscFree(rwork);
1749: #endif
1750: PetscFree(singular_vals);
1751: #if defined(PETSC_MISSING_LAPACK_GESVD)
1752: PetscFree(correlation_mat);
1753: PetscFree(temp_basis);
1754: #endif
1755: }
1756: for (k=0;k<nnsp_size;k++) {
1757: VecDestroy(&localnearnullsp[k]);
1758: }
1759: PetscFree(localnearnullsp);
1761: /* set quantities in pcbddc data structure and store previous primal size */
1762: /* n_vertices defines the number of subdomain corners in the primal space */
1763: /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
1764: olocal_primal_size = pcbddc->local_primal_size;
1765: pcbddc->local_primal_size = total_counts;
1766: pcbddc->n_vertices = n_vertices;
1767: pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
1769: /* Create constraint matrix */
1770: /* The constraint matrix is used to compute the l2g map of primal dofs */
1771: /* so we need to set it up properly either with or without change of basis */
1772: MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);
1773: MatSetType(pcbddc->ConstraintMatrix,impMatType);
1774: MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);
1775: /* array to compute a local numbering of constraints : vertices first then constraints */
1776: PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);
1777: /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */
1778: /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */
1779: PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);
1780: /* auxiliary stuff for basis change */
1781: PetscMalloc1(max_size_of_constraint,&global_indices);
1782: PetscBTCreate(pcis->n_B,&touched);
1784: /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
1785: total_primal_vertices=0;
1786: for (i=0;i<pcbddc->local_primal_size;i++) {
1787: size_of_constraint=temp_indices[i+1]-temp_indices[i];
1788: if (size_of_constraint == 1) {
1789: PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);
1790: aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
1791: aux_primal_minloc[total_primal_vertices]=0;
1792: total_primal_vertices++;
1793: } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
1794: PetscInt min_loc,min_index;
1795: ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);
1796: /* find first untouched local node */
1797: k = 0;
1798: while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
1799: min_index = global_indices[k];
1800: min_loc = k;
1801: /* search the minimum among global nodes already untouched on the cc */
1802: for (k=1;k<size_of_constraint;k++) {
1803: /* there can be more than one constraint on a single connected component */
1804: if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) {
1805: min_index = global_indices[k];
1806: min_loc = k;
1807: }
1808: }
1809: PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);
1810: aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc];
1811: aux_primal_minloc[total_primal_vertices]=min_loc;
1812: total_primal_vertices++;
1813: }
1814: }
1815: /* determine if a QR strategy is needed for change of basis */
1816: qr_needed = PETSC_FALSE;
1817: PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);
1818: for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1819: if (PetscBTLookup(change_basis,i)) {
1820: size_of_constraint = temp_indices[i+1]-temp_indices[i];
1821: j = 0;
1822: for (k=0;k<size_of_constraint;k++) {
1823: if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) {
1824: j++;
1825: }
1826: }
1827: /* found more than one primal dof on the cc */
1828: if (j > 1) {
1829: PetscBTSet(qr_needed_idx,i);
1830: qr_needed = PETSC_TRUE;
1831: }
1832: }
1833: }
1834: /* free workspace */
1835: PetscFree(global_indices);
1837: /* permute indices in order to have a sorted set of vertices */
1838: PetscSortInt(total_primal_vertices,aux_primal_numbering);
1840: /* nonzero structure of constraint matrix */
1841: PetscMalloc1(pcbddc->local_primal_size,&nnz);
1842: for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
1843: j=total_primal_vertices;
1844: for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1845: if (!PetscBTLookup(change_basis,i)) {
1846: nnz[j]=temp_indices[i+1]-temp_indices[i];
1847: j++;
1848: }
1849: }
1850: MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);
1851: PetscFree(nnz);
1852: /* set values in constraint matrix */
1853: for (i=0;i<total_primal_vertices;i++) {
1854: MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);
1855: }
1856: total_counts = total_primal_vertices;
1857: for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1858: if (!PetscBTLookup(change_basis,i)) {
1859: size_of_constraint=temp_indices[i+1]-temp_indices[i];
1860: MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);
1861: total_counts++;
1862: }
1863: }
1864: /* assembling */
1865: MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);
1866: MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);
1867: /*
1868: PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);
1869: MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);
1870: */
1871: /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
1872: if (pcbddc->use_change_of_basis) {
1873: /* dual and primal dofs on a single cc */
1874: PetscInt dual_dofs,primal_dofs;
1875: /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */
1876: PetscInt primal_counter;
1877: /* working stuff for GEQRF */
1878: PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
1879: PetscBLASInt lqr_work;
1880: /* working stuff for UNGQR */
1881: PetscScalar *gqr_work,lgqr_work_t;
1882: PetscBLASInt lgqr_work;
1883: /* working stuff for TRTRS */
1884: PetscScalar *trs_rhs;
1885: PetscBLASInt Blas_NRHS;
1886: /* pointers for values insertion into change of basis matrix */
1887: PetscInt *start_rows,*start_cols;
1888: PetscScalar *start_vals;
1889: /* working stuff for values insertion */
1890: PetscBT is_primal;
1892: /* change of basis acts on local interfaces -> dimension is n_B x n_B */
1893: MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);
1894: MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);
1895: MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);
1896: /* work arrays */
1897: PetscMalloc1(pcis->n_B,&nnz);
1898: for (i=0;i<pcis->n_B;i++) nnz[i]=1;
1899: /* nonzeros per row */
1900: for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1901: if (PetscBTLookup(change_basis,i)) {
1902: size_of_constraint = temp_indices[i+1]-temp_indices[i];
1903: if (PetscBTLookup(qr_needed_idx,i)) {
1904: for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1905: } else {
1906: for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2;
1907: /* get local primal index on the cc */
1908: j = 0;
1909: while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++;
1910: nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1911: }
1912: }
1913: }
1914: MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);
1915: PetscFree(nnz);
1916: /* Set initial identity in the matrix */
1917: for (i=0;i<pcis->n_B;i++) {
1918: MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);
1919: }
1921: if (pcbddc->dbg_flag) {
1922: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");
1923: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);
1924: }
1927: /* Now we loop on the constraints which need a change of basis */
1928: /*
1929: Change of basis matrix is evaluated similarly to the FIRST APPROACH in
1930: Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
1932: Basic blocks of change of basis matrix T computed
1934: - Using the following block transformation if there is only a primal dof on the cc
1935: (in the example, primal dof is the last one of the edge in LOCAL ordering
1936: in this code, primal dof is the first one of the edge in GLOBAL ordering)
1937: | 1 0 ... 0 1 |
1938: | 0 1 ... 0 1 |
1939: | ... |
1940: | 0 ... 1 1 |
1941: | -s_1/s_n ... -s_{n-1}/-s_n 1 |
1943: - via QR decomposition of constraints otherwise
1944: */
1945: if (qr_needed) {
1946: /* space to store Q */
1947: PetscMalloc1((max_size_of_constraint)*(max_size_of_constraint),&qr_basis);
1948: /* first we issue queries for optimal work */
1949: PetscBLASIntCast(max_size_of_constraint,&Blas_M);
1950: PetscBLASIntCast(max_constraints,&Blas_N);
1951: PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);
1952: lqr_work = -1;
1953: PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
1954: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
1955: PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);
1956: PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);
1957: lgqr_work = -1;
1958: PetscBLASIntCast(max_size_of_constraint,&Blas_M);
1959: PetscBLASIntCast(max_size_of_constraint,&Blas_N);
1960: PetscBLASIntCast(max_constraints,&Blas_K);
1961: PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);
1962: if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
1963: PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
1964: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
1965: PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);
1966: PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);
1967: /* array to store scaling factors for reflectors */
1968: PetscMalloc1(max_constraints,&qr_tau);
1969: /* array to store rhs and solution of triangular solver */
1970: PetscMalloc1(max_constraints*max_constraints,&trs_rhs);
1971: /* allocating workspace for check */
1972: if (pcbddc->dbg_flag) {
1973: PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&work);
1974: }
1975: }
1976: /* array to store whether a node is primal or not */
1977: PetscBTCreate(pcis->n_B,&is_primal);
1978: PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);
1979: ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);
1980: if (i != total_primal_vertices) {
1981: SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
1982: }
1983: for (i=0;i<total_primal_vertices;i++) {
1984: PetscBTSet(is_primal,aux_primal_numbering_B[i]);
1985: }
1986: PetscFree(aux_primal_numbering_B);
1988: /* loop on constraints and see whether or not they need a change of basis and compute it */
1989: /* -> using implicit ordering contained in temp_indices data */
1990: total_counts = pcbddc->n_vertices;
1991: primal_counter = total_counts;
1992: while (total_counts<pcbddc->local_primal_size) {
1993: primal_dofs = 1;
1994: if (PetscBTLookup(change_basis,total_counts)) {
1995: /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
1996: while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) {
1997: primal_dofs++;
1998: }
1999: /* get constraint info */
2000: size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
2001: dual_dofs = size_of_constraint-primal_dofs;
2003: if (pcbddc->dbg_flag) {
2004: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);
2005: }
2007: if (primal_dofs > 1) { /* QR */
2009: /* copy quadrature constraints for change of basis check */
2010: if (pcbddc->dbg_flag) {
2011: PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));
2012: }
2013: /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
2014: PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));
2016: /* compute QR decomposition of constraints */
2017: PetscBLASIntCast(size_of_constraint,&Blas_M);
2018: PetscBLASIntCast(primal_dofs,&Blas_N);
2019: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
2020: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
2021: PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
2022: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
2023: PetscFPTrapPop();
2025: /* explictly compute R^-T */
2026: PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));
2027: for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
2028: PetscBLASIntCast(primal_dofs,&Blas_N);
2029: PetscBLASIntCast(primal_dofs,&Blas_NRHS);
2030: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
2031: PetscBLASIntCast(primal_dofs,&Blas_LDB);
2032: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
2033: PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
2034: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
2035: PetscFPTrapPop();
2037: /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
2038: PetscBLASIntCast(size_of_constraint,&Blas_M);
2039: PetscBLASIntCast(size_of_constraint,&Blas_N);
2040: PetscBLASIntCast(primal_dofs,&Blas_K);
2041: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
2042: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
2043: PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
2044: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
2045: PetscFPTrapPop();
2047: /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
2048: i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
2049: where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
2050: PetscBLASIntCast(size_of_constraint,&Blas_M);
2051: PetscBLASIntCast(primal_dofs,&Blas_N);
2052: PetscBLASIntCast(primal_dofs,&Blas_K);
2053: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
2054: PetscBLASIntCast(primal_dofs,&Blas_LDB);
2055: PetscBLASIntCast(size_of_constraint,&Blas_LDC);
2056: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
2057: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
2058: PetscFPTrapPop();
2059: PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));
2061: /* insert values in change of basis matrix respecting global ordering of new primal dofs */
2062: start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]];
2063: /* insert cols for primal dofs */
2064: for (j=0;j<primal_dofs;j++) {
2065: start_vals = &qr_basis[j*size_of_constraint];
2066: start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]];
2067: MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);
2068: }
2069: /* insert cols for dual dofs */
2070: for (j=0,k=0;j<dual_dofs;k++) {
2071: if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
2072: start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
2073: start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k];
2074: MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);
2075: j++;
2076: }
2077: }
2079: /* check change of basis */
2080: if (pcbddc->dbg_flag) {
2081: PetscInt ii,jj;
2082: PetscBool valid_qr=PETSC_TRUE;
2083: PetscBLASIntCast(primal_dofs,&Blas_M);
2084: PetscBLASIntCast(size_of_constraint,&Blas_N);
2085: PetscBLASIntCast(size_of_constraint,&Blas_K);
2086: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
2087: PetscBLASIntCast(size_of_constraint,&Blas_LDB);
2088: PetscBLASIntCast(primal_dofs,&Blas_LDC);
2089: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
2090: PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC));
2091: PetscFPTrapPop();
2092: for (jj=0;jj<size_of_constraint;jj++) {
2093: for (ii=0;ii<primal_dofs;ii++) {
2094: if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
2095: if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
2096: }
2097: }
2098: if (!valid_qr) {
2099: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");
2100: for (jj=0;jj<size_of_constraint;jj++) {
2101: for (ii=0;ii<primal_dofs;ii++) {
2102: if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
2103: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2104: }
2105: if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
2106: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2107: }
2108: }
2109: }
2110: } else {
2111: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");
2112: }
2113: }
2114: } else { /* simple transformation block */
2115: PetscInt row,col;
2116: PetscScalar val;
2117: for (j=0;j<size_of_constraint;j++) {
2118: row = temp_indices_to_constraint_B[temp_indices[total_counts]+j];
2119: if (!PetscBTLookup(is_primal,row)) {
2120: col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2121: MatSetValue(pcbddc->ChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);
2122: MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,1.0,INSERT_VALUES);
2123: } else {
2124: for (k=0;k<size_of_constraint;k++) {
2125: col = temp_indices_to_constraint_B[temp_indices[total_counts]+k];
2126: if (row != col) {
2127: val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2128: } else {
2129: val = 1.0;
2130: }
2131: MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,val,INSERT_VALUES);
2132: }
2133: }
2134: }
2135: if (pcbddc->dbg_flag) {
2136: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");
2137: }
2138: }
2139: /* increment primal counter */
2140: primal_counter += primal_dofs;
2141: } else {
2142: if (pcbddc->dbg_flag) {
2143: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);
2144: }
2145: }
2146: /* increment constraint counter total_counts */
2147: total_counts += primal_dofs;
2148: }
2150: /* free workspace */
2151: if (qr_needed) {
2152: if (pcbddc->dbg_flag) {
2153: PetscFree(work);
2154: }
2155: PetscFree(trs_rhs);
2156: PetscFree(qr_tau);
2157: PetscFree(qr_work);
2158: PetscFree(gqr_work);
2159: PetscFree(qr_basis);
2160: }
2161: PetscBTDestroy(&is_primal);
2162: /* assembling */
2163: MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);
2164: MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);
2165: /*
2166: PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);
2167: MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);
2168: */
2169: }
2170: /* Change of basis as provided by the user in local numbering (internal and boundary) or boundary only */
2171: if (pcbddc->user_ChangeOfBasisMatrix) {
2172: PetscInt rows,cols;
2173: MatGetSize(pcbddc->user_ChangeOfBasisMatrix,&rows,&cols);
2174: if (rows == pcis->n && cols == pcis->n) {
2175: MatGetSubMatrix(pcbddc->user_ChangeOfBasisMatrix,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&pcbddc->ChangeOfBasisMatrix);
2176: } else {
2177: PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);
2178: pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
2179: }
2180: }
2182: /* get indices in local ordering for vertices and constraints */
2183: if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */
2184: PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);
2185: PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));
2186: }
2187: PetscFree(aux_primal_numbering);
2188: PetscFree(pcbddc->primal_indices_local_idxs);
2189: PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);
2190: PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);
2191: PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));
2192: PetscFree(aux_primal_numbering);
2193: PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);
2194: PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));
2195: PetscFree(aux_primal_numbering);
2196: /* set quantities in PCBDDC data struct */
2197: pcbddc->n_actual_vertices = i;
2198: /* check if a new primal space has been introduced */
2199: pcbddc->new_primal_space_local = PETSC_TRUE;
2200: if (olocal_primal_size == pcbddc->local_primal_size) {
2201: PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);
2202: pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2203: PetscFree(oprimal_indices_local_idxs);
2204: }
2205: /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
2206: MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
2208: /* flush dbg viewer */
2209: if (pcbddc->dbg_flag) {
2210: PetscViewerFlush(pcbddc->dbg_viewer);
2211: }
2213: /* free workspace */
2214: PetscBTDestroy(&touched);
2215: PetscBTDestroy(&qr_needed_idx);
2216: PetscFree(aux_primal_minloc);
2217: PetscFree(temp_indices);
2218: PetscBTDestroy(&change_basis);
2219: PetscFree(temp_indices_to_constraint);
2220: PetscFree(temp_indices_to_constraint_B);
2221: PetscFree(temp_quadrature_constraint);
2222: return(0);
2223: }
2227: PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
2228: {
2229: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
2230: PC_IS *pcis = (PC_IS*)pc->data;
2231: Mat_IS *matis = (Mat_IS*)pc->pmat->data;
2232: PetscInt ierr,i,vertex_size;
2233: PetscViewer viewer=pcbddc->dbg_viewer;
2236: /* Reset previously computed graph */
2237: PCBDDCGraphReset(pcbddc->mat_graph);
2238: /* Init local Graph struct */
2239: PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);
2241: /* Check validity of the csr graph passed in by the user */
2242: if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
2243: PCBDDCGraphResetCSR(pcbddc->mat_graph);
2244: }
2246: /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
2247: if (pcbddc->use_local_adj && (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy)) {
2248: Mat mat_adj;
2249: const PetscInt *xadj,*adjncy;
2250: PetscBool flg_row=PETSC_TRUE;
2252: MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);
2253: MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);
2254: if (!flg_row) {
2255: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
2256: }
2257: PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);
2258: MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);
2259: if (!flg_row) {
2260: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
2261: }
2262: MatDestroy(&mat_adj);
2263: }
2265: /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
2266: vertex_size = 1;
2267: if (pcbddc->user_provided_isfordofs) {
2268: if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
2269: PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);
2270: for (i=0;i<pcbddc->n_ISForDofs;i++) {
2271: PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);
2272: ISDestroy(&pcbddc->ISForDofs[i]);
2273: }
2274: pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
2275: pcbddc->n_ISForDofs = 0;
2276: PetscFree(pcbddc->ISForDofs);
2277: }
2278: /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
2279: MatGetBlockSize(matis->A,&vertex_size);
2280: } else {
2281: if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
2282: MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);
2283: PetscMalloc(pcbddc->n_ISForDofsLocal*sizeof(IS),&pcbddc->ISForDofsLocal);
2284: for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
2285: ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);
2286: }
2287: }
2288: }
2290: /* Setup of Graph */
2291: if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
2292: PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);
2293: }
2294: if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
2295: PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);
2296: }
2297: PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);
2299: /* Graph's connected components analysis */
2300: PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);
2302: /* print some info to stdout */
2303: if (pcbddc->dbg_flag) {
2304: PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);
2305: }
2307: /* mark topography has done */
2308: pcbddc->recompute_topography = PETSC_FALSE;
2309: return(0);
2310: }
2314: PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx)
2315: {
2316: PC_BDDC *pcbddc = (PC_BDDC*)(pc->data);
2317: PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size;
2321: n = 0;
2322: vertices = 0;
2323: if (pcbddc->ConstraintMatrix) {
2324: MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);
2325: for (i=0;i<local_primal_size;i++) {
2326: MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);
2327: if (size_of_constraint == 1) n++;
2328: MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);
2329: }
2330: if (vertices_idx) {
2331: PetscMalloc1(n,&vertices);
2332: n = 0;
2333: for (i=0;i<local_primal_size;i++) {
2334: MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);
2335: if (size_of_constraint == 1) {
2336: vertices[n++]=row_cmat_indices[0];
2337: }
2338: MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);
2339: }
2340: }
2341: }
2342: *n_vertices = n;
2343: if (vertices_idx) *vertices_idx = vertices;
2344: return(0);
2345: }
2349: PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx)
2350: {
2351: PC_BDDC *pcbddc = (PC_BDDC*)(pc->data);
2352: PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices;
2353: PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc;
2354: PetscBT touched;
2357: /* This function assumes that the number of local constraints per connected component
2358: is not greater than the number of nodes defined for the connected component
2359: (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
2361: n = 0;
2362: constraints_index = 0;
2363: if (pcbddc->ConstraintMatrix) {
2364: MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);
2365: max_size_of_constraint = 0;
2366: for (i=0;i<local_primal_size;i++) {
2367: MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);
2368: if (size_of_constraint > 1) {
2369: n++;
2370: }
2371: max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint);
2372: MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);
2373: }
2374: if (constraints_idx) {
2375: PetscMalloc1(n,&constraints_index);
2376: PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);
2377: PetscBTCreate(local_size,&touched);
2378: n = 0;
2379: for (i=0;i<local_primal_size;i++) {
2380: MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);
2381: if (size_of_constraint > 1) {
2382: ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);
2383: /* find first untouched local node */
2384: j = 0;
2385: while (PetscBTLookup(touched,row_cmat_indices[j])) j++;
2386: min_index = row_cmat_global_indices[j];
2387: min_loc = j;
2388: /* search the minimum among nodes not yet touched on the connected component
2389: since there can be more than one constraint on a single cc */
2390: for (j=1;j<size_of_constraint;j++) {
2391: if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) {
2392: min_index = row_cmat_global_indices[j];
2393: min_loc = j;
2394: }
2395: }
2396: PetscBTSet(touched,row_cmat_indices[min_loc]);
2397: constraints_index[n++] = row_cmat_indices[min_loc];
2398: }
2399: MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);
2400: }
2401: PetscBTDestroy(&touched);
2402: PetscFree(row_cmat_global_indices);
2403: }
2404: }
2405: *n_constraints = n;
2406: if (constraints_idx) *constraints_idx = constraints_index;
2407: return(0);
2408: }
2410: /* the next two functions has been adapted from pcis.c */
2413: PetscErrorCode PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2414: {
2416: PC_IS *pcis = (PC_IS*)(pc->data);
2419: if (!vec2_B) { vec2_B = v; }
2420: MatMult(pcis->A_BB,v,vec1_B);
2421: MatMult(pcis->A_IB,v,vec1_D);
2422: KSPSolve(pcis->ksp_D,vec1_D,vec2_D);
2423: MatMult(pcis->A_BI,vec2_D,vec2_B);
2424: VecAXPY(vec1_B,-1.0,vec2_B);
2425: return(0);
2426: }
2430: PetscErrorCode PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2431: {
2433: PC_IS *pcis = (PC_IS*)(pc->data);
2436: if (!vec2_B) { vec2_B = v; }
2437: MatMultTranspose(pcis->A_BB,v,vec1_B);
2438: MatMultTranspose(pcis->A_BI,v,vec1_D);
2439: KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);
2440: MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);
2441: VecAXPY(vec1_B,-1.0,vec2_B);
2442: return(0);
2443: }
2447: PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
2448: {
2449: Vec local_vec,global_vec;
2450: IS seqis,paris;
2451: VecScatter scatter_ctx;
2452: PetscScalar *array;
2453: PetscInt *temp_global_dofs;
2454: PetscScalar globalsum;
2455: PetscInt i,j,s;
2456: PetscInt nlocals,first_index,old_index,max_local;
2457: PetscMPIInt rank_prec_comm,size_prec_comm,max_global;
2458: PetscMPIInt *dof_sizes,*dof_displs;
2459: PetscBool first_found;
2463: /* mpi buffers */
2464: MPI_Comm_size(comm,&size_prec_comm);
2465: MPI_Comm_rank(comm,&rank_prec_comm);
2466: j = ( !rank_prec_comm ? size_prec_comm : 0);
2467: PetscMalloc1(j,&dof_sizes);
2468: PetscMalloc1(j,&dof_displs);
2469: /* get maximum size of subset */
2470: PetscMalloc1(n_local_dofs,&temp_global_dofs);
2471: ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);
2472: max_local = 0;
2473: for (i=0;i<n_local_dofs;i++) {
2474: if (max_local < temp_global_dofs[i] ) {
2475: max_local = temp_global_dofs[i];
2476: }
2477: }
2478: MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);
2479: max_global++;
2480: max_local = 0;
2481: for (i=0;i<n_local_dofs;i++) {
2482: if (max_local < local_dofs[i] ) {
2483: max_local = local_dofs[i];
2484: }
2485: }
2486: max_local++;
2487: /* allocate workspace */
2488: VecCreate(PETSC_COMM_SELF,&local_vec);
2489: VecSetSizes(local_vec,PETSC_DECIDE,max_local);
2490: VecSetType(local_vec,VECSEQ);
2491: VecCreate(comm,&global_vec);
2492: VecSetSizes(global_vec,PETSC_DECIDE,max_global);
2493: VecSetType(global_vec,VECMPI);
2494: /* create scatter */
2495: ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);
2496: ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);
2497: VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);
2498: ISDestroy(&seqis);
2499: ISDestroy(&paris);
2500: /* init array */
2501: VecSet(global_vec,0.0);
2502: VecSet(local_vec,0.0);
2503: VecGetArray(local_vec,&array);
2504: if (local_dofs_mult) {
2505: for (i=0;i<n_local_dofs;i++) {
2506: array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
2507: }
2508: } else {
2509: for (i=0;i<n_local_dofs;i++) {
2510: array[local_dofs[i]]=1.0;
2511: }
2512: }
2513: VecRestoreArray(local_vec,&array);
2514: /* scatter into global vec and get total number of global dofs */
2515: VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);
2516: VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);
2517: VecSum(global_vec,&globalsum);
2518: *n_global_subset = (PetscInt)PetscRealPart(globalsum);
2519: /* Fill global_vec with cumulative function for global numbering */
2520: VecGetArray(global_vec,&array);
2521: VecGetLocalSize(global_vec,&s);
2522: nlocals = 0;
2523: first_index = -1;
2524: first_found = PETSC_FALSE;
2525: for (i=0;i<s;i++) {
2526: if (!first_found && PetscRealPart(array[i]) > 0.1) {
2527: first_found = PETSC_TRUE;
2528: first_index = i;
2529: }
2530: nlocals += (PetscInt)PetscRealPart(array[i]);
2531: }
2532: MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);
2533: if (!rank_prec_comm) {
2534: dof_displs[0]=0;
2535: for (i=1;i<size_prec_comm;i++) {
2536: dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
2537: }
2538: }
2539: MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);
2540: if (first_found) {
2541: array[first_index] += (PetscScalar)nlocals;
2542: old_index = first_index;
2543: for (i=first_index+1;i<s;i++) {
2544: if (PetscRealPart(array[i]) > 0.1) {
2545: array[i] += array[old_index];
2546: old_index = i;
2547: }
2548: }
2549: }
2550: VecRestoreArray(global_vec,&array);
2551: VecSet(local_vec,0.0);
2552: VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);
2553: VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);
2554: /* get global ordering of local dofs */
2555: VecGetArray(local_vec,&array);
2556: if (local_dofs_mult) {
2557: for (i=0;i<n_local_dofs;i++) {
2558: temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
2559: }
2560: } else {
2561: for (i=0;i<n_local_dofs;i++) {
2562: temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
2563: }
2564: }
2565: VecRestoreArray(local_vec,&array);
2566: /* free workspace */
2567: VecScatterDestroy(&scatter_ctx);
2568: VecDestroy(&local_vec);
2569: VecDestroy(&global_vec);
2570: PetscFree(dof_sizes);
2571: PetscFree(dof_displs);
2572: /* return pointer to global ordering of local dofs */
2573: *global_numbering_subset = temp_global_dofs;
2574: return(0);
2575: }
2579: PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
2580: {
2581: PetscInt i,j;
2582: PetscScalar *alphas;
2586: /* this implements stabilized Gram-Schmidt */
2587: PetscMalloc1(n,&alphas);
2588: for (i=0;i<n;i++) {
2589: VecNormalize(vecs[i],NULL);
2590: if (i<n) { VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]); }
2591: for (j=i+1;j<n;j++) { VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]); }
2592: }
2593: PetscFree(alphas);
2594: return(0);
2595: }
2597: /* TODO
2598: - now preallocation is done assuming SEQDENSE local matrices
2599: */
2602: static PetscErrorCode MatISGetMPIXAIJ(Mat mat, MatType Mtype, MatReuse reuse, Mat *M)
2603: {
2604: Mat new_mat;
2605: Mat_IS *matis = (Mat_IS*)(mat->data);
2606: /* info on mat */
2607: /* ISLocalToGlobalMapping rmapping,cmapping; */
2608: PetscInt bs,rows,cols;
2609: PetscInt lrows,lcols;
2610: PetscInt local_rows,local_cols;
2611: PetscBool isdense;
2612: /* values insertion */
2613: PetscScalar *array;
2614: PetscInt *local_indices,*global_indices;
2615: /* work */
2616: PetscInt i,j,index_row;
2617: PetscErrorCode ierr;
2620: /* MISSING CHECKS
2621: - rectangular case not covered (it is not allowed by MATIS)
2622: */
2623: /* get info from mat */
2624: /* MatGetLocalToGlobalMapping(mat,&rmapping,&cmapping); */
2625: MatGetSize(mat,&rows,&cols);
2626: MatGetBlockSize(mat,&bs);
2627: MatGetSize(matis->A,&local_rows,&local_cols);
2629: /* work */
2630: PetscMalloc1(local_rows,&local_indices);
2631: for (i=0;i<local_rows;i++) local_indices[i]=i;
2632: /* map indices of local mat to global values */
2633: PetscMalloc(PetscMax(local_cols,local_rows)*sizeof(*global_indices),&global_indices);
2634: /* ISLocalToGlobalMappingApply(rmapping,local_rows,local_indices,global_indices); */
2635: ISLocalToGlobalMappingApply(matis->mapping,local_rows,local_indices,global_indices);
2637: if (reuse==MAT_INITIAL_MATRIX) {
2638: Vec vec_dnz,vec_onz;
2639: PetscScalar *my_dnz,*my_onz;
2640: PetscInt *dnz,*onz,*mat_ranges,*row_ownership;
2641: PetscInt index_col,owner;
2642: PetscMPIInt nsubdomains;
2644: MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);
2645: MatCreate(PetscObjectComm((PetscObject)mat),&new_mat);
2646: MatSetSizes(new_mat,PETSC_DECIDE,PETSC_DECIDE,rows,cols);
2647: MatSetBlockSize(new_mat,bs);
2648: MatSetType(new_mat,Mtype);
2649: MatSetUp(new_mat);
2650: MatGetLocalSize(new_mat,&lrows,&lcols);
2652: /*
2653: preallocation
2654: */
2656: MatPreallocateInitialize(PetscObjectComm((PetscObject)new_mat),lrows,lcols,dnz,onz);
2657: /*
2658: Some vectors are needed to sum up properly on shared interface dofs.
2659: Preallocation macros cannot do the job.
2660: Note that preallocation is not exact, since it overestimates nonzeros
2661: */
2662: MatGetVecs(new_mat,NULL,&vec_dnz);
2663: /* VecSetLocalToGlobalMapping(vec_dnz,rmapping); */
2664: VecSetLocalToGlobalMapping(vec_dnz,matis->mapping);
2665: VecDuplicate(vec_dnz,&vec_onz);
2666: /* All processes need to compute entire row ownership */
2667: PetscMalloc1(rows,&row_ownership);
2668: MatGetOwnershipRanges(new_mat,(const PetscInt**)&mat_ranges);
2669: for (i=0;i<nsubdomains;i++) {
2670: for (j=mat_ranges[i];j<mat_ranges[i+1];j++) {
2671: row_ownership[j]=i;
2672: }
2673: }
2675: /*
2676: my_dnz and my_onz contains exact contribution to preallocation from each local mat
2677: then, they will be summed up properly. This way, preallocation is always sufficient
2678: */
2679: PetscMalloc1(local_rows,&my_dnz);
2680: PetscMalloc1(local_rows,&my_onz);
2681: PetscMemzero(my_dnz,local_rows*sizeof(*my_dnz));
2682: PetscMemzero(my_onz,local_rows*sizeof(*my_onz));
2683: for (i=0;i<local_rows;i++) {
2684: index_row = global_indices[i];
2685: for (j=i;j<local_rows;j++) {
2686: owner = row_ownership[index_row];
2687: index_col = global_indices[j];
2688: if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */
2689: my_dnz[i] += 1.0;
2690: } else { /* offdiag block */
2691: my_onz[i] += 1.0;
2692: }
2693: /* same as before, interchanging rows and cols */
2694: if (i != j) {
2695: owner = row_ownership[index_col];
2696: if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) {
2697: my_dnz[j] += 1.0;
2698: } else {
2699: my_onz[j] += 1.0;
2700: }
2701: }
2702: }
2703: }
2704: VecSet(vec_dnz,0.0);
2705: VecSet(vec_onz,0.0);
2706: if (local_rows) { /* multilevel guard */
2707: VecSetValuesLocal(vec_dnz,local_rows,local_indices,my_dnz,ADD_VALUES);
2708: VecSetValuesLocal(vec_onz,local_rows,local_indices,my_onz,ADD_VALUES);
2709: }
2710: VecAssemblyBegin(vec_dnz);
2711: VecAssemblyBegin(vec_onz);
2712: VecAssemblyEnd(vec_dnz);
2713: VecAssemblyEnd(vec_onz);
2714: PetscFree(my_dnz);
2715: PetscFree(my_onz);
2716: PetscFree(row_ownership);
2718: /* set computed preallocation in dnz and onz */
2719: VecGetArray(vec_dnz,&array);
2720: for (i=0; i<lrows; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]);
2721: VecRestoreArray(vec_dnz,&array);
2722: VecGetArray(vec_onz,&array);
2723: for (i=0;i<lrows;i++) onz[i] = (PetscInt)PetscRealPart(array[i]);
2724: VecRestoreArray(vec_onz,&array);
2725: VecDestroy(&vec_dnz);
2726: VecDestroy(&vec_onz);
2728: /* Resize preallocation if overestimated */
2729: for (i=0;i<lrows;i++) {
2730: dnz[i] = PetscMin(dnz[i],lcols);
2731: onz[i] = PetscMin(onz[i],cols-lcols);
2732: }
2733: /* set preallocation */
2734: MatMPIAIJSetPreallocation(new_mat,0,dnz,0,onz);
2735: for (i=0;i<lrows/bs;i++) {
2736: dnz[i] = dnz[i*bs]/bs;
2737: onz[i] = onz[i*bs]/bs;
2738: }
2739: MatMPIBAIJSetPreallocation(new_mat,bs,0,dnz,0,onz);
2740: for (i=0;i<lrows/bs;i++) {
2741: dnz[i] = dnz[i]-i;
2742: }
2743: MatMPISBAIJSetPreallocation(new_mat,bs,0,dnz,0,onz);
2744: MatPreallocateFinalize(dnz,onz);
2745: *M = new_mat;
2746: } else {
2747: PetscInt mbs,mrows,mcols;
2748: /* some checks */
2749: MatGetBlockSize(*M,&mbs);
2750: MatGetSize(*M,&mrows,&mcols);
2751: if (mrows != rows) {
2752: SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong number of rows (%d != %d)",rows,mrows);
2753: }
2754: if (mrows != rows) {
2755: SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong number of cols (%d != %d)",cols,mcols);
2756: }
2757: if (mbs != bs) {
2758: SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong block size (%d != %d)",bs,mbs);
2759: }
2760: MatZeroEntries(*M);
2761: }
2762: /* set local to global mappings */
2763: /* MatSetLocalToGlobalMapping(*M,rmapping,cmapping); */
2764: /* Set values */
2765: PetscObjectTypeCompare((PetscObject)matis->A,MATSEQDENSE,&isdense);
2766: if (isdense) { /* special case for dense local matrices */
2767: MatSetOption(*M,MAT_ROW_ORIENTED,PETSC_FALSE);
2768: MatDenseGetArray(matis->A,&array);
2769: MatSetValues(*M,local_rows,global_indices,local_cols,global_indices,array,ADD_VALUES);
2770: MatDenseRestoreArray(matis->A,&array);
2771: PetscFree(local_indices);
2772: PetscFree(global_indices);
2773: } else { /* very basic values insertion for all other matrix types */
2774: PetscFree(local_indices);
2775: for (i=0;i<local_rows;i++) {
2776: MatGetRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);
2777: /* MatSetValuesLocal(*M,1,&i,j,local_indices,array,ADD_VALUES); */
2778: ISLocalToGlobalMappingApply(matis->mapping,j,local_indices,global_indices);
2779: ISLocalToGlobalMappingApply(matis->mapping,1,&i,&index_row);
2780: MatSetValues(*M,1,&index_row,j,global_indices,array,ADD_VALUES);
2781: MatRestoreRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);
2782: }
2783: PetscFree(global_indices);
2784: }
2785: MatAssemblyBegin(*M,MAT_FINAL_ASSEMBLY);
2786: MatAssemblyEnd(*M,MAT_FINAL_ASSEMBLY);
2787: if (isdense) {
2788: MatSetOption(*M,MAT_ROW_ORIENTED,PETSC_TRUE);
2789: }
2790: return(0);
2791: }
2795: PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends)
2796: {
2797: Mat subdomain_adj;
2798: IS new_ranks,ranks_send_to;
2799: MatPartitioning partitioner;
2800: Mat_IS *matis;
2801: PetscInt n_neighs,*neighs,*n_shared,**shared;
2802: PetscInt prank;
2803: PetscMPIInt size,rank,color;
2804: PetscInt *xadj,*adjncy,*oldranks;
2805: PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
2806: PetscInt i,j,local_size,threshold=0;
2807: PetscErrorCode ierr;
2808: PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
2809: PetscSubcomm subcomm;
2812: PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);
2813: PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);
2814: PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);
2816: /* Get info on mapping */
2817: matis = (Mat_IS*)(mat->data);
2818: ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);
2819: ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);
2821: /* build local CSR graph of subdomains' connectivity */
2822: PetscMalloc1(2,&xadj);
2823: xadj[0] = 0;
2824: xadj[1] = PetscMax(n_neighs-1,0);
2825: PetscMalloc1(xadj[1],&adjncy);
2826: PetscMalloc1(xadj[1],&adjncy_wgt);
2828: if (threshold) {
2829: PetscInt* count,min_threshold;
2830: PetscMalloc1(local_size,&count);
2831: PetscMemzero(count,local_size*sizeof(PetscInt));
2832: for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */
2833: for (j=0;j<n_shared[i];j++) {
2834: count[shared[i][j]] += 1;
2835: }
2836: }
2837: /* adapt threshold since we dont want to lose significant connections */
2838: min_threshold = n_neighs;
2839: for (i=1;i<n_neighs;i++) {
2840: for (j=0;j<n_shared[i];j++) {
2841: min_threshold = PetscMin(count[shared[i][j]],min_threshold);
2842: }
2843: }
2844: threshold = PetscMax(min_threshold+1,threshold);
2845: xadj[1] = 0;
2846: for (i=1;i<n_neighs;i++) {
2847: for (j=0;j<n_shared[i];j++) {
2848: if (count[shared[i][j]] < threshold) {
2849: adjncy[xadj[1]] = neighs[i];
2850: adjncy_wgt[xadj[1]] = n_shared[i];
2851: xadj[1]++;
2852: break;
2853: }
2854: }
2855: }
2856: PetscFree(count);
2857: } else {
2858: if (xadj[1]) {
2859: PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));
2860: PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));
2861: }
2862: }
2863: ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);
2864: if (use_square) {
2865: for (i=0;i<xadj[1];i++) {
2866: adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
2867: }
2868: }
2869: PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);
2871: PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);
2873: /*
2874: Restrict work on active processes only.
2875: */
2876: PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);
2877: PetscSubcommSetNumber(subcomm,2); /* 2 groups, active process and not active processes */
2878: MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);
2879: PetscMPIIntCast(!local_size,&color);
2880: PetscSubcommSetTypeGeneral(subcomm,color,rank);
2881: if (color) {
2882: PetscFree(xadj);
2883: PetscFree(adjncy);
2884: PetscFree(adjncy_wgt);
2885: } else {
2886: PetscInt coarsening_ratio;
2887: MPI_Comm_size(subcomm->comm,&size);
2888: PetscMalloc1(size,&oldranks);
2889: prank = rank;
2890: MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);
2891: /*
2892: for (i=0;i<size;i++) {
2893: PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
2894: }
2895: */
2896: for (i=0;i<xadj[1];i++) {
2897: PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);
2898: }
2899: PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);
2900: MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);
2901: /* MatView(subdomain_adj,0); */
2903: /* Partition */
2904: MatPartitioningCreate(subcomm->comm,&partitioner);
2905: MatPartitioningSetAdjacency(partitioner,subdomain_adj);
2906: if (use_vwgt) {
2907: PetscMalloc(sizeof(*v_wgt),&v_wgt);
2908: v_wgt[0] = local_size;
2909: MatPartitioningSetVertexWeights(partitioner,v_wgt);
2910: }
2911: n_subdomains = PetscMin((PetscInt)size,n_subdomains);
2912: coarsening_ratio = size/n_subdomains;
2913: /* Parmetis does not always give back nparts with small graphs! this should be taken into account */
2914: MatPartitioningSetNParts(partitioner,n_subdomains);
2915: MatPartitioningSetFromOptions(partitioner);
2916: MatPartitioningApply(partitioner,&new_ranks);
2917: /* MatPartitioningView(partitioner,0); */
2919: ISGetIndices(new_ranks,(const PetscInt**)&is_indices);
2920: if (contiguous) {
2921: ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */
2922: } else {
2923: ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */
2924: }
2925: ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);
2926: /* clean up */
2927: PetscFree(oldranks);
2928: ISDestroy(&new_ranks);
2929: MatDestroy(&subdomain_adj);
2930: MatPartitioningDestroy(&partitioner);
2931: }
2932: PetscSubcommDestroy(&subcomm);
2934: /* assemble parallel IS for sends */
2935: i = 1;
2936: if (color) i=0;
2937: ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);
2939: /* get back IS */
2940: *is_sends = ranks_send_to;
2941: return(0);
2942: }
2944: typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
2948: PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[])
2949: {
2950: Mat local_mat;
2951: Mat_IS *matis;
2952: IS is_sends_internal;
2953: PetscInt rows,cols;
2954: PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals;
2955: PetscBool ismatis,isdense,destroy_mat;
2956: ISLocalToGlobalMapping l2gmap;
2957: PetscInt* l2gmap_indices;
2958: const PetscInt* is_indices;
2959: MatType new_local_type;
2960: /* buffers */
2961: PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
2962: PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
2963: PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
2964: /* MPI */
2965: MPI_Comm comm,comm_n;
2966: PetscSubcomm subcomm;
2967: PetscMPIInt n_sends,n_recvs,commsize;
2968: PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
2969: PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
2970: PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest;
2971: MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals;
2972: MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals;
2973: PetscErrorCode ierr;
2976: /* TODO: add missing checks */
2981: PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);
2982: if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__);
2983: MatISGetLocalMat(mat,&local_mat);
2984: PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);
2985: if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
2986: MatGetSize(local_mat,&rows,&cols);
2987: if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
2988: if (reuse == MAT_REUSE_MATRIX && *mat_n) {
2989: PetscInt mrows,mcols,mnrows,mncols;
2990: PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);
2991: if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
2992: MatGetSize(mat,&mrows,&mcols);
2993: MatGetSize(*mat_n,&mnrows,&mncols);
2994: if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
2995: if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
2996: }
2997: MatGetBlockSize(local_mat,&bs);
2999: /* prepare IS for sending if not provided */
3000: if (!is_sends) {
3001: PetscBool pcontig = PETSC_TRUE;
3002: if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
3003: MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);
3004: } else {
3005: PetscObjectReference((PetscObject)is_sends);
3006: is_sends_internal = is_sends;
3007: }
3009: /* get pointer of MATIS data */
3010: matis = (Mat_IS*)mat->data;
3012: /* get comm */
3013: PetscObjectGetComm((PetscObject)mat,&comm);
3015: /* compute number of sends */
3016: ISGetLocalSize(is_sends_internal,&i);
3017: PetscMPIIntCast(i,&n_sends);
3019: /* compute number of receives */
3020: MPI_Comm_size(comm,&commsize);
3021: PetscMalloc1(commsize,&iflags);
3022: PetscMemzero(iflags,commsize*sizeof(*iflags));
3023: ISGetIndices(is_sends_internal,&is_indices);
3024: for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
3025: PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);
3026: PetscFree(iflags);
3028: /* restrict comm if requested */
3029: subcomm = 0;
3030: destroy_mat = PETSC_FALSE;
3031: if (restrict_comm) {
3032: PetscMPIInt color,rank,subcommsize;
3033: MPI_Comm_rank(comm,&rank);
3034: color = 0;
3035: if (n_sends && !n_recvs) color = 1; /* sending only processes will not partecipate in new comm */
3036: MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);
3037: subcommsize = commsize - subcommsize;
3038: /* check if reuse has been requested */
3039: if (reuse == MAT_REUSE_MATRIX) {
3040: if (*mat_n) {
3041: PetscMPIInt subcommsize2;
3042: MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);
3043: if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
3044: comm_n = PetscObjectComm((PetscObject)*mat_n);
3045: } else {
3046: comm_n = PETSC_COMM_SELF;
3047: }
3048: } else { /* MAT_INITIAL_MATRIX */
3049: PetscSubcommCreate(comm,&subcomm);
3050: PetscSubcommSetNumber(subcomm,2);
3051: PetscSubcommSetTypeGeneral(subcomm,color,rank);
3052: comm_n = subcomm->comm;
3053: }
3054: /* flag to destroy *mat_n if not significative */
3055: if (color) destroy_mat = PETSC_TRUE;
3056: } else {
3057: comm_n = comm;
3058: }
3060: /* prepare send/receive buffers */
3061: PetscMalloc1(commsize,&ilengths_idxs);
3062: PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));
3063: PetscMalloc1(commsize,&ilengths_vals);
3064: PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));
3065: if (nis) {
3066: PetscMalloc(commsize*sizeof(*ilengths_idxs_is),&ilengths_idxs_is);
3067: PetscMemzero(ilengths_idxs_is,commsize*sizeof(*ilengths_idxs_is));
3068: }
3070: /* Get data from local matrices */
3071: if (!isdense) {
3072: /* TODO: See below some guidelines on how to prepare the local buffers */
3073: /*
3074: send_buffer_vals should contain the raw values of the local matrix
3075: send_buffer_idxs should contain:
3076: - MatType_PRIVATE type
3077: - PetscInt size_of_l2gmap
3078: - PetscInt global_row_indices[size_of_l2gmap]
3079: - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values
3080: */
3081: } else {
3082: MatDenseGetArray(local_mat,&send_buffer_vals);
3083: ISLocalToGlobalMappingGetSize(matis->mapping,&i);
3084: PetscMalloc1((i+2),&send_buffer_idxs);
3085: send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
3086: send_buffer_idxs[1] = i;
3087: ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);
3088: PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));
3089: ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);
3090: PetscMPIIntCast(i,&len);
3091: for (i=0;i<n_sends;i++) {
3092: ilengths_vals[is_indices[i]] = len*len;
3093: ilengths_idxs[is_indices[i]] = len+2;
3094: }
3095: }
3096: PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);
3097: /* additional is (if any) */
3098: if (nis) {
3099: PetscMPIInt psum;
3100: PetscInt j;
3101: for (j=0,psum=0;j<nis;j++) {
3102: PetscInt plen;
3103: ISGetLocalSize(isarray[j],&plen);
3104: PetscMPIIntCast(plen,&len);
3105: psum += len+1; /* indices + lenght */
3106: }
3107: PetscMalloc(psum*sizeof(PetscInt),&send_buffer_idxs_is);
3108: for (j=0,psum=0;j<nis;j++) {
3109: PetscInt plen;
3110: const PetscInt *is_array_idxs;
3111: ISGetLocalSize(isarray[j],&plen);
3112: send_buffer_idxs_is[psum] = plen;
3113: ISGetIndices(isarray[j],&is_array_idxs);
3114: PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));
3115: ISRestoreIndices(isarray[j],&is_array_idxs);
3116: psum += plen+1; /* indices + lenght */
3117: }
3118: for (i=0;i<n_sends;i++) {
3119: ilengths_idxs_is[is_indices[i]] = psum;
3120: }
3121: PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);
3122: }
3124: buf_size_idxs = 0;
3125: buf_size_vals = 0;
3126: buf_size_idxs_is = 0;
3127: for (i=0;i<n_recvs;i++) {
3128: buf_size_idxs += (PetscInt)olengths_idxs[i];
3129: buf_size_vals += (PetscInt)olengths_vals[i];
3130: if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
3131: }
3132: PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);
3133: PetscMalloc1(buf_size_vals,&recv_buffer_vals);
3134: PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);
3136: /* get new tags for clean communications */
3137: PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);
3138: PetscObjectGetNewTag((PetscObject)mat,&tag_vals);
3139: PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);
3141: /* allocate for requests */
3142: PetscMalloc1(n_sends,&send_req_idxs);
3143: PetscMalloc1(n_sends,&send_req_vals);
3144: PetscMalloc1(n_sends,&send_req_idxs_is);
3145: PetscMalloc1(n_recvs,&recv_req_idxs);
3146: PetscMalloc1(n_recvs,&recv_req_vals);
3147: PetscMalloc1(n_recvs,&recv_req_idxs_is);
3149: /* communications */
3150: ptr_idxs = recv_buffer_idxs;
3151: ptr_vals = recv_buffer_vals;
3152: ptr_idxs_is = recv_buffer_idxs_is;
3153: for (i=0;i<n_recvs;i++) {
3154: source_dest = onodes[i];
3155: MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);
3156: MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);
3157: ptr_idxs += olengths_idxs[i];
3158: ptr_vals += olengths_vals[i];
3159: if (nis) {
3160: MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);
3161: ptr_idxs_is += olengths_idxs_is[i];
3162: }
3163: }
3164: for (i=0;i<n_sends;i++) {
3165: PetscMPIIntCast(is_indices[i],&source_dest);
3166: MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);
3167: MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);
3168: if (nis) {
3169: MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);
3170: }
3171: }
3172: ISRestoreIndices(is_sends_internal,&is_indices);
3173: ISDestroy(&is_sends_internal);
3175: /* assemble new l2g map */
3176: MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);
3177: ptr_idxs = recv_buffer_idxs;
3178: buf_size_idxs = 0;
3179: for (i=0;i<n_recvs;i++) {
3180: buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3181: ptr_idxs += olengths_idxs[i];
3182: }
3183: PetscMalloc1(buf_size_idxs,&l2gmap_indices);
3184: ptr_idxs = recv_buffer_idxs;
3185: buf_size_idxs = 0;
3186: for (i=0;i<n_recvs;i++) {
3187: PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));
3188: buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3189: ptr_idxs += olengths_idxs[i];
3190: }
3191: PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);
3192: ISLocalToGlobalMappingCreate(comm_n,1,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);
3193: PetscFree(l2gmap_indices);
3195: /* infer new local matrix type from received local matrices type */
3196: /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
3197: /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
3198: if (n_recvs) {
3199: MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
3200: ptr_idxs = recv_buffer_idxs;
3201: for (i=0;i<n_recvs;i++) {
3202: if ((PetscInt)new_local_type_private != *ptr_idxs) {
3203: new_local_type_private = MATAIJ_PRIVATE;
3204: break;
3205: }
3206: ptr_idxs += olengths_idxs[i];
3207: }
3208: switch (new_local_type_private) {
3209: case MATDENSE_PRIVATE:
3210: if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */
3211: new_local_type = MATSEQAIJ;
3212: bs = 1;
3213: } else { /* if I receive only 1 dense matrix */
3214: new_local_type = MATSEQDENSE;
3215: bs = 1;
3216: }
3217: break;
3218: case MATAIJ_PRIVATE:
3219: new_local_type = MATSEQAIJ;
3220: bs = 1;
3221: break;
3222: case MATBAIJ_PRIVATE:
3223: new_local_type = MATSEQBAIJ;
3224: break;
3225: case MATSBAIJ_PRIVATE:
3226: new_local_type = MATSEQSBAIJ;
3227: break;
3228: default:
3229: SETERRQ2(comm,PETSC_ERR_PLIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__);
3230: break;
3231: }
3232: } else { /* by default, new_local_type is seqdense */
3233: new_local_type = MATSEQDENSE;
3234: bs = 1;
3235: }
3237: /* create MATIS object if needed */
3238: if (reuse == MAT_INITIAL_MATRIX) {
3239: MatGetSize(mat,&rows,&cols);
3240: MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);
3241: } else {
3242: /* it also destroys the local matrices */
3243: MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);
3244: }
3245: ISLocalToGlobalMappingDestroy(&l2gmap);
3246: MatISGetLocalMat(*mat_n,&local_mat);
3247: MatSetType(local_mat,new_local_type);
3248: MatSetUp(local_mat); /* WARNING -> no preallocation yet */
3250: /* set values */
3251: MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);
3252: ptr_vals = recv_buffer_vals;
3253: ptr_idxs = recv_buffer_idxs;
3254: for (i=0;i<n_recvs;i++) {
3255: if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
3256: MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);
3257: MatSetValues(*mat_n,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);
3258: MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);
3259: MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);
3260: MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);
3261: } else {
3262: /* TODO */
3263: }
3264: ptr_idxs += olengths_idxs[i];
3265: ptr_vals += olengths_vals[i];
3266: }
3267: MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);
3268: MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);
3269: MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);
3270: MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);
3272: #if 0
3273: if (!restrict_comm) { /* check */
3274: Vec lvec,rvec;
3275: PetscReal infty_error;
3277: MatGetVecs(mat,&rvec,&lvec);
3278: VecSetRandom(rvec,NULL);
3279: MatMult(mat,rvec,lvec);
3280: VecScale(lvec,-1.0);
3281: MatMultAdd(*mat_n,rvec,lvec,lvec);
3282: VecNorm(lvec,NORM_INFINITY,&infty_error);
3283: PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
3284: VecDestroy(&rvec);
3285: VecDestroy(&lvec);
3286: }
3287: #endif
3289: /* assemble new additional is (if any) */
3290: if (nis) {
3291: PetscInt **temp_idxs,*count_is,j,psum;
3293: MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);
3294: PetscMalloc(nis*sizeof(PetscInt),&count_is);
3295: PetscMemzero(count_is,nis*sizeof(PetscInt));
3296: ptr_idxs = recv_buffer_idxs_is;
3297: psum = 0;
3298: for (i=0;i<n_recvs;i++) {
3299: for (j=0;j<nis;j++) {
3300: PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3301: count_is[j] += plen; /* increment counting of buffer for j-th IS */
3302: psum += plen;
3303: ptr_idxs += plen+1; /* shift pointer to received data */
3304: }
3305: }
3306: PetscMalloc(nis*sizeof(PetscInt*),&temp_idxs);
3307: PetscMalloc(psum*sizeof(PetscInt),&temp_idxs[0]);
3308: for (i=1;i<nis;i++) {
3309: temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
3310: }
3311: PetscMemzero(count_is,nis*sizeof(PetscInt));
3312: ptr_idxs = recv_buffer_idxs_is;
3313: for (i=0;i<n_recvs;i++) {
3314: for (j=0;j<nis;j++) {
3315: PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3316: PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));
3317: count_is[j] += plen; /* increment starting point of buffer for j-th IS */
3318: ptr_idxs += plen+1; /* shift pointer to received data */
3319: }
3320: }
3321: for (i=0;i<nis;i++) {
3322: ISDestroy(&isarray[i]);
3323: PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);
3324: ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);
3325: }
3326: PetscFree(count_is);
3327: PetscFree(temp_idxs[0]);
3328: PetscFree(temp_idxs);
3329: }
3330: /* free workspace */
3331: PetscFree(recv_buffer_idxs);
3332: PetscFree(recv_buffer_vals);
3333: PetscFree(recv_buffer_idxs_is);
3334: MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);
3335: PetscFree(send_buffer_idxs);
3336: MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);
3337: if (isdense) {
3338: MatISGetLocalMat(mat,&local_mat);
3339: MatDenseRestoreArray(local_mat,&send_buffer_vals);
3340: } else {
3341: /* PetscFree(send_buffer_vals); */
3342: }
3343: if (nis) {
3344: MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);
3345: PetscFree(send_buffer_idxs_is);
3346: }
3347: PetscFree(recv_req_idxs);
3348: PetscFree(recv_req_vals);
3349: PetscFree(recv_req_idxs_is);
3350: PetscFree(send_req_idxs);
3351: PetscFree(send_req_vals);
3352: PetscFree(send_req_idxs_is);
3353: PetscFree(ilengths_vals);
3354: PetscFree(ilengths_idxs);
3355: PetscFree(olengths_vals);
3356: PetscFree(olengths_idxs);
3357: PetscFree(onodes);
3358: if (nis) {
3359: PetscFree(ilengths_idxs_is);
3360: PetscFree(olengths_idxs_is);
3361: PetscFree(onodes_is);
3362: }
3363: PetscSubcommDestroy(&subcomm);
3364: if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
3365: MatDestroy(mat_n);
3366: for (i=0;i<nis;i++) {
3367: ISDestroy(&isarray[i]);
3368: }
3369: }
3370: return(0);
3371: }
3373: /* temporary hack into ksp private data structure */
3374: #include <petsc-private/kspimpl.h>
3378: PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
3379: {
3380: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
3381: PC_IS *pcis = (PC_IS*)pc->data;
3382: Mat coarse_mat,coarse_mat_is,coarse_submat_dense;
3383: MatNullSpace CoarseNullSpace=NULL;
3384: ISLocalToGlobalMapping coarse_islg;
3385: IS coarse_is,*isarray;
3386: PetscInt i,im_active=-1,active_procs=-1;
3387: PetscInt nis,nisdofs,nisneu;
3388: PC pc_temp;
3389: PCType coarse_pc_type;
3390: KSPType coarse_ksp_type;
3391: PetscBool multilevel_requested,multilevel_allowed;
3392: PetscBool isredundant,isbddc,isnn,coarse_reuse;
3393: Mat t_coarse_mat_is;
3394: PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse;
3395: PetscMPIInt all_procs;
3396: PetscBool csin_ml,csin_ds,csin,csin_type_simple;
3397: PetscBool compute_vecs = PETSC_FALSE;
3398: PetscErrorCode ierr;
3401: /* Assign global numbering to coarse dofs */
3402: if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
3403: compute_vecs = PETSC_TRUE;
3404: PetscInt ocoarse_size;
3405: ocoarse_size = pcbddc->coarse_size;
3406: PetscFree(pcbddc->global_primal_indices);
3407: PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);
3408: /* see if we can avoid some work */
3409: if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
3410: if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */
3411: KSPReset(pcbddc->coarse_ksp);
3412: coarse_reuse = PETSC_FALSE;
3413: } else { /* we can safely reuse already computed coarse matrix */
3414: coarse_reuse = PETSC_TRUE;
3415: }
3416: } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
3417: coarse_reuse = PETSC_FALSE;
3418: }
3419: /* reset any subassembling information */
3420: ISDestroy(&pcbddc->coarse_subassembling);
3421: ISDestroy(&pcbddc->coarse_subassembling_init);
3422: } else { /* primal space is unchanged, so we can reuse coarse matrix */
3423: coarse_reuse = PETSC_TRUE;
3424: }
3426: /* count "active" (i.e. with positive local size) and "void" processes */
3427: im_active = !!(pcis->n);
3428: MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));
3429: MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);
3430: void_procs = all_procs-active_procs;
3431: csin_type_simple = PETSC_TRUE;
3432: if (pcbddc->current_level) {
3433: csin_ml = PETSC_TRUE;
3434: ncoarse_ml = void_procs;
3435: csin_ds = PETSC_TRUE;
3436: ncoarse_ds = void_procs;
3437: if (!void_procs) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen");
3438: } else {
3439: csin_ml = PETSC_FALSE;
3440: ncoarse_ml = all_procs;
3441: if (void_procs) {
3442: csin_ds = PETSC_TRUE;
3443: ncoarse_ds = void_procs;
3444: csin_type_simple = PETSC_FALSE;
3445: } else {
3446: csin_ds = PETSC_FALSE;
3447: ncoarse_ds = all_procs;
3448: }
3449: }
3451: /*
3452: test if we can go multilevel: three conditions must be satisfied:
3453: - we have not exceeded the number of levels requested
3454: - we can actually subassemble the active processes
3455: - we can find a suitable number of MPI processes where we can place the subassembled problem
3456: */
3457: multilevel_allowed = PETSC_FALSE;
3458: multilevel_requested = PETSC_FALSE;
3459: if (pcbddc->current_level < pcbddc->max_levels) {
3460: multilevel_requested = PETSC_TRUE;
3461: if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) {
3462: multilevel_allowed = PETSC_FALSE;
3463: } else {
3464: multilevel_allowed = PETSC_TRUE;
3465: }
3466: }
3467: /* determine number of process partecipating to coarse solver */
3468: if (multilevel_allowed) {
3469: ncoarse = ncoarse_ml;
3470: csin = csin_ml;
3471: } else {
3472: ncoarse = ncoarse_ds;
3473: csin = csin_ds;
3474: }
3476: /* creates temporary l2gmap and IS for coarse indexes */
3477: ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);
3478: ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);
3480: /* creates temporary MATIS object for coarse matrix */
3481: MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);
3482: #if 0
3483: {
3484: PetscViewer viewer;
3485: char filename[256];
3486: sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank);
3487: PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);
3488: PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
3489: MatView(coarse_submat_dense,viewer);
3490: PetscViewerDestroy(&viewer);
3491: }
3492: #endif
3493: MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);
3494: MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);
3495: MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);
3496: MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);
3497: MatDestroy(&coarse_submat_dense);
3499: /* compute dofs splitting and neumann boundaries for coarse dofs */
3500: if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */
3501: PetscInt *tidxs,*tidxs2,nout,tsize,i;
3502: const PetscInt *idxs;
3503: ISLocalToGlobalMapping tmap;
3505: /* create map between primal indices (in local representative ordering) and local primal numbering */
3506: ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);
3507: /* allocate space for temporary storage */
3508: PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&tidxs);
3509: PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&tidxs2);
3510: /* allocate for IS array */
3511: nisdofs = pcbddc->n_ISForDofsLocal;
3512: nisneu = !!pcbddc->NeumannBoundariesLocal;
3513: nis = nisdofs + nisneu;
3514: PetscMalloc(nis*sizeof(IS),&isarray);
3515: /* dofs splitting */
3516: for (i=0;i<nisdofs;i++) {
3517: /* ISView(pcbddc->ISForDofsLocal[i],0); */
3518: ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);
3519: ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);
3520: ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);
3521: ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);
3522: ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);
3523: ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);
3524: /* ISView(isarray[i],0); */
3525: }
3526: /* neumann boundaries */
3527: if (pcbddc->NeumannBoundariesLocal) {
3528: /* ISView(pcbddc->NeumannBoundariesLocal,0); */
3529: ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);
3530: ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);
3531: ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);
3532: ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);
3533: ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);
3534: ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);
3535: /* ISView(isarray[nisdofs],0); */
3536: }
3537: /* free memory */
3538: PetscFree(tidxs);
3539: PetscFree(tidxs2);
3540: ISLocalToGlobalMappingDestroy(&tmap);
3541: } else {
3542: nis = 0;
3543: nisdofs = 0;
3544: nisneu = 0;
3545: isarray = NULL;
3546: }
3547: /* destroy no longer needed map */
3548: ISLocalToGlobalMappingDestroy(&coarse_islg);
3550: /* restrict on coarse candidates (if needed) */
3551: coarse_mat_is = NULL;
3552: if (csin) {
3553: if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */
3554: PetscInt j,tissize,*nisindices;
3555: PetscInt *coarse_candidates;
3556: const PetscInt* tisindices;
3557: /* get coarse candidates' ranks in pc communicator */
3558: PetscMalloc(all_procs*sizeof(PetscInt),&coarse_candidates);
3559: MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));
3560: for (i=0,j=0;i<all_procs;i++) {
3561: if (!coarse_candidates[i]) {
3562: coarse_candidates[j]=i;
3563: j++;
3564: }
3565: }
3566: if (j < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",j,ncoarse);
3567: /* get a suitable subassembling pattern */
3568: if (csin_type_simple) {
3569: PetscMPIInt rank;
3570: PetscInt issize,isidx;
3571: MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);
3572: if (im_active) {
3573: issize = 1;
3574: isidx = (PetscInt)rank;
3575: } else {
3576: issize = 0;
3577: isidx = -1;
3578: }
3579: ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);
3580: } else {
3581: MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);
3582: }
3583: if (pcbddc->dbg_flag) {
3584: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
3585: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");
3586: ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);
3587: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");
3588: for (i=0;i<j;i++) {
3589: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);
3590: }
3591: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");
3592: PetscViewerFlush(pcbddc->dbg_viewer);
3593: }
3594: /* shift the pattern on coarse candidates */
3595: ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);
3596: ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);
3597: PetscMalloc(tissize*sizeof(PetscInt),&nisindices);
3598: for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]];
3599: ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);
3600: ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);
3601: PetscFree(coarse_candidates);
3602: }
3603: if (pcbddc->dbg_flag) {
3604: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
3605: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");
3606: ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);
3607: PetscViewerFlush(pcbddc->dbg_viewer);
3608: }
3609: /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */
3610: MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);
3611: } else {
3612: if (pcbddc->dbg_flag) {
3613: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
3614: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");
3615: PetscViewerFlush(pcbddc->dbg_viewer);
3616: }
3617: PetscObjectReference((PetscObject)t_coarse_mat_is);
3618: coarse_mat_is = t_coarse_mat_is;
3619: }
3621: /* create local to global scatters for coarse problem */
3622: if (compute_vecs) {
3623: PetscInt lrows;
3624: VecDestroy(&pcbddc->coarse_vec);
3625: if (coarse_mat_is) {
3626: MatGetLocalSize(coarse_mat_is,&lrows,NULL);
3627: } else {
3628: lrows = 0;
3629: }
3630: VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);
3631: VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);
3632: VecSetType(pcbddc->coarse_vec,VECSTANDARD);
3633: VecScatterDestroy(&pcbddc->coarse_loc_to_glob);
3634: VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);
3635: }
3636: ISDestroy(&coarse_is);
3637: MatDestroy(&t_coarse_mat_is);
3639: /* set defaults for coarse KSP and PC */
3640: if (multilevel_allowed) {
3641: coarse_ksp_type = KSPRICHARDSON;
3642: coarse_pc_type = PCBDDC;
3643: } else {
3644: coarse_ksp_type = KSPPREONLY;
3645: coarse_pc_type = PCREDUNDANT;
3646: }
3648: /* print some info if requested */
3649: if (pcbddc->dbg_flag) {
3650: if (!multilevel_allowed) {
3651: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
3652: if (multilevel_requested) {
3653: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);
3654: } else if (pcbddc->max_levels) {
3655: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);
3656: }
3657: PetscViewerFlush(pcbddc->dbg_viewer);
3658: }
3659: }
3661: /* create the coarse KSP object only once with defaults */
3662: if (coarse_mat_is) {
3663: MatReuse coarse_mat_reuse;
3664: PetscViewer dbg_viewer = NULL;
3665: if (pcbddc->dbg_flag) {
3666: dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is));
3667: PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);
3668: }
3669: if (!pcbddc->coarse_ksp) {
3670: char prefix[256],str_level[16];
3671: size_t len;
3672: KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);
3673: PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);
3674: KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);
3675: KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);
3676: KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);
3677: KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);
3678: KSPGetPC(pcbddc->coarse_ksp,&pc_temp);
3679: PCSetType(pc_temp,coarse_pc_type);
3680: /* prefix */
3681: PetscStrcpy(prefix,"");
3682: PetscStrcpy(str_level,"");
3683: if (!pcbddc->current_level) {
3684: PetscStrcpy(prefix,((PetscObject)pc)->prefix);
3685: PetscStrcat(prefix,"pc_bddc_coarse_");
3686: } else {
3687: PetscStrlen(((PetscObject)pc)->prefix,&len);
3688: if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
3689: if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
3690: PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);
3691: sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
3692: PetscStrcat(prefix,str_level);
3693: }
3694: KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);
3695: /* allow user customization */
3696: KSPSetFromOptions(pcbddc->coarse_ksp);
3697: PCFactorSetReuseFill(pc_temp,PETSC_TRUE);
3698: }
3700: /* get some info after set from options */
3701: KSPGetPC(pcbddc->coarse_ksp,&pc_temp);
3702: PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);
3703: PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);
3704: PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);
3705: if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */
3706: PCSetType(pc_temp,coarse_pc_type);
3707: isbddc = PETSC_FALSE;
3708: }
3709: if (isredundant) {
3710: KSP inner_ksp;
3711: PC inner_pc;
3712: PCRedundantGetKSP(pc_temp,&inner_ksp);
3713: KSPGetPC(inner_ksp,&inner_pc);
3714: PCFactorSetReuseFill(inner_pc,PETSC_TRUE);
3715: }
3717: /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
3718: PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);
3719: PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);
3720: PCBDDCSetLevels(pc_temp,pcbddc->max_levels);
3721: if (nisdofs) {
3722: PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);
3723: for (i=0;i<nisdofs;i++) {
3724: ISDestroy(&isarray[i]);
3725: }
3726: }
3727: if (nisneu) {
3728: PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);
3729: ISDestroy(&isarray[nisdofs]);
3730: }
3732: /* assemble coarse matrix */
3733: if (coarse_reuse) {
3734: KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);
3735: PetscObjectReference((PetscObject)coarse_mat);
3736: coarse_mat_reuse = MAT_REUSE_MATRIX;
3737: } else {
3738: coarse_mat_reuse = MAT_INITIAL_MATRIX;
3739: }
3740: if (isbddc || isnn) {
3741: if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */
3742: MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);
3743: if (pcbddc->dbg_flag) {
3744: PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");
3745: PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");
3746: ISView(pcbddc->coarse_subassembling,dbg_viewer);
3747: PetscViewerFlush(dbg_viewer);
3748: }
3749: }
3750: MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);
3751: } else {
3752: MatISGetMPIXAIJ(coarse_mat_is,MATMPIAIJ,coarse_mat_reuse,&coarse_mat);
3753: }
3754: MatDestroy(&coarse_mat_is);
3756: /* propagate symmetry info to coarse matrix */
3757: MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);
3758: MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);
3760: /* set operators */
3761: KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);
3762: if (pcbddc->dbg_flag) {
3763: PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);
3764: }
3765: } else { /* processes non partecipating to coarse solver (if any) */
3766: coarse_mat = 0;
3767: }
3768: PetscFree(isarray);
3769: #if 0
3770: {
3771: PetscViewer viewer;
3772: char filename[256];
3773: sprintf(filename,"coarse_mat.m");
3774: PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);
3775: PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
3776: MatView(coarse_mat,viewer);
3777: PetscViewerDestroy(&viewer);
3778: }
3779: #endif
3781: /* Compute coarse null space (special handling by BDDC only) */
3782: if (pcbddc->NullSpace) {
3783: PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);
3784: }
3786: if (pcbddc->coarse_ksp) {
3787: Vec crhs,csol;
3788: PetscBool ispreonly;
3789: if (CoarseNullSpace) {
3790: if (isbddc) {
3791: PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);
3792: } else {
3793: KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);
3794: }
3795: }
3796: /* setup coarse ksp */
3797: KSPSetUp(pcbddc->coarse_ksp);
3798: KSPGetSolution(pcbddc->coarse_ksp,&csol);
3799: KSPGetRhs(pcbddc->coarse_ksp,&crhs);
3800: /* hack */
3801: if (!csol) {
3802: MatGetVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);
3803: }
3804: if (!crhs) {
3805: MatGetVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));
3806: }
3807: /* Check coarse problem if in debug mode or if solving with an iterative method */
3808: PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);
3809: if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
3810: KSP check_ksp;
3811: KSPType check_ksp_type;
3812: PC check_pc;
3813: Vec check_vec,coarse_vec;
3814: PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
3815: PetscInt its;
3816: PetscBool compute_eigs;
3817: PetscReal *eigs_r,*eigs_c;
3818: PetscInt neigs;
3819: const char *prefix;
3821: /* Create ksp object suitable for estimation of extreme eigenvalues */
3822: KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);
3823: KSPSetOperators(check_ksp,coarse_mat,coarse_mat);
3824: KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);
3825: if (ispreonly) {
3826: check_ksp_type = KSPPREONLY;
3827: compute_eigs = PETSC_FALSE;
3828: } else {
3829: check_ksp_type = KSPGMRES;
3830: compute_eigs = PETSC_TRUE;
3831: }
3832: KSPSetType(check_ksp,check_ksp_type);
3833: KSPSetComputeSingularValues(check_ksp,compute_eigs);
3834: KSPSetComputeEigenvalues(check_ksp,compute_eigs);
3835: KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);
3836: KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);
3837: KSPSetOptionsPrefix(check_ksp,prefix);
3838: KSPAppendOptionsPrefix(check_ksp,"check_");
3839: KSPSetFromOptions(check_ksp);
3840: KSPSetUp(check_ksp);
3841: KSPGetPC(pcbddc->coarse_ksp,&check_pc);
3842: KSPSetPC(check_ksp,check_pc);
3843: /* create random vec */
3844: KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);
3845: VecDuplicate(coarse_vec,&check_vec);
3846: VecSetRandom(check_vec,NULL);
3847: if (CoarseNullSpace) {
3848: MatNullSpaceRemove(CoarseNullSpace,check_vec);
3849: }
3850: MatMult(coarse_mat,check_vec,coarse_vec);
3851: /* solve coarse problem */
3852: KSPSolve(check_ksp,coarse_vec,coarse_vec);
3853: if (CoarseNullSpace) {
3854: MatNullSpaceRemove(CoarseNullSpace,coarse_vec);
3855: }
3856: /* set eigenvalue estimation if preonly has not been requested */
3857: if (compute_eigs) {
3858: PetscMalloc((pcbddc->coarse_size+1)*sizeof(PetscReal),&eigs_r);
3859: PetscMalloc((pcbddc->coarse_size+1)*sizeof(PetscReal),&eigs_c);
3860: KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);
3861: lambda_max = eigs_r[neigs-1];
3862: lambda_min = eigs_r[0];
3863: if (pcbddc->use_coarse_estimates) {
3864: if (lambda_max>lambda_min) {
3865: KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);
3866: KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));
3867: }
3868: }
3869: }
3871: /* check coarse problem residual error */
3872: if (pcbddc->dbg_flag) {
3873: PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
3874: PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));
3875: VecAXPY(check_vec,-1.0,coarse_vec);
3876: VecNorm(check_vec,NORM_INFINITY,&infty_error);
3877: MatMult(coarse_mat,check_vec,coarse_vec);
3878: VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);
3879: VecDestroy(&check_vec);
3880: PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (%d)\n",pcbddc->use_coarse_estimates);
3881: PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);
3882: PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);
3883: PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);
3884: PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);
3885: if (compute_eigs) {
3886: PetscReal lambda_max_s,lambda_min_s;
3887: KSPGetIterationNumber(check_ksp,&its);
3888: KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);
3889: PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);
3890: for (i=0;i<neigs;i++) {
3891: PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);
3892: }
3893: }
3894: PetscViewerFlush(dbg_viewer);
3895: PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));
3896: }
3897: KSPDestroy(&check_ksp);
3898: if (compute_eigs) {
3899: PetscFree(eigs_r);
3900: PetscFree(eigs_c);
3901: }
3902: }
3903: }
3904: /* print additional info */
3905: if (pcbddc->dbg_flag) {
3906: /* waits until all processes reaches this point */
3907: PetscBarrier((PetscObject)pc);
3908: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);
3909: PetscViewerFlush(pcbddc->dbg_viewer);
3910: }
3912: /* free memory */
3913: MatNullSpaceDestroy(&CoarseNullSpace);
3914: MatDestroy(&coarse_mat);
3915: return(0);
3916: }
3920: PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
3921: {
3922: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
3923: PC_IS* pcis = (PC_IS*)pc->data;
3924: Mat_IS* matis = (Mat_IS*)pc->pmat->data;
3925: PetscInt i,coarse_size;
3926: PetscInt *local_primal_indices;
3930: /* Compute global number of coarse dofs */
3931: if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) {
3932: SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created");
3933: }
3934: PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);
3936: /* check numbering */
3937: if (pcbddc->dbg_flag) {
3938: PetscScalar coarsesum,*array;
3939: PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;
3941: PetscViewerFlush(pcbddc->dbg_viewer);
3942: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
3943: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");
3944: PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);
3945: VecSet(pcis->vec1_N,0.0);
3946: for (i=0;i<pcbddc->local_primal_size;i++) {
3947: VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);
3948: }
3949: VecAssemblyBegin(pcis->vec1_N);
3950: VecAssemblyEnd(pcis->vec1_N);
3951: VecSet(pcis->vec1_global,0.0);
3952: VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
3953: VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
3954: VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
3955: VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
3956: VecGetArray(pcis->vec1_N,&array);
3957: for (i=0;i<pcis->n;i++) {
3958: if (array[i] == 1.0) {
3959: set_error = PETSC_TRUE;
3960: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);
3961: }
3962: }
3963: MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
3964: PetscViewerFlush(pcbddc->dbg_viewer);
3965: for (i=0;i<pcis->n;i++) {
3966: if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
3967: }
3968: VecRestoreArray(pcis->vec1_N,&array);
3969: VecSet(pcis->vec1_global,0.0);
3970: VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
3971: VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
3972: VecSum(pcis->vec1_global,&coarsesum);
3973: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));
3974: if (pcbddc->dbg_flag > 1 || set_error_reduced) {
3975: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");
3976: PetscViewerFlush(pcbddc->dbg_viewer);
3977: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);
3978: for (i=0;i<pcbddc->local_primal_size;i++) {
3979: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]);
3980: }
3981: PetscViewerFlush(pcbddc->dbg_viewer);
3982: }
3983: PetscViewerFlush(pcbddc->dbg_viewer);
3984: if (set_error_reduced) {
3985: SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
3986: }
3987: }
3988: /* get back data */
3989: *coarse_size_n = coarse_size;
3990: *local_primal_indices_n = local_primal_indices;
3991: return(0);
3992: }
3996: PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
3997: {
3998: IS localis_t;
3999: PetscInt i,lsize,*idxs,n;
4000: PetscScalar *vals;
4004: /* get indices in local ordering exploiting local to global map */
4005: ISGetLocalSize(globalis,&lsize);
4006: PetscMalloc(lsize*sizeof(PetscScalar),&vals);
4007: for (i=0;i<lsize;i++) vals[i] = 1.0;
4008: ISGetIndices(globalis,(const PetscInt**)&idxs);
4009: VecSet(gwork,0.0);
4010: VecSet(lwork,0.0);
4011: if (idxs) { /* multilevel guard */
4012: VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);
4013: }
4014: VecAssemblyBegin(gwork);
4015: ISRestoreIndices(globalis,(const PetscInt**)&idxs);
4016: PetscFree(vals);
4017: VecAssemblyEnd(gwork);
4018: /* now compute set in local ordering */
4019: VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);
4020: VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);
4021: VecGetArrayRead(lwork,(const PetscScalar**)&vals);
4022: VecGetSize(lwork,&n);
4023: for (i=0,lsize=0;i<n;i++) {
4024: if (PetscRealPart(vals[i]) > 0.5) {
4025: lsize++;
4026: }
4027: }
4028: PetscMalloc(lsize*sizeof(PetscInt),&idxs);
4029: for (i=0,lsize=0;i<n;i++) {
4030: if (PetscRealPart(vals[i]) > 0.5) {
4031: idxs[lsize++] = i;
4032: }
4033: }
4034: VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);
4035: ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);
4036: *localis = localis_t;
4037: return(0);
4038: }