Actual source code: bddcprivate.c

petsc-3.5.4 2015-05-23
Report Typos and Errors
  1: #include <../src/ksp/pc/impls/bddc/bddc.h>
  2: #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
  3: #include <petscblaslapack.h>

  7: PetscErrorCode PCBDDCSetUpSolvers(PC pc)
  8: {
  9:   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
 10:   PetscScalar    *coarse_submat_vals;

 14:   /* Compute matrix after change of basis and extract local submatrices */
 15:   PCBDDCSetUpLocalMatrices(pc);

 17:   /* Setup local scatters R_to_B and (optionally) R_to_D */
 18:   /* PCBDDCSetUpLocalWorkVectors and PCBDDCSetUpLocalMatrices should be called first! */
 19:   PCBDDCSetUpLocalScatters(pc);

 21:   /* Setup local solvers ksp_D and ksp_R */
 22:   /* PCBDDCSetUpLocalScatters should be called first! */
 23:   PCBDDCSetUpLocalSolvers(pc);

 25:   /* Change global null space passed in by the user if change of basis has been requested */
 26:   if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) {
 27:     PCBDDCNullSpaceAdaptGlobal(pc);
 28:   }

 30:   /*
 31:      Setup local correction and local part of coarse basis.
 32:      Gives back the dense local part of the coarse matrix in column major ordering
 33:   */
 34:   PCBDDCSetUpCorrection(pc,&coarse_submat_vals);

 36:   /* Compute total number of coarse nodes and setup coarse solver */
 37:   PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);

 39:   /* free */
 40:   PetscFree(coarse_submat_vals);
 41:   return(0);
 42: }

 46: PetscErrorCode PCBDDCResetCustomization(PC pc)
 47: {
 48:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;

 52:   PCBDDCGraphResetCSR(pcbddc->mat_graph);
 53:   ISDestroy(&pcbddc->user_primal_vertices);
 54:   MatNullSpaceDestroy(&pcbddc->NullSpace);
 55:   ISDestroy(&pcbddc->NeumannBoundaries);
 56:   ISDestroy(&pcbddc->NeumannBoundariesLocal);
 57:   ISDestroy(&pcbddc->DirichletBoundaries);
 58:   MatNullSpaceDestroy(&pcbddc->onearnullspace);
 59:   PetscFree(pcbddc->onearnullvecs_state);
 60:   ISDestroy(&pcbddc->DirichletBoundariesLocal);
 61:   PCBDDCSetDofsSplitting(pc,0,NULL);
 62:   PCBDDCSetDofsSplittingLocal(pc,0,NULL);
 63:   return(0);
 64: }

 68: PetscErrorCode PCBDDCResetTopography(PC pc)
 69: {
 70:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;

 74:   MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);
 75:   MatDestroy(&pcbddc->ChangeOfBasisMatrix);
 76:   MatDestroy(&pcbddc->ConstraintMatrix);
 77:   PCBDDCGraphReset(pcbddc->mat_graph);
 78:   return(0);
 79: }

 83: PetscErrorCode PCBDDCResetSolvers(PC pc)
 84: {
 85:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;

 89:   VecDestroy(&pcbddc->coarse_vec);
 90:   VecDestroy(&pcbddc->coarse_rhs);
 91:   MatDestroy(&pcbddc->coarse_phi_B);
 92:   MatDestroy(&pcbddc->coarse_phi_D);
 93:   MatDestroy(&pcbddc->coarse_psi_B);
 94:   MatDestroy(&pcbddc->coarse_psi_D);
 95:   VecDestroy(&pcbddc->vec1_P);
 96:   VecDestroy(&pcbddc->vec1_C);
 97:   MatDestroy(&pcbddc->local_auxmat1);
 98:   MatDestroy(&pcbddc->local_auxmat2);
 99:   VecDestroy(&pcbddc->vec1_R);
100:   VecDestroy(&pcbddc->vec2_R);
101:   ISDestroy(&pcbddc->is_R_local);
102:   VecScatterDestroy(&pcbddc->R_to_B);
103:   VecScatterDestroy(&pcbddc->R_to_D);
104:   VecScatterDestroy(&pcbddc->coarse_loc_to_glob);
105:   KSPDestroy(&pcbddc->ksp_D);
106:   KSPDestroy(&pcbddc->ksp_R);
107:   KSPDestroy(&pcbddc->coarse_ksp);
108:   MatDestroy(&pcbddc->local_mat);
109:   PetscFree(pcbddc->primal_indices_local_idxs);
110:   PetscFree(pcbddc->global_primal_indices);
111:   ISDestroy(&pcbddc->coarse_subassembling);
112:   ISDestroy(&pcbddc->coarse_subassembling_init);
113:   return(0);
114: }

118: PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
119: {
120:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
121:   PC_IS          *pcis = (PC_IS*)pc->data;
122:   VecType        impVecType;
123:   PetscInt       n_constraints,n_R,old_size;

127:   if (!pcbddc->ConstraintMatrix) {
128:     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
129:   }
130:   /* get sizes */
131:   n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices;
132:   n_R = pcis->n-pcbddc->n_actual_vertices;
133:   VecGetType(pcis->vec1_N,&impVecType);
134:   /* local work vectors (try to avoid unneeded work)*/
135:   /* R nodes */
136:   old_size = -1;
137:   if (pcbddc->vec1_R) {
138:     VecGetSize(pcbddc->vec1_R,&old_size);
139:   }
140:   if (n_R != old_size) {
141:     VecDestroy(&pcbddc->vec1_R);
142:     VecDestroy(&pcbddc->vec2_R);
143:     VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);
144:     VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);
145:     VecSetType(pcbddc->vec1_R,impVecType);
146:     VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);
147:   }
148:   /* local primal dofs */
149:   old_size = -1;
150:   if (pcbddc->vec1_P) {
151:     VecGetSize(pcbddc->vec1_P,&old_size);
152:   }
153:   if (pcbddc->local_primal_size != old_size) {
154:     VecDestroy(&pcbddc->vec1_P);
155:     VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);
156:     VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);
157:     VecSetType(pcbddc->vec1_P,impVecType);
158:   }
159:   /* local explicit constraints */
160:   old_size = -1;
161:   if (pcbddc->vec1_C) {
162:     VecGetSize(pcbddc->vec1_C,&old_size);
163:   }
164:   if (n_constraints && n_constraints != old_size) {
165:     VecDestroy(&pcbddc->vec1_C);
166:     VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);
167:     VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);
168:     VecSetType(pcbddc->vec1_C,impVecType);
169:   }
170:   return(0);
171: }

175: PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
176: {
177:   PetscErrorCode         ierr;
178:   /* pointers to pcis and pcbddc */
179:   PC_IS*                 pcis = (PC_IS*)pc->data;
180:   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
181:   /* submatrices of local problem */
182:   Mat                    A_RV,A_VR,A_VV;
183:   /* working matrices */
184:   Mat                    M1,M2,M3,C_CR;
185:   /* working vectors */
186:   Vec                    vec1_C,vec2_C,vec1_V,vec2_V;
187:   /* additional working stuff */
188:   IS                     is_aux;
189:   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
190:   const PetscScalar      *array,*row_cmat_values;
191:   const PetscInt         *row_cmat_indices,*idx_R_local;
192:   PetscInt               *idx_V_B,*auxindices;
193:   PetscInt               n_vertices,n_constraints,size_of_constraint;
194:   PetscInt               i,j,n_R,n_D,n_B;
195:   PetscBool              unsymmetric_check;
196:   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
197:   MatType                impMatType;
198:   /* some shortcuts to scalars */
199:   PetscScalar            zero=0.0,one=1.0,m_one=-1.0;
200:   /* for debugging purposes */
201:   PetscReal              *coarsefunctions_errors,*constraints_errors;

204:   /* get number of vertices (corners plus constraints with change of basis)
205:      pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */
206:   n_vertices = pcbddc->n_actual_vertices;
207:   n_constraints = pcbddc->local_primal_size-n_vertices;
208:   /* Set Non-overlapping dimensions */
209:   n_B = pcis->n_B; n_D = pcis->n - n_B;
210:   n_R = pcis->n-n_vertices;

212:   /* Set types for local objects needed by BDDC precondtioner */
213:   impMatType = MATSEQDENSE;

215:   /* Allocating some extra storage just to be safe */
216:   PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);
217:   for (i=0;i<pcis->n;i++) auxindices[i]=i;

219:   /* vertices in boundary numbering */
220:   PetscMalloc1(n_vertices,&idx_V_B);
221:   ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);
222:   if (i != n_vertices) {
223:     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
224:   }

226:   /* Precompute stuffs needed for preprocessing and application of BDDC*/
227:   if (n_constraints) {
228:     /* see if we can save some allocations */
229:     if (pcbddc->local_auxmat2) {
230:       PetscInt on_R,on_constraints;
231:       MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);
232:       if (on_R != n_R || on_constraints != n_constraints) {
233:         MatDestroy(&pcbddc->local_auxmat2);
234:         MatDestroy(&pcbddc->local_auxmat1);
235:       }
236:     }
237:     /* work vectors */
238:     VecDuplicate(pcbddc->vec1_C,&vec1_C);
239:     VecDuplicate(pcbddc->vec1_C,&vec2_C);
240:     /* auxiliary matrices */
241:     if (!pcbddc->local_auxmat2) {
242:       MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);
243:       MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);
244:       MatSetType(pcbddc->local_auxmat2,impMatType);
245:       MatSetUp(pcbddc->local_auxmat2);
246:     }

248:     /* Extract constraints on R nodes: C_{CR}  */
249:     ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);
250:     MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);
251:     ISDestroy(&is_aux);

253:     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
254:     for (i=0;i<n_constraints;i++) {
255:       VecSet(pcbddc->vec1_R,zero);
256:       /* Get row of constraint matrix in R numbering */
257:       MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);
258:       VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);
259:       MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);
260:       VecAssemblyBegin(pcbddc->vec1_R);
261:       VecAssemblyEnd(pcbddc->vec1_R);
262:       /* Solve for row of constraint matrix in R numbering */
263:       KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
264:       /* Set values in local_auxmat2 */
265:       VecGetArrayRead(pcbddc->vec2_R,&array);
266:       MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);
267:       VecRestoreArrayRead(pcbddc->vec2_R,&array);
268:     }
269:     MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);
270:     MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);
271:     MatScale(pcbddc->local_auxmat2,m_one);

273:     /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc  */
274:     MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);
275:     MatLUFactor(M3,NULL,NULL,NULL);
276:     MatCreate(PETSC_COMM_SELF,&M1);
277:     MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);
278:     MatSetType(M1,impMatType);
279:     MatSetUp(M1);
280:     MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);
281:     MatZeroEntries(M2);
282:     VecSet(vec1_C,m_one);
283:     MatDiagonalSet(M2,vec1_C,INSERT_VALUES);
284:     MatMatSolve(M3,M2,M1);
285:     MatDestroy(&M2);
286:     MatDestroy(&M3);
287:     /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */
288:     if (!pcbddc->local_auxmat1) {
289:       MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);
290:     } else {
291:       MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);
292:     }
293:   }

295:   /* Get submatrices from subdomain matrix */
296:   if (n_vertices) {
297:     PetscInt ibs,mbs;
298:     PetscBool issbaij;
299:     Mat newmat;

301:     ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);
302:     MatGetBlockSize(pcbddc->local_mat,&mbs);
303:     ISGetBlockSize(pcbddc->is_R_local,&ibs);
304:     if (ibs != mbs) { /* need to convert to SEQAIJ */
305:       MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);
306:       MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);
307:       MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);
308:       MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);
309:       MatDestroy(&newmat);
310:     } else {
311:       /* this is safe */
312:       MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);
313:       PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);
314:       if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
315:         MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);
316:         /* which of the two approaches is faster? */
317:         /* MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);
318:         MatCreateTranspose(A_RV,&A_VR);*/
319:         MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);
320:         MatCreateTranspose(A_VR,&A_RV);
321:         MatDestroy(&newmat);
322:       } else {
323:         MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);
324:         MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);
325:       }
326:     }
327:     MatGetVecs(A_RV,&vec1_V,NULL);
328:     VecDuplicate(vec1_V,&vec2_V);
329:     ISDestroy(&is_aux);
330:   }

332:   /* Matrix of coarse basis functions (local) */
333:   if (pcbddc->coarse_phi_B) {
334:     PetscInt on_B,on_primal;
335:     MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);
336:     if (on_B != n_B || on_primal != pcbddc->local_primal_size) {
337:       MatDestroy(&pcbddc->coarse_phi_B);
338:       MatDestroy(&pcbddc->coarse_psi_B);
339:     }
340:   }
341:   if (pcbddc->coarse_phi_D) {
342:     PetscInt on_D,on_primal;
343:     MatGetSize(pcbddc->coarse_phi_D,&on_D,&on_primal);
344:     if (on_D != n_D || on_primal != pcbddc->local_primal_size) {
345:       MatDestroy(&pcbddc->coarse_phi_D);
346:       MatDestroy(&pcbddc->coarse_psi_D);
347:     }
348:   }
349:   if (!pcbddc->coarse_phi_B) {
350:     MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);
351:     MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);
352:     MatSetType(pcbddc->coarse_phi_B,impMatType);
353:     MatSetUp(pcbddc->coarse_phi_B);
354:   }
355:   if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_phi_D ) {
356:     MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);
357:     MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);
358:     MatSetType(pcbddc->coarse_phi_D,impMatType);
359:     MatSetUp(pcbddc->coarse_phi_D);
360:   }

362:   if (pcbddc->dbg_flag) {
363:     ISGetIndices(pcbddc->is_R_local,&idx_R_local);
364:     PetscMalloc1(2*pcbddc->local_primal_size,&coarsefunctions_errors);
365:     PetscMalloc1(2*pcbddc->local_primal_size,&constraints_errors);
366:   }
367:   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
368:   PetscMalloc1((pcbddc->local_primal_size)*(pcbddc->local_primal_size),&coarse_submat_vals);

370:   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */

372:   /* vertices */
373:   for (i=0;i<n_vertices;i++) {
374:     /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */
375:     VecSet(pcbddc->vec1_R,zero); /* TODO: REMOVE IT */
376:     VecSet(vec1_V,zero);
377:     VecSetValue(vec1_V,i,one,INSERT_VALUES);
378:     VecAssemblyBegin(vec1_V);
379:     VecAssemblyEnd(vec1_V);
380:     /* simplified solution of saddle point problem with null rhs on constraints multipliers */
381:     MatMult(A_RV,vec1_V,pcbddc->vec1_R);
382:     KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);
383:     VecScale(pcbddc->vec1_R,m_one);
384:     if (n_constraints) {
385:       MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);
386:       MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);
387:       VecScale(vec1_C,m_one);
388:     }
389:     MatMult(A_VR,pcbddc->vec1_R,vec2_V);
390:     MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);

392:     /* Set values in coarse basis function and subdomain part of coarse_mat */
393:     /* coarse basis functions */
394:     VecSet(pcis->vec1_B,zero);
395:     VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
396:     VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
397:     VecGetArrayRead(pcis->vec1_B,&array);
398:     MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);
399:     VecRestoreArrayRead(pcis->vec1_B,&array);
400:     MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);
401:     if (pcbddc->switch_static || pcbddc->dbg_flag) {
402:       VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
403:       VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
404:       VecGetArrayRead(pcis->vec1_D,&array);
405:       MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);
406:       VecRestoreArrayRead(pcis->vec1_D,&array);
407:     }
408:     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
409:     VecGetArrayRead(vec2_V,&array);
410:     PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));
411:     VecRestoreArrayRead(vec2_V,&array);
412:     if (n_constraints) {
413:       VecGetArrayRead(vec1_C,&array);
414:       PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));
415:       VecRestoreArrayRead(vec1_C,&array);
416:     }

418:     /* check */
419:     if (pcbddc->dbg_flag) {
420:       /* assemble subdomain vector on local nodes */
421:       VecSet(pcis->vec1_N,zero);
422:       VecGetArrayRead(pcbddc->vec1_R,&array);
423:       VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);
424:       VecRestoreArrayRead(pcbddc->vec1_R,&array);
425:       VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);
426:       VecAssemblyBegin(pcis->vec1_N);
427:       VecAssemblyEnd(pcis->vec1_N);
428:       /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */
429:       VecSet(pcbddc->vec1_P,zero);
430:       VecGetArrayRead(vec2_V,&array);
431:       VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);
432:       VecRestoreArrayRead(vec2_V,&array);
433:       if (n_constraints) {
434:         VecGetArrayRead(vec1_C,&array);
435:         VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);
436:         VecRestoreArrayRead(vec1_C,&array);
437:       }
438:       VecAssemblyBegin(pcbddc->vec1_P);
439:       VecAssemblyEnd(pcbddc->vec1_P);
440:       VecScale(pcbddc->vec1_P,m_one);
441:       /* check saddle point solution */
442:       MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);
443:       MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);
444:       VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);
445:       MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);
446:       /* shift by the identity matrix */
447:       VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);
448:       VecAssemblyBegin(pcbddc->vec1_P);
449:       VecAssemblyEnd(pcbddc->vec1_P);
450:       VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);
451:     }
452:   }

454:   /* constraints */
455:   for (i=0;i<n_constraints;i++) {
456:     VecSet(vec2_C,zero);
457:     VecSetValue(vec2_C,i,m_one,INSERT_VALUES);
458:     VecAssemblyBegin(vec2_C);
459:     VecAssemblyEnd(vec2_C);
460:     /* simplified solution of saddle point problem with null rhs on vertices multipliers */
461:     MatMult(M1,vec2_C,vec1_C);
462:     MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);
463:     VecScale(vec1_C,m_one);
464:     if (n_vertices) {
465:       MatMult(A_VR,pcbddc->vec1_R,vec2_V);
466:     }
467:     /* Set values in coarse basis function and subdomain part of coarse_mat */
468:     /* coarse basis functions */
469:     j = i+n_vertices; /* don't touch this! */
470:     VecSet(pcis->vec1_B,zero);
471:     VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
472:     VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
473:     VecGetArrayRead(pcis->vec1_B,&array);
474:     MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);
475:     VecRestoreArrayRead(pcis->vec1_B,&array);
476:     if (pcbddc->switch_static || pcbddc->dbg_flag) {
477:       VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
478:       VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
479:       VecGetArrayRead(pcis->vec1_D,&array);
480:       MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);
481:       VecRestoreArrayRead(pcis->vec1_D,&array);
482:     }
483:     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
484:     if (n_vertices) {
485:       VecGetArrayRead(vec2_V,&array);
486:       PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));
487:       VecRestoreArrayRead(vec2_V,&array);
488:     }
489:     VecGetArrayRead(vec1_C,&array);
490:     PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));
491:     VecRestoreArrayRead(vec1_C,&array);

493:     if (pcbddc->dbg_flag) {
494:       /* assemble subdomain vector on nodes */
495:       VecSet(pcis->vec1_N,zero);
496:       VecGetArrayRead(pcbddc->vec1_R,&array);
497:       VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);
498:       VecRestoreArrayRead(pcbddc->vec1_R,&array);
499:       VecAssemblyBegin(pcis->vec1_N);
500:       VecAssemblyEnd(pcis->vec1_N);
501:       /* assemble subdomain vector of lagrange multipliers */
502:       VecSet(pcbddc->vec1_P,zero);
503:       if (n_vertices) {
504:         VecGetArrayRead(vec2_V,&array);
505:         VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);
506:         VecRestoreArrayRead(vec2_V,&array);
507:       }
508:       VecGetArrayRead(vec1_C,&array);
509:       VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);
510:       VecRestoreArrayRead(vec1_C,&array);
511:       VecAssemblyBegin(pcbddc->vec1_P);
512:       VecAssemblyEnd(pcbddc->vec1_P);
513:       VecScale(pcbddc->vec1_P,m_one);
514:       /* check saddle point solution */
515:       MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);
516:       MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);
517:       VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);
518:       MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);
519:       /* shift by the identity matrix */
520:       VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);
521:       VecAssemblyBegin(pcbddc->vec1_P);
522:       VecAssemblyEnd(pcbddc->vec1_P);
523:       VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);
524:     }
525:   }
526:   /* call assembling routines for local coarse basis */
527:   MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);
528:   MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);
529:   if (pcbddc->switch_static || pcbddc->dbg_flag) {
530:     MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);
531:     MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);
532:   }

534:   /* compute other basis functions for non-symmetric problems */
535:   MatIsSymmetric(pc->pmat,1.e-4,&pcbddc->issym);
536:   if (!pcbddc->issym) {
537:     if (!pcbddc->coarse_psi_B) {
538:       MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);
539:       MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);
540:       MatSetType(pcbddc->coarse_psi_B,impMatType);
541:       MatSetUp(pcbddc->coarse_psi_B);
542:     }
543:     if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_psi_D) {
544:       MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);
545:       MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);
546:       MatSetType(pcbddc->coarse_psi_D,impMatType);
547:       MatSetUp(pcbddc->coarse_psi_D);
548:     }
549:     for (i=0;i<pcbddc->local_primal_size;i++) {
550:       if (n_constraints) {
551:         VecSet(vec1_C,zero);
552:         for (j=0;j<n_constraints;j++) {
553:           VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);
554:         }
555:         VecAssemblyBegin(vec1_C);
556:         VecAssemblyEnd(vec1_C);
557:       }
558:       if (i<n_vertices) {
559:         VecSet(vec1_V,zero);
560:         VecSetValue(vec1_V,i,m_one,INSERT_VALUES);
561:         VecAssemblyBegin(vec1_V);
562:         VecAssemblyEnd(vec1_V);
563:         MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);
564:         if (n_constraints) {
565:           MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);
566:         }
567:       } else {
568:         MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);
569:       }
570:       KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);
571:       VecSet(pcis->vec1_B,zero);
572:       VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
573:       VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
574:       VecGetArrayRead(pcis->vec1_B,&array);
575:       MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);
576:       VecRestoreArrayRead(pcis->vec1_B,&array);
577:       if (i<n_vertices) {
578:         MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);
579:       }
580:       if (pcbddc->switch_static || pcbddc->dbg_flag) {
581:         VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
582:         VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
583:         VecGetArrayRead(pcis->vec1_D,&array);
584:         MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);
585:         VecRestoreArrayRead(pcis->vec1_D,&array);
586:       }

588:       if (pcbddc->dbg_flag) {
589:         /* assemble subdomain vector on nodes */
590:         VecSet(pcis->vec1_N,zero);
591:         VecGetArrayRead(pcbddc->vec1_R,&array);
592:         VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);
593:         VecRestoreArrayRead(pcbddc->vec1_R,&array);
594:         if (i<n_vertices) {
595:           VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);
596:         }
597:         VecAssemblyBegin(pcis->vec1_N);
598:         VecAssemblyEnd(pcis->vec1_N);
599:         /* assemble subdomain vector of lagrange multipliers */
600:         for (j=0;j<pcbddc->local_primal_size;j++) {
601:           VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);
602:         }
603:         VecAssemblyBegin(pcbddc->vec1_P);
604:         VecAssemblyEnd(pcbddc->vec1_P);
605:         /* check saddle point solution */
606:         MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);
607:         MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);
608:         VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);
609:         MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);
610:         /* shift by the identity matrix */
611:         VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);
612:         VecAssemblyBegin(pcbddc->vec1_P);
613:         VecAssemblyEnd(pcbddc->vec1_P);
614:         VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);
615:       }
616:     }
617:     MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);
618:     MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);
619:     if (pcbddc->switch_static || pcbddc->dbg_flag) {
620:       MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);
621:       MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);
622:     }
623:     unsymmetric_check = PETSC_TRUE;
624:   } else { /* take references to already computed coarse basis */
625:     unsymmetric_check = PETSC_FALSE;
626:     PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);
627:     pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
628:     if (pcbddc->coarse_phi_D) {
629:       PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);
630:       pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
631:     }
632:   }
633:   PetscFree(idx_V_B);
634:   /* Checking coarse_sub_mat and coarse basis functios */
635:   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
636:   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
637:   if (pcbddc->dbg_flag) {
638:     Mat         coarse_sub_mat;
639:     Mat         AUXMAT,TM1,TM2,TM3,TM4;
640:     Mat         coarse_phi_D,coarse_phi_B;
641:     Mat         coarse_psi_D,coarse_psi_B;
642:     Mat         A_II,A_BB,A_IB,A_BI;
643:     MatType     checkmattype=MATSEQAIJ;
644:     PetscReal   real_value;

646:     MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);
647:     MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);
648:     MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);
649:     MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);
650:     MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);
651:     MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);
652:     if (unsymmetric_check) {
653:       MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);
654:       MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);
655:     }
656:     MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);

658:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
659:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");
660:     PetscViewerFlush(pcbddc->dbg_viewer);
661:     if (unsymmetric_check) {
662:       MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
663:       MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);
664:       MatDestroy(&AUXMAT);
665:       MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
666:       MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);
667:       MatDestroy(&AUXMAT);
668:       MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
669:       MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);
670:       MatDestroy(&AUXMAT);
671:       MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
672:       MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);
673:       MatDestroy(&AUXMAT);
674:     } else {
675:       MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);
676:       MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);
677:       MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
678:       MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);
679:       MatDestroy(&AUXMAT);
680:       MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
681:       MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);
682:       MatDestroy(&AUXMAT);
683:     }
684:     MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);
685:     MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);
686:     MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);
687:     MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);
688:     MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);
689:     MatNorm(TM1,NORM_INFINITY,&real_value);
690:     PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);
691:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");
692:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);
693:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);
694:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");
695:     for (i=0;i<pcbddc->local_primal_size;i++) {
696:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);
697:     }
698:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");
699:     for (i=0;i<pcbddc->local_primal_size;i++) {
700:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);
701:     }
702:     if (unsymmetric_check) {
703:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");
704:       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
705:         PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);
706:       }
707:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");
708:       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
709:         PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);
710:       }
711:     }
712:     PetscViewerFlush(pcbddc->dbg_viewer);
713:     MatDestroy(&A_II);
714:     MatDestroy(&A_BB);
715:     MatDestroy(&A_IB);
716:     MatDestroy(&A_BI);
717:     MatDestroy(&TM1);
718:     MatDestroy(&TM2);
719:     MatDestroy(&TM3);
720:     MatDestroy(&TM4);
721:     MatDestroy(&coarse_phi_D);
722:     MatDestroy(&coarse_phi_B);
723:     if (unsymmetric_check) {
724:       MatDestroy(&coarse_psi_D);
725:       MatDestroy(&coarse_psi_B);
726:     }
727:     MatDestroy(&coarse_sub_mat);
728:     ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);
729:     PetscFree(coarsefunctions_errors);
730:     PetscFree(constraints_errors);
731:   }
732:   /* free memory */
733:   if (n_vertices) {
734:     VecDestroy(&vec1_V);
735:     VecDestroy(&vec2_V);
736:     MatDestroy(&A_RV);
737:     MatDestroy(&A_VR);
738:     MatDestroy(&A_VV);
739:   }
740:   if (n_constraints) {
741:     VecDestroy(&vec1_C);
742:     VecDestroy(&vec2_C);
743:     MatDestroy(&M1);
744:     MatDestroy(&C_CR);
745:   }
746:   PetscFree(auxindices);
747:   /* get back data */
748:   *coarse_submat_vals_n = coarse_submat_vals;
749:   return(0);
750: }

754: PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc)
755: {
756:   PC_IS*            pcis = (PC_IS*)(pc->data);
757:   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
758:   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
759:   PetscBool         issbaij,isseqaij;
760:   /* manage repeated solves */
761:   MatReuse          reuse;
762:   PetscErrorCode    ierr;

765:   if ( (pcbddc->use_change_of_basis && !pcbddc->ChangeOfBasisMatrix) || (pcbddc->user_ChangeOfBasisMatrix && !pcbddc->ChangeOfBasisMatrix) ) {
766:     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Change of basis matrix has not been created");
767:   }
768:   /* get mat flags */
769:   reuse = MAT_INITIAL_MATRIX;
770:   if (pc->setupcalled) {
771:     if (pc->flag == SAME_NONZERO_PATTERN) {
772:       reuse = MAT_REUSE_MATRIX;
773:     } else {
774:       reuse = MAT_INITIAL_MATRIX;
775:     }
776:   }
777:   if (reuse == MAT_INITIAL_MATRIX) {
778:     MatDestroy(&pcis->A_II);
779:     MatDestroy(&pcis->A_IB);
780:     MatDestroy(&pcis->A_BI);
781:     MatDestroy(&pcis->A_BB);
782:     MatDestroy(&pcbddc->local_mat);
783:   }

785:   /* transform local matrices if needed */
786:   if (pcbddc->ChangeOfBasisMatrix) {
787:     Mat         change_mat_all;
788:     PetscScalar *row_cmat_values;
789:     PetscInt    *row_cmat_indices;
790:     PetscInt    *nnz,*is_indices,*temp_indices;
791:     PetscInt    i,j,k,n_D,n_B;

793:     /* Get Non-overlapping dimensions */
794:     n_B = pcis->n_B;
795:     n_D = pcis->n-n_B;

797:     /* compute nonzero structure of change of basis on all local nodes */
798:     PetscMalloc1(pcis->n,&nnz);
799:     ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);
800:     for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1;
801:     ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);
802:     ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);
803:     k=1;
804:     for (i=0;i<n_B;i++) {
805:       MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);
806:       nnz[is_indices[i]]=j;
807:       if (k < j) k = j;
808:       MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);
809:     }
810:     ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);
811:     /* assemble change of basis matrix on the whole set of local dofs */
812:     PetscMalloc1(k,&temp_indices);
813:     MatCreate(PETSC_COMM_SELF,&change_mat_all);
814:     MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);
815:     MatSetType(change_mat_all,MATSEQAIJ);
816:     MatSeqAIJSetPreallocation(change_mat_all,0,nnz);
817:     ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);
818:     for (i=0;i<n_D;i++) {
819:       MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);
820:     }
821:     ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);
822:     ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);
823:     for (i=0;i<n_B;i++) {
824:       MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);
825:       for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]];
826:       MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);
827:       MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);
828:     }
829:     MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);
830:     MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);
831:     /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
832:     PetscObjectTypeCompare((PetscObject)matis->A,MATSEQBAIJ,&isseqaij);
833:     if (isseqaij) {
834:       MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);
835:     } else {
836:       Mat work_mat;
837:       MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);
838:       MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);
839:       MatDestroy(&work_mat);
840:     }
841:     /*
842:     PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);
843:     MatView(change_mat_all,(PetscViewer)0);
844:     */
845:     MatDestroy(&change_mat_all);
846:     PetscFree(nnz);
847:     PetscFree(temp_indices);
848:   } else {
849:     /* without change of basis, the local matrix is unchanged */
850:     if (!pcbddc->local_mat) {
851:       PetscObjectReference((PetscObject)matis->A);
852:       pcbddc->local_mat = matis->A;
853:     }
854:   }

856:   /* get submatrices */
857:   MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);
858:   MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);
859:   PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);
860:   if (!issbaij) {
861:     MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);
862:     MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);
863:   } else {
864:     Mat newmat;
865:     MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);
866:     MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);
867:     MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);
868:     MatDestroy(&newmat);
869:   }
870:   return(0);
871: }

875: PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
876: {
877:   PC_IS*         pcis = (PC_IS*)(pc->data);
878:   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
879:   IS             is_aux1,is_aux2;
880:   PetscInt       *aux_array1,*aux_array2,*is_indices,*idx_R_local;
881:   PetscInt       n_vertices,i,j,n_R,n_D,n_B;
882:   PetscInt       vbs,bs;
883:   PetscBT        bitmask;

887:   /*
888:     No need to setup local scatters if
889:       - primal space is unchanged
890:         AND
891:       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
892:         AND
893:       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
894:   */
895:   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
896:     return(0);
897:   }
898:   /* destroy old objects */
899:   ISDestroy(&pcbddc->is_R_local);
900:   VecScatterDestroy(&pcbddc->R_to_B);
901:   VecScatterDestroy(&pcbddc->R_to_D);
902:   /* Set Non-overlapping dimensions */
903:   n_B = pcis->n_B; n_D = pcis->n - n_B;
904:   n_vertices = pcbddc->n_actual_vertices;
905:   /* create auxiliary bitmask */
906:   PetscBTCreate(pcis->n,&bitmask);
907:   for (i=0;i<n_vertices;i++) {
908:     PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);
909:   }

911:   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
912:   PetscMalloc1((pcis->n-n_vertices),&idx_R_local);
913:   for (i=0, n_R=0; i<pcis->n; i++) {
914:     if (!PetscBTLookup(bitmask,i)) {
915:       idx_R_local[n_R] = i;
916:       n_R++;
917:     }
918:   }

920:   /* Block code */
921:   vbs = 1;
922:   MatGetBlockSize(pcbddc->local_mat,&bs);
923:   if (bs>1 && !(n_vertices%bs)) {
924:     PetscBool is_blocked = PETSC_TRUE;
925:     PetscInt  *vary;
926:     /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
927:     PetscMalloc1(pcis->n/bs,&vary);
928:     PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));
929:     for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++;
930:     for (i=0; i<n_vertices; i++) {
931:       if (vary[i]!=0 && vary[i]!=bs) {
932:         is_blocked = PETSC_FALSE;
933:         break;
934:       }
935:     }
936:     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
937:       vbs = bs;
938:       for (i=0;i<n_R/vbs;i++) {
939:         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
940:       }
941:     }
942:     PetscFree(vary);
943:   }
944:   ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);
945:   PetscFree(idx_R_local);

947:   /* print some info if requested */
948:   if (pcbddc->dbg_flag) {
949:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
950:     PetscViewerFlush(pcbddc->dbg_viewer);
951:     PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);
952:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);
953:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);
954:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);
955:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);
956:     PetscViewerFlush(pcbddc->dbg_viewer);
957:   }

959:   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
960:   ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);
961:   PetscMalloc1((pcis->n_B-n_vertices),&aux_array1);
962:   PetscMalloc1((pcis->n_B-n_vertices),&aux_array2);
963:   ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);
964:   for (i=0; i<n_D; i++) {
965:     PetscBTSet(bitmask,is_indices[i]);
966:   }
967:   ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);
968:   for (i=0, j=0; i<n_R; i++) {
969:     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
970:       aux_array1[j++] = i;
971:     }
972:   }
973:   ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);
974:   ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);
975:   for (i=0, j=0; i<n_B; i++) {
976:     if (!PetscBTLookup(bitmask,is_indices[i])) {
977:       aux_array2[j++] = i;
978:     }
979:   }
980:   ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);
981:   ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);
982:   VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);
983:   ISDestroy(&is_aux1);
984:   ISDestroy(&is_aux2);

986:   if (pcbddc->switch_static || pcbddc->dbg_flag) {
987:     PetscMalloc1(n_D,&aux_array1);
988:     for (i=0, j=0; i<n_R; i++) {
989:       if (PetscBTLookup(bitmask,idx_R_local[i])) {
990:         aux_array1[j++] = i;
991:       }
992:     }
993:     ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);
994:     VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);
995:     ISDestroy(&is_aux1);
996:   }
997:   PetscBTDestroy(&bitmask);
998:   ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);
999:   return(0);
1000: }


1005: PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc)
1006: {
1007:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1008:   PC_IS          *pcis = (PC_IS*)pc->data;
1009:   PC             pc_temp;
1010:   Mat            A_RR;
1011:   MatReuse       reuse;
1012:   PetscScalar    m_one = -1.0;
1013:   PetscReal      value;
1014:   PetscInt       n_D,n_R,ibs,mbs;
1015:   PetscBool      use_exact,use_exact_reduced,issbaij;
1017:   /* prefixes stuff */
1018:   char           dir_prefix[256],neu_prefix[256],str_level[16];
1019:   size_t         len;


1023:   /* compute prefixes */
1024:   PetscStrcpy(dir_prefix,"");
1025:   PetscStrcpy(neu_prefix,"");
1026:   if (!pcbddc->current_level) {
1027:     PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);
1028:     PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);
1029:     PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");
1030:     PetscStrcat(neu_prefix,"pc_bddc_neumann_");
1031:   } else {
1032:     PetscStrcpy(str_level,"");
1033:     sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
1034:     PetscStrlen(((PetscObject)pc)->prefix,&len);
1035:     len -= 15; /* remove "pc_bddc_coarse_" */
1036:     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
1037:     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
1038:     PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);
1039:     PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);
1040:     PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");
1041:     PetscStrcat(neu_prefix,"pc_bddc_neumann_");
1042:     PetscStrcat(dir_prefix,str_level);
1043:     PetscStrcat(neu_prefix,str_level);
1044:   }

1046:   /* DIRICHLET PROBLEM */
1047:   /* Matrix for Dirichlet problem is pcis->A_II */
1048:   ISGetSize(pcis->is_I_local,&n_D);
1049:   if (!pcbddc->ksp_D) { /* create object if not yet build */
1050:     KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);
1051:     PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);
1052:     /* default */
1053:     KSPSetType(pcbddc->ksp_D,KSPPREONLY);
1054:     KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);
1055:     PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);
1056:     KSPGetPC(pcbddc->ksp_D,&pc_temp);
1057:     if (issbaij) {
1058:       PCSetType(pc_temp,PCCHOLESKY);
1059:     } else {
1060:       PCSetType(pc_temp,PCLU);
1061:     }
1062:     /* Allow user's customization */
1063:     KSPSetFromOptions(pcbddc->ksp_D);
1064:     PCFactorSetReuseFill(pc_temp,PETSC_TRUE);
1065:   }
1066:   KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);
1067:   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1068:   if (!n_D) {
1069:     KSPGetPC(pcbddc->ksp_D,&pc_temp);
1070:     PCSetType(pc_temp,PCNONE);
1071:   }
1072:   /* Set Up KSP for Dirichlet problem of BDDC */
1073:   KSPSetUp(pcbddc->ksp_D);
1074:   /* set ksp_D into pcis data */
1075:   KSPDestroy(&pcis->ksp_D);
1076:   PetscObjectReference((PetscObject)pcbddc->ksp_D);
1077:   pcis->ksp_D = pcbddc->ksp_D;

1079:   /* NEUMANN PROBLEM */
1080:   /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1081:   ISGetSize(pcbddc->is_R_local,&n_R);
1082:   if (pcbddc->ksp_R) { /* already created ksp */
1083:     PetscInt nn_R;
1084:     KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);
1085:     PetscObjectReference((PetscObject)A_RR);
1086:     MatGetSize(A_RR,&nn_R,NULL);
1087:     if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1088:       KSPReset(pcbddc->ksp_R);
1089:       MatDestroy(&A_RR);
1090:       reuse = MAT_INITIAL_MATRIX;
1091:     } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1092:       if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
1093:         MatDestroy(&A_RR);
1094:         reuse = MAT_INITIAL_MATRIX;
1095:       } else { /* safe to reuse the matrix */
1096:         reuse = MAT_REUSE_MATRIX;
1097:       }
1098:     }
1099:     /* last check */
1100:     if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1101:       MatDestroy(&A_RR);
1102:       reuse = MAT_INITIAL_MATRIX;
1103:     }
1104:   } else { /* first time, so we need to create the matrix */
1105:     reuse = MAT_INITIAL_MATRIX;
1106:   }
1107:   /* extract A_RR */
1108:   MatGetBlockSize(pcbddc->local_mat,&mbs);
1109:   ISGetBlockSize(pcbddc->is_R_local,&ibs);
1110:   if (ibs != mbs) {
1111:     Mat newmat;
1112:     MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);
1113:     MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);
1114:     MatDestroy(&newmat);
1115:   } else {
1116:     MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);
1117:   }
1118:   if (!pcbddc->ksp_R) { /* create object if not present */
1119:     KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);
1120:     PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);
1121:     /* default */
1122:     KSPSetType(pcbddc->ksp_R,KSPPREONLY);
1123:     KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);
1124:     KSPGetPC(pcbddc->ksp_R,&pc_temp);
1125:     PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);
1126:     if (issbaij) {
1127:       PCSetType(pc_temp,PCCHOLESKY);
1128:     } else {
1129:       PCSetType(pc_temp,PCLU);
1130:     }
1131:     /* Allow user's customization */
1132:     KSPSetFromOptions(pcbddc->ksp_R);
1133:     PCFactorSetReuseFill(pc_temp,PETSC_TRUE);
1134:   }
1135:   KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);
1136:   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1137:   if (!n_R) {
1138:     KSPGetPC(pcbddc->ksp_R,&pc_temp);
1139:     PCSetType(pc_temp,PCNONE);
1140:   }
1141:   /* Set Up KSP for Neumann problem of BDDC */
1142:   KSPSetUp(pcbddc->ksp_R);

1144:   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1145:   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
1146:     /* Dirichlet */
1147:     VecSetRandom(pcis->vec1_D,NULL);
1148:     MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);
1149:     KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);
1150:     VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);
1151:     VecNorm(pcis->vec1_D,NORM_INFINITY,&value);
1152:     /* need to be adapted? */
1153:     use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1154:     MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));
1155:     PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);
1156:     /* print info */
1157:     if (pcbddc->dbg_flag) {
1158:       PetscViewerFlush(pcbddc->dbg_viewer);
1159:       PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);
1160:       PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
1161:       PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");
1162:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);
1163:       PetscViewerFlush(pcbddc->dbg_viewer);
1164:     }
1165:     if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1166:       PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);
1167:     }

1169:     /* Neumann */
1170:     VecSetRandom(pcbddc->vec1_R,NULL);
1171:     MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);
1172:     KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);
1173:     VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);
1174:     VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);
1175:     /* need to be adapted? */
1176:     use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1177:     MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));
1178:     /* print info */
1179:     if (pcbddc->dbg_flag) {
1180:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);
1181:       PetscViewerFlush(pcbddc->dbg_viewer);
1182:     }
1183:     if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1184:       PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);
1185:     }
1186:   }
1187:   /* free Neumann problem's matrix */
1188:   MatDestroy(&A_RR);
1189:   return(0);
1190: }

1194: static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose)
1195: {
1197:   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);

1200:   if (applytranspose) {
1201:     if (pcbddc->local_auxmat1) {
1202:       MatMultTranspose(pcbddc->local_auxmat2,rhs,work);
1203:       MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);
1204:     }
1205:     KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);
1206:   } else {
1207:     KSPSolve(pcbddc->ksp_R,rhs,sol);
1208:     if (pcbddc->local_auxmat1) {
1209:       MatMult(pcbddc->local_auxmat1,sol,work);
1210:       MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);
1211:     }
1212:   }
1213:   return(0);
1214: }

1216: /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
1219: PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
1220: {
1222:   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1223:   PC_IS*            pcis = (PC_IS*)  (pc->data);
1224:   const PetscScalar zero = 0.0;

1227:   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
1228:   if (applytranspose) {
1229:     MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);
1230:     if (pcbddc->switch_static) { MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P); }
1231:   } else {
1232:     MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);
1233:     if (pcbddc->switch_static) { MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P); }
1234:   }
1235:   /* start communications from local primal nodes to rhs of coarse solver */
1236:   VecSet(pcbddc->coarse_vec,zero);
1237:   PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);
1238:   PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);

1240:   /* Coarse solution -> rhs and sol updated in PCBDDCScattarCoarseDataBegin/End */
1241:   /* TODO remove null space when doing multilevel */
1242:   if (pcbddc->coarse_ksp) {
1243:     if (applytranspose) {
1244:       KSPSolveTranspose(pcbddc->coarse_ksp,NULL,NULL);
1245:     } else {
1246:       KSPSolve(pcbddc->coarse_ksp,NULL,NULL);
1247:     }
1248:   }
1249:   /* start communications from coarse solver solution to local primal nodes */
1250:   PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);

1252:   /* Local solution on R nodes */
1253:   VecSet(pcbddc->vec1_R,zero);
1254:   VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
1255:   VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
1256:   if (pcbddc->switch_static) {
1257:     VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
1258:     VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
1259:   }
1260:   PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);
1261:   VecSet(pcis->vec1_B,zero);
1262:   VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
1263:   VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
1264:   if (pcbddc->switch_static) {
1265:     VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
1266:     VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
1267:   }

1269:   /* complete communications from coarse sol to local primal nodes */
1270:   PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);

1272:   /* Sum contributions from two levels */
1273:   if (applytranspose) {
1274:     MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);
1275:     if (pcbddc->switch_static) { MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D); }
1276:   } else {
1277:     MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);
1278:     if (pcbddc->switch_static) { MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D); }
1279:   }
1280:   return(0);
1281: }

1283: /* TODO: the following two function can be optimized using VecPlaceArray whenever possible and using overlap flag */
1286: PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
1287: {
1289:   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1290:   PetscScalar    *array,*array2;
1291:   Vec            from,to;

1294:   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1295:     from = pcbddc->coarse_vec;
1296:     to = pcbddc->vec1_P;
1297:     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1298:       Vec tvec;
1299:       PetscInt lsize;
1300:       KSPGetSolution(pcbddc->coarse_ksp,&tvec);
1301:       VecGetLocalSize(tvec,&lsize);
1302:       VecGetArrayRead(tvec,(const PetscScalar**)&array);
1303:       VecGetArray(from,&array2);
1304:       PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));
1305:       VecRestoreArrayRead(tvec,(const PetscScalar**)&array);
1306:       VecRestoreArray(from,&array2);
1307:     }
1308:   } else { /* from local to global -> put data in coarse right hand side */
1309:     from = pcbddc->vec1_P;
1310:     to = pcbddc->coarse_vec;
1311:   }
1312:   VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);
1313:   return(0);
1314: }

1318: PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
1319: {
1321:   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1322:   PetscScalar    *array,*array2;
1323:   Vec            from,to;

1326:   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1327:     from = pcbddc->coarse_vec;
1328:     to = pcbddc->vec1_P;
1329:   } else { /* from local to global -> put data in coarse right hand side */
1330:     from = pcbddc->vec1_P;
1331:     to = pcbddc->coarse_vec;
1332:   }
1333:   VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);
1334:   if (smode == SCATTER_FORWARD) {
1335:     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1336:       Vec tvec;
1337:       PetscInt lsize;
1338:       KSPGetRhs(pcbddc->coarse_ksp,&tvec);
1339:       VecGetLocalSize(tvec,&lsize);
1340:       VecGetArrayRead(to,(const PetscScalar**)&array);
1341:       VecGetArray(tvec,&array2);
1342:       PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));
1343:       VecRestoreArrayRead(to,(const PetscScalar**)&array);
1344:       VecRestoreArray(tvec,&array2);
1345:     }
1346:   }
1347:   return(0);
1348: }

1350: /* uncomment for testing purposes */
1351: /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1354: PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1355: {
1356:   PetscErrorCode    ierr;
1357:   PC_IS*            pcis = (PC_IS*)(pc->data);
1358:   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1359:   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1360:   /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */
1361:   MatType           impMatType=MATSEQAIJ;
1362:   /* one and zero */
1363:   PetscScalar       one=1.0,zero=0.0;
1364:   /* space to store constraints and their local indices */
1365:   PetscScalar       *temp_quadrature_constraint;
1366:   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1367:   /* iterators */
1368:   PetscInt          i,j,k,total_counts,temp_start_ptr;
1369:   /* stuff to store connected components stored in pcbddc->mat_graph */
1370:   IS                ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1371:   PetscInt          n_ISForFaces,n_ISForEdges;
1372:   /* near null space stuff */
1373:   MatNullSpace      nearnullsp;
1374:   const Vec         *nearnullvecs;
1375:   Vec               *localnearnullsp;
1376:   PetscBool         nnsp_has_cnst;
1377:   PetscInt          nnsp_size;
1378:   PetscScalar       *array;
1379:   /* BLAS integers */
1380:   PetscBLASInt      lwork,lierr;
1381:   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1382:   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1383:   /* LAPACK working arrays for SVD or POD */
1384:   PetscBool         skip_lapack;
1385:   PetscScalar       *work;
1386:   PetscReal         *singular_vals;
1387: #if defined(PETSC_USE_COMPLEX)
1388:   PetscReal         *rwork;
1389: #endif
1390: #if defined(PETSC_MISSING_LAPACK_GESVD)
1391:   PetscBLASInt      Blas_one_2=1;
1392:   PetscScalar       *temp_basis,*correlation_mat;
1393: #else
1394:   PetscBLASInt      dummy_int_1=1,dummy_int_2=1;
1395:   PetscScalar       dummy_scalar_1=0.0,dummy_scalar_2=0.0;
1396: #endif
1397:   /* reuse */
1398:   PetscInt          olocal_primal_size;
1399:   PetscInt          *oprimal_indices_local_idxs;
1400:   /* change of basis */
1401:   PetscInt          *aux_primal_numbering,*aux_primal_minloc,*global_indices;
1402:   PetscBool         boolforchange,qr_needed;
1403:   PetscBT           touched,change_basis,qr_needed_idx;
1404:   /* auxiliary stuff */
1405:   PetscInt          *nnz,*is_indices,*aux_primal_numbering_B;
1406:   /* some quantities */
1407:   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
1408:   PetscInt          size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints;


1412:   /* Destroy Mat objects computed previously */
1413:   MatDestroy(&pcbddc->ChangeOfBasisMatrix);
1414:   MatDestroy(&pcbddc->ConstraintMatrix);
1415:   /* Get index sets for faces, edges and vertices from graph */
1416:   if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) {
1417:     pcbddc->use_vertices = PETSC_TRUE;
1418:   }
1419:   PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);
1420:   /* HACKS (the two following code branches) */
1421:   if (!ISForVertices && pcbddc->NullSpace && !pcbddc->user_ChangeOfBasisMatrix) {
1422:     pcbddc->use_change_of_basis = PETSC_TRUE;
1423:     pcbddc->use_change_on_faces = PETSC_FALSE;
1424:   }
1425:   if (pcbddc->NullSpace) {
1426:     /* use_change_of_basis should be consistent among processors */
1427:     PetscBool tbool = pcbddc->use_change_of_basis;
1428:     MPI_Allreduce(&tbool,&(pcbddc->use_change_of_basis),1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
1429:   }
1430:   /* print some info */
1431:   if (pcbddc->dbg_flag) {
1432:     PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);
1433:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");
1434:     i = 0;
1435:     if (ISForVertices) {
1436:       ISGetSize(ISForVertices,&i);
1437:     }
1438:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);
1439:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);
1440:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);
1441:     PetscViewerFlush(pcbddc->dbg_viewer);
1442:   }
1443:   /* check if near null space is attached to global mat */
1444:   MatGetNearNullSpace(pc->pmat,&nearnullsp);
1445:   if (nearnullsp) {
1446:     MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);
1447:     /* remove any stored info */
1448:     MatNullSpaceDestroy(&pcbddc->onearnullspace);
1449:     PetscFree(pcbddc->onearnullvecs_state);
1450:     /* store information for BDDC solver reuse */
1451:     PetscObjectReference((PetscObject)nearnullsp);
1452:     pcbddc->onearnullspace = nearnullsp;
1453:     PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);
1454:     for (i=0;i<nnsp_size;i++) {
1455:       PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);
1456:     }
1457:   } else { /* if near null space is not provided BDDC uses constants by default */
1458:     nnsp_size = 0;
1459:     nnsp_has_cnst = PETSC_TRUE;
1460:   }
1461:   /* get max number of constraints on a single cc */
1462:   max_constraints = nnsp_size;
1463:   if (nnsp_has_cnst) max_constraints++;

1465:   /*
1466:        Evaluate maximum storage size needed by the procedure
1467:        - temp_indices will contain start index of each constraint stored as follows
1468:        - temp_indices_to_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1469:        - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1470:        - temp_quadrature_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself
1471:                                                                                                                                                          */
1472:   total_counts = n_ISForFaces+n_ISForEdges;
1473:   total_counts *= max_constraints;
1474:   n_vertices = 0;
1475:   if (ISForVertices) {
1476:     ISGetSize(ISForVertices,&n_vertices);
1477:   }
1478:   total_counts += n_vertices;
1479:   PetscMalloc1((total_counts+1),&temp_indices);
1480:   PetscBTCreate(total_counts,&change_basis);
1481:   total_counts = 0;
1482:   max_size_of_constraint = 0;
1483:   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1484:     if (i<n_ISForEdges) {
1485:       used_IS = &ISForEdges[i];
1486:     } else {
1487:       used_IS = &ISForFaces[i-n_ISForEdges];
1488:     }
1489:     ISGetSize(*used_IS,&j);
1490:     total_counts += j;
1491:     max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1492:   }
1493:   total_counts *= max_constraints;
1494:   total_counts += n_vertices;
1495:   PetscMalloc1(total_counts,&temp_quadrature_constraint);
1496:   PetscMalloc1(total_counts,&temp_indices_to_constraint);
1497:   PetscMalloc1(total_counts,&temp_indices_to_constraint_B);
1498:   /* get local part of global near null space vectors */
1499:   PetscMalloc1(nnsp_size,&localnearnullsp);
1500:   for (k=0;k<nnsp_size;k++) {
1501:     VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);
1502:     VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);
1503:     VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);
1504:   }

1506:   /* whether or not to skip lapack calls */
1507:   skip_lapack = PETSC_TRUE;
1508:   if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE;

1510:   /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1511:   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1512:     PetscScalar temp_work;
1513: #if defined(PETSC_MISSING_LAPACK_GESVD)
1514:     /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1515:     PetscMalloc1(max_constraints*max_constraints,&correlation_mat);
1516:     PetscMalloc1(max_constraints,&singular_vals);
1517:     PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);
1518: #if defined(PETSC_USE_COMPLEX)
1519:     PetscMalloc1(3*max_constraints,&rwork);
1520: #endif
1521:     /* now we evaluate the optimal workspace using query with lwork=-1 */
1522:     PetscBLASIntCast(max_constraints,&Blas_N);
1523:     PetscBLASIntCast(max_constraints,&Blas_LDA);
1524:     lwork = -1;
1525:     PetscFPTrapPush(PETSC_FP_TRAP_OFF);
1526: #if !defined(PETSC_USE_COMPLEX)
1527:     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1528: #else
1529:     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1530: #endif
1531:     PetscFPTrapPop();
1532:     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1533: #else /* on missing GESVD */
1534:     /* SVD */
1535:     PetscInt max_n,min_n;
1536:     max_n = max_size_of_constraint;
1537:     min_n = max_constraints;
1538:     if (max_size_of_constraint < max_constraints) {
1539:       min_n = max_size_of_constraint;
1540:       max_n = max_constraints;
1541:     }
1542:     PetscMalloc1(min_n,&singular_vals);
1543: #if defined(PETSC_USE_COMPLEX)
1544:     PetscMalloc1(5*min_n,&rwork);
1545: #endif
1546:     /* now we evaluate the optimal workspace using query with lwork=-1 */
1547:     lwork = -1;
1548:     PetscBLASIntCast(max_n,&Blas_M);
1549:     PetscBLASIntCast(min_n,&Blas_N);
1550:     PetscBLASIntCast(max_n,&Blas_LDA);
1551:     PetscFPTrapPush(PETSC_FP_TRAP_OFF);
1552: #if !defined(PETSC_USE_COMPLEX)
1553:     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr));
1554: #else
1555:     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr));
1556: #endif
1557:     PetscFPTrapPop();
1558:     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1559: #endif /* on missing GESVD */
1560:     /* Allocate optimal workspace */
1561:     PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);
1562:     PetscMalloc1((PetscInt)lwork,&work);
1563:   }
1564:   /* Now we can loop on constraining sets */
1565:   total_counts = 0;
1566:   temp_indices[0] = 0;
1567:   /* vertices */
1568:   if (ISForVertices) {
1569:     ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);
1570:     if (nnsp_has_cnst) { /* consider all vertices */
1571:       PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));
1572:       for (i=0;i<n_vertices;i++) {
1573:         temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1574:         temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1575:         total_counts++;
1576:       }
1577:     } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
1578:       PetscBool used_vertex;
1579:       for (i=0;i<n_vertices;i++) {
1580:         used_vertex = PETSC_FALSE;
1581:         k = 0;
1582:         while (!used_vertex && k<nnsp_size) {
1583:           VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);
1584:           if (PetscAbsScalar(array[is_indices[i]])>0.0) {
1585:             temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1586:             temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1587:             temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1588:             total_counts++;
1589:             used_vertex = PETSC_TRUE;
1590:           }
1591:           VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);
1592:           k++;
1593:         }
1594:       }
1595:     }
1596:     ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);
1597:     n_vertices = total_counts;
1598:   }

1600:   /* edges and faces */
1601:   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1602:     if (i<n_ISForEdges) {
1603:       used_IS = &ISForEdges[i];
1604:       boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
1605:     } else {
1606:       used_IS = &ISForFaces[i-n_ISForEdges];
1607:       boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
1608:     }
1609:     temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
1610:     temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
1611:     ISGetSize(*used_IS,&size_of_constraint);
1612:     ISGetIndices(*used_IS,(const PetscInt**)&is_indices);
1613:     /* change of basis should not be performed on local periodic nodes */
1614:     if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
1615:     if (nnsp_has_cnst) {
1616:       PetscScalar quad_value;
1617:       temp_constraints++;
1618:       quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
1619:       PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));
1620:       for (j=0;j<size_of_constraint;j++) {
1621:         temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
1622:       }
1623:       temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1624:       total_counts++;
1625:     }
1626:     for (k=0;k<nnsp_size;k++) {
1627:       PetscReal real_value;
1628:       VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);
1629:       PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));
1630:       for (j=0;j<size_of_constraint;j++) {
1631:         temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
1632:       }
1633:       VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);
1634:       /* check if array is null on the connected component */
1635:       PetscBLASIntCast(size_of_constraint,&Blas_N);
1636:       PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
1637:       if (real_value > 0.0) { /* keep indices and values */
1638:         temp_constraints++;
1639:         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1640:         total_counts++;
1641:       }
1642:     }
1643:     ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);
1644:     valid_constraints = temp_constraints;
1645:     /* perform SVD on the constraints if use_nnsp_true has not be requested by the user and there are non-null constraints on the cc */
1646:     if (!pcbddc->use_nnsp_true && temp_constraints) {
1647:       PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */

1649: #if defined(PETSC_MISSING_LAPACK_GESVD)
1650:       /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
1651:          POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
1652:          -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
1653:             the constraints basis will differ (by a complex factor with absolute value equal to 1)
1654:             from that computed using LAPACKgesvd
1655:          -> This is due to a different computation of eigenvectors in LAPACKheev
1656:          -> The quality of the POD-computed basis will be the same */
1657:       PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));
1658:       /* Store upper triangular part of correlation matrix */
1659:       PetscBLASIntCast(size_of_constraint,&Blas_N);
1660:       PetscFPTrapPush(PETSC_FP_TRAP_OFF);
1661:       for (j=0;j<temp_constraints;j++) {
1662:         for (k=0;k<j+1;k++) {
1663:           PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2));
1664:         }
1665:       }
1666:       /* compute eigenvalues and eigenvectors of correlation matrix */
1667:       PetscBLASIntCast(temp_constraints,&Blas_N);
1668:       PetscBLASIntCast(temp_constraints,&Blas_LDA);
1669: #if !defined(PETSC_USE_COMPLEX)
1670:       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
1671: #else
1672:       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
1673: #endif
1674:       PetscFPTrapPop();
1675:       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
1676:       /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
1677:       j=0;
1678:       while (j < temp_constraints && singular_vals[j] < tol) j++;
1679:       total_counts=total_counts-j;
1680:       valid_constraints = temp_constraints-j;
1681:       /* scale and copy POD basis into used quadrature memory */
1682:       PetscBLASIntCast(size_of_constraint,&Blas_M);
1683:       PetscBLASIntCast(temp_constraints,&Blas_N);
1684:       PetscBLASIntCast(temp_constraints,&Blas_K);
1685:       PetscBLASIntCast(size_of_constraint,&Blas_LDA);
1686:       PetscBLASIntCast(temp_constraints,&Blas_LDB);
1687:       PetscBLASIntCast(size_of_constraint,&Blas_LDC);
1688:       if (j<temp_constraints) {
1689:         PetscInt ii;
1690:         for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
1691:         PetscFPTrapPush(PETSC_FP_TRAP_OFF);
1692:         PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
1693:         PetscFPTrapPop();
1694:         for (k=0;k<temp_constraints-j;k++) {
1695:           for (ii=0;ii<size_of_constraint;ii++) {
1696:             temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
1697:           }
1698:         }
1699:       }
1700: #else  /* on missing GESVD */
1701:       PetscBLASIntCast(size_of_constraint,&Blas_M);
1702:       PetscBLASIntCast(temp_constraints,&Blas_N);
1703:       PetscBLASIntCast(size_of_constraint,&Blas_LDA);
1704:       PetscFPTrapPush(PETSC_FP_TRAP_OFF);
1705: #if !defined(PETSC_USE_COMPLEX)
1706:       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr));
1707: #else
1708:       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr));
1709: #endif
1710:       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
1711:       PetscFPTrapPop();
1712:       /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
1713:       k = temp_constraints;
1714:       if (k > size_of_constraint) k = size_of_constraint;
1715:       j = 0;
1716:       while (j < k && singular_vals[k-j-1] < tol) j++;
1717:       total_counts = total_counts-temp_constraints+k-j;
1718:       valid_constraints = k-j;
1719: #endif /* on missing GESVD */
1720:     }
1721:     /* setting change_of_basis flag is safe now */
1722:     if (boolforchange) {
1723:       for (j=0;j<valid_constraints;j++) {
1724:         PetscBTSet(change_basis,total_counts-j-1);
1725:       }
1726:     }
1727:   }
1728:   /* free index sets of faces, edges and vertices */
1729:   for (i=0;i<n_ISForFaces;i++) {
1730:     ISDestroy(&ISForFaces[i]);
1731:   }
1732:   PetscFree(ISForFaces);
1733:   for (i=0;i<n_ISForEdges;i++) {
1734:     ISDestroy(&ISForEdges[i]);
1735:   }
1736:   PetscFree(ISForEdges);
1737:   ISDestroy(&ISForVertices);
1738:   /* map temp_indices_to_constraint in boundary numbering */
1739:   ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);
1740:   if (i != temp_indices[total_counts]) {
1741:     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i);
1742:   }

1744:   /* free workspace */
1745:   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1746:     PetscFree(work);
1747: #if defined(PETSC_USE_COMPLEX)
1748:     PetscFree(rwork);
1749: #endif
1750:     PetscFree(singular_vals);
1751: #if defined(PETSC_MISSING_LAPACK_GESVD)
1752:     PetscFree(correlation_mat);
1753:     PetscFree(temp_basis);
1754: #endif
1755:   }
1756:   for (k=0;k<nnsp_size;k++) {
1757:     VecDestroy(&localnearnullsp[k]);
1758:   }
1759:   PetscFree(localnearnullsp);

1761:   /* set quantities in pcbddc data structure and store previous primal size */
1762:   /* n_vertices defines the number of subdomain corners in the primal space */
1763:   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
1764:   olocal_primal_size = pcbddc->local_primal_size;
1765:   pcbddc->local_primal_size = total_counts;
1766:   pcbddc->n_vertices = n_vertices;
1767:   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;

1769:   /* Create constraint matrix */
1770:   /* The constraint matrix is used to compute the l2g map of primal dofs */
1771:   /* so we need to set it up properly either with or without change of basis */
1772:   MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);
1773:   MatSetType(pcbddc->ConstraintMatrix,impMatType);
1774:   MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);
1775:   /* array to compute a local numbering of constraints : vertices first then constraints */
1776:   PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);
1777:   /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */
1778:   /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */
1779:   PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);
1780:   /* auxiliary stuff for basis change */
1781:   PetscMalloc1(max_size_of_constraint,&global_indices);
1782:   PetscBTCreate(pcis->n_B,&touched);

1784:   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
1785:   total_primal_vertices=0;
1786:   for (i=0;i<pcbddc->local_primal_size;i++) {
1787:     size_of_constraint=temp_indices[i+1]-temp_indices[i];
1788:     if (size_of_constraint == 1) {
1789:       PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);
1790:       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
1791:       aux_primal_minloc[total_primal_vertices]=0;
1792:       total_primal_vertices++;
1793:     } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
1794:       PetscInt min_loc,min_index;
1795:       ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);
1796:       /* find first untouched local node */
1797:       k = 0;
1798:       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
1799:       min_index = global_indices[k];
1800:       min_loc = k;
1801:       /* search the minimum among global nodes already untouched on the cc */
1802:       for (k=1;k<size_of_constraint;k++) {
1803:         /* there can be more than one constraint on a single connected component */
1804:         if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) {
1805:           min_index = global_indices[k];
1806:           min_loc = k;
1807:         }
1808:       }
1809:       PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);
1810:       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc];
1811:       aux_primal_minloc[total_primal_vertices]=min_loc;
1812:       total_primal_vertices++;
1813:     }
1814:   }
1815:   /* determine if a QR strategy is needed for change of basis */
1816:   qr_needed = PETSC_FALSE;
1817:   PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);
1818:   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1819:     if (PetscBTLookup(change_basis,i)) {
1820:       size_of_constraint = temp_indices[i+1]-temp_indices[i];
1821:       j = 0;
1822:       for (k=0;k<size_of_constraint;k++) {
1823:         if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) {
1824:           j++;
1825:         }
1826:       }
1827:       /* found more than one primal dof on the cc */
1828:       if (j > 1) {
1829:         PetscBTSet(qr_needed_idx,i);
1830:         qr_needed = PETSC_TRUE;
1831:       }
1832:     }
1833:   }
1834:   /* free workspace */
1835:   PetscFree(global_indices);

1837:   /* permute indices in order to have a sorted set of vertices */
1838:   PetscSortInt(total_primal_vertices,aux_primal_numbering);

1840:   /* nonzero structure of constraint matrix */
1841:   PetscMalloc1(pcbddc->local_primal_size,&nnz);
1842:   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
1843:   j=total_primal_vertices;
1844:   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1845:     if (!PetscBTLookup(change_basis,i)) {
1846:       nnz[j]=temp_indices[i+1]-temp_indices[i];
1847:       j++;
1848:     }
1849:   }
1850:   MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);
1851:   PetscFree(nnz);
1852:   /* set values in constraint matrix */
1853:   for (i=0;i<total_primal_vertices;i++) {
1854:     MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);
1855:   }
1856:   total_counts = total_primal_vertices;
1857:   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1858:     if (!PetscBTLookup(change_basis,i)) {
1859:       size_of_constraint=temp_indices[i+1]-temp_indices[i];
1860:       MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);
1861:       total_counts++;
1862:     }
1863:   }
1864:   /* assembling */
1865:   MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);
1866:   MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);
1867:   /*
1868:   PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);
1869:   MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);
1870:   */
1871:   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
1872:   if (pcbddc->use_change_of_basis) {
1873:     /* dual and primal dofs on a single cc */
1874:     PetscInt     dual_dofs,primal_dofs;
1875:     /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */
1876:     PetscInt     primal_counter;
1877:     /* working stuff for GEQRF */
1878:     PetscScalar  *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
1879:     PetscBLASInt lqr_work;
1880:     /* working stuff for UNGQR */
1881:     PetscScalar  *gqr_work,lgqr_work_t;
1882:     PetscBLASInt lgqr_work;
1883:     /* working stuff for TRTRS */
1884:     PetscScalar  *trs_rhs;
1885:     PetscBLASInt Blas_NRHS;
1886:     /* pointers for values insertion into change of basis matrix */
1887:     PetscInt     *start_rows,*start_cols;
1888:     PetscScalar  *start_vals;
1889:     /* working stuff for values insertion */
1890:     PetscBT      is_primal;

1892:     /* change of basis acts on local interfaces -> dimension is n_B x n_B */
1893:     MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);
1894:     MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);
1895:     MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);
1896:     /* work arrays */
1897:     PetscMalloc1(pcis->n_B,&nnz);
1898:     for (i=0;i<pcis->n_B;i++) nnz[i]=1;
1899:     /* nonzeros per row */
1900:     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1901:       if (PetscBTLookup(change_basis,i)) {
1902:         size_of_constraint = temp_indices[i+1]-temp_indices[i];
1903:         if (PetscBTLookup(qr_needed_idx,i)) {
1904:           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1905:         } else {
1906:           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2;
1907:           /* get local primal index on the cc */
1908:           j = 0;
1909:           while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++;
1910:           nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1911:         }
1912:       }
1913:     }
1914:     MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);
1915:     PetscFree(nnz);
1916:     /* Set initial identity in the matrix */
1917:     for (i=0;i<pcis->n_B;i++) {
1918:       MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);
1919:     }

1921:     if (pcbddc->dbg_flag) {
1922:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");
1923:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);
1924:     }


1927:     /* Now we loop on the constraints which need a change of basis */
1928:     /*
1929:        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
1930:        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)

1932:        Basic blocks of change of basis matrix T computed

1934:           - Using the following block transformation if there is only a primal dof on the cc
1935:             (in the example, primal dof is the last one of the edge in LOCAL ordering
1936:              in this code, primal dof is the first one of the edge in GLOBAL ordering)
1937:             | 1        0   ...        0     1 |
1938:             | 0        1   ...        0     1 |
1939:             |              ...                |
1940:             | 0        ...            1     1 |
1941:             | -s_1/s_n ...    -s_{n-1}/-s_n 1 |

1943:           - via QR decomposition of constraints otherwise
1944:     */
1945:     if (qr_needed) {
1946:       /* space to store Q */
1947:       PetscMalloc1((max_size_of_constraint)*(max_size_of_constraint),&qr_basis);
1948:       /* first we issue queries for optimal work */
1949:       PetscBLASIntCast(max_size_of_constraint,&Blas_M);
1950:       PetscBLASIntCast(max_constraints,&Blas_N);
1951:       PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);
1952:       lqr_work = -1;
1953:       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
1954:       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
1955:       PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);
1956:       PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);
1957:       lgqr_work = -1;
1958:       PetscBLASIntCast(max_size_of_constraint,&Blas_M);
1959:       PetscBLASIntCast(max_size_of_constraint,&Blas_N);
1960:       PetscBLASIntCast(max_constraints,&Blas_K);
1961:       PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);
1962:       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
1963:       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
1964:       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
1965:       PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);
1966:       PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);
1967:       /* array to store scaling factors for reflectors */
1968:       PetscMalloc1(max_constraints,&qr_tau);
1969:       /* array to store rhs and solution of triangular solver */
1970:       PetscMalloc1(max_constraints*max_constraints,&trs_rhs);
1971:       /* allocating workspace for check */
1972:       if (pcbddc->dbg_flag) {
1973:         PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&work);
1974:       }
1975:     }
1976:     /* array to store whether a node is primal or not */
1977:     PetscBTCreate(pcis->n_B,&is_primal);
1978:     PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);
1979:     ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);
1980:     if (i != total_primal_vertices) {
1981:       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
1982:     }
1983:     for (i=0;i<total_primal_vertices;i++) {
1984:       PetscBTSet(is_primal,aux_primal_numbering_B[i]);
1985:     }
1986:     PetscFree(aux_primal_numbering_B);

1988:     /* loop on constraints and see whether or not they need a change of basis and compute it */
1989:     /* -> using implicit ordering contained in temp_indices data */
1990:     total_counts = pcbddc->n_vertices;
1991:     primal_counter = total_counts;
1992:     while (total_counts<pcbddc->local_primal_size) {
1993:       primal_dofs = 1;
1994:       if (PetscBTLookup(change_basis,total_counts)) {
1995:         /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
1996:         while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) {
1997:           primal_dofs++;
1998:         }
1999:         /* get constraint info */
2000:         size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
2001:         dual_dofs = size_of_constraint-primal_dofs;

2003:         if (pcbddc->dbg_flag) {
2004:           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);
2005:         }

2007:         if (primal_dofs > 1) { /* QR */

2009:           /* copy quadrature constraints for change of basis check */
2010:           if (pcbddc->dbg_flag) {
2011:             PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));
2012:           }
2013:           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
2014:           PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));

2016:           /* compute QR decomposition of constraints */
2017:           PetscBLASIntCast(size_of_constraint,&Blas_M);
2018:           PetscBLASIntCast(primal_dofs,&Blas_N);
2019:           PetscBLASIntCast(size_of_constraint,&Blas_LDA);
2020:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
2021:           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
2022:           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
2023:           PetscFPTrapPop();

2025:           /* explictly compute R^-T */
2026:           PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));
2027:           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
2028:           PetscBLASIntCast(primal_dofs,&Blas_N);
2029:           PetscBLASIntCast(primal_dofs,&Blas_NRHS);
2030:           PetscBLASIntCast(size_of_constraint,&Blas_LDA);
2031:           PetscBLASIntCast(primal_dofs,&Blas_LDB);
2032:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
2033:           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
2034:           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
2035:           PetscFPTrapPop();

2037:           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
2038:           PetscBLASIntCast(size_of_constraint,&Blas_M);
2039:           PetscBLASIntCast(size_of_constraint,&Blas_N);
2040:           PetscBLASIntCast(primal_dofs,&Blas_K);
2041:           PetscBLASIntCast(size_of_constraint,&Blas_LDA);
2042:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
2043:           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
2044:           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
2045:           PetscFPTrapPop();

2047:           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
2048:              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
2049:              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
2050:           PetscBLASIntCast(size_of_constraint,&Blas_M);
2051:           PetscBLASIntCast(primal_dofs,&Blas_N);
2052:           PetscBLASIntCast(primal_dofs,&Blas_K);
2053:           PetscBLASIntCast(size_of_constraint,&Blas_LDA);
2054:           PetscBLASIntCast(primal_dofs,&Blas_LDB);
2055:           PetscBLASIntCast(size_of_constraint,&Blas_LDC);
2056:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
2057:           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
2058:           PetscFPTrapPop();
2059:           PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));

2061:           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
2062:           start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]];
2063:           /* insert cols for primal dofs */
2064:           for (j=0;j<primal_dofs;j++) {
2065:             start_vals = &qr_basis[j*size_of_constraint];
2066:             start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]];
2067:             MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);
2068:           }
2069:           /* insert cols for dual dofs */
2070:           for (j=0,k=0;j<dual_dofs;k++) {
2071:             if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
2072:               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
2073:               start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k];
2074:               MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);
2075:               j++;
2076:             }
2077:           }

2079:           /* check change of basis */
2080:           if (pcbddc->dbg_flag) {
2081:             PetscInt   ii,jj;
2082:             PetscBool valid_qr=PETSC_TRUE;
2083:             PetscBLASIntCast(primal_dofs,&Blas_M);
2084:             PetscBLASIntCast(size_of_constraint,&Blas_N);
2085:             PetscBLASIntCast(size_of_constraint,&Blas_K);
2086:             PetscBLASIntCast(size_of_constraint,&Blas_LDA);
2087:             PetscBLASIntCast(size_of_constraint,&Blas_LDB);
2088:             PetscBLASIntCast(primal_dofs,&Blas_LDC);
2089:             PetscFPTrapPush(PETSC_FP_TRAP_OFF);
2090:             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC));
2091:             PetscFPTrapPop();
2092:             for (jj=0;jj<size_of_constraint;jj++) {
2093:               for (ii=0;ii<primal_dofs;ii++) {
2094:                 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
2095:                 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
2096:               }
2097:             }
2098:             if (!valid_qr) {
2099:               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");
2100:               for (jj=0;jj<size_of_constraint;jj++) {
2101:                 for (ii=0;ii<primal_dofs;ii++) {
2102:                   if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
2103:                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2104:                   }
2105:                   if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
2106:                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2107:                   }
2108:                 }
2109:               }
2110:             } else {
2111:               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");
2112:             }
2113:           }
2114:         } else { /* simple transformation block */
2115:           PetscInt row,col;
2116:           PetscScalar val;
2117:           for (j=0;j<size_of_constraint;j++) {
2118:             row = temp_indices_to_constraint_B[temp_indices[total_counts]+j];
2119:             if (!PetscBTLookup(is_primal,row)) {
2120:               col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2121:               MatSetValue(pcbddc->ChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);
2122:               MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,1.0,INSERT_VALUES);
2123:             } else {
2124:               for (k=0;k<size_of_constraint;k++) {
2125:                 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k];
2126:                 if (row != col) {
2127:                   val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2128:                 } else {
2129:                   val = 1.0;
2130:                 }
2131:                 MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,val,INSERT_VALUES);
2132:               }
2133:             }
2134:           }
2135:           if (pcbddc->dbg_flag) {
2136:             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");
2137:           }
2138:         }
2139:         /* increment primal counter */
2140:         primal_counter += primal_dofs;
2141:       } else {
2142:         if (pcbddc->dbg_flag) {
2143:           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);
2144:         }
2145:       }
2146:       /* increment constraint counter total_counts */
2147:       total_counts += primal_dofs;
2148:     }

2150:     /* free workspace */
2151:     if (qr_needed) {
2152:       if (pcbddc->dbg_flag) {
2153:         PetscFree(work);
2154:       }
2155:       PetscFree(trs_rhs);
2156:       PetscFree(qr_tau);
2157:       PetscFree(qr_work);
2158:       PetscFree(gqr_work);
2159:       PetscFree(qr_basis);
2160:     }
2161:     PetscBTDestroy(&is_primal);
2162:     /* assembling */
2163:     MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);
2164:     MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);
2165:     /*
2166:     PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);
2167:     MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);
2168:     */
2169:   }
2170:   /* Change of basis as provided by the user in local numbering (internal and boundary) or boundary only */
2171:   if (pcbddc->user_ChangeOfBasisMatrix) {
2172:     PetscInt rows,cols;
2173:     MatGetSize(pcbddc->user_ChangeOfBasisMatrix,&rows,&cols);
2174:     if (rows == pcis->n && cols == pcis->n) {
2175:       MatGetSubMatrix(pcbddc->user_ChangeOfBasisMatrix,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&pcbddc->ChangeOfBasisMatrix);
2176:     } else {
2177:       PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);
2178:       pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
2179:     }
2180:   }

2182:   /* get indices in local ordering for vertices and constraints */
2183:   if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */
2184:     PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);
2185:     PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));
2186:   }
2187:   PetscFree(aux_primal_numbering);
2188:   PetscFree(pcbddc->primal_indices_local_idxs);
2189:   PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);
2190:   PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);
2191:   PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));
2192:   PetscFree(aux_primal_numbering);
2193:   PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);
2194:   PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));
2195:   PetscFree(aux_primal_numbering);
2196:   /* set quantities in PCBDDC data struct */
2197:   pcbddc->n_actual_vertices = i;
2198:   /* check if a new primal space has been introduced */
2199:   pcbddc->new_primal_space_local = PETSC_TRUE;
2200:   if (olocal_primal_size == pcbddc->local_primal_size) {
2201:     PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);
2202:     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2203:     PetscFree(oprimal_indices_local_idxs);
2204:   }
2205:   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
2206:   MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));

2208:   /* flush dbg viewer */
2209:   if (pcbddc->dbg_flag) {
2210:     PetscViewerFlush(pcbddc->dbg_viewer);
2211:   }

2213:   /* free workspace */
2214:   PetscBTDestroy(&touched);
2215:   PetscBTDestroy(&qr_needed_idx);
2216:   PetscFree(aux_primal_minloc);
2217:   PetscFree(temp_indices);
2218:   PetscBTDestroy(&change_basis);
2219:   PetscFree(temp_indices_to_constraint);
2220:   PetscFree(temp_indices_to_constraint_B);
2221:   PetscFree(temp_quadrature_constraint);
2222:   return(0);
2223: }

2227: PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
2228: {
2229:   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
2230:   PC_IS       *pcis = (PC_IS*)pc->data;
2231:   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
2232:   PetscInt    ierr,i,vertex_size;
2233:   PetscViewer viewer=pcbddc->dbg_viewer;

2236:   /* Reset previously computed graph */
2237:   PCBDDCGraphReset(pcbddc->mat_graph);
2238:   /* Init local Graph struct */
2239:   PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);

2241:   /* Check validity of the csr graph passed in by the user */
2242:   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
2243:     PCBDDCGraphResetCSR(pcbddc->mat_graph);
2244:   }

2246:   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
2247:   if (pcbddc->use_local_adj && (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy)) {
2248:     Mat mat_adj;
2249:     const PetscInt *xadj,*adjncy;
2250:     PetscBool flg_row=PETSC_TRUE;

2252:     MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);
2253:     MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);
2254:     if (!flg_row) {
2255:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
2256:     }
2257:     PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);
2258:     MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);
2259:     if (!flg_row) {
2260:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
2261:     }
2262:     MatDestroy(&mat_adj);
2263:   }

2265:   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
2266:   vertex_size = 1;
2267:   if (pcbddc->user_provided_isfordofs) {
2268:     if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
2269:       PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);
2270:       for (i=0;i<pcbddc->n_ISForDofs;i++) {
2271:         PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);
2272:         ISDestroy(&pcbddc->ISForDofs[i]);
2273:       }
2274:       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
2275:       pcbddc->n_ISForDofs = 0;
2276:       PetscFree(pcbddc->ISForDofs);
2277:     }
2278:     /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
2279:     MatGetBlockSize(matis->A,&vertex_size);
2280:   } else {
2281:     if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
2282:       MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);
2283:       PetscMalloc(pcbddc->n_ISForDofsLocal*sizeof(IS),&pcbddc->ISForDofsLocal);
2284:       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
2285:         ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);
2286:       }
2287:     }
2288:   }

2290:   /* Setup of Graph */
2291:   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
2292:     PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);
2293:   }
2294:   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
2295:     PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);
2296:   }
2297:   PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);

2299:   /* Graph's connected components analysis */
2300:   PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);

2302:   /* print some info to stdout */
2303:   if (pcbddc->dbg_flag) {
2304:     PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);
2305:   }

2307:   /* mark topography has done */
2308:   pcbddc->recompute_topography = PETSC_FALSE;
2309:   return(0);
2310: }

2314: PetscErrorCode  PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx)
2315: {
2316:   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2317:   PetscInt       *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size;

2321:   n = 0;
2322:   vertices = 0;
2323:   if (pcbddc->ConstraintMatrix) {
2324:     MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);
2325:     for (i=0;i<local_primal_size;i++) {
2326:       MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);
2327:       if (size_of_constraint == 1) n++;
2328:       MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);
2329:     }
2330:     if (vertices_idx) {
2331:       PetscMalloc1(n,&vertices);
2332:       n = 0;
2333:       for (i=0;i<local_primal_size;i++) {
2334:         MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);
2335:         if (size_of_constraint == 1) {
2336:           vertices[n++]=row_cmat_indices[0];
2337:         }
2338:         MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);
2339:       }
2340:     }
2341:   }
2342:   *n_vertices = n;
2343:   if (vertices_idx) *vertices_idx = vertices;
2344:   return(0);
2345: }

2349: PetscErrorCode  PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx)
2350: {
2351:   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2352:   PetscInt       *constraints_index,*row_cmat_indices,*row_cmat_global_indices;
2353:   PetscInt       n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc;
2354:   PetscBT        touched;

2357:     /* This function assumes that the number of local constraints per connected component
2358:        is not greater than the number of nodes defined for the connected component
2359:        (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
2361:   n = 0;
2362:   constraints_index = 0;
2363:   if (pcbddc->ConstraintMatrix) {
2364:     MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);
2365:     max_size_of_constraint = 0;
2366:     for (i=0;i<local_primal_size;i++) {
2367:       MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);
2368:       if (size_of_constraint > 1) {
2369:         n++;
2370:       }
2371:       max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint);
2372:       MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);
2373:     }
2374:     if (constraints_idx) {
2375:       PetscMalloc1(n,&constraints_index);
2376:       PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);
2377:       PetscBTCreate(local_size,&touched);
2378:       n = 0;
2379:       for (i=0;i<local_primal_size;i++) {
2380:         MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);
2381:         if (size_of_constraint > 1) {
2382:           ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);
2383:           /* find first untouched local node */
2384:           j = 0;
2385:           while (PetscBTLookup(touched,row_cmat_indices[j])) j++;
2386:           min_index = row_cmat_global_indices[j];
2387:           min_loc = j;
2388:           /* search the minimum among nodes not yet touched on the connected component
2389:              since there can be more than one constraint on a single cc */
2390:           for (j=1;j<size_of_constraint;j++) {
2391:             if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) {
2392:               min_index = row_cmat_global_indices[j];
2393:               min_loc = j;
2394:             }
2395:           }
2396:           PetscBTSet(touched,row_cmat_indices[min_loc]);
2397:           constraints_index[n++] = row_cmat_indices[min_loc];
2398:         }
2399:         MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);
2400:       }
2401:       PetscBTDestroy(&touched);
2402:       PetscFree(row_cmat_global_indices);
2403:     }
2404:   }
2405:   *n_constraints = n;
2406:   if (constraints_idx) *constraints_idx = constraints_index;
2407:   return(0);
2408: }

2410: /* the next two functions has been adapted from pcis.c */
2413: PetscErrorCode  PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2414: {
2416:   PC_IS          *pcis = (PC_IS*)(pc->data);

2419:   if (!vec2_B) { vec2_B = v; }
2420:   MatMult(pcis->A_BB,v,vec1_B);
2421:   MatMult(pcis->A_IB,v,vec1_D);
2422:   KSPSolve(pcis->ksp_D,vec1_D,vec2_D);
2423:   MatMult(pcis->A_BI,vec2_D,vec2_B);
2424:   VecAXPY(vec1_B,-1.0,vec2_B);
2425:   return(0);
2426: }

2430: PetscErrorCode  PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2431: {
2433:   PC_IS          *pcis = (PC_IS*)(pc->data);

2436:   if (!vec2_B) { vec2_B = v; }
2437:   MatMultTranspose(pcis->A_BB,v,vec1_B);
2438:   MatMultTranspose(pcis->A_BI,v,vec1_D);
2439:   KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);
2440:   MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);
2441:   VecAXPY(vec1_B,-1.0,vec2_B);
2442:   return(0);
2443: }

2447: PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
2448: {
2449:   Vec            local_vec,global_vec;
2450:   IS             seqis,paris;
2451:   VecScatter     scatter_ctx;
2452:   PetscScalar    *array;
2453:   PetscInt       *temp_global_dofs;
2454:   PetscScalar    globalsum;
2455:   PetscInt       i,j,s;
2456:   PetscInt       nlocals,first_index,old_index,max_local;
2457:   PetscMPIInt    rank_prec_comm,size_prec_comm,max_global;
2458:   PetscMPIInt    *dof_sizes,*dof_displs;
2459:   PetscBool      first_found;

2463:   /* mpi buffers */
2464:   MPI_Comm_size(comm,&size_prec_comm);
2465:   MPI_Comm_rank(comm,&rank_prec_comm);
2466:   j = ( !rank_prec_comm ? size_prec_comm : 0);
2467:   PetscMalloc1(j,&dof_sizes);
2468:   PetscMalloc1(j,&dof_displs);
2469:   /* get maximum size of subset */
2470:   PetscMalloc1(n_local_dofs,&temp_global_dofs);
2471:   ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);
2472:   max_local = 0;
2473:   for (i=0;i<n_local_dofs;i++) {
2474:     if (max_local < temp_global_dofs[i] ) {
2475:       max_local = temp_global_dofs[i];
2476:     }
2477:   }
2478:   MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);
2479:   max_global++;
2480:   max_local = 0;
2481:   for (i=0;i<n_local_dofs;i++) {
2482:     if (max_local < local_dofs[i] ) {
2483:       max_local = local_dofs[i];
2484:     }
2485:   }
2486:   max_local++;
2487:   /* allocate workspace */
2488:   VecCreate(PETSC_COMM_SELF,&local_vec);
2489:   VecSetSizes(local_vec,PETSC_DECIDE,max_local);
2490:   VecSetType(local_vec,VECSEQ);
2491:   VecCreate(comm,&global_vec);
2492:   VecSetSizes(global_vec,PETSC_DECIDE,max_global);
2493:   VecSetType(global_vec,VECMPI);
2494:   /* create scatter */
2495:   ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);
2496:   ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);
2497:   VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);
2498:   ISDestroy(&seqis);
2499:   ISDestroy(&paris);
2500:   /* init array */
2501:   VecSet(global_vec,0.0);
2502:   VecSet(local_vec,0.0);
2503:   VecGetArray(local_vec,&array);
2504:   if (local_dofs_mult) {
2505:     for (i=0;i<n_local_dofs;i++) {
2506:       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
2507:     }
2508:   } else {
2509:     for (i=0;i<n_local_dofs;i++) {
2510:       array[local_dofs[i]]=1.0;
2511:     }
2512:   }
2513:   VecRestoreArray(local_vec,&array);
2514:   /* scatter into global vec and get total number of global dofs */
2515:   VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);
2516:   VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);
2517:   VecSum(global_vec,&globalsum);
2518:   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
2519:   /* Fill global_vec with cumulative function for global numbering */
2520:   VecGetArray(global_vec,&array);
2521:   VecGetLocalSize(global_vec,&s);
2522:   nlocals = 0;
2523:   first_index = -1;
2524:   first_found = PETSC_FALSE;
2525:   for (i=0;i<s;i++) {
2526:     if (!first_found && PetscRealPart(array[i]) > 0.1) {
2527:       first_found = PETSC_TRUE;
2528:       first_index = i;
2529:     }
2530:     nlocals += (PetscInt)PetscRealPart(array[i]);
2531:   }
2532:   MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);
2533:   if (!rank_prec_comm) {
2534:     dof_displs[0]=0;
2535:     for (i=1;i<size_prec_comm;i++) {
2536:       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
2537:     }
2538:   }
2539:   MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);
2540:   if (first_found) {
2541:     array[first_index] += (PetscScalar)nlocals;
2542:     old_index = first_index;
2543:     for (i=first_index+1;i<s;i++) {
2544:       if (PetscRealPart(array[i]) > 0.1) {
2545:         array[i] += array[old_index];
2546:         old_index = i;
2547:       }
2548:     }
2549:   }
2550:   VecRestoreArray(global_vec,&array);
2551:   VecSet(local_vec,0.0);
2552:   VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);
2553:   VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);
2554:   /* get global ordering of local dofs */
2555:   VecGetArray(local_vec,&array);
2556:   if (local_dofs_mult) {
2557:     for (i=0;i<n_local_dofs;i++) {
2558:       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
2559:     }
2560:   } else {
2561:     for (i=0;i<n_local_dofs;i++) {
2562:       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
2563:     }
2564:   }
2565:   VecRestoreArray(local_vec,&array);
2566:   /* free workspace */
2567:   VecScatterDestroy(&scatter_ctx);
2568:   VecDestroy(&local_vec);
2569:   VecDestroy(&global_vec);
2570:   PetscFree(dof_sizes);
2571:   PetscFree(dof_displs);
2572:   /* return pointer to global ordering of local dofs */
2573:   *global_numbering_subset = temp_global_dofs;
2574:   return(0);
2575: }

2579: PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
2580: {
2581:   PetscInt       i,j;
2582:   PetscScalar    *alphas;

2586:   /* this implements stabilized Gram-Schmidt */
2587:   PetscMalloc1(n,&alphas);
2588:   for (i=0;i<n;i++) {
2589:     VecNormalize(vecs[i],NULL);
2590:     if (i<n) { VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]); }
2591:     for (j=i+1;j<n;j++) { VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]); }
2592:   }
2593:   PetscFree(alphas);
2594:   return(0);
2595: }

2597: /* TODO
2598:    - now preallocation is done assuming SEQDENSE local matrices
2599: */
2602: static PetscErrorCode MatISGetMPIXAIJ(Mat mat, MatType Mtype, MatReuse reuse, Mat *M)
2603: {
2604:   Mat                    new_mat;
2605:   Mat_IS                 *matis = (Mat_IS*)(mat->data);
2606:   /* info on mat */
2607:   /* ISLocalToGlobalMapping rmapping,cmapping; */
2608:   PetscInt               bs,rows,cols;
2609:   PetscInt               lrows,lcols;
2610:   PetscInt               local_rows,local_cols;
2611:   PetscBool              isdense;
2612:   /* values insertion */
2613:   PetscScalar            *array;
2614:   PetscInt               *local_indices,*global_indices;
2615:   /* work */
2616:   PetscInt               i,j,index_row;
2617:   PetscErrorCode         ierr;

2620:   /* MISSING CHECKS
2621:     - rectangular case not covered (it is not allowed by MATIS)
2622:   */
2623:   /* get info from mat */
2624:   /* MatGetLocalToGlobalMapping(mat,&rmapping,&cmapping); */
2625:   MatGetSize(mat,&rows,&cols);
2626:   MatGetBlockSize(mat,&bs);
2627:   MatGetSize(matis->A,&local_rows,&local_cols);

2629:   /* work */
2630:   PetscMalloc1(local_rows,&local_indices);
2631:   for (i=0;i<local_rows;i++) local_indices[i]=i;
2632:   /* map indices of local mat to global values */
2633:   PetscMalloc(PetscMax(local_cols,local_rows)*sizeof(*global_indices),&global_indices);
2634:   /* ISLocalToGlobalMappingApply(rmapping,local_rows,local_indices,global_indices); */
2635:   ISLocalToGlobalMappingApply(matis->mapping,local_rows,local_indices,global_indices);

2637:   if (reuse==MAT_INITIAL_MATRIX) {
2638:     Vec         vec_dnz,vec_onz;
2639:     PetscScalar *my_dnz,*my_onz;
2640:     PetscInt    *dnz,*onz,*mat_ranges,*row_ownership;
2641:     PetscInt    index_col,owner;
2642:     PetscMPIInt nsubdomains;

2644:     MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);
2645:     MatCreate(PetscObjectComm((PetscObject)mat),&new_mat);
2646:     MatSetSizes(new_mat,PETSC_DECIDE,PETSC_DECIDE,rows,cols);
2647:     MatSetBlockSize(new_mat,bs);
2648:     MatSetType(new_mat,Mtype);
2649:     MatSetUp(new_mat);
2650:     MatGetLocalSize(new_mat,&lrows,&lcols);

2652:     /*
2653:       preallocation
2654:     */

2656:     MatPreallocateInitialize(PetscObjectComm((PetscObject)new_mat),lrows,lcols,dnz,onz);
2657:     /*
2658:        Some vectors are needed to sum up properly on shared interface dofs.
2659:        Preallocation macros cannot do the job.
2660:        Note that preallocation is not exact, since it overestimates nonzeros
2661:     */
2662:     MatGetVecs(new_mat,NULL,&vec_dnz);
2663:     /* VecSetLocalToGlobalMapping(vec_dnz,rmapping); */
2664:     VecSetLocalToGlobalMapping(vec_dnz,matis->mapping);
2665:     VecDuplicate(vec_dnz,&vec_onz);
2666:     /* All processes need to compute entire row ownership */
2667:     PetscMalloc1(rows,&row_ownership);
2668:     MatGetOwnershipRanges(new_mat,(const PetscInt**)&mat_ranges);
2669:     for (i=0;i<nsubdomains;i++) {
2670:       for (j=mat_ranges[i];j<mat_ranges[i+1];j++) {
2671:         row_ownership[j]=i;
2672:       }
2673:     }

2675:     /*
2676:        my_dnz and my_onz contains exact contribution to preallocation from each local mat
2677:        then, they will be summed up properly. This way, preallocation is always sufficient
2678:     */
2679:     PetscMalloc1(local_rows,&my_dnz);
2680:     PetscMalloc1(local_rows,&my_onz);
2681:     PetscMemzero(my_dnz,local_rows*sizeof(*my_dnz));
2682:     PetscMemzero(my_onz,local_rows*sizeof(*my_onz));
2683:     for (i=0;i<local_rows;i++) {
2684:       index_row = global_indices[i];
2685:       for (j=i;j<local_rows;j++) {
2686:         owner = row_ownership[index_row];
2687:         index_col = global_indices[j];
2688:         if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */
2689:           my_dnz[i] += 1.0;
2690:         } else { /* offdiag block */
2691:           my_onz[i] += 1.0;
2692:         }
2693:         /* same as before, interchanging rows and cols */
2694:         if (i != j) {
2695:           owner = row_ownership[index_col];
2696:           if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) {
2697:             my_dnz[j] += 1.0;
2698:           } else {
2699:             my_onz[j] += 1.0;
2700:           }
2701:         }
2702:       }
2703:     }
2704:     VecSet(vec_dnz,0.0);
2705:     VecSet(vec_onz,0.0);
2706:     if (local_rows) { /* multilevel guard */
2707:       VecSetValuesLocal(vec_dnz,local_rows,local_indices,my_dnz,ADD_VALUES);
2708:       VecSetValuesLocal(vec_onz,local_rows,local_indices,my_onz,ADD_VALUES);
2709:     }
2710:     VecAssemblyBegin(vec_dnz);
2711:     VecAssemblyBegin(vec_onz);
2712:     VecAssemblyEnd(vec_dnz);
2713:     VecAssemblyEnd(vec_onz);
2714:     PetscFree(my_dnz);
2715:     PetscFree(my_onz);
2716:     PetscFree(row_ownership);

2718:     /* set computed preallocation in dnz and onz */
2719:     VecGetArray(vec_dnz,&array);
2720:     for (i=0; i<lrows; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]);
2721:     VecRestoreArray(vec_dnz,&array);
2722:     VecGetArray(vec_onz,&array);
2723:     for (i=0;i<lrows;i++) onz[i] = (PetscInt)PetscRealPart(array[i]);
2724:     VecRestoreArray(vec_onz,&array);
2725:     VecDestroy(&vec_dnz);
2726:     VecDestroy(&vec_onz);

2728:     /* Resize preallocation if overestimated */
2729:     for (i=0;i<lrows;i++) {
2730:       dnz[i] = PetscMin(dnz[i],lcols);
2731:       onz[i] = PetscMin(onz[i],cols-lcols);
2732:     }
2733:     /* set preallocation */
2734:     MatMPIAIJSetPreallocation(new_mat,0,dnz,0,onz);
2735:     for (i=0;i<lrows/bs;i++) {
2736:       dnz[i] = dnz[i*bs]/bs;
2737:       onz[i] = onz[i*bs]/bs;
2738:     }
2739:     MatMPIBAIJSetPreallocation(new_mat,bs,0,dnz,0,onz);
2740:     for (i=0;i<lrows/bs;i++) {
2741:       dnz[i] = dnz[i]-i;
2742:     }
2743:     MatMPISBAIJSetPreallocation(new_mat,bs,0,dnz,0,onz);
2744:     MatPreallocateFinalize(dnz,onz);
2745:     *M = new_mat;
2746:   } else {
2747:     PetscInt mbs,mrows,mcols;
2748:     /* some checks */
2749:     MatGetBlockSize(*M,&mbs);
2750:     MatGetSize(*M,&mrows,&mcols);
2751:     if (mrows != rows) {
2752:       SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong number of rows (%d != %d)",rows,mrows);
2753:     }
2754:     if (mrows != rows) {
2755:       SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong number of cols (%d != %d)",cols,mcols);
2756:     }
2757:     if (mbs != bs) {
2758:       SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong block size (%d != %d)",bs,mbs);
2759:     }
2760:     MatZeroEntries(*M);
2761:   }
2762:   /* set local to global mappings */
2763:   /* MatSetLocalToGlobalMapping(*M,rmapping,cmapping); */
2764:   /* Set values */
2765:   PetscObjectTypeCompare((PetscObject)matis->A,MATSEQDENSE,&isdense);
2766:   if (isdense) { /* special case for dense local matrices */
2767:     MatSetOption(*M,MAT_ROW_ORIENTED,PETSC_FALSE);
2768:     MatDenseGetArray(matis->A,&array);
2769:     MatSetValues(*M,local_rows,global_indices,local_cols,global_indices,array,ADD_VALUES);
2770:     MatDenseRestoreArray(matis->A,&array);
2771:     PetscFree(local_indices);
2772:     PetscFree(global_indices);
2773:   } else { /* very basic values insertion for all other matrix types */
2774:     PetscFree(local_indices);
2775:     for (i=0;i<local_rows;i++) {
2776:       MatGetRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);
2777:       /* MatSetValuesLocal(*M,1,&i,j,local_indices,array,ADD_VALUES); */
2778:       ISLocalToGlobalMappingApply(matis->mapping,j,local_indices,global_indices);
2779:       ISLocalToGlobalMappingApply(matis->mapping,1,&i,&index_row);
2780:       MatSetValues(*M,1,&index_row,j,global_indices,array,ADD_VALUES);
2781:       MatRestoreRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);
2782:     }
2783:     PetscFree(global_indices);
2784:   }
2785:   MatAssemblyBegin(*M,MAT_FINAL_ASSEMBLY);
2786:   MatAssemblyEnd(*M,MAT_FINAL_ASSEMBLY);
2787:   if (isdense) {
2788:     MatSetOption(*M,MAT_ROW_ORIENTED,PETSC_TRUE);
2789:   }
2790:   return(0);
2791: }

2795: PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends)
2796: {
2797:   Mat             subdomain_adj;
2798:   IS              new_ranks,ranks_send_to;
2799:   MatPartitioning partitioner;
2800:   Mat_IS          *matis;
2801:   PetscInt        n_neighs,*neighs,*n_shared,**shared;
2802:   PetscInt        prank;
2803:   PetscMPIInt     size,rank,color;
2804:   PetscInt        *xadj,*adjncy,*oldranks;
2805:   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
2806:   PetscInt        i,j,local_size,threshold=0;
2807:   PetscErrorCode  ierr;
2808:   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
2809:   PetscSubcomm    subcomm;

2812:   PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);
2813:   PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);
2814:   PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);

2816:   /* Get info on mapping */
2817:   matis = (Mat_IS*)(mat->data);
2818:   ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);
2819:   ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);

2821:   /* build local CSR graph of subdomains' connectivity */
2822:   PetscMalloc1(2,&xadj);
2823:   xadj[0] = 0;
2824:   xadj[1] = PetscMax(n_neighs-1,0);
2825:   PetscMalloc1(xadj[1],&adjncy);
2826:   PetscMalloc1(xadj[1],&adjncy_wgt);

2828:   if (threshold) {
2829:     PetscInt* count,min_threshold;
2830:     PetscMalloc1(local_size,&count);
2831:     PetscMemzero(count,local_size*sizeof(PetscInt));
2832:     for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */
2833:       for (j=0;j<n_shared[i];j++) {
2834:         count[shared[i][j]] += 1;
2835:       }
2836:     }
2837:     /* adapt threshold since we dont want to lose significant connections */
2838:     min_threshold = n_neighs;
2839:     for (i=1;i<n_neighs;i++) {
2840:       for (j=0;j<n_shared[i];j++) {
2841:         min_threshold = PetscMin(count[shared[i][j]],min_threshold);
2842:       }
2843:     }
2844:     threshold = PetscMax(min_threshold+1,threshold);
2845:     xadj[1] = 0;
2846:     for (i=1;i<n_neighs;i++) {
2847:       for (j=0;j<n_shared[i];j++) {
2848:         if (count[shared[i][j]] < threshold) {
2849:           adjncy[xadj[1]] = neighs[i];
2850:           adjncy_wgt[xadj[1]] = n_shared[i];
2851:           xadj[1]++;
2852:           break;
2853:         }
2854:       }
2855:     }
2856:     PetscFree(count);
2857:   } else {
2858:     if (xadj[1]) {
2859:       PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));
2860:       PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));
2861:     }
2862:   }
2863:   ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);
2864:   if (use_square) {
2865:     for (i=0;i<xadj[1];i++) {
2866:       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
2867:     }
2868:   }
2869:   PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);

2871:   PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);

2873:   /*
2874:     Restrict work on active processes only.
2875:   */
2876:   PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);
2877:   PetscSubcommSetNumber(subcomm,2); /* 2 groups, active process and not active processes */
2878:   MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);
2879:   PetscMPIIntCast(!local_size,&color);
2880:   PetscSubcommSetTypeGeneral(subcomm,color,rank);
2881:   if (color) {
2882:     PetscFree(xadj);
2883:     PetscFree(adjncy);
2884:     PetscFree(adjncy_wgt);
2885:   } else {
2886:     PetscInt coarsening_ratio;
2887:     MPI_Comm_size(subcomm->comm,&size);
2888:     PetscMalloc1(size,&oldranks);
2889:     prank = rank;
2890:     MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);
2891:     /*
2892:     for (i=0;i<size;i++) {
2893:       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
2894:     }
2895:     */
2896:     for (i=0;i<xadj[1];i++) {
2897:       PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);
2898:     }
2899:     PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);
2900:     MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);
2901:     /* MatView(subdomain_adj,0); */

2903:     /* Partition */
2904:     MatPartitioningCreate(subcomm->comm,&partitioner);
2905:     MatPartitioningSetAdjacency(partitioner,subdomain_adj);
2906:     if (use_vwgt) {
2907:       PetscMalloc(sizeof(*v_wgt),&v_wgt);
2908:       v_wgt[0] = local_size;
2909:       MatPartitioningSetVertexWeights(partitioner,v_wgt);
2910:     }
2911:     n_subdomains = PetscMin((PetscInt)size,n_subdomains);
2912:     coarsening_ratio = size/n_subdomains;
2913:     /* Parmetis does not always give back nparts with small graphs! this should be taken into account */
2914:     MatPartitioningSetNParts(partitioner,n_subdomains);
2915:     MatPartitioningSetFromOptions(partitioner);
2916:     MatPartitioningApply(partitioner,&new_ranks);
2917:     /* MatPartitioningView(partitioner,0); */

2919:     ISGetIndices(new_ranks,(const PetscInt**)&is_indices);
2920:     if (contiguous) {
2921:       ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */
2922:     } else {
2923:       ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */
2924:     }
2925:     ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);
2926:     /* clean up */
2927:     PetscFree(oldranks);
2928:     ISDestroy(&new_ranks);
2929:     MatDestroy(&subdomain_adj);
2930:     MatPartitioningDestroy(&partitioner);
2931:   }
2932:   PetscSubcommDestroy(&subcomm);

2934:   /* assemble parallel IS for sends */
2935:   i = 1;
2936:   if (color) i=0;
2937:   ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);

2939:   /* get back IS */
2940:   *is_sends = ranks_send_to;
2941:   return(0);
2942: }

2944: typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;

2948: PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[])
2949: {
2950:   Mat                    local_mat;
2951:   Mat_IS                 *matis;
2952:   IS                     is_sends_internal;
2953:   PetscInt               rows,cols;
2954:   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals;
2955:   PetscBool              ismatis,isdense,destroy_mat;
2956:   ISLocalToGlobalMapping l2gmap;
2957:   PetscInt*              l2gmap_indices;
2958:   const PetscInt*        is_indices;
2959:   MatType                new_local_type;
2960:   /* buffers */
2961:   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
2962:   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
2963:   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
2964:   /* MPI */
2965:   MPI_Comm               comm,comm_n;
2966:   PetscSubcomm           subcomm;
2967:   PetscMPIInt            n_sends,n_recvs,commsize;
2968:   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
2969:   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
2970:   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,source_dest;
2971:   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals;
2972:   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals;
2973:   PetscErrorCode         ierr;

2976:   /* TODO: add missing checks */
2981:   PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);
2982:   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__);
2983:   MatISGetLocalMat(mat,&local_mat);
2984:   PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);
2985:   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
2986:   MatGetSize(local_mat,&rows,&cols);
2987:   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
2988:   if (reuse == MAT_REUSE_MATRIX && *mat_n) {
2989:     PetscInt mrows,mcols,mnrows,mncols;
2990:     PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);
2991:     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
2992:     MatGetSize(mat,&mrows,&mcols);
2993:     MatGetSize(*mat_n,&mnrows,&mncols);
2994:     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
2995:     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
2996:   }
2997:   MatGetBlockSize(local_mat,&bs);
2999:   /* prepare IS for sending if not provided */
3000:   if (!is_sends) {
3001:     PetscBool pcontig = PETSC_TRUE;
3002:     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
3003:     MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);
3004:   } else {
3005:     PetscObjectReference((PetscObject)is_sends);
3006:     is_sends_internal = is_sends;
3007:   }

3009:   /* get pointer of MATIS data */
3010:   matis = (Mat_IS*)mat->data;

3012:   /* get comm */
3013:   PetscObjectGetComm((PetscObject)mat,&comm);

3015:   /* compute number of sends */
3016:   ISGetLocalSize(is_sends_internal,&i);
3017:   PetscMPIIntCast(i,&n_sends);

3019:   /* compute number of receives */
3020:   MPI_Comm_size(comm,&commsize);
3021:   PetscMalloc1(commsize,&iflags);
3022:   PetscMemzero(iflags,commsize*sizeof(*iflags));
3023:   ISGetIndices(is_sends_internal,&is_indices);
3024:   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
3025:   PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);
3026:   PetscFree(iflags);

3028:   /* restrict comm if requested */
3029:   subcomm = 0;
3030:   destroy_mat = PETSC_FALSE;
3031:   if (restrict_comm) {
3032:     PetscMPIInt color,rank,subcommsize;
3033:     MPI_Comm_rank(comm,&rank);
3034:     color = 0;
3035:     if (n_sends && !n_recvs) color = 1; /* sending only processes will not partecipate in new comm */
3036:     MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);
3037:     subcommsize = commsize - subcommsize;
3038:     /* check if reuse has been requested */
3039:     if (reuse == MAT_REUSE_MATRIX) {
3040:       if (*mat_n) {
3041:         PetscMPIInt subcommsize2;
3042:         MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);
3043:         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
3044:         comm_n = PetscObjectComm((PetscObject)*mat_n);
3045:       } else {
3046:         comm_n = PETSC_COMM_SELF;
3047:       }
3048:     } else { /* MAT_INITIAL_MATRIX */
3049:       PetscSubcommCreate(comm,&subcomm);
3050:       PetscSubcommSetNumber(subcomm,2);
3051:       PetscSubcommSetTypeGeneral(subcomm,color,rank);
3052:       comm_n = subcomm->comm;
3053:     }
3054:     /* flag to destroy *mat_n if not significative */
3055:     if (color) destroy_mat = PETSC_TRUE;
3056:   } else {
3057:     comm_n = comm;
3058:   }

3060:   /* prepare send/receive buffers */
3061:   PetscMalloc1(commsize,&ilengths_idxs);
3062:   PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));
3063:   PetscMalloc1(commsize,&ilengths_vals);
3064:   PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));
3065:   if (nis) {
3066:     PetscMalloc(commsize*sizeof(*ilengths_idxs_is),&ilengths_idxs_is);
3067:     PetscMemzero(ilengths_idxs_is,commsize*sizeof(*ilengths_idxs_is));
3068:   }

3070:   /* Get data from local matrices */
3071:   if (!isdense) {
3072:     /* TODO: See below some guidelines on how to prepare the local buffers */
3073:     /*
3074:        send_buffer_vals should contain the raw values of the local matrix
3075:        send_buffer_idxs should contain:
3076:        - MatType_PRIVATE type
3077:        - PetscInt        size_of_l2gmap
3078:        - PetscInt        global_row_indices[size_of_l2gmap]
3079:        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
3080:     */
3081:   } else {
3082:     MatDenseGetArray(local_mat,&send_buffer_vals);
3083:     ISLocalToGlobalMappingGetSize(matis->mapping,&i);
3084:     PetscMalloc1((i+2),&send_buffer_idxs);
3085:     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
3086:     send_buffer_idxs[1] = i;
3087:     ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);
3088:     PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));
3089:     ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);
3090:     PetscMPIIntCast(i,&len);
3091:     for (i=0;i<n_sends;i++) {
3092:       ilengths_vals[is_indices[i]] = len*len;
3093:       ilengths_idxs[is_indices[i]] = len+2;
3094:     }
3095:   }
3096:   PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);
3097:   /* additional is (if any) */
3098:   if (nis) {
3099:     PetscMPIInt psum;
3100:     PetscInt j;
3101:     for (j=0,psum=0;j<nis;j++) {
3102:       PetscInt plen;
3103:       ISGetLocalSize(isarray[j],&plen);
3104:       PetscMPIIntCast(plen,&len);
3105:       psum += len+1; /* indices + lenght */
3106:     }
3107:     PetscMalloc(psum*sizeof(PetscInt),&send_buffer_idxs_is);
3108:     for (j=0,psum=0;j<nis;j++) {
3109:       PetscInt plen;
3110:       const PetscInt *is_array_idxs;
3111:       ISGetLocalSize(isarray[j],&plen);
3112:       send_buffer_idxs_is[psum] = plen;
3113:       ISGetIndices(isarray[j],&is_array_idxs);
3114:       PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));
3115:       ISRestoreIndices(isarray[j],&is_array_idxs);
3116:       psum += plen+1; /* indices + lenght */
3117:     }
3118:     for (i=0;i<n_sends;i++) {
3119:       ilengths_idxs_is[is_indices[i]] = psum;
3120:     }
3121:     PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);
3122:   }

3124:   buf_size_idxs = 0;
3125:   buf_size_vals = 0;
3126:   buf_size_idxs_is = 0;
3127:   for (i=0;i<n_recvs;i++) {
3128:     buf_size_idxs += (PetscInt)olengths_idxs[i];
3129:     buf_size_vals += (PetscInt)olengths_vals[i];
3130:     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
3131:   }
3132:   PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);
3133:   PetscMalloc1(buf_size_vals,&recv_buffer_vals);
3134:   PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);

3136:   /* get new tags for clean communications */
3137:   PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);
3138:   PetscObjectGetNewTag((PetscObject)mat,&tag_vals);
3139:   PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);

3141:   /* allocate for requests */
3142:   PetscMalloc1(n_sends,&send_req_idxs);
3143:   PetscMalloc1(n_sends,&send_req_vals);
3144:   PetscMalloc1(n_sends,&send_req_idxs_is);
3145:   PetscMalloc1(n_recvs,&recv_req_idxs);
3146:   PetscMalloc1(n_recvs,&recv_req_vals);
3147:   PetscMalloc1(n_recvs,&recv_req_idxs_is);

3149:   /* communications */
3150:   ptr_idxs = recv_buffer_idxs;
3151:   ptr_vals = recv_buffer_vals;
3152:   ptr_idxs_is = recv_buffer_idxs_is;
3153:   for (i=0;i<n_recvs;i++) {
3154:     source_dest = onodes[i];
3155:     MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);
3156:     MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);
3157:     ptr_idxs += olengths_idxs[i];
3158:     ptr_vals += olengths_vals[i];
3159:     if (nis) {
3160:       MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);
3161:       ptr_idxs_is += olengths_idxs_is[i];
3162:     }
3163:   }
3164:   for (i=0;i<n_sends;i++) {
3165:     PetscMPIIntCast(is_indices[i],&source_dest);
3166:     MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);
3167:     MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);
3168:     if (nis) {
3169:       MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);
3170:     }
3171:   }
3172:   ISRestoreIndices(is_sends_internal,&is_indices);
3173:   ISDestroy(&is_sends_internal);

3175:   /* assemble new l2g map */
3176:   MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);
3177:   ptr_idxs = recv_buffer_idxs;
3178:   buf_size_idxs = 0;
3179:   for (i=0;i<n_recvs;i++) {
3180:     buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3181:     ptr_idxs += olengths_idxs[i];
3182:   }
3183:   PetscMalloc1(buf_size_idxs,&l2gmap_indices);
3184:   ptr_idxs = recv_buffer_idxs;
3185:   buf_size_idxs = 0;
3186:   for (i=0;i<n_recvs;i++) {
3187:     PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));
3188:     buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3189:     ptr_idxs += olengths_idxs[i];
3190:   }
3191:   PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);
3192:   ISLocalToGlobalMappingCreate(comm_n,1,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);
3193:   PetscFree(l2gmap_indices);

3195:   /* infer new local matrix type from received local matrices type */
3196:   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
3197:   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
3198:   if (n_recvs) {
3199:     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
3200:     ptr_idxs = recv_buffer_idxs;
3201:     for (i=0;i<n_recvs;i++) {
3202:       if ((PetscInt)new_local_type_private != *ptr_idxs) {
3203:         new_local_type_private = MATAIJ_PRIVATE;
3204:         break;
3205:       }
3206:       ptr_idxs += olengths_idxs[i];
3207:     }
3208:     switch (new_local_type_private) {
3209:       case MATDENSE_PRIVATE:
3210:         if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */
3211:           new_local_type = MATSEQAIJ;
3212:           bs = 1;
3213:         } else { /* if I receive only 1 dense matrix */
3214:           new_local_type = MATSEQDENSE;
3215:           bs = 1;
3216:         }
3217:         break;
3218:       case MATAIJ_PRIVATE:
3219:         new_local_type = MATSEQAIJ;
3220:         bs = 1;
3221:         break;
3222:       case MATBAIJ_PRIVATE:
3223:         new_local_type = MATSEQBAIJ;
3224:         break;
3225:       case MATSBAIJ_PRIVATE:
3226:         new_local_type = MATSEQSBAIJ;
3227:         break;
3228:       default:
3229:         SETERRQ2(comm,PETSC_ERR_PLIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__);
3230:         break;
3231:     }
3232:   } else { /* by default, new_local_type is seqdense */
3233:     new_local_type = MATSEQDENSE;
3234:     bs = 1;
3235:   }

3237:   /* create MATIS object if needed */
3238:   if (reuse == MAT_INITIAL_MATRIX) {
3239:     MatGetSize(mat,&rows,&cols);
3240:     MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);
3241:   } else {
3242:     /* it also destroys the local matrices */
3243:     MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);
3244:   }
3245:   ISLocalToGlobalMappingDestroy(&l2gmap);
3246:   MatISGetLocalMat(*mat_n,&local_mat);
3247:   MatSetType(local_mat,new_local_type);
3248:   MatSetUp(local_mat); /* WARNING -> no preallocation yet */

3250:   /* set values */
3251:   MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);
3252:   ptr_vals = recv_buffer_vals;
3253:   ptr_idxs = recv_buffer_idxs;
3254:   for (i=0;i<n_recvs;i++) {
3255:     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
3256:       MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);
3257:       MatSetValues(*mat_n,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);
3258:       MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);
3259:       MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);
3260:       MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);
3261:     } else {
3262:       /* TODO */
3263:     }
3264:     ptr_idxs += olengths_idxs[i];
3265:     ptr_vals += olengths_vals[i];
3266:   }
3267:   MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);
3268:   MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);
3269:   MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);
3270:   MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);

3272: #if 0
3273:   if (!restrict_comm) { /* check */
3274:     Vec       lvec,rvec;
3275:     PetscReal infty_error;

3277:     MatGetVecs(mat,&rvec,&lvec);
3278:     VecSetRandom(rvec,NULL);
3279:     MatMult(mat,rvec,lvec);
3280:     VecScale(lvec,-1.0);
3281:     MatMultAdd(*mat_n,rvec,lvec,lvec);
3282:     VecNorm(lvec,NORM_INFINITY,&infty_error);
3283:     PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
3284:     VecDestroy(&rvec);
3285:     VecDestroy(&lvec);
3286:   }
3287: #endif

3289:   /* assemble new additional is (if any) */
3290:   if (nis) {
3291:     PetscInt **temp_idxs,*count_is,j,psum;

3293:     MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);
3294:     PetscMalloc(nis*sizeof(PetscInt),&count_is);
3295:     PetscMemzero(count_is,nis*sizeof(PetscInt));
3296:     ptr_idxs = recv_buffer_idxs_is;
3297:     psum = 0;
3298:     for (i=0;i<n_recvs;i++) {
3299:       for (j=0;j<nis;j++) {
3300:         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3301:         count_is[j] += plen; /* increment counting of buffer for j-th IS */
3302:         psum += plen;
3303:         ptr_idxs += plen+1; /* shift pointer to received data */
3304:       }
3305:     }
3306:     PetscMalloc(nis*sizeof(PetscInt*),&temp_idxs);
3307:     PetscMalloc(psum*sizeof(PetscInt),&temp_idxs[0]);
3308:     for (i=1;i<nis;i++) {
3309:       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
3310:     }
3311:     PetscMemzero(count_is,nis*sizeof(PetscInt));
3312:     ptr_idxs = recv_buffer_idxs_is;
3313:     for (i=0;i<n_recvs;i++) {
3314:       for (j=0;j<nis;j++) {
3315:         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3316:         PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));
3317:         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
3318:         ptr_idxs += plen+1; /* shift pointer to received data */
3319:       }
3320:     }
3321:     for (i=0;i<nis;i++) {
3322:       ISDestroy(&isarray[i]);
3323:       PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);
3324:       ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);
3325:     }
3326:     PetscFree(count_is);
3327:     PetscFree(temp_idxs[0]);
3328:     PetscFree(temp_idxs);
3329:   }
3330:   /* free workspace */
3331:   PetscFree(recv_buffer_idxs);
3332:   PetscFree(recv_buffer_vals);
3333:   PetscFree(recv_buffer_idxs_is);
3334:   MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);
3335:   PetscFree(send_buffer_idxs);
3336:   MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);
3337:   if (isdense) {
3338:     MatISGetLocalMat(mat,&local_mat);
3339:     MatDenseRestoreArray(local_mat,&send_buffer_vals);
3340:   } else {
3341:     /* PetscFree(send_buffer_vals); */
3342:   }
3343:   if (nis) {
3344:     MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);
3345:     PetscFree(send_buffer_idxs_is);
3346:   }
3347:   PetscFree(recv_req_idxs);
3348:   PetscFree(recv_req_vals);
3349:   PetscFree(recv_req_idxs_is);
3350:   PetscFree(send_req_idxs);
3351:   PetscFree(send_req_vals);
3352:   PetscFree(send_req_idxs_is);
3353:   PetscFree(ilengths_vals);
3354:   PetscFree(ilengths_idxs);
3355:   PetscFree(olengths_vals);
3356:   PetscFree(olengths_idxs);
3357:   PetscFree(onodes);
3358:   if (nis) {
3359:     PetscFree(ilengths_idxs_is);
3360:     PetscFree(olengths_idxs_is);
3361:     PetscFree(onodes_is);
3362:   }
3363:   PetscSubcommDestroy(&subcomm);
3364:   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
3365:     MatDestroy(mat_n);
3366:     for (i=0;i<nis;i++) {
3367:       ISDestroy(&isarray[i]);
3368:     }
3369:   }
3370:   return(0);
3371: }

3373: /* temporary hack into ksp private data structure */
3374: #include <petsc-private/kspimpl.h>

3378: PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
3379: {
3380:   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
3381:   PC_IS                  *pcis = (PC_IS*)pc->data;
3382:   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
3383:   MatNullSpace           CoarseNullSpace=NULL;
3384:   ISLocalToGlobalMapping coarse_islg;
3385:   IS                     coarse_is,*isarray;
3386:   PetscInt               i,im_active=-1,active_procs=-1;
3387:   PetscInt               nis,nisdofs,nisneu;
3388:   PC                     pc_temp;
3389:   PCType                 coarse_pc_type;
3390:   KSPType                coarse_ksp_type;
3391:   PetscBool              multilevel_requested,multilevel_allowed;
3392:   PetscBool              isredundant,isbddc,isnn,coarse_reuse;
3393:   Mat                    t_coarse_mat_is;
3394:   PetscInt               void_procs,ncoarse_ml,ncoarse_ds,ncoarse;
3395:   PetscMPIInt            all_procs;
3396:   PetscBool              csin_ml,csin_ds,csin,csin_type_simple;
3397:   PetscBool              compute_vecs = PETSC_FALSE;
3398:   PetscErrorCode         ierr;

3401:   /* Assign global numbering to coarse dofs */
3402:   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
3403:     compute_vecs = PETSC_TRUE;
3404:     PetscInt ocoarse_size;
3405:     ocoarse_size = pcbddc->coarse_size;
3406:     PetscFree(pcbddc->global_primal_indices);
3407:     PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);
3408:     /* see if we can avoid some work */
3409:     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
3410:       if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */
3411:         KSPReset(pcbddc->coarse_ksp);
3412:         coarse_reuse = PETSC_FALSE;
3413:       } else { /* we can safely reuse already computed coarse matrix */
3414:         coarse_reuse = PETSC_TRUE;
3415:       }
3416:     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
3417:       coarse_reuse = PETSC_FALSE;
3418:     }
3419:     /* reset any subassembling information */
3420:     ISDestroy(&pcbddc->coarse_subassembling);
3421:     ISDestroy(&pcbddc->coarse_subassembling_init);
3422:   } else { /* primal space is unchanged, so we can reuse coarse matrix */
3423:     coarse_reuse = PETSC_TRUE;
3424:   }

3426:   /* count "active" (i.e. with positive local size) and "void" processes */
3427:   im_active = !!(pcis->n);
3428:   MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));
3429:   MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);
3430:   void_procs = all_procs-active_procs;
3431:   csin_type_simple = PETSC_TRUE;
3432:   if (pcbddc->current_level) {
3433:     csin_ml = PETSC_TRUE;
3434:     ncoarse_ml = void_procs;
3435:     csin_ds = PETSC_TRUE;
3436:     ncoarse_ds = void_procs;
3437:     if (!void_procs) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen");
3438:   } else {
3439:     csin_ml = PETSC_FALSE;
3440:     ncoarse_ml = all_procs;
3441:     if (void_procs) {
3442:       csin_ds = PETSC_TRUE;
3443:       ncoarse_ds = void_procs;
3444:       csin_type_simple = PETSC_FALSE;
3445:     } else {
3446:       csin_ds = PETSC_FALSE;
3447:       ncoarse_ds = all_procs;
3448:     }
3449:   }

3451:   /*
3452:     test if we can go multilevel: three conditions must be satisfied:
3453:     - we have not exceeded the number of levels requested
3454:     - we can actually subassemble the active processes
3455:     - we can find a suitable number of MPI processes where we can place the subassembled problem
3456:   */
3457:   multilevel_allowed = PETSC_FALSE;
3458:   multilevel_requested = PETSC_FALSE;
3459:   if (pcbddc->current_level < pcbddc->max_levels) {
3460:     multilevel_requested = PETSC_TRUE;
3461:     if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) {
3462:       multilevel_allowed = PETSC_FALSE;
3463:     } else {
3464:       multilevel_allowed = PETSC_TRUE;
3465:     }
3466:   }
3467:   /* determine number of process partecipating to coarse solver */
3468:   if (multilevel_allowed) {
3469:     ncoarse = ncoarse_ml;
3470:     csin = csin_ml;
3471:   } else {
3472:     ncoarse = ncoarse_ds;
3473:     csin = csin_ds;
3474:   }

3476:   /* creates temporary l2gmap and IS for coarse indexes */
3477:   ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);
3478:   ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);

3480:   /* creates temporary MATIS object for coarse matrix */
3481:   MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);
3482: #if 0
3483:   {
3484:     PetscViewer viewer;
3485:     char filename[256];
3486:     sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank);
3487:     PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);
3488:     PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
3489:     MatView(coarse_submat_dense,viewer);
3490:     PetscViewerDestroy(&viewer);
3491:   }
3492: #endif
3493:   MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);
3494:   MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);
3495:   MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);
3496:   MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);
3497:   MatDestroy(&coarse_submat_dense);

3499:   /* compute dofs splitting and neumann boundaries for coarse dofs */
3500:   if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */
3501:     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
3502:     const PetscInt         *idxs;
3503:     ISLocalToGlobalMapping tmap;

3505:     /* create map between primal indices (in local representative ordering) and local primal numbering */
3506:     ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);
3507:     /* allocate space for temporary storage */
3508:     PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&tidxs);
3509:     PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&tidxs2);
3510:     /* allocate for IS array */
3511:     nisdofs = pcbddc->n_ISForDofsLocal;
3512:     nisneu = !!pcbddc->NeumannBoundariesLocal;
3513:     nis = nisdofs + nisneu;
3514:     PetscMalloc(nis*sizeof(IS),&isarray);
3515:     /* dofs splitting */
3516:     for (i=0;i<nisdofs;i++) {
3517:       /* ISView(pcbddc->ISForDofsLocal[i],0); */
3518:       ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);
3519:       ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);
3520:       ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);
3521:       ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);
3522:       ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);
3523:       ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);
3524:       /* ISView(isarray[i],0); */
3525:     }
3526:     /* neumann boundaries */
3527:     if (pcbddc->NeumannBoundariesLocal) {
3528:       /* ISView(pcbddc->NeumannBoundariesLocal,0); */
3529:       ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);
3530:       ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);
3531:       ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);
3532:       ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);
3533:       ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);
3534:       ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);
3535:       /* ISView(isarray[nisdofs],0); */
3536:     }
3537:     /* free memory */
3538:     PetscFree(tidxs);
3539:     PetscFree(tidxs2);
3540:     ISLocalToGlobalMappingDestroy(&tmap);
3541:   } else {
3542:     nis = 0;
3543:     nisdofs = 0;
3544:     nisneu = 0;
3545:     isarray = NULL;
3546:   }
3547:   /* destroy no longer needed map */
3548:   ISLocalToGlobalMappingDestroy(&coarse_islg);

3550:   /* restrict on coarse candidates (if needed) */
3551:   coarse_mat_is = NULL;
3552:   if (csin) {
3553:     if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */
3554:       PetscInt j,tissize,*nisindices;
3555:       PetscInt *coarse_candidates;
3556:       const PetscInt* tisindices;
3557:       /* get coarse candidates' ranks in pc communicator */
3558:       PetscMalloc(all_procs*sizeof(PetscInt),&coarse_candidates);
3559:       MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));
3560:       for (i=0,j=0;i<all_procs;i++) {
3561:         if (!coarse_candidates[i]) {
3562:           coarse_candidates[j]=i;
3563:           j++;
3564:         }
3565:       }
3566:       if (j < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",j,ncoarse);
3567:       /* get a suitable subassembling pattern */
3568:       if (csin_type_simple) {
3569:         PetscMPIInt rank;
3570:         PetscInt    issize,isidx;
3571:         MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);
3572:         if (im_active) {
3573:           issize = 1;
3574:           isidx = (PetscInt)rank;
3575:         } else {
3576:           issize = 0;
3577:           isidx = -1;
3578:         }
3579:         ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);
3580:       } else {
3581:         MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);
3582:       }
3583:       if (pcbddc->dbg_flag) {
3584:         PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
3585:         PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");
3586:         ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);
3587:         PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");
3588:         for (i=0;i<j;i++) {
3589:           PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);
3590:         }
3591:         PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");
3592:         PetscViewerFlush(pcbddc->dbg_viewer);
3593:       }
3594:       /* shift the pattern on coarse candidates */
3595:       ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);
3596:       ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);
3597:       PetscMalloc(tissize*sizeof(PetscInt),&nisindices);
3598:       for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]];
3599:       ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);
3600:       ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);
3601:       PetscFree(coarse_candidates);
3602:     }
3603:     if (pcbddc->dbg_flag) {
3604:       PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
3605:       PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");
3606:       ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);
3607:       PetscViewerFlush(pcbddc->dbg_viewer);
3608:     }
3609:     /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */
3610:     MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);
3611:   } else {
3612:     if (pcbddc->dbg_flag) {
3613:       PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
3614:       PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");
3615:       PetscViewerFlush(pcbddc->dbg_viewer);
3616:     }
3617:     PetscObjectReference((PetscObject)t_coarse_mat_is);
3618:     coarse_mat_is = t_coarse_mat_is;
3619:   }

3621:   /* create local to global scatters for coarse problem */
3622:   if (compute_vecs) {
3623:     PetscInt lrows;
3624:     VecDestroy(&pcbddc->coarse_vec);
3625:     if (coarse_mat_is) {
3626:       MatGetLocalSize(coarse_mat_is,&lrows,NULL);
3627:     } else {
3628:       lrows = 0;
3629:     }
3630:     VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);
3631:     VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);
3632:     VecSetType(pcbddc->coarse_vec,VECSTANDARD);
3633:     VecScatterDestroy(&pcbddc->coarse_loc_to_glob);
3634:     VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);
3635:   }
3636:   ISDestroy(&coarse_is);
3637:   MatDestroy(&t_coarse_mat_is);

3639:   /* set defaults for coarse KSP and PC */
3640:   if (multilevel_allowed) {
3641:     coarse_ksp_type = KSPRICHARDSON;
3642:     coarse_pc_type = PCBDDC;
3643:   } else {
3644:     coarse_ksp_type = KSPPREONLY;
3645:     coarse_pc_type = PCREDUNDANT;
3646:   }

3648:   /* print some info if requested */
3649:   if (pcbddc->dbg_flag) {
3650:     if (!multilevel_allowed) {
3651:       PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
3652:       if (multilevel_requested) {
3653:         PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);
3654:       } else if (pcbddc->max_levels) {
3655:         PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);
3656:       }
3657:       PetscViewerFlush(pcbddc->dbg_viewer);
3658:     }
3659:   }

3661:   /* create the coarse KSP object only once with defaults */
3662:   if (coarse_mat_is) {
3663:     MatReuse coarse_mat_reuse;
3664:     PetscViewer dbg_viewer = NULL;
3665:     if (pcbddc->dbg_flag) {
3666:       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is));
3667:       PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);
3668:     }
3669:     if (!pcbddc->coarse_ksp) {
3670:       char prefix[256],str_level[16];
3671:       size_t len;
3672:       KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);
3673:       PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);
3674:       KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);
3675:       KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);
3676:       KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);
3677:       KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);
3678:       KSPGetPC(pcbddc->coarse_ksp,&pc_temp);
3679:       PCSetType(pc_temp,coarse_pc_type);
3680:       /* prefix */
3681:       PetscStrcpy(prefix,"");
3682:       PetscStrcpy(str_level,"");
3683:       if (!pcbddc->current_level) {
3684:         PetscStrcpy(prefix,((PetscObject)pc)->prefix);
3685:         PetscStrcat(prefix,"pc_bddc_coarse_");
3686:       } else {
3687:         PetscStrlen(((PetscObject)pc)->prefix,&len);
3688:         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
3689:         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
3690:         PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);
3691:         sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
3692:         PetscStrcat(prefix,str_level);
3693:       }
3694:       KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);
3695:       /* allow user customization */
3696:       KSPSetFromOptions(pcbddc->coarse_ksp);
3697:       PCFactorSetReuseFill(pc_temp,PETSC_TRUE);
3698:     }

3700:     /* get some info after set from options */
3701:     KSPGetPC(pcbddc->coarse_ksp,&pc_temp);
3702:     PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);
3703:     PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);
3704:     PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);
3705:     if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */
3706:       PCSetType(pc_temp,coarse_pc_type);
3707:       isbddc = PETSC_FALSE;
3708:     }
3709:     if (isredundant) {
3710:       KSP inner_ksp;
3711:       PC inner_pc;
3712:       PCRedundantGetKSP(pc_temp,&inner_ksp);
3713:       KSPGetPC(inner_ksp,&inner_pc);
3714:       PCFactorSetReuseFill(inner_pc,PETSC_TRUE);
3715:     }

3717:     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
3718:     PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);
3719:     PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);
3720:     PCBDDCSetLevels(pc_temp,pcbddc->max_levels);
3721:     if (nisdofs) {
3722:       PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);
3723:       for (i=0;i<nisdofs;i++) {
3724:         ISDestroy(&isarray[i]);
3725:       }
3726:     }
3727:     if (nisneu) {
3728:       PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);
3729:       ISDestroy(&isarray[nisdofs]);
3730:     }

3732:     /* assemble coarse matrix */
3733:     if (coarse_reuse) {
3734:       KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);
3735:       PetscObjectReference((PetscObject)coarse_mat);
3736:       coarse_mat_reuse = MAT_REUSE_MATRIX;
3737:     } else {
3738:       coarse_mat_reuse = MAT_INITIAL_MATRIX;
3739:     }
3740:     if (isbddc || isnn) {
3741:       if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */
3742:         MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);
3743:         if (pcbddc->dbg_flag) {
3744:           PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");
3745:           PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");
3746:           ISView(pcbddc->coarse_subassembling,dbg_viewer);
3747:           PetscViewerFlush(dbg_viewer);
3748:         }
3749:       }
3750:       MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);
3751:     } else {
3752:       MatISGetMPIXAIJ(coarse_mat_is,MATMPIAIJ,coarse_mat_reuse,&coarse_mat);
3753:     }
3754:     MatDestroy(&coarse_mat_is);

3756:     /* propagate symmetry info to coarse matrix */
3757:     MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);
3758:     MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);

3760:     /* set operators */
3761:     KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);
3762:     if (pcbddc->dbg_flag) {
3763:       PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);
3764:     }
3765:   } else { /* processes non partecipating to coarse solver (if any) */
3766:     coarse_mat = 0;
3767:   }
3768:   PetscFree(isarray);
3769: #if 0
3770:   {
3771:     PetscViewer viewer;
3772:     char filename[256];
3773:     sprintf(filename,"coarse_mat.m");
3774:     PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);
3775:     PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
3776:     MatView(coarse_mat,viewer);
3777:     PetscViewerDestroy(&viewer);
3778:   }
3779: #endif

3781:   /* Compute coarse null space (special handling by BDDC only) */
3782:   if (pcbddc->NullSpace) {
3783:     PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);
3784:   }

3786:   if (pcbddc->coarse_ksp) {
3787:     Vec crhs,csol;
3788:     PetscBool ispreonly;
3789:     if (CoarseNullSpace) {
3790:       if (isbddc) {
3791:         PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);
3792:       } else {
3793:         KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);
3794:       }
3795:     }
3796:     /* setup coarse ksp */
3797:     KSPSetUp(pcbddc->coarse_ksp);
3798:     KSPGetSolution(pcbddc->coarse_ksp,&csol);
3799:     KSPGetRhs(pcbddc->coarse_ksp,&crhs);
3800:     /* hack */
3801:     if (!csol) {
3802:       MatGetVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);
3803:     }
3804:     if (!crhs) {
3805:       MatGetVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));
3806:     }
3807:     /* Check coarse problem if in debug mode or if solving with an iterative method */
3808:     PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);
3809:     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
3810:       KSP       check_ksp;
3811:       KSPType   check_ksp_type;
3812:       PC        check_pc;
3813:       Vec       check_vec,coarse_vec;
3814:       PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
3815:       PetscInt  its;
3816:       PetscBool compute_eigs;
3817:       PetscReal *eigs_r,*eigs_c;
3818:       PetscInt  neigs;
3819:       const char *prefix;

3821:       /* Create ksp object suitable for estimation of extreme eigenvalues */
3822:       KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);
3823:       KSPSetOperators(check_ksp,coarse_mat,coarse_mat);
3824:       KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);
3825:       if (ispreonly) {
3826:         check_ksp_type = KSPPREONLY;
3827:         compute_eigs = PETSC_FALSE;
3828:       } else {
3829:         check_ksp_type = KSPGMRES;
3830:         compute_eigs = PETSC_TRUE;
3831:       }
3832:       KSPSetType(check_ksp,check_ksp_type);
3833:       KSPSetComputeSingularValues(check_ksp,compute_eigs);
3834:       KSPSetComputeEigenvalues(check_ksp,compute_eigs);
3835:       KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);
3836:       KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);
3837:       KSPSetOptionsPrefix(check_ksp,prefix);
3838:       KSPAppendOptionsPrefix(check_ksp,"check_");
3839:       KSPSetFromOptions(check_ksp);
3840:       KSPSetUp(check_ksp);
3841:       KSPGetPC(pcbddc->coarse_ksp,&check_pc);
3842:       KSPSetPC(check_ksp,check_pc);
3843:       /* create random vec */
3844:       KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);
3845:       VecDuplicate(coarse_vec,&check_vec);
3846:       VecSetRandom(check_vec,NULL);
3847:       if (CoarseNullSpace) {
3848:         MatNullSpaceRemove(CoarseNullSpace,check_vec);
3849:       }
3850:       MatMult(coarse_mat,check_vec,coarse_vec);
3851:       /* solve coarse problem */
3852:       KSPSolve(check_ksp,coarse_vec,coarse_vec);
3853:       if (CoarseNullSpace) {
3854:         MatNullSpaceRemove(CoarseNullSpace,coarse_vec);
3855:       }
3856:       /* set eigenvalue estimation if preonly has not been requested */
3857:       if (compute_eigs) {
3858:         PetscMalloc((pcbddc->coarse_size+1)*sizeof(PetscReal),&eigs_r);
3859:         PetscMalloc((pcbddc->coarse_size+1)*sizeof(PetscReal),&eigs_c);
3860:         KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);
3861:         lambda_max = eigs_r[neigs-1];
3862:         lambda_min = eigs_r[0];
3863:         if (pcbddc->use_coarse_estimates) {
3864:           if (lambda_max>lambda_min) {
3865:             KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);
3866:             KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));
3867:           }
3868:         }
3869:       }

3871:       /* check coarse problem residual error */
3872:       if (pcbddc->dbg_flag) {
3873:         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
3874:         PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));
3875:         VecAXPY(check_vec,-1.0,coarse_vec);
3876:         VecNorm(check_vec,NORM_INFINITY,&infty_error);
3877:         MatMult(coarse_mat,check_vec,coarse_vec);
3878:         VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);
3879:         VecDestroy(&check_vec);
3880:         PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (%d)\n",pcbddc->use_coarse_estimates);
3881:         PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);
3882:         PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);
3883:         PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);
3884:         PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);
3885:         if (compute_eigs) {
3886:           PetscReal lambda_max_s,lambda_min_s;
3887:           KSPGetIterationNumber(check_ksp,&its);
3888:           KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);
3889:           PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);
3890:           for (i=0;i<neigs;i++) {
3891:             PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);
3892:           }
3893:         }
3894:         PetscViewerFlush(dbg_viewer);
3895:         PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));
3896:       }
3897:       KSPDestroy(&check_ksp);
3898:       if (compute_eigs) {
3899:         PetscFree(eigs_r);
3900:         PetscFree(eigs_c);
3901:       }
3902:     }
3903:   }
3904:   /* print additional info */
3905:   if (pcbddc->dbg_flag) {
3906:     /* waits until all processes reaches this point */
3907:     PetscBarrier((PetscObject)pc);
3908:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);
3909:     PetscViewerFlush(pcbddc->dbg_viewer);
3910:   }

3912:   /* free memory */
3913:   MatNullSpaceDestroy(&CoarseNullSpace);
3914:   MatDestroy(&coarse_mat);
3915:   return(0);
3916: }

3920: PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
3921: {
3922:   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
3923:   PC_IS*         pcis = (PC_IS*)pc->data;
3924:   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
3925:   PetscInt       i,coarse_size;
3926:   PetscInt       *local_primal_indices;

3930:   /* Compute global number of coarse dofs */
3931:   if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) {
3932:     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created");
3933:   }
3934:   PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);

3936:   /* check numbering */
3937:   if (pcbddc->dbg_flag) {
3938:     PetscScalar coarsesum,*array;
3939:     PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;

3941:     PetscViewerFlush(pcbddc->dbg_viewer);
3942:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
3943:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");
3944:     PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);
3945:     VecSet(pcis->vec1_N,0.0);
3946:     for (i=0;i<pcbddc->local_primal_size;i++) {
3947:       VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);
3948:     }
3949:     VecAssemblyBegin(pcis->vec1_N);
3950:     VecAssemblyEnd(pcis->vec1_N);
3951:     VecSet(pcis->vec1_global,0.0);
3952:     VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
3953:     VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
3954:     VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
3955:     VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
3956:     VecGetArray(pcis->vec1_N,&array);
3957:     for (i=0;i<pcis->n;i++) {
3958:       if (array[i] == 1.0) {
3959:         set_error = PETSC_TRUE;
3960:         PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);
3961:       }
3962:     }
3963:     MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
3964:     PetscViewerFlush(pcbddc->dbg_viewer);
3965:     for (i=0;i<pcis->n;i++) {
3966:       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
3967:     }
3968:     VecRestoreArray(pcis->vec1_N,&array);
3969:     VecSet(pcis->vec1_global,0.0);
3970:     VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
3971:     VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
3972:     VecSum(pcis->vec1_global,&coarsesum);
3973:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));
3974:     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
3975:       PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");
3976:       PetscViewerFlush(pcbddc->dbg_viewer);
3977:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);
3978:       for (i=0;i<pcbddc->local_primal_size;i++) {
3979:         PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]);
3980:       }
3981:       PetscViewerFlush(pcbddc->dbg_viewer);
3982:     }
3983:     PetscViewerFlush(pcbddc->dbg_viewer);
3984:     if (set_error_reduced) {
3985:       SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
3986:     }
3987:   }
3988:   /* get back data */
3989:   *coarse_size_n = coarse_size;
3990:   *local_primal_indices_n = local_primal_indices;
3991:   return(0);
3992: }

3996: PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
3997: {
3998:   IS             localis_t;
3999:   PetscInt       i,lsize,*idxs,n;
4000:   PetscScalar    *vals;

4004:   /* get indices in local ordering exploiting local to global map */
4005:   ISGetLocalSize(globalis,&lsize);
4006:   PetscMalloc(lsize*sizeof(PetscScalar),&vals);
4007:   for (i=0;i<lsize;i++) vals[i] = 1.0;
4008:   ISGetIndices(globalis,(const PetscInt**)&idxs);
4009:   VecSet(gwork,0.0);
4010:   VecSet(lwork,0.0);
4011:   if (idxs) { /* multilevel guard */
4012:     VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);
4013:   }
4014:   VecAssemblyBegin(gwork);
4015:   ISRestoreIndices(globalis,(const PetscInt**)&idxs);
4016:   PetscFree(vals);
4017:   VecAssemblyEnd(gwork);
4018:   /* now compute set in local ordering */
4019:   VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);
4020:   VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);
4021:   VecGetArrayRead(lwork,(const PetscScalar**)&vals);
4022:   VecGetSize(lwork,&n);
4023:   for (i=0,lsize=0;i<n;i++) {
4024:     if (PetscRealPart(vals[i]) > 0.5) {
4025:       lsize++;
4026:     }
4027:   }
4028:   PetscMalloc(lsize*sizeof(PetscInt),&idxs);
4029:   for (i=0,lsize=0;i<n;i++) {
4030:     if (PetscRealPart(vals[i]) > 0.5) {
4031:       idxs[lsize++] = i;
4032:     }
4033:   }
4034:   VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);
4035:   ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);
4036:   *localis = localis_t;
4037:   return(0);
4038: }