Actual source code: bddc.c

petsc-3.3-p7 2013-05-11
  1: /* TODOLIST
  2:    DofSplitting and DM attached to pc.
  3:    Exact solvers: Solve local saddle point directly for very hard problems
  4:      - change prec_type to switch_inexact_prec_type
  5:      - add bool solve_exact_saddle_point slot to pdbddc data
  6:    Inexact solvers: global preconditioner application is ready, ask to developers (Jed?) on how to best implement Dohrmann's approach (PCSHELL?)
  7:    change how to deal with the coarse problem (PCBDDCSetCoarseEnvironment):
  8:      - mind the problem with coarsening_factor 
  9:      - simplify coarse problem structure -> PCBDDC or PCREDUDANT, nothing else -> same comm for all levels?
 10:      - remove coarse enums and allow use of PCBDDCGetCoarseKSP
 11:      - remove metis dependency -> use MatPartitioning for multilevel -> Assemble serial adjacency in ManageLocalBoundaries?
 12:      - Add levels' slot to bddc data structure and associated Set/Get functions
 13:    code refactoring:
 14:      - pick up better names for static functions
 15:    check log_summary for leaking (actually: 1 Vector per level )
 16:    change options structure:
 17:      - insert BDDC into MG framework?
 18:    provide other ops? Ask to developers
 19:    remove all unused printf
 20:    remove // commments and adhere to PETSc code requirements
 21:    man pages
 22: */

 24: /* ---------------------------------------------------------------------------------------------------------------------------------------------- 
 25:    Implementation of BDDC preconditioner based on:
 26:    C. Dohrmann "An approximate BDDC preconditioner", Numerical Linear Algebra with Applications Volume 14, Issue 2, pages 149-168, March 2007
 27:    ---------------------------------------------------------------------------------------------------------------------------------------------- */

 29: #include "bddc.h" /*I "petscpc.h" I*/  /* includes for fortran wrappers */
 30: #include <petscblaslapack.h>

 32: /* -------------------------------------------------------------------------- */
 35: PetscErrorCode PCSetFromOptions_BDDC(PC pc)
 36: {
 37:   PC_BDDC         *pcbddc = (PC_BDDC*)pc->data;

 41:   PetscOptionsHead("BDDC options");
 42:   /* Verbose debugging of main data structures */
 43:   PetscOptionsBool("-pc_bddc_check_all"       ,"Verbose (debugging) output for PCBDDC"                       ,"none",pcbddc->dbg_flag      ,&pcbddc->dbg_flag      ,PETSC_NULL);
 44:   /* Some customization for default primal space */
 45:   PetscOptionsBool("-pc_bddc_vertices_only"   ,"Use vertices only in coarse space (i.e. discard constraints)","none",pcbddc->vertices_flag   ,&pcbddc->vertices_flag   ,PETSC_NULL);
 46:   PetscOptionsBool("-pc_bddc_constraints_only","Use constraints only in coarse space (i.e. discard vertices)","none",pcbddc->constraints_flag,&pcbddc->constraints_flag,PETSC_NULL);
 47:   PetscOptionsBool("-pc_bddc_faces_only"      ,"Use faces only in coarse space (i.e. discard edges)"         ,"none",pcbddc->faces_flag      ,&pcbddc->faces_flag      ,PETSC_NULL);
 48:   PetscOptionsBool("-pc_bddc_edges_only"      ,"Use edges only in coarse space (i.e. discard faces)"         ,"none",pcbddc->edges_flag      ,&pcbddc->edges_flag      ,PETSC_NULL);
 49:   /* Coarse solver context */
 50:   static const char *avail_coarse_problems[] = {"sequential","replicated","parallel","multilevel",""}; //order of choiches depends on ENUM defined in bddc.h
 51:   PetscOptionsEnum("-pc_bddc_coarse_problem_type","Set coarse problem type","none",avail_coarse_problems,(PetscEnum)pcbddc->coarse_problem_type,(PetscEnum*)&pcbddc->coarse_problem_type,PETSC_NULL);
 52:   /* Two different application of BDDC to the whole set of dofs, internal and interface */
 53:   PetscOptionsBool("-pc_bddc_switch_preconditioning_type","Switch between M_2 (default) and M_3 preconditioners (as defined by Dohrmann)","none",pcbddc->prec_type,&pcbddc->prec_type,PETSC_NULL);
 54:   PetscOptionsInt("-pc_bddc_coarsening_ratio","Set coarsening ratio used in multilevel coarsening","none",pcbddc->coarsening_ratio,&pcbddc->coarsening_ratio,PETSC_NULL);
 55:   PetscOptionsTail();
 56:   return(0);
 57: }

 59: /* -------------------------------------------------------------------------- */
 60: EXTERN_C_BEGIN
 63: static PetscErrorCode PCBDDCSetCoarseProblemType_BDDC(PC pc, CoarseProblemType CPT)
 64: {
 65:   PC_BDDC  *pcbddc = (PC_BDDC*)pc->data;

 68:   pcbddc->coarse_problem_type = CPT;
 69:   return(0);
 70: }
 71: EXTERN_C_END

 73: /* -------------------------------------------------------------------------- */
 76: /*@
 77:  PCBDDCSetCoarseProblemType - Set coarse problem type in PCBDDC.

 79:    Not collective

 81:    Input Parameters:
 82: +  pc - the preconditioning context
 83: -  CoarseProblemType - pick a better name and explain what this is

 85:    Level: intermediate

 87:    Notes:
 88:    Not collective but all procs must call this.

 90: .seealso: PCBDDC
 91: @*/
 92: PetscErrorCode PCBDDCSetCoarseProblemType(PC pc, CoarseProblemType CPT)
 93: {

 98:   PetscTryMethod(pc,"PCBDDCSetCoarseProblemType_C",(PC,CoarseProblemType),(pc,CPT));
 99:   return(0);
100: }

102: /* -------------------------------------------------------------------------- */
103: EXTERN_C_BEGIN
106: static PetscErrorCode PCBDDCSetDirichletBoundaries_BDDC(PC pc,IS DirichletBoundaries)
107: {
108:   PC_BDDC  *pcbddc = (PC_BDDC*)pc->data;

112:   ISDestroy(&pcbddc->DirichletBoundaries);
113:   PetscObjectReference((PetscObject)DirichletBoundaries);
114:   pcbddc->DirichletBoundaries=DirichletBoundaries;
115:   return(0);
116: }
117: EXTERN_C_END

119: /* -------------------------------------------------------------------------- */
122: /*@
123:  PCBDDCSetDirichletBoundaries - Set index set defining subdomain part of
124:                               Dirichlet boundaries for the global problem.

126:    Not collective

128:    Input Parameters:
129: +  pc - the preconditioning context
130: -  DirichletBoundaries - sequential index set defining the subdomain part of Dirichlet boundaries (can be PETSC_NULL)

132:    Level: intermediate

134:    Notes:

136: .seealso: PCBDDC
137: @*/
138: PetscErrorCode PCBDDCSetDirichletBoundaries(PC pc,IS DirichletBoundaries)
139: {

144:   PetscTryMethod(pc,"PCBDDCSetDirichletBoundaries_C",(PC,IS),(pc,DirichletBoundaries));
145:   return(0);
146: }
147: /* -------------------------------------------------------------------------- */
148: EXTERN_C_BEGIN
151: static PetscErrorCode PCBDDCSetNeumannBoundaries_BDDC(PC pc,IS NeumannBoundaries)
152: {
153:   PC_BDDC  *pcbddc = (PC_BDDC*)pc->data;

157:   ISDestroy(&pcbddc->NeumannBoundaries);
158:   PetscObjectReference((PetscObject)NeumannBoundaries);
159:   pcbddc->NeumannBoundaries=NeumannBoundaries;
160:   return(0);
161: }
162: EXTERN_C_END

164: /* -------------------------------------------------------------------------- */
167: /*@
168:  PCBDDCSetNeumannBoundaries - Set index set defining subdomain part of
169:                               Neumann boundaries for the global problem.

171:    Not collective

173:    Input Parameters:
174: +  pc - the preconditioning context
175: -  NeumannBoundaries - sequential index set defining the subdomain part of Neumann boundaries (can be PETSC_NULL)

177:    Level: intermediate

179:    Notes:

181: .seealso: PCBDDC
182: @*/
183: PetscErrorCode PCBDDCSetNeumannBoundaries(PC pc,IS NeumannBoundaries)
184: {

189:   PetscTryMethod(pc,"PCBDDCSetNeumannBoundaries_C",(PC,IS),(pc,NeumannBoundaries));
190:   return(0);
191: }
192: /* -------------------------------------------------------------------------- */
193: EXTERN_C_BEGIN
196: static PetscErrorCode PCBDDCGetNeumannBoundaries_BDDC(PC pc,IS *NeumannBoundaries)
197: {
198:   PC_BDDC  *pcbddc = (PC_BDDC*)pc->data;

201:   *NeumannBoundaries = pcbddc->NeumannBoundaries;
202:   return(0);
203: }
204: EXTERN_C_END

206: /* -------------------------------------------------------------------------- */
209: /*@
210:  PCBDDCGetNeumannBoundaries - Get index set defining subdomain part of
211:                               Neumann boundaries for the global problem.

213:    Not collective

215:    Input Parameters:
216: +  pc - the preconditioning context

218:    Output Parameters:
219: +  NeumannBoundaries - index set defining the subdomain part of Neumann boundaries

221:    Level: intermediate

223:    Notes:

225: .seealso: PCBDDC
226: @*/
227: PetscErrorCode PCBDDCGetNeumannBoundaries(PC pc,IS *NeumannBoundaries)
228: {

233:   PetscUseMethod(pc,"PCBDDCGetNeumannBoundaries_C",(PC,IS*),(pc,NeumannBoundaries));
234:   return(0);
235: }
236: /* -------------------------------------------------------------------------- */
237: EXTERN_C_BEGIN
240: static PetscErrorCode PCBDDCGetDirichletBoundaries_BDDC(PC pc,IS *DirichletBoundaries)
241: {
242:   PC_BDDC  *pcbddc = (PC_BDDC*)pc->data;

245:   *DirichletBoundaries = pcbddc->DirichletBoundaries;
246:   return(0);
247: }
248: EXTERN_C_END

250: /* -------------------------------------------------------------------------- */
253: /*@
254:  PCBDDCGetDirichletBoundaries - Get index set defining subdomain part of
255:                               Dirichlet boundaries for the global problem.

257:    Not collective

259:    Input Parameters:
260: +  pc - the preconditioning context

262:    Output Parameters:
263: +  DirichletBoundaries - index set defining the subdomain part of Dirichlet boundaries

265:    Level: intermediate

267:    Notes:

269: .seealso: PCBDDC
270: @*/
271: PetscErrorCode PCBDDCGetDirichletBoundaries(PC pc,IS *DirichletBoundaries)
272: {

277:   PetscUseMethod(pc,"PCBDDCGetDirichletBoundaries_C",(PC,IS*),(pc,DirichletBoundaries));
278:   return(0);
279: }

281: /* -------------------------------------------------------------------------- */
282: EXTERN_C_BEGIN
285: static PetscErrorCode PCBDDCSetDofsSplitting_BDDC(PC pc,PetscInt n_is, IS ISForDofs[])
286: {
287:   PC_BDDC  *pcbddc = (PC_BDDC*)pc->data;
288:   PetscInt i;

292:   /* Destroy ISs if they were already set */
293:   for(i=0;i<pcbddc->n_ISForDofs;i++) {
294:     ISDestroy(&pcbddc->ISForDofs[i]);
295:   }
296:   PetscFree(pcbddc->ISForDofs);

298:   /* allocate space then copy ISs */
299:   PetscMalloc(n_is*sizeof(IS),&pcbddc->ISForDofs);
300:   for(i=0;i<n_is;i++) {
301:     ISDuplicate(ISForDofs[i],&pcbddc->ISForDofs[i]);
302:     ISCopy(ISForDofs[i],pcbddc->ISForDofs[i]);
303:   }
304:   pcbddc->n_ISForDofs=n_is;

306:   return(0);
307: }
308: EXTERN_C_END

310: /* -------------------------------------------------------------------------- */
313: /*@
314:  PCBDDCSetDofsSplitting - Set index set defining how dofs are splitted.

316:    Not collective

318:    Input Parameters:
319: +  pc - the preconditioning context
320: -  n - number of index sets defining dofs spltting
321: -  IS[] - array of IS describing dofs splitting

323:    Level: intermediate

325:    Notes:
326:    Sequential ISs are copied, the user must destroy the array of IS passed in.

328: .seealso: PCBDDC
329: @*/
330: PetscErrorCode PCBDDCSetDofsSplitting(PC pc,PetscInt n_is, IS ISForDofs[])
331: {

336:   PetscTryMethod(pc,"PCBDDCSetDofsSplitting_C",(PC,PetscInt,IS[]),(pc,n_is,ISForDofs));
337:   return(0);
338: }

342: /* -------------------------------------------------------------------------- */
343: /*
344:    PCSetUp_BDDC - Prepares for the use of the BDDC preconditioner
345:                   by setting data structures and options.   

347:    Input Parameter:
348: +  pc - the preconditioner context

350:    Application Interface Routine: PCSetUp()

352:    Notes:
353:    The interface routine PCSetUp() is not usually called directly by
354:    the user, but instead is called by PCApply() if necessary.
355: */
356: PetscErrorCode PCSetUp_BDDC(PC pc)
357: {
359:   PC_BDDC*       pcbddc   = (PC_BDDC*)pc->data;
360:   PC_IS            *pcis = (PC_IS*)(pc->data);

363:   if (!pc->setupcalled) {
364:     /* For BDDC we need to define a local "Neumann" problem different to that defined in PCISSetup
365:        So, we set to pcnone the Neumann problem of pcis in order to avoid unneeded computation
366:        Also, we decide to directly build the (same) Dirichlet problem */
367:     PetscOptionsSetValue("-is_localN_pc_type","none");
368:     PetscOptionsSetValue("-is_localD_pc_type","none");
369:     /* Set up all the "iterative substructuring" common block */
370:     PCISSetUp(pc);
371:     /* Get stdout for dbg */
372:     if(pcbddc->dbg_flag) {
373:       PetscViewerASCIIGetStdout(((PetscObject)pc)->comm,&pcbddc->dbg_viewer);
374:       PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);
375:     }
376:     /* TODO MOVE CODE FRAGMENT */
377:     PetscInt im_active=0;
378:     if(pcis->n) im_active = 1;
379:     MPI_Allreduce(&im_active,&pcbddc->active_procs,1,MPIU_INT,MPI_SUM,((PetscObject)pc)->comm);
380:     /* Analyze local interface */
381:     PCBDDCManageLocalBoundaries(pc);
382:     /* Set up local constraint matrix */
383:     PCBDDCCreateConstraintMatrix(pc);
384:     /* Create coarse and local stuffs used for evaluating action of preconditioner */
385:     PCBDDCCoarseSetUp(pc);
386:     /* Processes fakely involved in multilevel should not call ISLocalToGlobalMappingRestoreInfo */
387:     if ( !pcis->n_neigh ) pcis->ISLocalToGlobalMappingGetInfoWasCalled=PETSC_FALSE;
388:   }
389:   return(0);
390: }

392: /* -------------------------------------------------------------------------- */
393: /*
394:    PCApply_BDDC - Applies the BDDC preconditioner to a vector.

396:    Input Parameters:
397: .  pc - the preconditioner context
398: .  r - input vector (global)

400:    Output Parameter:
401: .  z - output vector (global)

403:    Application Interface Routine: PCApply()
404:  */
407: PetscErrorCode PCApply_BDDC(PC pc,Vec r,Vec z)
408: {
409:   PC_IS             *pcis = (PC_IS*)(pc->data);
410:   PC_BDDC           *pcbddc = (PC_BDDC*)(pc->data);
411:   PetscErrorCode    ierr;
412:   const PetscScalar one = 1.0;
413:   const PetscScalar m_one = -1.0;

415: /* This code is similar to that provided in nn.c for PCNN
416:    NN interface preconditioner changed to BDDC
417:    Added support for M_3 preconditioenr in the reference article (code is active if pcbddc->prec_type = PETSC_TRUE) */

420:   /* First Dirichlet solve */
421:   VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
422:   VecScatterEnd  (pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
423:   KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);
424:   /*
425:     Assembling right hand side for BDDC operator
426:     - vec1_D for the Dirichlet part (if needed, i.e. prec_flag=PETSC_TRUE)
427:     - the interface part of the global vector z
428:   */
429:   VecScale(pcis->vec2_D,m_one);
430:   MatMult(pcis->A_BI,pcis->vec2_D,pcis->vec1_B);
431:   if(pcbddc->prec_type) { MatMultAdd(pcis->A_II,pcis->vec2_D,pcis->vec1_D,pcis->vec1_D); }
432:   VecScale(pcis->vec2_D,m_one);
433:   VecCopy(r,z);
434:   VecScatterBegin(pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);
435:   VecScatterEnd  (pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);

437:   /*
438:     Apply interface preconditioner
439:     Results are stored in:
440:     -  vec1_D (if needed, i.e. with prec_type = PETSC_TRUE)
441:     -  the interface part of the global vector z
442:   */
443:   PCBDDCApplyInterfacePreconditioner(pc,z);

445:   /* Second Dirichlet solve and assembling of output */
446:   VecScatterBegin(pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
447:   VecScatterEnd  (pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
448:   MatMult(pcis->A_IB,pcis->vec1_B,pcis->vec3_D);
449:   if(pcbddc->prec_type) { MatMultAdd(pcis->A_II,pcis->vec1_D,pcis->vec3_D,pcis->vec3_D); }
450:   KSPSolve(pcbddc->ksp_D,pcis->vec3_D,pcbddc->vec4_D);
451:   VecScale(pcbddc->vec4_D,m_one);
452:   if(pcbddc->prec_type) { VecAXPY (pcbddc->vec4_D,one,pcis->vec1_D); }
453:   VecAXPY (pcis->vec2_D,one,pcbddc->vec4_D);
454:   VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);
455:   VecScatterEnd  (pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);

457:   return(0);

459: }

461: /* -------------------------------------------------------------------------- */
462: /*
463:    PCBDDCApplyInterfacePreconditioner - Apply the BDDC preconditioner at the interface. 
464:     
465: */
468: static PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, Vec z)
469: {
471:   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
472:   PC_IS*            pcis = (PC_IS*)  (pc->data);
473:   const PetscScalar zero = 0.0;

476:   /* Get Local boundary and apply partition of unity */
477:   VecScatterBegin(pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
478:   VecScatterEnd  (pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
479:   VecPointwiseMult(pcis->vec1_B,pcis->D,pcis->vec1_B);

481:   /* Application of PHI^T  */
482:   MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);
483:   if(pcbddc->prec_type) { MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P); }

485:   /* Scatter data of coarse_rhs */
486:   if(pcbddc->coarse_rhs) VecSet(pcbddc->coarse_rhs,zero);
487:   PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);

489:   /* Local solution on R nodes */
490:   VecSet(pcbddc->vec1_R,zero);
491:   VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
492:   VecScatterEnd  (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
493:   if(pcbddc->prec_type) {
494:     VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
495:     VecScatterEnd  (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
496:   }
497:   PCBDDCSolveSaddlePoint(pc);
498:   VecSet(pcis->vec1_B,zero);
499:   VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
500:   VecScatterEnd  (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
501:   if(pcbddc->prec_type) {
502:     VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
503:     VecScatterEnd  (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
504:   }

506:   /* Coarse solution */
507:   PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);
508:   if(pcbddc->coarse_rhs) KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);
509:   PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);
510:   PCBDDCScatterCoarseDataEnd  (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);

512:   /* Sum contributions from two levels */
513:   /* Apply partition of unity and sum boundary values */
514:   MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);
515:   VecPointwiseMult(pcis->vec1_B,pcis->D,pcis->vec1_B);
516:   if(pcbddc->prec_type) { MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D); }
517:   VecSet(z,zero);
518:   VecScatterBegin(pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);
519:   VecScatterEnd  (pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);

521:   return(0);
522: }


525: /* -------------------------------------------------------------------------- */
526: /*
527:    PCBDDCSolveSaddlePoint 
528:     
529: */
532: static PetscErrorCode  PCBDDCSolveSaddlePoint(PC pc)
533: {
535:   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);


539:   KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
540:   if(pcbddc->n_constraints) {
541:     MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);
542:     MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);
543:   }

545:   return(0);
546: }
547: /* -------------------------------------------------------------------------- */
548: /*
549:    PCBDDCScatterCoarseDataBegin  
550:     
551: */
554: static PetscErrorCode  PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
555: {
557:   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);


561:   switch(pcbddc->coarse_communications_type){
562:     case SCATTERS_BDDC:
563:       VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);
564:       break;
565:     case GATHERS_BDDC:
566:       break;
567:   }
568:   return(0);

570: }
571: /* -------------------------------------------------------------------------- */
572: /*
573:    PCBDDCScatterCoarseDataEnd  
574:     
575: */
578: static PetscErrorCode  PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
579: {
581:   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
582:   PetscScalar*   array_to;
583:   PetscScalar*   array_from;
584:   MPI_Comm       comm=((PetscObject)pc)->comm;
585:   PetscInt i;


589:   switch(pcbddc->coarse_communications_type){
590:     case SCATTERS_BDDC:
591:       VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);
592:       break;
593:     case GATHERS_BDDC:
594:       if(vec_from) VecGetArray(vec_from,&array_from);
595:       if(vec_to)   VecGetArray(vec_to,&array_to);
596:       switch(pcbddc->coarse_problem_type){
597:         case SEQUENTIAL_BDDC:
598:           if(smode == SCATTER_FORWARD) {
599:             MPI_Gatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,0,comm);
600:             if(vec_to) {
601:               for(i=0;i<pcbddc->replicated_primal_size;i++)
602:                 array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i];
603:             }
604:           } else {
605:             if(vec_from)
606:               for(i=0;i<pcbddc->replicated_primal_size;i++)
607:                 pcbddc->replicated_local_primal_values[i]=array_from[pcbddc->replicated_local_primal_indices[i]];
608:             MPI_Scatterv(&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,&array_to[0],pcbddc->local_primal_size,MPIU_SCALAR,0,comm);
609:           }
610:           break;
611:         case REPLICATED_BDDC:
612:           if(smode == SCATTER_FORWARD) {
613:             MPI_Allgatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,comm);
614:             for(i=0;i<pcbddc->replicated_primal_size;i++)
615:               array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i];
616:           } else { /* no communications needed for SCATTER_REVERSE since needed data is already present */
617:             for(i=0;i<pcbddc->local_primal_size;i++)
618:               array_to[i]=array_from[pcbddc->local_primal_indices[i]];
619:           }
620:           break;
621:         case MULTILEVEL_BDDC:
622:           break;
623:         case PARALLEL_BDDC:
624:           break;
625:       }
626:       if(vec_from) VecRestoreArray(vec_from,&array_from);
627:       if(vec_to)   VecRestoreArray(vec_to,&array_to);
628:       break;
629:   }
630:   return(0);

632: }

634: /* -------------------------------------------------------------------------- */
635: /*
636:    PCDestroy_BDDC - Destroys the private context for the NN preconditioner
637:    that was created with PCCreate_BDDC().

639:    Input Parameter:
640: .  pc - the preconditioner context

642:    Application Interface Routine: PCDestroy()
643: */
646: PetscErrorCode PCDestroy_BDDC(PC pc)
647: {
648:   PC_BDDC          *pcbddc = (PC_BDDC*)pc->data;

652:   /* free data created by PCIS */
653:   PCISDestroy(pc);
654:   /* free BDDC data  */
655:   VecDestroy(&pcbddc->coarse_vec);
656:   VecDestroy(&pcbddc->coarse_rhs);
657:   KSPDestroy(&pcbddc->coarse_ksp);
658:   MatDestroy(&pcbddc->coarse_mat);
659:   MatDestroy(&pcbddc->coarse_phi_B);
660:   MatDestroy(&pcbddc->coarse_phi_D);
661:   VecDestroy(&pcbddc->vec1_P);
662:   VecDestroy(&pcbddc->vec1_C);
663:   MatDestroy(&pcbddc->local_auxmat1);
664:   MatDestroy(&pcbddc->local_auxmat2);
665:   VecDestroy(&pcbddc->vec1_R);
666:   VecDestroy(&pcbddc->vec2_R);
667:   VecDestroy(&pcbddc->vec4_D);
668:   VecScatterDestroy(&pcbddc->R_to_B);
669:   VecScatterDestroy(&pcbddc->R_to_D);
670:   VecScatterDestroy(&pcbddc->coarse_loc_to_glob);
671:   KSPDestroy(&pcbddc->ksp_D);
672:   KSPDestroy(&pcbddc->ksp_R);
673:   ISDestroy(&pcbddc->NeumannBoundaries);
674:   ISDestroy(&pcbddc->DirichletBoundaries);
675:   MatDestroy(&pcbddc->ConstraintMatrix);
676:   PetscFree(pcbddc->local_primal_indices);
677:   PetscFree(pcbddc->replicated_local_primal_indices);
678:   if (pcbddc->replicated_local_primal_values)    { free(pcbddc->replicated_local_primal_values); }
679:   PetscFree(pcbddc->local_primal_displacements);
680:   PetscFree(pcbddc->local_primal_sizes);
681:   PetscInt i;
682:   for(i=0;i<pcbddc->n_ISForDofs;i++) { ISDestroy(&pcbddc->ISForDofs[i]); }
683:   PetscFree(pcbddc->ISForDofs);
684:   for(i=0;i<pcbddc->n_ISForFaces;i++) { ISDestroy(&pcbddc->ISForFaces[i]); }
685:   PetscFree(pcbddc->ISForFaces);
686:   for(i=0;i<pcbddc->n_ISForEdges;i++) { ISDestroy(&pcbddc->ISForEdges[i]); }
687:   PetscFree(pcbddc->ISForEdges);
688:   ISDestroy(&pcbddc->ISForVertices);
689:   /* Free the private data structure that was hanging off the PC */
690:   PetscFree(pcbddc);
691:   return(0);
692: }

694: /* -------------------------------------------------------------------------- */
695: /*MC
696:    PCBDDC - Balancing Domain Decomposition by Constraints.

698:    Options Database Keys:
699: .    -pcbddc ??? -

701:    Level: intermediate

703:    Notes: The matrix used with this preconditioner must be of type MATIS 

705:           Unlike more 'conventional' interface preconditioners, this iterates over ALL the
706:           degrees of freedom, NOT just those on the interface (this allows the use of approximate solvers
707:           on the subdomains).

709:           Options for the coarse grid preconditioner can be set with -
710:           Options for the Dirichlet subproblem can be set with -
711:           Options for the Neumann subproblem can be set with -

713:    Contributed by Stefano Zampini

715: .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PC,  MATIS
716: M*/
717: EXTERN_C_BEGIN
720: PetscErrorCode PCCreate_BDDC(PC pc)
721: {
723:   PC_BDDC          *pcbddc;

726:   /* Creates the private data structure for this preconditioner and attach it to the PC object. */
727:   PetscNewLog(pc,PC_BDDC,&pcbddc);
728:   pc->data  = (void*)pcbddc;
729:   /* create PCIS data structure */
730:   PCISCreate(pc);
731:   /* BDDC specific */
732:   pcbddc->coarse_vec                 = 0;
733:   pcbddc->coarse_rhs                 = 0;
734:   pcbddc->coarse_ksp                 = 0;
735:   pcbddc->coarse_phi_B               = 0;
736:   pcbddc->coarse_phi_D               = 0;
737:   pcbddc->vec1_P                     = 0;
738:   pcbddc->vec1_R                     = 0;
739:   pcbddc->vec2_R                     = 0;
740:   pcbddc->local_auxmat1              = 0;
741:   pcbddc->local_auxmat2              = 0;
742:   pcbddc->R_to_B                     = 0;
743:   pcbddc->R_to_D                     = 0;
744:   pcbddc->ksp_D                      = 0;
745:   pcbddc->ksp_R                      = 0;
746:   pcbddc->local_primal_indices       = 0;
747:   pcbddc->prec_type                  = PETSC_FALSE;
748:   pcbddc->NeumannBoundaries          = 0;
749:   pcbddc->ISForDofs                  = 0;
750:   pcbddc->ISForVertices              = 0;
751:   pcbddc->n_ISForFaces               = 0;
752:   pcbddc->n_ISForEdges               = 0;
753:   pcbddc->ConstraintMatrix           = 0;
754:   pcbddc->use_nnsp_true              = PETSC_FALSE;
755:   pcbddc->local_primal_sizes         = 0;
756:   pcbddc->local_primal_displacements = 0;
757:   pcbddc->replicated_local_primal_indices = 0;
758:   pcbddc->replicated_local_primal_values  = 0;
759:   pcbddc->coarse_loc_to_glob         = 0;
760:   pcbddc->dbg_flag                   = PETSC_FALSE;
761:   pcbddc->coarsening_ratio           = 8;
762:   /* function pointers */
763:   pc->ops->apply               = PCApply_BDDC;
764:   pc->ops->applytranspose      = 0;
765:   pc->ops->setup               = PCSetUp_BDDC;
766:   pc->ops->destroy             = PCDestroy_BDDC;
767:   pc->ops->setfromoptions      = PCSetFromOptions_BDDC;
768:   pc->ops->view                = 0;
769:   pc->ops->applyrichardson     = 0;
770:   pc->ops->applysymmetricleft  = 0;
771:   pc->ops->applysymmetricright = 0;
772:   /* composing function */
773:   PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetDirichletBoundaries_C","PCBDDCSetDirichletBoundaries_BDDC",
774:                     PCBDDCSetDirichletBoundaries_BDDC);
775:   PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetNeumannBoundaries_C","PCBDDCSetNeumannBoundaries_BDDC",
776:                     PCBDDCSetNeumannBoundaries_BDDC);
777:   PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCGetDirichletBoundaries_C","PCBDDCGetDirichletBoundaries_BDDC",
778:                     PCBDDCGetDirichletBoundaries_BDDC);
779:   PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCGetNeumannBoundaries_C","PCBDDCGetNeumannBoundaries_BDDC",
780:                     PCBDDCGetNeumannBoundaries_BDDC);
781:   PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetCoarseProblemType_C","PCBDDCSetCoarseProblemType_BDDC",
782:                     PCBDDCSetCoarseProblemType_BDDC);
783:   PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetDofsSplitting_C","PCBDDCSetDofsSplitting_BDDC",
784:                     PCBDDCSetDofsSplitting_BDDC);
785:   return(0);
786: }
787: EXTERN_C_END

789: /* -------------------------------------------------------------------------- */
790: /*  
791:    Create C matrix [I 0; 0 const] 
792: */
793: #ifdef BDDC_USE_POD
794: #if !defined(PETSC_MISSING_LAPACK_GESVD)
795: #define PETSC_MISSING_LAPACK_GESVD 1
796: #define UNDEF_PETSC_MISSING_LAPACK_GESVD 1 
797: #endif
798: #endif

802: static PetscErrorCode PCBDDCCreateConstraintMatrix(PC pc)
803: {
805:   PC_IS*         pcis = (PC_IS*)(pc->data);
806:   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
807:   Mat_IS         *matis = (Mat_IS*)pc->pmat->data;
808:   PetscInt       *nnz,*vertices,*is_indices;
809:   PetscScalar    *temp_quadrature_constraint;
810:   PetscInt       *temp_indices,*temp_indices_to_constraint;
811:   PetscInt       local_primal_size,i,j,k,total_counts,max_size_of_constraint;
812:   PetscInt       n_constraints,n_vertices,size_of_constraint;
813:   PetscReal      quad_value;
814:   PetscBool      nnsp_has_cnst=PETSC_FALSE,use_nnsp_true=pcbddc->use_nnsp_true;
815:   PetscInt       nnsp_size=0,nnsp_addone=0,temp_constraints,temp_start_ptr;
816:   IS             *used_IS;
817:   const MatType  impMatType=MATSEQAIJ;
818:   PetscBLASInt   Bs,Bt,lwork,lierr;
819:   PetscReal      tol=1.0e-8;
820:   MatNullSpace   nearnullsp;
821:   const Vec      *nearnullvecs;
822:   Vec            *localnearnullsp;
823:   PetscScalar    *work,*temp_basis,*array_vector,*correlation_mat;
824:   PetscReal      *rwork,*singular_vals;
825:   PetscBLASInt   Bone=1;
826: /* some ugly conditional declarations */
827: #if defined(PETSC_MISSING_LAPACK_GESVD)
828:   PetscScalar    dot_result;
829:   PetscScalar    one=1.0,zero=0.0;
830:   PetscInt       ii;
831: #if defined(PETSC_USE_COMPLEX)
832:   PetscScalar    val1,val2;
833: #endif
834: #else
835:   PetscBLASInt   dummy_int;
836:   PetscScalar    dummy_scalar;
837: #endif

840:   /* check if near null space is attached to global mat */
841:   MatGetNearNullSpace(pc->pmat,&nearnullsp);
842:   if (nearnullsp) {
843:     MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);
844:   } else { /* if near null space is not provided it uses constants */
845:     nnsp_has_cnst = PETSC_TRUE;
846:     use_nnsp_true = PETSC_TRUE;
847:   }
848:   if(nnsp_has_cnst) {
849:     nnsp_addone = 1;
850:   }
851:   /*
852:        Evaluate maximum storage size needed by the procedure
853:        - temp_indices will contain start index of each constraint stored as follows
854:        - temp_indices_to_constraint[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
855:        - temp_quadrature_constraint[temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself
856:                                                                                                                                                          */
857:   total_counts = pcbddc->n_ISForFaces+pcbddc->n_ISForEdges;
858:   total_counts *= (nnsp_addone+nnsp_size);
859:   PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);
860:   total_counts = 0;
861:   max_size_of_constraint = 0;
862:   for(i=0;i<pcbddc->n_ISForEdges+pcbddc->n_ISForFaces;i++){
863:     if(i<pcbddc->n_ISForEdges){
864:       used_IS = &pcbddc->ISForEdges[i];
865:     } else {
866:       used_IS = &pcbddc->ISForFaces[i-pcbddc->n_ISForEdges];
867:     }
868:     ISGetSize(*used_IS,&j);
869:     total_counts += j;
870:     if(j>max_size_of_constraint) max_size_of_constraint=j;
871:   }
872:   total_counts *= (nnsp_addone+nnsp_size);
873:   PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);
874:   PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);
875:   /* First we issue queries to allocate optimal workspace for LAPACKgesvd or LAPACKsyev/LAPACKheev */
876:   rwork = 0;
877:   work = 0;
878:   singular_vals = 0;
879:   temp_basis = 0;
880:   correlation_mat = 0;
881:   if(!pcbddc->use_nnsp_true) {
882:     PetscScalar temp_work;
883: #if defined(PETSC_MISSING_LAPACK_GESVD)
884:     /* POD */
885:     PetscInt max_n;
886:     max_n = nnsp_addone+nnsp_size;
887:     /* using some techniques borrowed from Proper Orthogonal Decomposition */
888:     PetscMalloc(max_n*max_n*sizeof(PetscScalar),&correlation_mat);
889:     PetscMalloc(max_n*sizeof(PetscReal),&singular_vals);
890:     PetscMalloc(max_size_of_constraint*(nnsp_addone+nnsp_size)*sizeof(PetscScalar),&temp_basis);
891: #if defined(PETSC_USE_COMPLEX)
892:     PetscMalloc(3*max_n*sizeof(PetscReal),&rwork);
893: #endif
894:     /* now we evaluate the optimal workspace using query with lwork=-1 */
895:     Bt = PetscBLASIntCast(max_n);
896:     lwork=-1;
897: #if !defined(PETSC_USE_COMPLEX)
898:     LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,&temp_work,&lwork,&lierr);
899: #else
900:     LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,&temp_work,&lwork,rwork,&lierr);
901: #endif
902: #else /* on missing GESVD */
903:     /* SVD */
904:     PetscInt max_n,min_n;
905:     max_n = max_size_of_constraint;
906:     min_n = nnsp_addone+nnsp_size;
907:     if(max_size_of_constraint < ( nnsp_addone+nnsp_size ) ) {
908:       min_n = max_size_of_constraint;
909:       max_n = nnsp_addone+nnsp_size;
910:     }
911:     PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);
912: #if defined(PETSC_USE_COMPLEX)
913:     PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);
914: #endif
915:     /* now we evaluate the optimal workspace using query with lwork=-1 */
916:     lwork=-1;
917:     Bs = PetscBLASIntCast(max_n);
918:     Bt = PetscBLASIntCast(min_n);
919:     dummy_int = Bs;
920:     PetscFPTrapPush(PETSC_FP_TRAP_OFF);
921: #if !defined(PETSC_USE_COMPLEX)
922:     LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[0],&Bs,singular_vals,
923:                  &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr);
924: #else
925:     LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[0],&Bs,singular_vals,
926:                  &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr);
927: #endif
928:     if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SVD Lapack routine %d",(int)lierr);
929:     PetscFPTrapPop();
930: #endif
931:     /* Allocate optimal workspace */
932:     lwork = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work));
933:     total_counts = (PetscInt)lwork;
934:     PetscMalloc(total_counts*sizeof(PetscScalar),&work);
935:   }
936:   /* get local part of global near null space vectors */
937:   PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);
938:   for(k=0;k<nnsp_size;k++) {
939:     VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);
940:     VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);
941:     VecScatterEnd  (matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);
942:   }
943:   /* Now we can loop on constraining sets */
944:   total_counts=0;
945:   temp_indices[0]=0;
946:   for(i=0;i<pcbddc->n_ISForEdges+pcbddc->n_ISForFaces;i++){
947:     if(i<pcbddc->n_ISForEdges){
948:       used_IS = &pcbddc->ISForEdges[i];
949:     } else {
950:       used_IS = &pcbddc->ISForFaces[i-pcbddc->n_ISForEdges];
951:     }
952:     temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
953:     temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
954:     ISGetSize(*used_IS,&size_of_constraint);
955:     ISGetIndices(*used_IS,(const PetscInt**)&is_indices);
956:     if(nnsp_has_cnst) {
957:       temp_constraints++;
958:       quad_value = 1.0/PetscSqrtReal((PetscReal)size_of_constraint);
959:       for(j=0;j<size_of_constraint;j++) {
960:         temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j];
961:         temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
962:       }
963:       temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
964:       total_counts++;
965:     }
966:     for(k=0;k<nnsp_size;k++) {
967:       VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);
968:       for(j=0;j<size_of_constraint;j++) {
969:         temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j];
970:         temp_quadrature_constraint[temp_indices[total_counts]+j]=array_vector[is_indices[j]];
971:       }
972:       VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);
973:       quad_value = 1.0;
974:       if( use_nnsp_true ) { /* check if array is null on the connected component in case use_nnsp_true has been requested */
975:         Bs = PetscBLASIntCast(size_of_constraint);
976:         quad_value = BLASasum_(&Bs,&temp_quadrature_constraint[temp_indices[total_counts]],&Bone);
977:       }
978:       if ( quad_value > 0.0 ) { /* keep indices and values */
979:         temp_constraints++;
980:         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
981:         total_counts++;
982:       }
983:     }
984:     ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);
985:     /* perform SVD on the constraint if use_nnsp_true has not be requested by the user */
986:     if(!use_nnsp_true) {

988:       Bs = PetscBLASIntCast(size_of_constraint);
989:       Bt = PetscBLASIntCast(temp_constraints);

991: #if defined(PETSC_MISSING_LAPACK_GESVD)
992:       PetscMemzero(correlation_mat,Bt*Bt*sizeof(PetscScalar));
993:       /* Store upper triangular part of correlation matrix */
994:       for(j=0;j<temp_constraints;j++) {
995:         for(k=0;k<j+1;k++) {
996: #if defined(PETSC_USE_COMPLEX)
997:           /* hand made complex dot product */
998:           dot_result = 0.0;
999:           for (ii=0; ii<size_of_constraint; ii++) {
1000:             val1 = temp_quadrature_constraint[temp_indices[temp_start_ptr+j]+ii];
1001:             val2 = temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii];
1002:             dot_result += val1*PetscConj(val2);
1003:           }
1004: #else
1005:           dot_result = BLASdot_(&Bs,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Bone,
1006:                                     &temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Bone);
1007: #endif
1008:           correlation_mat[j*temp_constraints+k]=dot_result;
1009:         }
1010:       }
1011: #if !defined(PETSC_USE_COMPLEX)
1012:       LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,work,&lwork,&lierr);
1013: #else
1014:       LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,work,&lwork,rwork,&lierr);
1015: #endif   
1016:       if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in EV Lapack routine %d",(int)lierr);
1017:       /* retain eigenvalues greater than tol: note that lapack SYEV gives eigs in ascending order */
1018:       j=0;
1019:       while( j < Bt && singular_vals[j] < tol) j++;
1020:       total_counts=total_counts-j;
1021:       if(j<temp_constraints) {
1022:         for(k=j;k<Bt;k++) { singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); }
1023:         BLASgemm_("N","N",&Bs,&Bt,&Bt,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,correlation_mat,&Bt,&zero,temp_basis,&Bs);
1024:         /* copy POD basis into used quadrature memory */
1025:         for(k=0;k<Bt-j;k++) {
1026:           for(ii=0;ii<size_of_constraint;ii++) {
1027:             temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[Bt-1-k]*temp_basis[(Bt-1-k)*size_of_constraint+ii];
1028:           }
1029:         }
1030:       }

1032: #else  /* on missing GESVD */

1034:       PetscInt min_n = temp_constraints;
1035:       if(min_n > size_of_constraint) min_n = size_of_constraint;
1036:       dummy_int = Bs;
1037:       PetscFPTrapPush(PETSC_FP_TRAP_OFF);
1038: #if !defined(PETSC_USE_COMPLEX)
1039:       LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,singular_vals,
1040:                    &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr);
1041: #else
1042:       LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,singular_vals,
1043:                    &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr);
1044: #endif
1045:       if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SVD Lapack routine %d",(int)lierr);
1046:       PetscFPTrapPop();
1047:       /* retain eigenvalues greater than tol: note that lapack SVD gives eigs in descending order */
1048:       j=0;
1049:       while( j < min_n && singular_vals[min_n-j-1] < tol) j++;
1050:       total_counts = total_counts-(PetscInt)Bt+(min_n-j);

1052: #endif
1053:     }
1054:   }
1055:   n_constraints=total_counts;
1056:   ISGetSize(pcbddc->ISForVertices,&n_vertices);
1057:   local_primal_size = n_vertices+n_constraints;
1058:   PetscMalloc(local_primal_size*sizeof(PetscInt),&nnz);
1059:   MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);
1060:   MatSetType(pcbddc->ConstraintMatrix,impMatType);
1061:   MatSetSizes(pcbddc->ConstraintMatrix,local_primal_size,pcis->n,local_primal_size,pcis->n);
1062:   for(i=0;i<n_vertices;i++) { nnz[i]= 1; }
1063:   for(i=0;i<n_constraints;i++) { nnz[i+n_vertices]=temp_indices[i+1]-temp_indices[i]; }
1064:   MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);
1065:   ISGetIndices(pcbddc->ISForVertices,(const PetscInt**)&vertices);
1066:   for(i=0;i<n_vertices;i++) { MatSetValue(pcbddc->ConstraintMatrix,i,vertices[i],1.0,INSERT_VALUES); }
1067:   ISRestoreIndices(pcbddc->ISForVertices,(const PetscInt**)&vertices);
1068:   for(i=0;i<n_constraints;i++) {
1069:     j=i+n_vertices;
1070:     size_of_constraint=temp_indices[i+1]-temp_indices[i];
1071:     MatSetValues(pcbddc->ConstraintMatrix,1,&j,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);
1072:   }
1073:   MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);
1074:   MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);
1075:   /* set quantities in pcbddc data structure */
1076:   pcbddc->n_vertices = n_vertices;
1077:   pcbddc->n_constraints = n_constraints;
1078:   pcbddc->local_primal_size = n_vertices+n_constraints;
1079:   /* free workspace no longer needed */
1080:   PetscFree(rwork);
1081:   PetscFree(work);
1082:   PetscFree(temp_basis);
1083:   PetscFree(singular_vals);
1084:   PetscFree(correlation_mat);
1085:   PetscFree(temp_indices);
1086:   PetscFree(temp_indices_to_constraint);
1087:   PetscFree(temp_quadrature_constraint);
1088:   PetscFree(nnz);
1089:   for(k=0;k<nnsp_size;k++) { VecDestroy(&localnearnullsp[k]); }
1090:   PetscFree(localnearnullsp);
1091:   return(0);
1092: }
1093: #ifdef UNDEF_PETSC_MISSING_LAPACK_GESVD
1094: #undef PETSC_MISSING_LAPACK_GESVD
1095: #endif

1097: /* -------------------------------------------------------------------------- */
1098: /*  
1099:    PCBDDCCoarseSetUp - 
1100: */
1103: static PetscErrorCode PCBDDCCoarseSetUp(PC pc)
1104: {
1105:   PetscErrorCode  ierr;

1107:   PC_IS*            pcis = (PC_IS*)(pc->data);
1108:   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1109:   Mat_IS            *matis = (Mat_IS*)pc->pmat->data;
1110:   IS                is_R_local;
1111:   IS                is_V_local;
1112:   IS                is_C_local;
1113:   IS                is_aux1;
1114:   IS                is_aux2;
1115:   const VecType     impVecType;
1116:   const MatType     impMatType;
1117:   PetscInt          n_R=0;
1118:   PetscInt          n_D=0;
1119:   PetscInt          n_B=0;
1120:   PetscScalar       zero=0.0;
1121:   PetscScalar       one=1.0;
1122:   PetscScalar       m_one=-1.0;
1123:   PetscScalar*      array;
1124:   PetscScalar       *coarse_submat_vals;
1125:   PetscInt          *idx_R_local;
1126:   PetscInt          *idx_V_B;
1127:   PetscScalar       *coarsefunctions_errors;
1128:   PetscScalar       *constraints_errors;
1129:   /* auxiliary indices */
1130:   PetscInt s,i,j,k;
1131:   /* for verbose output of bddc */
1132:   PetscViewer       viewer=pcbddc->dbg_viewer;
1133:   PetscBool         dbg_flag=pcbddc->dbg_flag;
1134:   /* for counting coarse dofs */
1135:   PetscScalar       coarsesum;
1136:   PetscInt          n_vertices=pcbddc->n_vertices,n_constraints=pcbddc->n_constraints;
1137:   PetscInt          size_of_constraint;
1138:   PetscInt          *row_cmat_indices;
1139:   PetscScalar       *row_cmat_values;
1140:   const PetscInt    *vertices;
1141: 
1143:   /* Set Non-overlapping dimensions */
1144:   n_B = pcis->n_B; n_D = pcis->n - n_B;
1145:   ISGetIndices(pcbddc->ISForVertices,&vertices);
1146:   /* First let's count coarse dofs: note that we allow to have a constraint on a subdomain and not its counterpart on the neighbour subdomain (if user wants) */
1147:   VecSet(pcis->vec1_N,zero);
1148:   VecGetArray(pcis->vec1_N,&array);
1149:   for(i=0;i<n_vertices;i++) { array[ vertices[i] ] = one; }

1151:   for(i=0;i<n_constraints;i++) {
1152:     MatGetRow(pcbddc->ConstraintMatrix,n_vertices+i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);
1153:     for (j=0; j<size_of_constraint; j++) {
1154:       k = row_cmat_indices[j];
1155:       if( array[k] == zero ) {
1156:         array[k] = one;
1157:         break;
1158:       }
1159:     }
1160:     MatRestoreRow(pcbddc->ConstraintMatrix,n_vertices+i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);
1161:   }
1162:   VecRestoreArray(pcis->vec1_N,&array);
1163:   VecSet(pcis->vec1_global,zero);
1164:   VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
1165:   VecScatterEnd  (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
1166:   VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
1167:   VecScatterEnd  (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
1168:   VecGetArray(pcis->vec1_N,&array);
1169:   for(i=0;i<pcis->n;i++) { if( array[i] > zero) array[i] = one/array[i]; }
1170:   VecRestoreArray(pcis->vec1_N,&array);
1171:   VecSet(pcis->vec1_global,zero);
1172:   VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
1173:   VecScatterEnd  (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
1174:   VecSum(pcis->vec1_global,&coarsesum);
1175:   pcbddc->coarse_size = (PetscInt) coarsesum;

1177:   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
1178:   VecSet(pcis->vec1_N,one);
1179:   VecGetArray(pcis->vec1_N,&array);
1180:   for (i=0;i<n_vertices;i++) { array[ vertices[i] ] = zero; }
1181:   PetscMalloc(( pcis->n - n_vertices )*sizeof(PetscInt),&idx_R_local);
1182:   for (i=0, n_R=0; i<pcis->n; i++) { if (array[i] == one) { idx_R_local[n_R] = i; n_R++; } }
1183:   VecRestoreArray(pcis->vec1_N,&array);
1184:   if(dbg_flag) {
1185:     PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");
1186:     PetscViewerFlush(viewer);
1187:     PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);
1188:     PetscViewerASCIISynchronizedPrintf(viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);
1189:     PetscViewerASCIISynchronizedPrintf(viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,n_constraints,pcbddc->local_primal_size);
1190:     PetscViewerFlush(viewer);
1191:     PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");
1192:     PetscViewerASCIIPrintf(viewer,"Size of coarse problem = %d (%f)\n",pcbddc->coarse_size,coarsesum);
1193:     PetscViewerFlush(viewer);
1194:   }
1195:   /* Allocate needed vectors */
1196:   /* Set Mat type for local matrices needed by BDDC precondtioner */
1197:   impMatType = MATSEQDENSE;
1198:   impVecType = VECSEQ;
1199:   VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);
1200:   VecDuplicate(pcis->vec1_N,&pcis->vec2_N);
1201:   VecCreate(PETSC_COMM_SELF,&pcbddc->vec1_R);
1202:   VecSetSizes(pcbddc->vec1_R,n_R,n_R);
1203:   VecSetType(pcbddc->vec1_R,impVecType);
1204:   VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);
1205:   VecCreate(PETSC_COMM_SELF,&pcbddc->vec1_P);
1206:   VecSetSizes(pcbddc->vec1_P,pcbddc->local_primal_size,pcbddc->local_primal_size);
1207:   VecSetType(pcbddc->vec1_P,impVecType);

1209:   /* Creating some index sets needed  */
1210:   /* For submatrices */
1211:   ISCreateGeneral(PETSC_COMM_SELF,n_R,idx_R_local,PETSC_COPY_VALUES,&is_R_local);
1212:   if(n_vertices)    {
1213:     ISDuplicate(pcbddc->ISForVertices,&is_V_local);
1214:     ISCopy(pcbddc->ISForVertices,is_V_local);
1215:   }
1216:   if(n_constraints) { ISCreateStride (PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_C_local); }
1217:   /* For VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
1218:   {
1219:     PetscInt   *aux_array1;
1220:     PetscInt   *aux_array2;
1221:     PetscScalar      value;

1223:     PetscMalloc( (pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);
1224:     PetscMalloc( (pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);

1226:     VecSet(pcis->vec1_global,zero);
1227:     VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
1228:     VecScatterEnd  (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
1229:     VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
1230:     VecScatterEnd  (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
1231:     VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
1232:     VecScatterEnd  (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
1233:     VecGetArray(pcis->vec1_N,&array);
1234:     for (i=0, s=0; i<n_R; i++) { if (array[idx_R_local[i]] > one) { aux_array1[s] = i; s++; } }
1235:     VecRestoreArray(pcis->vec1_N,&array);
1236:     ISCreateGeneral(PETSC_COMM_SELF,s,aux_array1,PETSC_COPY_VALUES,&is_aux1);
1237:     VecGetArray(pcis->vec1_B,&array);
1238:     for (i=0, s=0; i<n_B; i++) { if (array[i] > one) { aux_array2[s] = i; s++; } }
1239:     VecRestoreArray(pcis->vec1_B,&array);
1240:     ISCreateGeneral(PETSC_COMM_SELF,s,aux_array2,PETSC_COPY_VALUES,&is_aux2);
1241:     VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);
1242:     PetscFree(aux_array1);
1243:     PetscFree(aux_array2);
1244:     ISDestroy(&is_aux1);
1245:     ISDestroy(&is_aux2);

1247:     if(pcbddc->prec_type || dbg_flag ) {
1248:       PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);
1249:       VecGetArray(pcis->vec1_N,&array);
1250:       for (i=0, s=0; i<n_R; i++) { if (array[idx_R_local[i]] == one) { aux_array1[s] = i; s++; } }
1251:       VecRestoreArray(pcis->vec1_N,&array);
1252:       ISCreateGeneral(PETSC_COMM_SELF,s,aux_array1,PETSC_COPY_VALUES,&is_aux1);
1253:       VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);
1254:       PetscFree(aux_array1);
1255:       ISDestroy(&is_aux1);
1256:     }

1258:     /* Check scatters */
1259:     if(dbg_flag) {
1260: 
1261:       Vec            vec_aux;

1263:       PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");
1264:       PetscViewerASCIIPrintf(viewer,"Checking pcbddc->R_to_B scatter\n");
1265:       PetscViewerFlush(viewer);
1266:       VecSetRandom(pcbddc->vec1_R,PETSC_NULL);
1267:       VecSetRandom(pcis->vec1_B,PETSC_NULL);
1268:       VecDuplicate(pcbddc->vec1_R,&vec_aux);
1269:       VecCopy(pcbddc->vec1_R,vec_aux);
1270:       VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
1271:       VecScatterEnd  (pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
1272:       VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,vec_aux,INSERT_VALUES,SCATTER_REVERSE);
1273:       VecScatterEnd  (pcbddc->R_to_B,pcis->vec1_B,vec_aux,INSERT_VALUES,SCATTER_REVERSE);
1274:       VecAXPY(vec_aux,m_one,pcbddc->vec1_R);
1275:       VecNorm(vec_aux,NORM_INFINITY,&value);
1276:       PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d R_to_B FORWARD error = % 1.14e\n",PetscGlobalRank,value);
1277:       VecDestroy(&vec_aux);

1279:       VecSetRandom(pcbddc->vec1_R,PETSC_NULL);
1280:       VecSetRandom(pcis->vec1_B,PETSC_NULL);
1281:       VecDuplicate(pcis->vec1_B,&vec_aux);
1282:       VecCopy(pcis->vec1_B,vec_aux);
1283:       VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
1284:       VecScatterEnd  (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
1285:       VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,vec_aux,INSERT_VALUES,SCATTER_FORWARD);
1286:       VecScatterEnd  (pcbddc->R_to_B,pcbddc->vec1_R,vec_aux,INSERT_VALUES,SCATTER_FORWARD);
1287:       VecAXPY(vec_aux,m_one,pcis->vec1_B);
1288:       VecNorm(vec_aux,NORM_INFINITY,&value);
1289:       PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d R_to_B REVERSE error = % 1.14e\n",PetscGlobalRank,value);
1290:       VecDestroy(&vec_aux);

1292:       PetscViewerFlush(viewer);
1293:       PetscViewerASCIIPrintf(viewer,"Checking pcbddc->R_to_D scatter\n");
1294:       PetscViewerFlush(viewer);

1296:       VecSetRandom(pcbddc->vec1_R,PETSC_NULL);
1297:       VecSetRandom(pcis->vec1_D,PETSC_NULL);
1298:       VecDuplicate(pcbddc->vec1_R,&vec_aux);
1299:       VecCopy(pcbddc->vec1_R,vec_aux);
1300:       VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
1301:       VecScatterEnd  (pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
1302:       VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,vec_aux,INSERT_VALUES,SCATTER_REVERSE);
1303:       VecScatterEnd  (pcbddc->R_to_D,pcis->vec1_D,vec_aux,INSERT_VALUES,SCATTER_REVERSE);
1304:       VecAXPY(vec_aux,m_one,pcbddc->vec1_R);
1305:       VecNorm(vec_aux,NORM_INFINITY,&value);
1306:       PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d R_to_D FORWARD error = % 1.14e\n",PetscGlobalRank,value);
1307:       VecDestroy(&vec_aux);

1309:       VecSetRandom(pcbddc->vec1_R,PETSC_NULL);
1310:       VecSetRandom(pcis->vec1_D,PETSC_NULL);
1311:       VecDuplicate(pcis->vec1_D,&vec_aux);
1312:       VecCopy(pcis->vec1_D,vec_aux);
1313:       VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
1314:       VecScatterEnd  (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
1315:       VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,vec_aux,INSERT_VALUES,SCATTER_FORWARD);
1316:       VecScatterEnd  (pcbddc->R_to_D,pcbddc->vec1_R,vec_aux,INSERT_VALUES,SCATTER_FORWARD);
1317:       VecAXPY(vec_aux,m_one,pcis->vec1_D);
1318:       VecNorm(vec_aux,NORM_INFINITY,&value);
1319:       PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d R_to_D REVERSE error = % 1.14e\n",PetscGlobalRank,value);
1320:       VecDestroy(&vec_aux);
1321:       PetscViewerFlush(viewer);

1323:     }
1324:   }

1326:   /* vertices in boundary numbering */
1327:   if(n_vertices) {
1328:     VecSet(pcis->vec1_N,m_one);
1329:     VecGetArray(pcis->vec1_N,&array);
1330:     for (i=0; i<n_vertices; i++) { array[ vertices[i] ] = i; }
1331:     VecRestoreArray(pcis->vec1_N,&array);
1332:     VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
1333:     VecScatterEnd  (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
1334:     PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);
1335:     VecGetArray(pcis->vec1_B,&array);
1336:     for (i=0; i<n_vertices; i++) {
1337:       s=0;
1338:       while (array[s] != i ) {s++;}
1339:       idx_V_B[i]=s;
1340:     }
1341:     VecRestoreArray(pcis->vec1_B,&array);
1342:   }


1345:   /* Creating PC contexts for local Dirichlet and Neumann problems */
1346:   {
1347:     Mat  A_RR;
1348:     PC   pc_temp;
1349:     /* Matrix for Dirichlet problem is A_II -> we already have it from pcis.c code */
1350:     KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);
1351:     PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);
1352:     KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,SAME_PRECONDITIONER);
1353:     KSPSetType(pcbddc->ksp_D,KSPPREONLY);
1354:     //KSPSetOptionsPrefix();
1355:     /* default */
1356:     KSPGetPC(pcbddc->ksp_D,&pc_temp);
1357:     PCSetType(pc_temp,PCLU);
1358:     /* Allow user's customization */
1359:     KSPSetFromOptions(pcbddc->ksp_D);
1360:     /* Set Up KSP for Dirichlet problem of BDDC */
1361:     KSPSetUp(pcbddc->ksp_D);
1362:     /* Matrix for Neumann problem is A_RR -> we need to create it */
1363:     MatGetSubMatrix(matis->A,is_R_local,is_R_local,MAT_INITIAL_MATRIX,&A_RR);
1364:     KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);
1365:     PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);
1366:     KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,SAME_PRECONDITIONER);
1367:     KSPSetType(pcbddc->ksp_R,KSPPREONLY);
1368:     //KSPSetOptionsPrefix();
1369:     /* default */
1370:     KSPGetPC(pcbddc->ksp_R,&pc_temp);
1371:     PCSetType(pc_temp,PCLU);
1372:     /* Allow user's customization */
1373:     KSPSetFromOptions(pcbddc->ksp_R);
1374:     /* Set Up KSP for Neumann problem of BDDC */
1375:     KSPSetUp(pcbddc->ksp_R);
1376:     /* check Dirichlet and Neumann solvers */
1377:     if(pcbddc->dbg_flag) {
1378:       Vec temp_vec;
1379:       PetscScalar value;

1381:       VecDuplicate(pcis->vec1_D,&temp_vec);
1382:       VecSetRandom(pcis->vec1_D,PETSC_NULL);
1383:       MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);
1384:       KSPSolve(pcbddc->ksp_D,pcis->vec2_D,temp_vec);
1385:       VecAXPY(temp_vec,m_one,pcis->vec1_D);
1386:       VecNorm(temp_vec,NORM_INFINITY,&value);
1387:       VecDestroy(&temp_vec);
1388:       PetscViewerFlush(viewer);
1389:       PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");
1390:       PetscViewerASCIIPrintf(viewer,"Checking solution of Dirichlet and Neumann problems\n");
1391:       PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d infinity error for Dirichlet solve = % 1.14e \n",PetscGlobalRank,value);
1392:       VecDuplicate(pcbddc->vec1_R,&temp_vec);
1393:       VecSetRandom(pcbddc->vec1_R,PETSC_NULL);
1394:       MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);
1395:       KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,temp_vec);
1396:       VecAXPY(temp_vec,m_one,pcbddc->vec1_R);
1397:       VecNorm(temp_vec,NORM_INFINITY,&value);
1398:       VecDestroy(&temp_vec);
1399:       PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d infinity error for  Neumann  solve = % 1.14e \n",PetscGlobalRank,value);
1400:       PetscViewerFlush(viewer);
1401:     }
1402:     /* free Neumann problem's matrix */
1403:     MatDestroy(&A_RR);
1404:   }

1406:   /* Assemble all remaining stuff needed to apply BDDC  */
1407:   {
1408:     Mat          A_RV,A_VR,A_VV;
1409:     Mat          M1,M2;
1410:     Mat          C_CR;
1411:     Mat          AUXMAT;
1412:     Vec          vec1_C;
1413:     Vec          vec2_C;
1414:     Vec          vec1_V;
1415:     Vec          vec2_V;
1416:     PetscInt     *nnz;
1417:     PetscInt     *auxindices;
1418:     PetscInt     index;
1419:     PetscScalar* array2;
1420:     MatFactorInfo matinfo;

1422:     /* Allocating some extra storage just to be safe */
1423:     PetscMalloc (pcis->n*sizeof(PetscInt),&nnz);
1424:     PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);
1425:     for(i=0;i<pcis->n;i++) {auxindices[i]=i;}

1427:     /* some work vectors on vertices and/or constraints */
1428:     if(n_vertices) {
1429:       VecCreate(PETSC_COMM_SELF,&vec1_V);
1430:       VecSetSizes(vec1_V,n_vertices,n_vertices);
1431:       VecSetType(vec1_V,impVecType);
1432:       VecDuplicate(vec1_V,&vec2_V);
1433:     }
1434:     if(pcbddc->n_constraints) {
1435:       VecCreate(PETSC_COMM_SELF,&vec1_C);
1436:       VecSetSizes(vec1_C,pcbddc->n_constraints,pcbddc->n_constraints);
1437:       VecSetType(vec1_C,impVecType);
1438:       VecDuplicate(vec1_C,&vec2_C);
1439:       VecDuplicate(vec1_C,&pcbddc->vec1_C);
1440:     }
1441:     /* Precompute stuffs needed for preprocessing and application of BDDC*/
1442:     if(n_constraints) {
1443:       /* some work vectors */
1444:       MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);
1445:       MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,n_R,n_constraints);
1446:       MatSetType(pcbddc->local_auxmat2,impMatType);
1447:       MatSeqDenseSetPreallocation(pcbddc->local_auxmat2,PETSC_NULL);

1449:       /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
1450:       for(i=0;i<n_constraints;i++) {
1451:         VecSet(pcis->vec1_N,zero);
1452:         VecSet(pcbddc->vec1_R,zero);
1453:         /* Get row of constraint matrix in R numbering */
1454:         VecGetArray(pcis->vec1_N,&array);
1455:         MatGetRow(pcbddc->ConstraintMatrix,n_vertices+i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);
1456:         for(j=0;j<size_of_constraint;j++) { array[ row_cmat_indices[j] ] = - row_cmat_values[j]; }
1457:         MatRestoreRow(pcbddc->ConstraintMatrix,n_vertices+i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);
1458:         VecGetArray(pcbddc->vec1_R,&array2);
1459:         for(j=0;j<n_R;j++) { array2[j] = array[ idx_R_local[j] ]; }
1460:         VecRestoreArray(pcis->vec1_N,&array);
1461:         VecRestoreArray(pcbddc->vec1_R,&array2);
1462:         /* Solve for row of constraint matrix in R numbering */
1463:         KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
1464:         /* Set values */
1465:         VecGetArray(pcbddc->vec2_R,&array);
1466:         MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);
1467:         VecRestoreArray(pcbddc->vec2_R,&array);
1468:       }
1469:       MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);
1470:       MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);

1472:       /* Create Constraint matrix on R nodes: C_{CR}  */
1473:       MatGetSubMatrix(pcbddc->ConstraintMatrix,is_C_local,is_R_local,MAT_INITIAL_MATRIX,&C_CR);
1474:       ISDestroy(&is_C_local);

1476:       /* Assemble AUXMAT = ( LUFactor )( -C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */
1477:       MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&AUXMAT);
1478:       MatFactorInfoInitialize(&matinfo);
1479:       ISCreateStride(PETSC_COMM_SELF,n_constraints,0,1,&is_aux1);
1480:       MatLUFactor(AUXMAT,is_aux1,is_aux1,&matinfo);
1481:       ISDestroy(&is_aux1);

1483:       /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc  */
1484:       MatCreate(PETSC_COMM_SELF,&M1);
1485:       MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);
1486:       MatSetType(M1,impMatType);
1487:       MatSeqDenseSetPreallocation(M1,PETSC_NULL);
1488:       for(i=0;i<n_constraints;i++) {
1489:         VecSet(vec1_C,zero);
1490:         VecSetValue(vec1_C,i,one,INSERT_VALUES);
1491:         VecAssemblyBegin(vec1_C);
1492:         VecAssemblyEnd(vec1_C);
1493:         MatSolve(AUXMAT,vec1_C,vec2_C);
1494:         VecScale(vec2_C,m_one);
1495:         VecGetArray(vec2_C,&array);
1496:         MatSetValues(M1,n_constraints,auxindices,1,&i,array,INSERT_VALUES);
1497:         VecRestoreArray(vec2_C,&array);
1498:       }
1499:       MatAssemblyBegin(M1,MAT_FINAL_ASSEMBLY);
1500:       MatAssemblyEnd(M1,MAT_FINAL_ASSEMBLY);
1501:       MatDestroy(&AUXMAT);
1502:       /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */
1503:       MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);

1505:     }

1507:     /* Get submatrices from subdomain matrix */
1508:     if(n_vertices){
1509:       MatGetSubMatrix(matis->A,is_R_local,is_V_local,MAT_INITIAL_MATRIX,&A_RV);
1510:       MatGetSubMatrix(matis->A,is_V_local,is_R_local,MAT_INITIAL_MATRIX,&A_VR);
1511:       MatGetSubMatrix(matis->A,is_V_local,is_V_local,MAT_INITIAL_MATRIX,&A_VV);
1512:       /* Assemble M2 = A_RR^{-1}A_RV */
1513:       MatCreate(PETSC_COMM_SELF,&M2);
1514:       MatSetSizes(M2,n_R,n_vertices,n_R,n_vertices);
1515:       MatSetType(M2,impMatType);
1516:       MatSeqDenseSetPreallocation(M2,PETSC_NULL);
1517:       for(i=0;i<n_vertices;i++) {
1518:         VecSet(vec1_V,zero);
1519:         VecSetValue(vec1_V,i,one,INSERT_VALUES);
1520:         VecAssemblyBegin(vec1_V);
1521:         VecAssemblyEnd(vec1_V);
1522:         MatMult(A_RV,vec1_V,pcbddc->vec1_R);
1523:         KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
1524:         VecGetArray(pcbddc->vec2_R,&array);
1525:         MatSetValues(M2,n_R,auxindices,1,&i,array,INSERT_VALUES);
1526:         VecRestoreArray(pcbddc->vec2_R,&array);
1527:       }
1528:       MatAssemblyBegin(M2,MAT_FINAL_ASSEMBLY);
1529:       MatAssemblyEnd(M2,MAT_FINAL_ASSEMBLY);
1530:     }

1532:     /* Matrix of coarse basis functions (local) */
1533:     MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);
1534:     MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);
1535:     MatSetType(pcbddc->coarse_phi_B,impMatType);
1536:     MatSeqDenseSetPreallocation(pcbddc->coarse_phi_B,PETSC_NULL);
1537:     if(pcbddc->prec_type || dbg_flag ) {
1538:       MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);
1539:       MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);
1540:       MatSetType(pcbddc->coarse_phi_D,impMatType);
1541:       MatSeqDenseSetPreallocation(pcbddc->coarse_phi_D,PETSC_NULL);
1542:     }

1544:     if(dbg_flag) {
1545:       PetscMalloc( pcbddc->local_primal_size*sizeof(PetscScalar),&coarsefunctions_errors);
1546:       PetscMalloc( pcbddc->local_primal_size*sizeof(PetscScalar),&constraints_errors);
1547:     }
1548:     /* Subdomain contribution (Non-overlapping) to coarse matrix  */
1549:     PetscMalloc ((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);

1551:     /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
1552:     for(i=0;i<n_vertices;i++){
1553:       VecSet(vec1_V,zero);
1554:       VecSetValue(vec1_V,i,one,INSERT_VALUES);
1555:       VecAssemblyBegin(vec1_V);
1556:       VecAssemblyEnd(vec1_V);
1557:       /* solution of saddle point problem */
1558:       MatMult(M2,vec1_V,pcbddc->vec1_R);
1559:       VecScale(pcbddc->vec1_R,m_one);
1560:       if(n_constraints) {
1561:         MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);
1562:         MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);
1563:         VecScale(vec1_C,m_one);
1564:       }
1565:       MatMult(A_VR,pcbddc->vec1_R,vec2_V);
1566:       MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);

1568:       /* Set values in coarse basis function and subdomain part of coarse_mat */
1569:       /* coarse basis functions */
1570:       VecSet(pcis->vec1_B,zero);
1571:       VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
1572:       VecScatterEnd  (pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
1573:       VecGetArray(pcis->vec1_B,&array);
1574:       MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);
1575:       VecRestoreArray(pcis->vec1_B,&array);
1576:       MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);
1577:       if( pcbddc->prec_type || dbg_flag  ) {
1578:         VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
1579:         VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
1580:         VecGetArray(pcis->vec1_D,&array);
1581:         MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);
1582:         VecRestoreArray(pcis->vec1_D,&array);
1583:       }
1584:       /* subdomain contribution to coarse matrix */
1585:       VecGetArray(vec2_V,&array);
1586:       for(j=0;j<n_vertices;j++) { coarse_submat_vals[i*pcbddc->local_primal_size+j] = array[j]; } //WARNING -> column major ordering
1587:       VecRestoreArray(vec2_V,&array);
1588:       if(n_constraints) {
1589:         VecGetArray(vec1_C,&array);
1590:         for(j=0;j<n_constraints;j++) { coarse_submat_vals[i*pcbddc->local_primal_size+j+n_vertices] = array[j]; } //WARNING -> column major ordering
1591:         VecRestoreArray(vec1_C,&array);
1592:       }
1593: 
1594:       if( dbg_flag ) {
1595:         /* assemble subdomain vector on nodes */
1596:         VecSet(pcis->vec1_N,zero);
1597:         VecGetArray(pcis->vec1_N,&array);
1598:         VecGetArray(pcbddc->vec1_R,&array2);
1599:         for(j=0;j<n_R;j++) { array[idx_R_local[j]] = array2[j]; }
1600:         array[ vertices[i] ] = one;
1601:         VecRestoreArray(pcbddc->vec1_R,&array2);
1602:         VecRestoreArray(pcis->vec1_N,&array);
1603:         /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */
1604:         VecSet(pcbddc->vec1_P,zero);
1605:         VecGetArray(pcbddc->vec1_P,&array2);
1606:         VecGetArray(vec2_V,&array);
1607:         for(j=0;j<n_vertices;j++) { array2[j]=array[j]; }
1608:         VecRestoreArray(vec2_V,&array);
1609:         if(n_constraints) {
1610:           VecGetArray(vec1_C,&array);
1611:           for(j=0;j<n_constraints;j++) { array2[j+n_vertices]=array[j]; }
1612:           VecRestoreArray(vec1_C,&array);
1613:         }
1614:         VecRestoreArray(pcbddc->vec1_P,&array2);
1615:         VecScale(pcbddc->vec1_P,m_one);
1616:         /* check saddle point solution */
1617:         MatMult(matis->A,pcis->vec1_N,pcis->vec2_N);
1618:         MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);
1619:         VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);
1620:         MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);
1621:         VecGetArray(pcbddc->vec1_P,&array);
1622:         array[i]=array[i]+m_one;  /* shift by the identity matrix */
1623:         VecRestoreArray(pcbddc->vec1_P,&array);
1624:         VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);
1625:       }
1626:     }
1627: 
1628:     for(i=0;i<n_constraints;i++){
1629:       VecSet(vec2_C,zero);
1630:       VecSetValue(vec2_C,i,m_one,INSERT_VALUES);
1631:       VecAssemblyBegin(vec2_C);
1632:       VecAssemblyEnd(vec2_C);
1633:       /* solution of saddle point problem */
1634:       MatMult(M1,vec2_C,vec1_C);
1635:       MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);
1636:       VecScale(vec1_C,m_one);
1637:       if(n_vertices) { MatMult(A_VR,pcbddc->vec1_R,vec2_V); }
1638:       /* Set values in coarse basis function and subdomain part of coarse_mat */
1639:       /* coarse basis functions */
1640:       index=i+n_vertices;
1641:       VecSet(pcis->vec1_B,zero);
1642:       VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
1643:       VecScatterEnd  (pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
1644:       VecGetArray(pcis->vec1_B,&array);
1645:       MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&index,array,INSERT_VALUES);
1646:       VecRestoreArray(pcis->vec1_B,&array);
1647:       if( pcbddc->prec_type || dbg_flag ) {
1648:         VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
1649:         VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
1650:         VecGetArray(pcis->vec1_D,&array);
1651:         MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&index,array,INSERT_VALUES);
1652:         VecRestoreArray(pcis->vec1_D,&array);
1653:       }
1654:       /* subdomain contribution to coarse matrix */
1655:       if(n_vertices) {
1656:         VecGetArray(vec2_V,&array);
1657:         for(j=0;j<n_vertices;j++) {coarse_submat_vals[index*pcbddc->local_primal_size+j]=array[j];} //WARNING -> column major ordering
1658:         VecRestoreArray(vec2_V,&array);
1659:       }
1660:       VecGetArray(vec1_C,&array);
1661:       for(j=0;j<n_constraints;j++) {coarse_submat_vals[index*pcbddc->local_primal_size+j+n_vertices]=array[j];} //WARNING -> column major ordering
1662:       VecRestoreArray(vec1_C,&array);
1663: 
1664:       if( dbg_flag ) {
1665:         /* assemble subdomain vector on nodes */
1666:         VecSet(pcis->vec1_N,zero);
1667:         VecGetArray(pcis->vec1_N,&array);
1668:         VecGetArray(pcbddc->vec1_R,&array2);
1669:         for(j=0;j<n_R;j++){ array[ idx_R_local[j] ] = array2[j]; }
1670:         VecRestoreArray(pcbddc->vec1_R,&array2);
1671:         VecRestoreArray(pcis->vec1_N,&array);
1672:         /* assemble subdomain vector of lagrange multipliers */
1673:         VecSet(pcbddc->vec1_P,zero);
1674:         VecGetArray(pcbddc->vec1_P,&array2);
1675:         if( n_vertices) {
1676:           VecGetArray(vec2_V,&array);
1677:           for(j=0;j<n_vertices;j++) {array2[j]=-array[j];}
1678:           VecRestoreArray(vec2_V,&array);
1679:         }
1680:         VecGetArray(vec1_C,&array);
1681:         for(j=0;j<n_constraints;j++) {array2[j+n_vertices]=-array[j];}
1682:         VecRestoreArray(vec1_C,&array);
1683:         VecRestoreArray(pcbddc->vec1_P,&array2);
1684:         /* check saddle point solution */
1685:         MatMult(matis->A,pcis->vec1_N,pcis->vec2_N);
1686:         MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);
1687:         VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[index]);
1688:         MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);
1689:         VecGetArray(pcbddc->vec1_P,&array);
1690:         array[index]=array[index]+m_one; /* shift by the identity matrix */
1691:         VecRestoreArray(pcbddc->vec1_P,&array);
1692:         VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[index]);
1693:       }
1694:     }
1695:     MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);
1696:     MatAssemblyEnd  (pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);
1697:     if( pcbddc->prec_type || dbg_flag ) {
1698:       MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);
1699:       MatAssemblyEnd  (pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);
1700:     }
1701:     /* Checking coarse_sub_mat and coarse basis functios */
1702:     /* It shuld be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
1703:     if(dbg_flag) {

1705:       Mat coarse_sub_mat;
1706:       Mat TM1,TM2,TM3,TM4;
1707:       Mat coarse_phi_D,coarse_phi_B,A_II,A_BB,A_IB,A_BI;
1708:       const MatType checkmattype=MATSEQAIJ;
1709:       PetscScalar      value;

1711:       MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);
1712:       MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);
1713:       MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);
1714:       MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);
1715:       MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);
1716:       MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);
1717:       MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);
1718:       MatConvert(coarse_sub_mat,checkmattype,MAT_REUSE_MATRIX,&coarse_sub_mat);

1720:       PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");
1721:       PetscViewerASCIIPrintf(viewer,"Check coarse sub mat and local basis functions\n");
1722:       PetscViewerFlush(viewer);
1723:       MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);
1724:       MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);
1725:       MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
1726:       MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);
1727:       MatDestroy(&AUXMAT);
1728:       MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
1729:       MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);
1730:       MatDestroy(&AUXMAT);
1731:       MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);
1732:       MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);
1733:       MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);
1734:       MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);
1735:       MatNorm(TM1,NORM_INFINITY,&value);
1736:       PetscViewerASCIISynchronizedPrintf(viewer,"----------------------------------\n");
1737:       PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d \n",PetscGlobalRank);
1738:       PetscViewerASCIISynchronizedPrintf(viewer,"matrix error = % 1.14e\n",value);
1739:       PetscViewerASCIISynchronizedPrintf(viewer,"coarse functions errors\n");
1740:       for(i=0;i<pcbddc->local_primal_size;i++) { PetscViewerASCIISynchronizedPrintf(viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]); }
1741:       PetscViewerASCIISynchronizedPrintf(viewer,"constraints errors\n");
1742:       for(i=0;i<pcbddc->local_primal_size;i++) { PetscViewerASCIISynchronizedPrintf(viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]); }
1743:       PetscViewerFlush(viewer);
1744:       MatDestroy(&A_II);
1745:       MatDestroy(&A_BB);
1746:       MatDestroy(&A_IB);
1747:       MatDestroy(&A_BI);
1748:       MatDestroy(&TM1);
1749:       MatDestroy(&TM2);
1750:       MatDestroy(&TM3);
1751:       MatDestroy(&TM4);
1752:       MatDestroy(&coarse_phi_D);
1753:       MatDestroy(&coarse_sub_mat);
1754:       MatDestroy(&coarse_phi_B);
1755:       PetscFree(coarsefunctions_errors);
1756:       PetscFree(constraints_errors);
1757:     }

1759:     /* create coarse matrix and data structures for message passing associated actual choice of coarse problem type */
1760:     PCBDDCSetupCoarseEnvironment(pc,coarse_submat_vals);
1761:     /* free memory */
1762:     PetscFree(coarse_submat_vals);
1763:     PetscFree(auxindices);
1764:     PetscFree(nnz);
1765:     if(n_vertices) {
1766:       VecDestroy(&vec1_V);
1767:       VecDestroy(&vec2_V);
1768:       MatDestroy(&M2);
1769:       MatDestroy(&A_RV);
1770:       MatDestroy(&A_VR);
1771:       MatDestroy(&A_VV);
1772:     }
1773:     if(pcbddc->n_constraints) {
1774:       VecDestroy(&vec1_C);
1775:       VecDestroy(&vec2_C);
1776:       MatDestroy(&M1);
1777:       MatDestroy(&C_CR);
1778:     }
1779:   }
1780:   /* free memory */
1781:   if(n_vertices) {
1782:     PetscFree(idx_V_B);
1783:     ISDestroy(&is_V_local);
1784:   }
1785:   PetscFree(idx_R_local);
1786:   ISDestroy(&is_R_local);
1787:   ISRestoreIndices(pcbddc->ISForVertices,(const PetscInt**)&vertices);

1789:   return(0);
1790: }

1792: /* -------------------------------------------------------------------------- */

1796: static PetscErrorCode PCBDDCSetupCoarseEnvironment(PC pc,PetscScalar* coarse_submat_vals)
1797: {

1799: 
1800:   Mat_IS    *matis    = (Mat_IS*)pc->pmat->data;
1801:   PC_BDDC   *pcbddc   = (PC_BDDC*)pc->data;
1802:   PC_IS     *pcis     = (PC_IS*)pc->data;
1803:   MPI_Comm  prec_comm = ((PetscObject)pc)->comm;
1804:   MPI_Comm  coarse_comm;

1806:   /* common to all choiches */
1807:   PetscScalar *temp_coarse_mat_vals;
1808:   PetscScalar *ins_coarse_mat_vals;
1809:   PetscInt    *ins_local_primal_indices;
1810:   PetscMPIInt *localsizes2,*localdispl2;
1811:   PetscMPIInt size_prec_comm;
1812:   PetscMPIInt rank_prec_comm;
1813:   PetscMPIInt active_rank=MPI_PROC_NULL;
1814:   PetscMPIInt master_proc=0;
1815:   PetscInt    ins_local_primal_size;
1816:   /* specific to MULTILEVEL_BDDC */
1817:   PetscMPIInt *ranks_recv;
1818:   PetscMPIInt count_recv=0;
1819:   PetscMPIInt rank_coarse_proc_send_to;
1820:   PetscMPIInt coarse_color = MPI_UNDEFINED;
1821:   ISLocalToGlobalMapping coarse_ISLG;
1822:   /* some other variables */
1824:   const MatType coarse_mat_type;
1825:   const PCType  coarse_pc_type;
1826:   const KSPType  coarse_ksp_type;
1827:   PC pc_temp;
1828:   PetscInt i,j,k,bs;
1829:   PetscInt max_it_coarse_ksp=1;  /* don't increase this value */
1830:   /* verbose output viewer */
1831:   PetscViewer viewer=pcbddc->dbg_viewer;
1832:   PetscBool   dbg_flag=pcbddc->dbg_flag;
1833: 

1836:   ins_local_primal_indices = 0;
1837:   ins_coarse_mat_vals      = 0;
1838:   localsizes2              = 0;
1839:   localdispl2              = 0;
1840:   temp_coarse_mat_vals     = 0;
1841:   coarse_ISLG              = 0;

1843:   MPI_Comm_size(prec_comm,&size_prec_comm);
1844:   MPI_Comm_rank(prec_comm,&rank_prec_comm);
1845:   MatGetBlockSize(matis->A,&bs);
1846: 
1847:   /* Assign global numbering to coarse dofs */
1848:   {
1849:     PetscScalar    one=1.,zero=0.;
1850:     PetscScalar    *array;
1851:     PetscMPIInt    *auxlocal_primal;
1852:     PetscMPIInt    *auxglobal_primal;
1853:     PetscMPIInt    *all_auxglobal_primal;
1854:     PetscMPIInt    *all_auxglobal_primal_dummy;
1855:     PetscMPIInt    mpi_local_primal_size = (PetscMPIInt)pcbddc->local_primal_size;
1856:     PetscInt       *vertices,*row_cmat_indices;
1857:     PetscInt       size_of_constraint;

1859:     /* Construct needed data structures for message passing */
1860:     PetscMalloc(mpi_local_primal_size*sizeof(PetscMPIInt),&pcbddc->local_primal_indices);
1861:     PetscMalloc(size_prec_comm*sizeof(PetscMPIInt),&pcbddc->local_primal_sizes);
1862:     PetscMalloc(size_prec_comm*sizeof(PetscMPIInt),&pcbddc->local_primal_displacements);
1863:     /* Gather local_primal_size information for all processes  */
1864:     MPI_Allgather(&mpi_local_primal_size,1,MPIU_INT,&pcbddc->local_primal_sizes[0],1,MPIU_INT,prec_comm);
1865:     pcbddc->replicated_primal_size = 0;
1866:     for (i=0; i<size_prec_comm; i++) {
1867:       pcbddc->local_primal_displacements[i] = pcbddc->replicated_primal_size ;
1868:       pcbddc->replicated_primal_size += pcbddc->local_primal_sizes[i];
1869:     }
1870:     if(rank_prec_comm == 0) {
1871:       /* allocate some auxiliary space */
1872:       PetscMalloc(pcbddc->replicated_primal_size*sizeof(*all_auxglobal_primal),&all_auxglobal_primal);
1873:       PetscMalloc(pcbddc->replicated_primal_size*sizeof(*all_auxglobal_primal_dummy),&all_auxglobal_primal_dummy);
1874:     }
1875:     PetscMalloc(pcbddc->local_primal_size*sizeof(PetscMPIInt),&auxlocal_primal);
1876:     PetscMalloc(pcbddc->local_primal_size*sizeof(PetscMPIInt),&auxglobal_primal);

1878:     /* First let's count coarse dofs: note that we allow to have a constraint on a subdomain and not its counterpart on the neighbour subdomain (if user wants)
1879:        This code fragment assumes that the number of local constraints per connected component
1880:        is not greater than the number of nodes defined for the connected component 
1881:        (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
1882:     /* auxlocal_primal      : primal indices in local nodes numbering (internal and interface) with complete queue sorted by global ordering */
1883:     VecSet(pcis->vec1_N,zero);
1884:     ISGetIndices(pcbddc->ISForVertices,(const PetscInt**)&vertices);
1885:     VecGetArray(pcis->vec1_N,&array);
1886:     for(i=0;i<pcbddc->n_vertices;i++) {  /* note that  pcbddc->n_vertices can be different from size of ISForVertices */
1887:       array[ vertices[i] ] = one;
1888:       auxlocal_primal[i] = vertices[i];
1889:     }
1890:     ISRestoreIndices(pcbddc->ISForVertices,(const PetscInt**)&vertices);
1891:     for(i=0;i<pcbddc->n_constraints;i++) {
1892:       MatGetRow(pcbddc->ConstraintMatrix,pcbddc->n_vertices+i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);
1893:       for (j=0; j<size_of_constraint; j++) {
1894:         k = row_cmat_indices[j];
1895:         if( array[k] == zero ) {
1896:           array[k] = one;
1897:           auxlocal_primal[i+pcbddc->n_vertices] = k;
1898:           break;
1899:         }
1900:       }
1901:       MatRestoreRow(pcbddc->ConstraintMatrix,pcbddc->n_vertices+i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);
1902:     }
1903:     VecRestoreArray(pcis->vec1_N,&array);

1905:     /* Now assign them a global numbering */
1906:     /* auxglobal_primal contains indices in global nodes numbering (internal and interface) */
1907:     ISLocalToGlobalMappingApply(matis->mapping,pcbddc->local_primal_size,auxlocal_primal,auxglobal_primal);
1908:     /* all_auxglobal_primal contains all primal nodes indices in global nodes numbering (internal and interface) */
1909:     MPI_Gatherv(&auxglobal_primal[0],pcbddc->local_primal_size,MPIU_INT,&all_auxglobal_primal[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,0,prec_comm);

1911:     /* After this block all_auxglobal_primal should contains one copy of each primal node's indices in global nodes numbering */
1912:     /* It implements a function similar to PetscSortRemoveDupsInt */
1913:     if(rank_prec_comm==0) {
1914:       /* dummy argument since PetscSortMPIInt doesn't exist! */
1915:       PetscSortMPIIntWithArray(pcbddc->replicated_primal_size,all_auxglobal_primal,all_auxglobal_primal_dummy);
1916:       k=1;
1917:       j=all_auxglobal_primal[0];  /* first dof in global numbering */
1918:       for(i=1;i< pcbddc->replicated_primal_size ;i++) {
1919:         if(j != all_auxglobal_primal[i] ) {
1920:           all_auxglobal_primal[k]=all_auxglobal_primal[i];
1921:           k++;
1922:           j=all_auxglobal_primal[i];
1923:         }
1924:       }
1925:     } else {
1926:       PetscMalloc(pcbddc->coarse_size*sizeof(PetscMPIInt),&all_auxglobal_primal);
1927:     }
1928:     /* We only need to broadcast the indices from 0 to pcbddc->coarse_size. Remaning elements of array all_aux_global_primal are garbage. */
1929:     MPI_Bcast(all_auxglobal_primal,pcbddc->coarse_size,MPIU_INT,0,prec_comm);
1930: 
1931:     /* Now get global coarse numbering of local primal nodes */
1932:     for(i=0;i<pcbddc->local_primal_size;i++) {
1933:       k=0;
1934:       while( all_auxglobal_primal[k] != auxglobal_primal[i] ) { k++;}
1935:       pcbddc->local_primal_indices[i]=k;
1936:     }
1937:     if(dbg_flag) {
1938:       PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");
1939:       PetscViewerASCIIPrintf(viewer,"Distribution of local primal indices\n");
1940:       PetscViewerFlush(viewer);
1941:       PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d\n",PetscGlobalRank);
1942:       for(i=0;i<pcbddc->local_primal_size;i++) {
1943:         PetscViewerASCIISynchronizedPrintf(viewer,"local_primal_indices[%d]=%d \n",i,pcbddc->local_primal_indices[i]);
1944:       }
1945:       PetscViewerFlush(viewer);
1946:     }
1947:     /* free allocated memory */
1948:     PetscFree(auxlocal_primal);
1949:     PetscFree(auxglobal_primal);
1950:     PetscFree(all_auxglobal_primal);
1951:     if(rank_prec_comm == 0) {
1952:       PetscFree(all_auxglobal_primal_dummy);
1953:     }
1954:   }

1956:   /* adapt coarse problem type */
1957:   if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC && pcbddc->active_procs < MIN_PROCS_FOR_BDDC )
1958:     pcbddc->coarse_problem_type = PARALLEL_BDDC;

1960:   switch(pcbddc->coarse_problem_type){

1962:     case(MULTILEVEL_BDDC):   //we define a coarse mesh where subdomains are elements
1963:     {
1964:       /* we need additional variables */
1965:       MetisInt   n_subdomains,n_parts,objval,ncon,faces_nvtxs;
1966:       MetisInt   *metis_coarse_subdivision;
1967:       MetisInt   options[METIS_NOPTIONS];
1968:       PetscMPIInt size_coarse_comm,rank_coarse_comm;
1969:       PetscMPIInt procs_jumps_coarse_comm;
1970:       PetscMPIInt *coarse_subdivision;
1971:       PetscMPIInt *total_count_recv;
1972:       PetscMPIInt *total_ranks_recv;
1973:       PetscMPIInt *displacements_recv;
1974:       PetscMPIInt *my_faces_connectivity;
1975:       PetscMPIInt *petsc_faces_adjncy;
1976:       MetisInt    *faces_adjncy;
1977:       MetisInt    *faces_xadj;
1978:       PetscMPIInt *number_of_faces;
1979:       PetscMPIInt *faces_displacements;
1980:       PetscInt    *array_int;
1981:       PetscMPIInt my_faces=0;
1982:       PetscMPIInt total_faces=0;
1983:       PetscInt    ranks_stretching_ratio;

1985:       /* define some quantities */
1986:       pcbddc->coarse_communications_type = SCATTERS_BDDC;
1987:       coarse_mat_type = MATIS;
1988:       coarse_pc_type  = PCBDDC;
1989:       coarse_ksp_type  = KSPCHEBYSHEV;

1991:       /* details of coarse decomposition */
1992:       n_subdomains = pcbddc->active_procs;
1993:       n_parts      = n_subdomains/pcbddc->coarsening_ratio;
1994:       ranks_stretching_ratio = size_prec_comm/pcbddc->active_procs;
1995:       procs_jumps_coarse_comm = pcbddc->coarsening_ratio*ranks_stretching_ratio;

1997:       printf("Coarse algorithm details: \n");
1998:       printf("n_subdomains %d, n_parts %d\nstretch %d,jumps %d,coarse_ratio %d\nlevel should be log_%d(%d)\n",n_subdomains,n_parts,ranks_stretching_ratio,procs_jumps_coarse_comm,pcbddc->coarsening_ratio,pcbddc->coarsening_ratio,(ranks_stretching_ratio/pcbddc->coarsening_ratio+1));

2000:       /* build CSR graph of subdomains' connectivity through faces */
2001:       PetscMalloc (pcis->n*sizeof(PetscInt),&array_int);
2002:       PetscMemzero(array_int,pcis->n*sizeof(PetscInt));
2003:       for(i=1;i<pcis->n_neigh;i++){/* i=1 so I don't count myself -> faces nodes counts to 1 */
2004:         for(j=0;j<pcis->n_shared[i];j++){
2005:           array_int[ pcis->shared[i][j] ]+=1;
2006:         }
2007:       }
2008:       for(i=1;i<pcis->n_neigh;i++){
2009:         for(j=0;j<pcis->n_shared[i];j++){
2010:           if(array_int[ pcis->shared[i][j] ] == 1 ){
2011:             my_faces++;
2012:             break;
2013:           }
2014:         }
2015:       }
2016:       //printf("I found %d faces.\n",my_faces);

2018:       MPI_Reduce(&my_faces,&total_faces,1,MPIU_INT,MPI_SUM,master_proc,prec_comm);
2019:       PetscMalloc (my_faces*sizeof(PetscInt),&my_faces_connectivity);
2020:       my_faces=0;
2021:       for(i=1;i<pcis->n_neigh;i++){
2022:         for(j=0;j<pcis->n_shared[i];j++){
2023:           if(array_int[ pcis->shared[i][j] ] == 1 ){
2024:             my_faces_connectivity[my_faces]=pcis->neigh[i];
2025:             my_faces++;
2026:             break;
2027:           }
2028:         }
2029:       }
2030:       if(rank_prec_comm == master_proc) {
2031:         //printf("I found %d total faces.\n",total_faces);
2032:         PetscMalloc (total_faces*sizeof(PetscMPIInt),&petsc_faces_adjncy);
2033:         PetscMalloc (size_prec_comm*sizeof(PetscMPIInt),&number_of_faces);
2034:         PetscMalloc (total_faces*sizeof(MetisInt),&faces_adjncy);
2035:         PetscMalloc ((n_subdomains+1)*sizeof(MetisInt),&faces_xadj);
2036:         PetscMalloc ((size_prec_comm+1)*sizeof(PetscMPIInt),&faces_displacements);
2037:       }
2038:       MPI_Gather(&my_faces,1,MPIU_INT,&number_of_faces[0],1,MPIU_INT,master_proc,prec_comm);
2039:       if(rank_prec_comm == master_proc) {
2040:         faces_xadj[0]=0;
2041:         faces_displacements[0]=0;
2042:         j=0;
2043:         for(i=1;i<size_prec_comm+1;i++) {
2044:           faces_displacements[i]=faces_displacements[i-1]+number_of_faces[i-1];
2045:           if(number_of_faces[i-1]) {
2046:             j++;
2047:             faces_xadj[j]=faces_xadj[j-1]+number_of_faces[i-1];
2048:           }
2049:         }
2050:         printf("The J I count is %d and should be %d\n",j,n_subdomains);
2051:         printf("Total faces seem %d and should be %d\n",faces_xadj[j],total_faces);
2052:       }
2053:       MPI_Gatherv(&my_faces_connectivity[0],my_faces,MPIU_INT,&petsc_faces_adjncy[0],number_of_faces,faces_displacements,MPIU_INT,master_proc,prec_comm);
2054:       PetscFree(my_faces_connectivity);
2055:       PetscFree(array_int);
2056:       if(rank_prec_comm == master_proc) {
2057:         for(i=0;i<total_faces;i++) faces_adjncy[i]=(MetisInt)(petsc_faces_adjncy[i]/ranks_stretching_ratio); /* cast to MetisInt */
2058:         printf("This is the face connectivity (actual ranks)\n");
2059:         for(i=0;i<n_subdomains;i++){
2060:           printf("proc %d is connected with \n",i);
2061:           for(j=faces_xadj[i];j<faces_xadj[i+1];j++)
2062:             printf("%d ",faces_adjncy[j]);
2063:           printf("\n");
2064:         }
2065:         PetscFree(faces_displacements);
2066:         PetscFree(number_of_faces);
2067:         PetscFree(petsc_faces_adjncy);
2068:       }

2070:       if( rank_prec_comm == master_proc ) {

2072:         PetscInt heuristic_for_metis=3;

2074:         ncon=1;
2075:         faces_nvtxs=n_subdomains;
2076:         /* partition graoh induced by face connectivity */
2077:         PetscMalloc (n_subdomains*sizeof(MetisInt),&metis_coarse_subdivision);
2078:         METIS_SetDefaultOptions(options);
2079:         /* we need a contiguous partition of the coarse mesh */
2080:         options[METIS_OPTION_CONTIG]=1;
2081:         options[METIS_OPTION_DBGLVL]=1;
2082:         options[METIS_OPTION_NITER]=30;
2083:         //options[METIS_OPTION_NCUTS]=1;
2084:         printf("METIS PART GRAPH\n");
2085:         if(n_subdomains>n_parts*heuristic_for_metis) {
2086:           printf("Using Kway\n");
2087:           options[METIS_OPTION_IPTYPE]=METIS_IPTYPE_EDGE;
2088:           options[METIS_OPTION_OBJTYPE]=METIS_OBJTYPE_CUT;
2089:           METIS_PartGraphKway(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision);
2090:         } else {
2091:           printf("Using Recursive\n");
2092:           METIS_PartGraphRecursive(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision);
2093:         }
2094:         if(ierr != METIS_OK) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in METIS_PartGraphKway (metis error code %D) called from PCBDDCSetupCoarseEnvironment\n",ierr);
2095:         printf("Partition done!\n");
2096:         PetscFree(faces_xadj);
2097:         PetscFree(faces_adjncy);
2098:         coarse_subdivision = (PetscMPIInt*)calloc(size_prec_comm,sizeof(PetscMPIInt)); /* calloc for contiguous memory since we need to scatter these values later */
2099:         /* copy/cast values avoiding possible type conflicts between PETSc, MPI and METIS */
2100:         for(i=0;i<size_prec_comm;i++) coarse_subdivision[i]=MPI_PROC_NULL;
2101:         for(i=0;i<n_subdomains;i++)   coarse_subdivision[ranks_stretching_ratio*i]=(PetscInt)(metis_coarse_subdivision[i]);
2102:         PetscFree(metis_coarse_subdivision);
2103:       }

2105:       /* Create new communicator for coarse problem splitting the old one */
2106:       if( !(rank_prec_comm%procs_jumps_coarse_comm) && rank_prec_comm < procs_jumps_coarse_comm*n_parts ){
2107:         coarse_color=0;              //for communicator splitting
2108:         active_rank=rank_prec_comm;  //for insertion of matrix values
2109:       }
2110:       // procs with coarse_color = MPI_UNDEFINED will have coarse_comm = MPI_COMM_NULL (from mpi standards)
2111:       // key = rank_prec_comm -> keep same ordering of ranks from the old to the new communicator
2112:       MPI_Comm_split(prec_comm,coarse_color,rank_prec_comm,&coarse_comm);

2114:       if( coarse_color == 0 ) {
2115:         MPI_Comm_size(coarse_comm,&size_coarse_comm);
2116:         MPI_Comm_rank(coarse_comm,&rank_coarse_comm);
2117:         printf("Details of coarse comm\n");
2118:         printf("size = %d, myrank = %d\n",size_coarse_comm,rank_coarse_comm);
2119:         printf("jumps = %d, coarse_color = %d, n_parts = %d\n",procs_jumps_coarse_comm,coarse_color,n_parts);
2120:       } else {
2121:         rank_coarse_comm = MPI_PROC_NULL;
2122:       }

2124:       /* master proc take care of arranging and distributing coarse informations */
2125:       if(rank_coarse_comm == master_proc) {
2126:         PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&displacements_recv);
2127:         //PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&total_count_recv);
2128:         //PetscMalloc (n_subdomains*sizeof(PetscMPIInt),&total_ranks_recv);
2129:         total_count_recv = (PetscMPIInt*)calloc(size_prec_comm,sizeof(PetscMPIInt));
2130:         total_ranks_recv = (PetscMPIInt*)calloc(n_subdomains,sizeof(PetscMPIInt));
2131:         /* some initializations */
2132:         displacements_recv[0]=0;
2133:         //PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt)); not needed -> calloc initializes to zero
2134:         /* count from how many processes the j-th process of the coarse decomposition will receive data */
2135:         for(j=0;j<size_coarse_comm;j++)
2136:           for(i=0;i<size_prec_comm;i++)
2137:             if(coarse_subdivision[i]==j)
2138:               total_count_recv[j]++;
2139:         /* displacements needed for scatterv of total_ranks_recv */
2140:         for(i=1;i<size_coarse_comm;i++) displacements_recv[i]=displacements_recv[i-1]+total_count_recv[i-1];
2141:         /* Now fill properly total_ranks_recv -> each coarse process will receive the ranks (in prec_comm communicator) of its friend (sending) processes */
2142:         PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt));
2143:         for(j=0;j<size_coarse_comm;j++) {
2144:           for(i=0;i<size_prec_comm;i++) {
2145:             if(coarse_subdivision[i]==j) {
2146:               total_ranks_recv[displacements_recv[j]+total_count_recv[j]]=i;
2147:               total_count_recv[j]+=1;
2148:             }
2149:           }
2150:         }
2151:         for(j=0;j<size_coarse_comm;j++) {
2152:           printf("process %d in new rank will receive from %d processes (original ranks follows)\n",j,total_count_recv[j]);
2153:           for(i=0;i<total_count_recv[j];i++) {
2154:             printf("%d ",total_ranks_recv[displacements_recv[j]+i]);
2155:           }
2156:           printf("\n");
2157:         }

2159:         /* identify new decomposition in terms of ranks in the old communicator */
2160:         for(i=0;i<n_subdomains;i++) coarse_subdivision[ranks_stretching_ratio*i]=coarse_subdivision[ranks_stretching_ratio*i]*procs_jumps_coarse_comm;
2161:         printf("coarse_subdivision in old end new ranks\n");
2162:         for(i=0;i<size_prec_comm;i++)
2163:           if(coarse_subdivision[i]!=MPI_PROC_NULL) {
2164:             printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]/procs_jumps_coarse_comm);
2165:           } else {
2166:             printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]);
2167:           }
2168:         printf("\n");
2169:       }

2171:       /* Scatter new decomposition for send details */
2172:       MPI_Scatter(&coarse_subdivision[0],1,MPIU_INT,&rank_coarse_proc_send_to,1,MPIU_INT,master_proc,prec_comm);
2173:       /* Scatter receiving details to members of coarse decomposition */
2174:       if( coarse_color == 0) {
2175:         MPI_Scatter(&total_count_recv[0],1,MPIU_INT,&count_recv,1,MPIU_INT,master_proc,coarse_comm);
2176:         PetscMalloc (count_recv*sizeof(PetscMPIInt),&ranks_recv);
2177:         MPI_Scatterv(&total_ranks_recv[0],total_count_recv,displacements_recv,MPIU_INT,&ranks_recv[0],count_recv,MPIU_INT,master_proc,coarse_comm);
2178:       }

2180:       //printf("I will send my matrix data to proc  %d\n",rank_coarse_proc_send_to);
2181:       //if(coarse_color == 0) {
2182:       //  printf("I will receive some matrix data from %d processes (ranks follows)\n",count_recv);
2183:       //  for(i=0;i<count_recv;i++)
2184:       //    printf("%d ",ranks_recv[i]);
2185:       //  printf("\n");
2186:       //}

2188:       if(rank_prec_comm == master_proc) {
2189:         //PetscFree(coarse_subdivision);
2190:         //PetscFree(total_count_recv);
2191:         //PetscFree(total_ranks_recv);
2192:         free(coarse_subdivision);
2193:         free(total_count_recv);
2194:         free(total_ranks_recv);
2195:         PetscFree(displacements_recv);
2196:       }
2197:       break;
2198:     }

2200:     case(REPLICATED_BDDC):

2202:       pcbddc->coarse_communications_type = GATHERS_BDDC;
2203:       coarse_mat_type = MATSEQAIJ;
2204:       coarse_pc_type  = PCLU;
2205:       coarse_ksp_type  = KSPPREONLY;
2206:       coarse_comm = PETSC_COMM_SELF;
2207:       active_rank = rank_prec_comm;
2208:       break;

2210:     case(PARALLEL_BDDC):

2212:       pcbddc->coarse_communications_type = SCATTERS_BDDC;
2213:       coarse_mat_type = MATMPIAIJ;
2214:       coarse_pc_type  = PCREDUNDANT;
2215:       coarse_ksp_type  = KSPPREONLY;
2216:       coarse_comm = prec_comm;
2217:       active_rank = rank_prec_comm;
2218:       break;

2220:     case(SEQUENTIAL_BDDC):
2221:       pcbddc->coarse_communications_type = GATHERS_BDDC;
2222:       coarse_mat_type = MATSEQAIJ;
2223:       coarse_pc_type = PCLU;
2224:       coarse_ksp_type  = KSPPREONLY;
2225:       coarse_comm = PETSC_COMM_SELF;
2226:       active_rank = master_proc;
2227:       break;
2228:   }

2230:   switch(pcbddc->coarse_communications_type){

2232:     case(SCATTERS_BDDC):
2233:       {
2234:         if(pcbddc->coarse_problem_type==MULTILEVEL_BDDC) {

2236:           PetscMPIInt send_size;
2237:           PetscInt    *aux_ins_indices;
2238:           PetscInt    ii,jj;
2239:           MPI_Request *requests;

2241:           /* allocate auxiliary space */
2242:           PetscMalloc (pcbddc->replicated_primal_size*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);
2243:           MPI_Allgatherv(&pcbddc->local_primal_indices[0],pcbddc->local_primal_size,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,prec_comm);
2244:           PetscMalloc ( pcbddc->coarse_size*sizeof(PetscInt),&aux_ins_indices);
2245:           PetscMemzero(aux_ins_indices,pcbddc->coarse_size*sizeof(PetscInt));
2246:           /* allocate stuffs for message massing */
2247:           PetscMalloc ( (count_recv+1)*sizeof(MPI_Request),&requests);
2248:           for(i=0;i<count_recv+1;i++) requests[i]=MPI_REQUEST_NULL;
2249:           PetscMalloc ( count_recv*sizeof(PetscMPIInt),&localsizes2);
2250:           PetscMalloc ( count_recv*sizeof(PetscMPIInt),&localdispl2);
2251:           /* fill up quantities */
2252:           j=0;
2253:           for(i=0;i<count_recv;i++){
2254:             ii = ranks_recv[i];
2255:             localsizes2[i]=pcbddc->local_primal_sizes[ii]*pcbddc->local_primal_sizes[ii];
2256:             localdispl2[i]=j;
2257:             j+=localsizes2[i];
2258:             jj = pcbddc->local_primal_displacements[ii];
2259:             for(k=0;k<pcbddc->local_primal_sizes[ii];k++) aux_ins_indices[pcbddc->replicated_local_primal_indices[jj+k]]+=1;  // it counts the coarse subdomains sharing the coarse node
2260:           }
2261:           //printf("aux_ins_indices 1\n");
2262:           //for(i=0;i<pcbddc->coarse_size;i++)
2263:           //  printf("%d ",aux_ins_indices[i]);
2264:           //printf("\n");
2265:           /* temp_coarse_mat_vals used to store temporarly received matrix values */
2266:           PetscMalloc ( j*sizeof(PetscScalar),&temp_coarse_mat_vals);
2267:           /* evaluate how many values I will insert in coarse mat */
2268:           ins_local_primal_size=0;
2269:           for(i=0;i<pcbddc->coarse_size;i++)
2270:             if(aux_ins_indices[i])
2271:               ins_local_primal_size++;
2272:           /* evaluate indices I will insert in coarse mat */
2273:           PetscMalloc ( ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);
2274:           j=0;
2275:           for(i=0;i<pcbddc->coarse_size;i++)
2276:             if(aux_ins_indices[i])
2277:               ins_local_primal_indices[j++]=i;
2278:           /* use aux_ins_indices to realize a global to local mapping */
2279:           j=0;
2280:           for(i=0;i<pcbddc->coarse_size;i++){
2281:             if(aux_ins_indices[i]==0){
2282:               aux_ins_indices[i]=-1;
2283:             } else {
2284:               aux_ins_indices[i]=j;
2285:               j++;
2286:             }
2287:           }

2289:           //printf("New details localsizes2 localdispl2\n");
2290:           //for(i=0;i<count_recv;i++)
2291:           //  printf("(%d %d) ",localsizes2[i],localdispl2[i]);
2292:           //printf("\n");
2293:           //printf("aux_ins_indices 2\n");
2294:           //for(i=0;i<pcbddc->coarse_size;i++)
2295:           //  printf("%d ",aux_ins_indices[i]);
2296:           //printf("\n");
2297:           //printf("ins_local_primal_indices\n");
2298:           //for(i=0;i<ins_local_primal_size;i++)
2299:           //  printf("%d ",ins_local_primal_indices[i]);
2300:           //printf("\n");
2301:           //printf("coarse_submat_vals\n");
2302:           //for(i=0;i<pcbddc->local_primal_size;i++)
2303:           //  for(j=0;j<pcbddc->local_primal_size;j++)
2304:           //    printf("(%lf %d %d)\n",coarse_submat_vals[j*pcbddc->local_primal_size+i],pcbddc->local_primal_indices[i],pcbddc->local_primal_indices[j]);
2305:           //printf("\n");
2306: 
2307:           /* processes partecipating in coarse problem receive matrix data from their friends */
2308:           for(i=0;i<count_recv;i++) MPI_Irecv(&temp_coarse_mat_vals[localdispl2[i]],localsizes2[i],MPIU_SCALAR,ranks_recv[i],666,prec_comm,&requests[i]);
2309:           if(rank_coarse_proc_send_to != MPI_PROC_NULL ) {
2310:             send_size=pcbddc->local_primal_size*pcbddc->local_primal_size;
2311:             MPI_Isend(&coarse_submat_vals[0],send_size,MPIU_SCALAR,rank_coarse_proc_send_to,666,prec_comm,&requests[count_recv]);
2312:           }
2313:           MPI_Waitall(count_recv+1,requests,MPI_STATUSES_IGNORE);

2315:           //if(coarse_color == 0) {
2316:           //  printf("temp_coarse_mat_vals\n");
2317:           //  for(k=0;k<count_recv;k++){
2318:           //    printf("---- %d ----\n",ranks_recv[k]);
2319:           //    for(i=0;i<pcbddc->local_primal_sizes[ranks_recv[k]];i++)
2320:           //      for(j=0;j<pcbddc->local_primal_sizes[ranks_recv[k]];j++)
2321:           //        printf("(%lf %d %d)\n",temp_coarse_mat_vals[localdispl2[k]+j*pcbddc->local_primal_sizes[ranks_recv[k]]+i],pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[ranks_recv[k]]+i],pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[ranks_recv[k]]+j]);
2322:           //    printf("\n");
2323:           //  }
2324:           //}
2325:           /* calculate data to insert in coarse mat */
2326:           PetscMalloc ( ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);
2327:           PetscMemzero(ins_coarse_mat_vals,ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar));

2329:           PetscMPIInt rr,kk,lps,lpd;
2330:           PetscInt row_ind,col_ind;
2331:           for(k=0;k<count_recv;k++){
2332:             rr = ranks_recv[k];
2333:             kk = localdispl2[k];
2334:             lps = pcbddc->local_primal_sizes[rr];
2335:             lpd = pcbddc->local_primal_displacements[rr];
2336:             //printf("Inserting the following indices (received from %d)\n",rr);
2337:             for(j=0;j<lps;j++){
2338:               col_ind=aux_ins_indices[pcbddc->replicated_local_primal_indices[lpd+j]];
2339:               for(i=0;i<lps;i++){
2340:                 row_ind=aux_ins_indices[pcbddc->replicated_local_primal_indices[lpd+i]];
2341:                 //printf("%d %d\n",row_ind,col_ind);
2342:                 ins_coarse_mat_vals[col_ind*ins_local_primal_size+row_ind]+=temp_coarse_mat_vals[kk+j*lps+i];
2343:               }
2344:             }
2345:           }
2346:           PetscFree(requests);
2347:           PetscFree(aux_ins_indices);
2348:           PetscFree(temp_coarse_mat_vals);
2349:           if(coarse_color == 0) { PetscFree(ranks_recv); }

2351:           /* create local to global mapping needed by coarse MATIS */
2352:           {
2353:             IS coarse_IS;
2354:             if(coarse_comm != MPI_COMM_NULL ) MPI_Comm_free(&coarse_comm);
2355:             coarse_comm = prec_comm;
2356:             active_rank=rank_prec_comm;
2357:             ISCreateGeneral(coarse_comm,ins_local_primal_size,ins_local_primal_indices,PETSC_COPY_VALUES,&coarse_IS);
2358:             ISLocalToGlobalMappingCreateIS(coarse_IS,&coarse_ISLG);
2359:             ISDestroy(&coarse_IS);
2360:           }
2361:         }
2362:         if(pcbddc->coarse_problem_type==PARALLEL_BDDC) {
2363:           /* arrays for values insertion */
2364:           ins_local_primal_size = pcbddc->local_primal_size;
2365:           PetscMalloc ( ins_local_primal_size*sizeof(PetscMPIInt),&ins_local_primal_indices);
2366:           PetscMalloc ( ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);
2367:           for(j=0;j<ins_local_primal_size;j++){
2368:             ins_local_primal_indices[j]=pcbddc->local_primal_indices[j];
2369:             for(i=0;i<ins_local_primal_size;i++) ins_coarse_mat_vals[j*ins_local_primal_size+i]=coarse_submat_vals[j*ins_local_primal_size+i];
2370:           }
2371:         }
2372:         break;
2373: 
2374:     }

2376:     case(GATHERS_BDDC):
2377:       {

2379:         PetscMPIInt mysize,mysize2;

2381:         if(rank_prec_comm==active_rank) {
2382:           PetscMalloc ( pcbddc->replicated_primal_size*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);
2383:           pcbddc->replicated_local_primal_values = (PetscScalar*)calloc(pcbddc->replicated_primal_size,sizeof(PetscScalar));
2384:           PetscMalloc ( size_prec_comm*sizeof(PetscMPIInt),&localsizes2);
2385:           PetscMalloc ( size_prec_comm*sizeof(PetscMPIInt),&localdispl2);
2386:           /* arrays for values insertion */
2387:           ins_local_primal_size = pcbddc->coarse_size;
2388:           PetscMalloc ( ins_local_primal_size*sizeof(PetscMPIInt),&ins_local_primal_indices);
2389:           PetscMalloc ( ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);
2390:           for(i=0;i<size_prec_comm;i++) localsizes2[i]=pcbddc->local_primal_sizes[i]*pcbddc->local_primal_sizes[i];
2391:           localdispl2[0]=0;
2392:           for(i=1;i<size_prec_comm;i++) localdispl2[i]=localsizes2[i-1]+localdispl2[i-1];
2393:           j=0;
2394:           for(i=0;i<size_prec_comm;i++) j+=localsizes2[i];
2395:           PetscMalloc ( j*sizeof(PetscScalar),&temp_coarse_mat_vals);
2396:         }

2398:         mysize=pcbddc->local_primal_size;
2399:         mysize2=pcbddc->local_primal_size*pcbddc->local_primal_size;
2400:         if(pcbddc->coarse_problem_type == SEQUENTIAL_BDDC){
2401:           MPI_Gatherv(&pcbddc->local_primal_indices[0],mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,master_proc,prec_comm);
2402:           MPI_Gatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,master_proc,prec_comm);
2403:         } else {
2404:           MPI_Allgatherv(&pcbddc->local_primal_indices[0],mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,prec_comm);
2405:           MPI_Allgatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,prec_comm);
2406:         }

2408:   /* free data structures no longer needed and allocate some space which will be needed in BDDC application */
2409:         if(rank_prec_comm==active_rank) {
2410:           PetscInt offset,offset2,row_ind,col_ind;
2411:           for(j=0;j<ins_local_primal_size;j++){
2412:             ins_local_primal_indices[j]=j;
2413:             for(i=0;i<ins_local_primal_size;i++) ins_coarse_mat_vals[j*ins_local_primal_size+i]=0.0;
2414:           }
2415:           for(k=0;k<size_prec_comm;k++){
2416:             offset=pcbddc->local_primal_displacements[k];
2417:             offset2=localdispl2[k];
2418:             for(j=0;j<pcbddc->local_primal_sizes[k];j++){
2419:               col_ind=pcbddc->replicated_local_primal_indices[offset+j];
2420:               for(i=0;i<pcbddc->local_primal_sizes[k];i++){
2421:                 row_ind=pcbddc->replicated_local_primal_indices[offset+i];
2422:                 ins_coarse_mat_vals[col_ind*pcbddc->coarse_size+row_ind]+=temp_coarse_mat_vals[offset2+j*pcbddc->local_primal_sizes[k]+i];
2423:               }
2424:             }
2425:           }
2426:         }
2427:         break;
2428:       }//switch on coarse problem and communications associated with finished
2429:   }

2431:   /* Now create and fill up coarse matrix */
2432:   if( rank_prec_comm == active_rank ) {
2433:     if(pcbddc->coarse_problem_type != MULTILEVEL_BDDC) {
2434:       MatCreate(coarse_comm,&pcbddc->coarse_mat);
2435:       MatSetSizes(pcbddc->coarse_mat,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size);
2436:       MatSetType(pcbddc->coarse_mat,coarse_mat_type);
2437:       MatSetUp(pcbddc->coarse_mat);
2438:       MatSetOption(pcbddc->coarse_mat,MAT_ROW_ORIENTED,PETSC_FALSE); //local values stored in column major
2439:       MatSetOption(pcbddc->coarse_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);
2440:     } else {
2441:       Mat matis_coarse_local_mat;
2442:       /* remind bs */
2443:       MatCreateIS(coarse_comm,bs,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_ISLG,&pcbddc->coarse_mat);
2444:       MatSetUp(pcbddc->coarse_mat);
2445:       MatISGetLocalMat(pcbddc->coarse_mat,&matis_coarse_local_mat);
2446:       MatSetUp(matis_coarse_local_mat);
2447:       MatSetOption(matis_coarse_local_mat,MAT_ROW_ORIENTED,PETSC_FALSE); //local values stored in column major
2448:       MatSetOption(matis_coarse_local_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);
2449:     }
2450:     MatSetOption(pcbddc->coarse_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);
2451:     MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);
2452:     MatAssemblyBegin(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);
2453:     MatAssemblyEnd(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);

2455:     MatGetVecs(pcbddc->coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);
2456:     /* Preconditioner for coarse problem */
2457:     KSPCreate(coarse_comm,&pcbddc->coarse_ksp);
2458:     PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);
2459:     KSPSetOperators(pcbddc->coarse_ksp,pcbddc->coarse_mat,pcbddc->coarse_mat,SAME_PRECONDITIONER);
2460:     KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,max_it_coarse_ksp);
2461:     KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);
2462:     KSPGetPC(pcbddc->coarse_ksp,&pc_temp);
2463:     PCSetType(pc_temp,coarse_pc_type);
2464:     /* Allow user's customization */
2465:     KSPSetFromOptions(pcbddc->coarse_ksp);
2466:     /* Set Up PC for coarse problem BDDC */
2467:     if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC) {
2468:       if(dbg_flag) {
2469:         PetscViewerASCIIPrintf(viewer,"----------------Setting up a new level---------------\n");
2470:         PetscViewerFlush(viewer);
2471:       }
2472:       PCBDDCSetCoarseProblemType(pc_temp,MULTILEVEL_BDDC);
2473:     }
2474:     KSPSetUp(pcbddc->coarse_ksp);
2475:     if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC) {
2476:       if(dbg_flag) {
2477:         PetscViewerASCIIPrintf(viewer,"----------------New level set------------------------\n");
2478:         PetscViewerFlush(viewer);
2479:       }
2480:     }
2481:   }
2482:   if(pcbddc->coarse_communications_type == SCATTERS_BDDC) {
2483:      IS local_IS,global_IS;
2484:      ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&local_IS);
2485:      ISCreateGeneral(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_indices,PETSC_COPY_VALUES,&global_IS);
2486:      VecScatterCreate(pcbddc->vec1_P,local_IS,pcbddc->coarse_vec,global_IS,&pcbddc->coarse_loc_to_glob);
2487:      ISDestroy(&local_IS);
2488:      ISDestroy(&global_IS);
2489:   }


2492:   /* Evaluate condition number of coarse problem for cheby (and verbose output if requested) */
2493:   if( pcbddc->coarse_problem_type == MULTILEVEL_BDDC && rank_prec_comm == active_rank ) {
2494:     PetscScalar m_one=-1.0;
2495:     PetscReal   infty_error,lambda_min,lambda_max,kappa_2;
2496:     const KSPType check_ksp_type=KSPGMRES;

2498:     /* change coarse ksp object to an iterative method suitable for extreme eigenvalues' estimation */
2499:     KSPSetType(pcbddc->coarse_ksp,check_ksp_type);
2500:     KSPSetComputeSingularValues(pcbddc->coarse_ksp,PETSC_TRUE);
2501:     KSPSetTolerances(pcbddc->coarse_ksp,1.e-8,1.e-8,PETSC_DEFAULT,pcbddc->coarse_size);
2502:     KSPSetUp(pcbddc->coarse_ksp);
2503:     VecSetRandom(pcbddc->coarse_rhs,PETSC_NULL);
2504:     MatMult(pcbddc->coarse_mat,pcbddc->coarse_rhs,pcbddc->coarse_vec);
2505:     MatMult(pcbddc->coarse_mat,pcbddc->coarse_vec,pcbddc->coarse_rhs);
2506:     KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_rhs);
2507:     KSPComputeExtremeSingularValues(pcbddc->coarse_ksp,&lambda_max,&lambda_min);
2508:     if(dbg_flag) {
2509:       kappa_2=lambda_max/lambda_min;
2510:       KSPGetIterationNumber(pcbddc->coarse_ksp,&k);
2511:       VecAXPY(pcbddc->coarse_rhs,m_one,pcbddc->coarse_vec);
2512:       VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&infty_error);
2513:       PetscViewerASCIIPrintf(viewer,"Coarse problem condition number estimated with %d iterations of %s is: % 1.14e\n",k,check_ksp_type,kappa_2);
2514:       PetscViewerASCIIPrintf(viewer,"Coarse problem eigenvalues: % 1.14e %1.14e\n",lambda_min,lambda_max);
2515:       PetscViewerASCIIPrintf(viewer,"Coarse problem infty_error: %1.14e\n",infty_error);
2516:     }
2517:     /* restore coarse ksp to default values */
2518:     KSPSetComputeSingularValues(pcbddc->coarse_ksp,PETSC_FALSE);
2519:     KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);
2520:     KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);
2521:     KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,max_it_coarse_ksp);
2522:     KSPSetFromOptions(pcbddc->coarse_ksp);
2523:     KSPSetUp(pcbddc->coarse_ksp);
2524:   }

2526:   /* free data structures no longer needed */
2527:   if(coarse_ISLG)                { ISLocalToGlobalMappingDestroy(&coarse_ISLG); }
2528:   if(ins_local_primal_indices)   { PetscFree(ins_local_primal_indices);  }
2529:   if(ins_coarse_mat_vals)        { PetscFree(ins_coarse_mat_vals);}
2530:   if(localsizes2)                { PetscFree(localsizes2);}
2531:   if(localdispl2)                { PetscFree(localdispl2);}
2532:   if(temp_coarse_mat_vals)       { PetscFree(temp_coarse_mat_vals);}

2534:   return(0);
2535: }

2539: static PetscErrorCode PCBDDCManageLocalBoundaries(PC pc)
2540: {

2542:   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
2543:   PC_IS         *pcis = (PC_IS*)pc->data;
2544:   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
2545:   PCBDDCGraph mat_graph;
2546:   Mat         mat_adj;
2547:   PetscInt    **neighbours_set;
2548:   PetscInt    *queue_in_global_numbering;
2549:   PetscInt    bs,ierr,i,j,s,k,iindex,neumann_bsize,dirichlet_bsize;
2550:   PetscInt    total_counts,nodes_touched=0,where_values=1,vertex_size;
2551:   PetscMPIInt adapt_interface=0,adapt_interface_reduced=0;
2552:   PetscBool   same_set,flg_row;
2553:   PetscBool   symmetrize_rowij=PETSC_TRUE,compressed_rowij=PETSC_FALSE;
2554:   MPI_Comm    interface_comm=((PetscObject)pc)->comm;
2555:   PetscBool   use_faces=PETSC_FALSE,use_edges=PETSC_FALSE;
2556:   const PetscInt *neumann_nodes;
2557:   const PetscInt *dirichlet_nodes;
2558:   IS          used_IS;

2561:   /* allocate and initialize needed graph structure */
2562:   PetscMalloc(sizeof(*mat_graph),&mat_graph);
2563:   MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);
2564:   /* MatDuplicate(matis->A,MAT_COPY_VALUES,&mat_adj); */
2565:   MatGetRowIJ(mat_adj,0,symmetrize_rowij,compressed_rowij,&mat_graph->nvtxs,&mat_graph->xadj,&mat_graph->adjncy,&flg_row);
2566:   if(!flg_row) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called from PCBDDCManageLocalBoundaries.\n");
2567:   i = mat_graph->nvtxs;
2568:   PetscMalloc4(i,PetscInt,&mat_graph->where,i,PetscInt,&mat_graph->count,i+1,PetscInt,&mat_graph->cptr,i,PetscInt,&mat_graph->queue);
2569:   PetscMalloc3(i,PetscInt,&mat_graph->which_dof,i,PetscBool,&mat_graph->touched,i,PetscInt,&queue_in_global_numbering);
2570:   PetscMemzero(mat_graph->where,mat_graph->nvtxs*sizeof(PetscInt));
2571:   PetscMemzero(mat_graph->count,mat_graph->nvtxs*sizeof(PetscInt));
2572:   PetscMemzero(mat_graph->which_dof,mat_graph->nvtxs*sizeof(PetscInt));
2573:   PetscMemzero(mat_graph->queue,mat_graph->nvtxs*sizeof(PetscInt));
2574:   PetscMemzero(mat_graph->cptr,(mat_graph->nvtxs+1)*sizeof(PetscInt));
2575:   for(i=0;i<mat_graph->nvtxs;i++){mat_graph->touched[i]=PETSC_FALSE;}
2576: 
2577:   /* Setting dofs splitting in mat_graph->which_dof */
2578:   if(pcbddc->n_ISForDofs) { /* get information about dofs' splitting if provided by the user */
2579:     PetscInt *is_indices;
2580:     PetscInt is_size;
2581:     for(i=0;i<pcbddc->n_ISForDofs;i++) {
2582:       ISGetSize(pcbddc->ISForDofs[i],&is_size);
2583:       ISGetIndices(pcbddc->ISForDofs[i],(const PetscInt**)&is_indices);
2584:       for(j=0;j<is_size;j++) {
2585:         mat_graph->which_dof[is_indices[j]]=i;
2586:       }
2587:       ISRestoreIndices(pcbddc->ISForDofs[i],(const PetscInt**)&is_indices);
2588:     }
2589:     /* use mat block size as vertex size */
2590:     MatGetBlockSize(matis->A,&vertex_size);
2591:   } else { /* otherwise it assumes a constant block size */
2592:     MatGetBlockSize(matis->A,&bs);
2593:     for(i=0;i<mat_graph->nvtxs/bs;i++) {
2594:       for(s=0;s<bs;s++) {
2595:         mat_graph->which_dof[i*bs+s]=s;
2596:       }
2597:     }
2598:     vertex_size=1;
2599:   }
2600:   /* count number of neigh per node */
2601:   total_counts=0;
2602:   for(i=1;i<pcis->n_neigh;i++){
2603:     s=pcis->n_shared[i];
2604:     total_counts+=s;
2605:     for(j=0;j<s;j++){
2606:       mat_graph->count[pcis->shared[i][j]] += 1;
2607:     }
2608:   }
2609:   /* Take into account Neumann data -> it increments number of sharing subdomains for all but faces nodes lying on the interface */
2610:   PCBDDCGetNeumannBoundaries(pc,&used_IS);
2611:   if(used_IS) {
2612:     ISGetSize(used_IS,&neumann_bsize);
2613:     ISGetIndices(used_IS,&neumann_nodes);
2614:     for(i=0;i<neumann_bsize;i++){
2615:       iindex = neumann_nodes[i];
2616:       if(mat_graph->count[iindex] > 1){
2617:         mat_graph->count[iindex]+=1;
2618:         total_counts++;
2619:       }
2620:     }
2621:   }
2622:   /* allocate space for storing the set of neighbours of each node */
2623:   PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt*),&neighbours_set);
2624:   if(mat_graph->nvtxs) { PetscMalloc(total_counts*sizeof(PetscInt),&neighbours_set[0]); }
2625:   for(i=1;i<mat_graph->nvtxs;i++) neighbours_set[i]=neighbours_set[i-1]+mat_graph->count[i-1];
2626:   PetscMemzero(mat_graph->count,mat_graph->nvtxs*sizeof(PetscInt));
2627:   for(i=1;i<pcis->n_neigh;i++){
2628:     s=pcis->n_shared[i];
2629:     for(j=0;j<s;j++) {
2630:       k=pcis->shared[i][j];
2631:       neighbours_set[k][mat_graph->count[k]] = pcis->neigh[i];
2632:       mat_graph->count[k]+=1;
2633:     }
2634:   }
2635:   /* set -1 fake neighbour to mimic Neumann boundary */
2636:   if(used_IS) {
2637:     for(i=0;i<neumann_bsize;i++){
2638:       iindex = neumann_nodes[i];
2639:       if(mat_graph->count[iindex] > 1){
2640:         neighbours_set[iindex][mat_graph->count[iindex]] = -1;
2641:         mat_graph->count[iindex]+=1;
2642:       }
2643:     }
2644:     ISRestoreIndices(used_IS,&neumann_nodes);
2645:   }
2646:   /* sort set of sharing subdomains (needed for comparison below) */
2647:   for(i=0;i<mat_graph->nvtxs;i++) { PetscSortInt(mat_graph->count[i],neighbours_set[i]); }
2648:   /* remove interior nodes and dirichlet boundary nodes from the next search into the graph */
2649:   PCBDDCGetDirichletBoundaries(pc,&used_IS);
2650:   if(used_IS) {
2651:     ISGetSize(used_IS,&dirichlet_bsize);
2652:     ISGetIndices(used_IS,&dirichlet_nodes);
2653:     for(i=0;i<dirichlet_bsize;i++){
2654:       mat_graph->count[dirichlet_nodes[i]]=0;
2655:     }
2656:     ISRestoreIndices(used_IS,&dirichlet_nodes);
2657:   }
2658:   for(i=0;i<mat_graph->nvtxs;i++){
2659:     if(!mat_graph->count[i]){  /* interior nodes */
2660:       mat_graph->touched[i]=PETSC_TRUE;
2661:       mat_graph->where[i]=0;
2662:       nodes_touched++;
2663:     }
2664:   }
2665:   mat_graph->ncmps = 0;
2666:   while(nodes_touched<mat_graph->nvtxs) {
2667:     /*  find first untouched node in local ordering */
2668:     i=0;
2669:     while(mat_graph->touched[i]) i++;
2670:     mat_graph->touched[i]=PETSC_TRUE;
2671:     mat_graph->where[i]=where_values;
2672:     nodes_touched++;
2673:     /* now find all other nodes having the same set of sharing subdomains */
2674:     for(j=i+1;j<mat_graph->nvtxs;j++){
2675:       /* check for same number of sharing subdomains and dof number */
2676:       if(mat_graph->count[i]==mat_graph->count[j] && mat_graph->which_dof[i] == mat_graph->which_dof[j] ){
2677:         /* check for same set of sharing subdomains */
2678:         same_set=PETSC_TRUE;
2679:         for(k=0;k<mat_graph->count[j];k++){
2680:           if(neighbours_set[i][k]!=neighbours_set[j][k]) {
2681:             same_set=PETSC_FALSE;
2682:           }
2683:         }
2684:         /* I found a friend of mine */
2685:         if(same_set) {
2686:           mat_graph->where[j]=where_values;
2687:           mat_graph->touched[j]=PETSC_TRUE;
2688:           nodes_touched++;
2689:         }
2690:       }
2691:     }
2692:     where_values++;
2693:   }
2694:   where_values--; if(where_values<0) where_values=0;
2695:   PetscMalloc(where_values*sizeof(PetscMPIInt),&mat_graph->where_ncmps);
2696:   /* Find connected components defined on the shared interface */
2697:   if(where_values) {
2698:     PCBDDCFindConnectedComponents(mat_graph, where_values);
2699:     /* For consistency among neughbouring procs, I need to sort (by global ordering) each connected component */
2700:     for(i=0;i<mat_graph->ncmps;i++) {
2701:       ISLocalToGlobalMappingApply(matis->mapping,mat_graph->cptr[i+1]-mat_graph->cptr[i],&mat_graph->queue[mat_graph->cptr[i]],&queue_in_global_numbering[mat_graph->cptr[i]]);
2702:       PetscSortIntWithArray(mat_graph->cptr[i+1]-mat_graph->cptr[i],&queue_in_global_numbering[mat_graph->cptr[i]],&mat_graph->queue[mat_graph->cptr[i]]);
2703:     }
2704:   }
2705:   /* check consistency of connected components among neighbouring subdomains -> it adapt them in case it is needed */
2706:   for(i=0;i<where_values;i++) {
2707:     /* We are not sure that two connected components will be the same among subdomains sharing a subset of local interface */
2708:     if(mat_graph->where_ncmps[i]>1) {
2709:       adapt_interface=1;
2710:       break;
2711:     }
2712:   }
2713:   MPI_Allreduce(&adapt_interface,&adapt_interface_reduced,1,MPIU_INT,MPI_LOR,interface_comm);
2714:   if(where_values && adapt_interface_reduced) {

2716:     printf("Adapting Interface\n");

2718:     PetscInt sum_requests=0,my_rank;
2719:     PetscInt buffer_size,start_of_recv,size_of_recv,start_of_send;
2720:     PetscInt temp_buffer_size,ins_val,global_where_counter;
2721:     PetscInt *cum_recv_counts;
2722:     PetscInt *where_to_nodes_indices;
2723:     PetscInt *petsc_buffer;
2724:     PetscMPIInt *recv_buffer;
2725:     PetscMPIInt *recv_buffer_where;
2726:     PetscMPIInt *send_buffer;
2727:     PetscMPIInt size_of_send;
2728:     PetscInt *sizes_of_sends;
2729:     MPI_Request *send_requests;
2730:     MPI_Request *recv_requests;
2731:     PetscInt *where_cc_adapt;
2732:     PetscInt **temp_buffer;
2733:     PetscInt *nodes_to_temp_buffer_indices;
2734:     PetscInt *add_to_where;

2736:     MPI_Comm_rank(interface_comm,&my_rank);
2737:     PetscMalloc((where_values+1)*sizeof(PetscInt),&cum_recv_counts);
2738:     PetscMemzero(cum_recv_counts,(where_values+1)*sizeof(PetscInt));
2739:     PetscMalloc(where_values*sizeof(PetscInt),&where_to_nodes_indices);
2740:     /* first count how many neighbours per connected component I will receive from */
2741:     cum_recv_counts[0]=0;
2742:     for(i=1;i<where_values+1;i++){
2743:       j=0;
2744:       while(mat_graph->where[j] != i) j++;
2745:       where_to_nodes_indices[i-1]=j;
2746:       if(neighbours_set[j][0]!=-1) { cum_recv_counts[i]=cum_recv_counts[i-1]+mat_graph->count[j]; } /* We don't want sends/recvs_to/from_self -> here I don't count myself  */
2747:       else { cum_recv_counts[i]=cum_recv_counts[i-1]+mat_graph->count[j]-1; }
2748:     }
2749:     buffer_size=2*cum_recv_counts[where_values]+mat_graph->nvtxs;
2750:     PetscMalloc(2*cum_recv_counts[where_values]*sizeof(PetscMPIInt),&recv_buffer_where);
2751:     PetscMalloc(buffer_size*sizeof(PetscMPIInt),&send_buffer);
2752:     PetscMalloc(cum_recv_counts[where_values]*sizeof(MPI_Request),&send_requests);
2753:     PetscMalloc(cum_recv_counts[where_values]*sizeof(MPI_Request),&recv_requests);
2754:     for(i=0;i<cum_recv_counts[where_values];i++) {
2755:       send_requests[i]=MPI_REQUEST_NULL;
2756:       recv_requests[i]=MPI_REQUEST_NULL;
2757:     }
2758:     /* exchange with my neighbours the number of my connected components on the shared interface */
2759:     for(i=0;i<where_values;i++){
2760:       j=where_to_nodes_indices[i];
2761:       k = (neighbours_set[j][0] == -1 ?  1 : 0);
2762:       for(;k<mat_graph->count[j];k++){
2763:         MPI_Isend(&mat_graph->where_ncmps[i],1,MPIU_INT,neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);
2764:         MPI_Irecv(&recv_buffer_where[sum_requests],1,MPIU_INT,neighbours_set[j][k],(neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);
2765:         sum_requests++;
2766:       }
2767:     }
2768:     MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);
2769:     MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);
2770:     /* determine the connected component I need to adapt */
2771:     PetscMalloc(where_values*sizeof(PetscInt),&where_cc_adapt);
2772:     PetscMemzero(where_cc_adapt,where_values*sizeof(PetscInt));
2773:     for(i=0;i<where_values;i++){
2774:       for(j=cum_recv_counts[i];j<cum_recv_counts[i+1];j++){
2775:         /* The first condition is natural (i.e someone has a different number of cc than me), the second one is just to be safe */
2776:         if( mat_graph->where_ncmps[i]!=recv_buffer_where[j] || mat_graph->where_ncmps[i] > 1 ) {
2777:           where_cc_adapt[i]=PETSC_TRUE;
2778:           break;
2779:         }
2780:       }
2781:     }
2782:     /* now get from neighbours their ccs (in global numbering) and adapt them (in case it is needed) */
2783:     /* first determine how much data to send (size of each queue plus the global indices) and communicate it to neighbours */
2784:     PetscMalloc(where_values*sizeof(PetscInt),&sizes_of_sends);
2785:     PetscMemzero(sizes_of_sends,where_values*sizeof(PetscInt));
2786:     sum_requests=0;
2787:     start_of_send=0;
2788:     start_of_recv=cum_recv_counts[where_values];
2789:     for(i=0;i<where_values;i++) {
2790:       if(where_cc_adapt[i]) {
2791:         size_of_send=0;
2792:         for(j=i;j<mat_graph->ncmps;j++) {
2793:           if(mat_graph->where[mat_graph->queue[mat_graph->cptr[j]]] == i+1) { /* WARNING -> where values goes from 1 to where_values included */
2794:             send_buffer[start_of_send+size_of_send]=mat_graph->cptr[j+1]-mat_graph->cptr[j];
2795:             size_of_send+=1;
2796:             for(k=0;k<mat_graph->cptr[j+1]-mat_graph->cptr[j];k++) {
2797:               send_buffer[start_of_send+size_of_send+k]=queue_in_global_numbering[mat_graph->cptr[j]+k];
2798:             }
2799:             size_of_send=size_of_send+mat_graph->cptr[j+1]-mat_graph->cptr[j];
2800:           }
2801:         }
2802:         j = where_to_nodes_indices[i];
2803:         k = (neighbours_set[j][0] == -1 ?  1 : 0);
2804:         for(;k<mat_graph->count[j];k++){
2805:           MPI_Isend(&size_of_send,1,MPIU_INT,neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);
2806:           MPI_Irecv(&recv_buffer_where[sum_requests+start_of_recv],1,MPIU_INT,neighbours_set[j][k],(neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);
2807:           sum_requests++;
2808:         }
2809:         sizes_of_sends[i]=size_of_send;
2810:         start_of_send+=size_of_send;
2811:       }
2812:     }
2813:     MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);
2814:     MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);
2815:     buffer_size=0;
2816:     for(k=0;k<sum_requests;k++) { buffer_size+=recv_buffer_where[start_of_recv+k]; }
2817:     PetscMalloc(buffer_size*sizeof(PetscMPIInt),&recv_buffer);
2818:     /* now exchange the data */
2819:     start_of_recv=0;
2820:     start_of_send=0;
2821:     sum_requests=0;
2822:     for(i=0;i<where_values;i++) {
2823:       if(where_cc_adapt[i]) {
2824:         size_of_send = sizes_of_sends[i];
2825:         j = where_to_nodes_indices[i];
2826:         k = (neighbours_set[j][0] == -1 ?  1 : 0);
2827:         for(;k<mat_graph->count[j];k++){
2828:           MPI_Isend(&send_buffer[start_of_send],size_of_send,MPIU_INT,neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);
2829:           size_of_recv=recv_buffer_where[cum_recv_counts[where_values]+sum_requests];
2830:           MPI_Irecv(&recv_buffer[start_of_recv],size_of_recv,MPIU_INT,neighbours_set[j][k],(neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);
2831:           start_of_recv+=size_of_recv;
2832:           sum_requests++;
2833:         }
2834:         start_of_send+=size_of_send;
2835:       }
2836:     }
2837:     MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);
2838:     MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);
2839:     PetscMalloc(buffer_size*sizeof(PetscInt),&petsc_buffer);
2840:     for(k=0;k<start_of_recv;k++) { petsc_buffer[k]=(PetscInt)recv_buffer[k]; }
2841:     for(j=0;j<buffer_size;) {
2842:        ISGlobalToLocalMappingApply(matis->mapping,IS_GTOLM_MASK,petsc_buffer[j],&petsc_buffer[j+1],&petsc_buffer[j],&petsc_buffer[j+1]);
2843:        k=petsc_buffer[j]+1;
2844:        j+=k;
2845:     }
2846:     sum_requests=cum_recv_counts[where_values];
2847:     start_of_recv=0;
2848:     PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt),&nodes_to_temp_buffer_indices);
2849:     global_where_counter=0;
2850:     for(i=0;i<where_values;i++){
2851:       if(where_cc_adapt[i]){
2852:         temp_buffer_size=0;
2853:         /* find nodes on the shared interface we need to adapt */
2854:         for(j=0;j<mat_graph->nvtxs;j++){
2855:           if(mat_graph->where[j]==i+1) {
2856:             nodes_to_temp_buffer_indices[j]=temp_buffer_size;
2857:             temp_buffer_size++;
2858:           } else {
2859:             nodes_to_temp_buffer_indices[j]=-1;
2860:           }
2861:         }
2862:         /* allocate some temporary space */
2863:         PetscMalloc(temp_buffer_size*sizeof(PetscInt*),&temp_buffer);
2864:         PetscMalloc(temp_buffer_size*(cum_recv_counts[i+1]-cum_recv_counts[i])*sizeof(PetscInt),&temp_buffer[0]);
2865:         PetscMemzero(temp_buffer[0],temp_buffer_size*(cum_recv_counts[i+1]-cum_recv_counts[i])*sizeof(PetscInt));
2866:         for(j=1;j<temp_buffer_size;j++){
2867:           temp_buffer[j]=temp_buffer[j-1]+cum_recv_counts[i+1]-cum_recv_counts[i];
2868:         }
2869:         /* analyze contributions from neighbouring subdomains for i-th conn comp 
2870:            temp buffer structure:
2871:            supposing part of the interface has dimension 5 (global nodes 0,1,2,3,4)
2872:            3 neighs procs with structured connected components:
2873:              neigh 0: [0 1 4], [2 3];  (2 connected components)
2874:              neigh 1: [0 1], [2 3 4];  (2 connected components)
2875:              neigh 2: [0 4], [1], [2 3]; (3 connected components)
2876:            tempbuffer (row-oriented) should be filled as:
2877:              [ 0, 0, 0;
2878:                0, 0, 1;
2879:                1, 1, 2;
2880:                1, 1, 2;
2881:                0, 1, 0; ];
2882:            This way we can simply recover the resulting structure account for possible intersections of ccs among neighs.
2883:            The mat_graph->where array will be modified to reproduce the following 4 connected components [0], [1], [2 3], [4];
2884:                                                                                                                                    */
2885:         for(j=0;j<cum_recv_counts[i+1]-cum_recv_counts[i];j++) {
2886:           ins_val=0;
2887:           size_of_recv=recv_buffer_where[sum_requests];  /* total size of recv from neighs */
2888:           for(buffer_size=0;buffer_size<size_of_recv;) {  /* loop until all data from neighs has been taken into account */
2889:             for(k=1;k<petsc_buffer[buffer_size+start_of_recv]+1;k++) { /* filling properly temp_buffer using data from a single recv */
2890:               temp_buffer[ nodes_to_temp_buffer_indices[ petsc_buffer[ start_of_recv+buffer_size+k ] ] ][j]=ins_val;
2891:             }
2892:             buffer_size+=k;
2893:             ins_val++;
2894:           }
2895:           start_of_recv+=size_of_recv;
2896:           sum_requests++;
2897:         }
2898:         PetscMalloc(temp_buffer_size*sizeof(PetscInt),&add_to_where);
2899:         PetscMemzero(add_to_where,temp_buffer_size*sizeof(PetscInt));
2900:         for(j=0;j<temp_buffer_size;j++){
2901:           if(!add_to_where[j]){ /* found a new cc  */
2902:             global_where_counter++;
2903:             add_to_where[j]=global_where_counter;
2904:             for(k=j+1;k<temp_buffer_size;k++){ /* check for other nodes in new cc */
2905:               same_set=PETSC_TRUE;
2906:               for(s=0;s<cum_recv_counts[i+1]-cum_recv_counts[i];s++){
2907:                 if(temp_buffer[j][s]!=temp_buffer[k][s]) {
2908:                   same_set=PETSC_FALSE;
2909:                   break;
2910:                 }
2911:               }
2912:               if(same_set) add_to_where[k]=global_where_counter;
2913:             }
2914:           }
2915:         }
2916:         /* insert new data in where array */
2917:         temp_buffer_size=0;
2918:         for(j=0;j<mat_graph->nvtxs;j++){
2919:           if(mat_graph->where[j]==i+1) {
2920:             mat_graph->where[j]=where_values+add_to_where[temp_buffer_size];
2921:             temp_buffer_size++;
2922:           }
2923:         }
2924:         PetscFree(temp_buffer[0]);
2925:         PetscFree(temp_buffer);
2926:         PetscFree(add_to_where);
2927:       }
2928:     }
2929:     PetscFree(nodes_to_temp_buffer_indices);
2930:     PetscFree(sizes_of_sends);
2931:     PetscFree(send_requests);
2932:     PetscFree(recv_requests);
2933:     PetscFree(petsc_buffer);
2934:     PetscFree(recv_buffer);
2935:     PetscFree(recv_buffer_where);
2936:     PetscFree(send_buffer);
2937:     PetscFree(cum_recv_counts);
2938:     PetscFree(where_to_nodes_indices);
2939:     /* We are ready to evaluate consistent connected components on each part of the shared interface */
2940:     if(global_where_counter) {
2941:       for(i=0;i<mat_graph->nvtxs;i++){ mat_graph->touched[i]=PETSC_FALSE; }
2942:       global_where_counter=0;
2943:       for(i=0;i<mat_graph->nvtxs;i++){
2944:         if(mat_graph->where[i] && !mat_graph->touched[i]) {
2945:           global_where_counter++;
2946:           for(j=i+1;j<mat_graph->nvtxs;j++){
2947:             if(!mat_graph->touched[j] && mat_graph->where[j]==mat_graph->where[i]) {
2948:               mat_graph->where[j]=global_where_counter;
2949:               mat_graph->touched[j]=PETSC_TRUE;
2950:             }
2951:           }
2952:           mat_graph->where[i]=global_where_counter;
2953:           mat_graph->touched[i]=PETSC_TRUE;
2954:         }
2955:       }
2956:       where_values=global_where_counter;
2957:     }
2958:     if(global_where_counter) {
2959:       PetscMemzero(mat_graph->cptr,(mat_graph->nvtxs+1)*sizeof(PetscInt));
2960:       PetscMemzero(mat_graph->queue,mat_graph->nvtxs*sizeof(PetscInt));
2961:       PetscFree(mat_graph->where_ncmps);
2962:       PetscMalloc(where_values*sizeof(PetscMPIInt),&mat_graph->where_ncmps);
2963:       PCBDDCFindConnectedComponents(mat_graph, where_values);
2964:       for(i=0;i<mat_graph->ncmps;i++) {
2965:         ISLocalToGlobalMappingApply(matis->mapping,mat_graph->cptr[i+1]-mat_graph->cptr[i],&mat_graph->queue[mat_graph->cptr[i]],&queue_in_global_numbering[mat_graph->cptr[i]]);
2966:         PetscSortIntWithArray(mat_graph->cptr[i+1]-mat_graph->cptr[i],&queue_in_global_numbering[mat_graph->cptr[i]],&mat_graph->queue[mat_graph->cptr[i]]);
2967:       }
2968:     }
2969:   } /* Finished adapting interface */
2970:   PetscInt nfc=0;
2971:   PetscInt nec=0;
2972:   PetscInt nvc=0;
2973:   PetscBool twodim_flag=PETSC_FALSE;
2974:   for (i=0; i<mat_graph->ncmps; i++) {
2975:     if( mat_graph->cptr[i+1]-mat_graph->cptr[i] > vertex_size ){
2976:       if(mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]==1){ /* 1 neigh */
2977:         nfc++;
2978:       } else { /* note that nec will be zero in 2d */
2979:         nec++;
2980:       }
2981:     } else {
2982:       nvc+=mat_graph->cptr[i+1]-mat_graph->cptr[i];
2983:     }
2984:   }

2986:   if(!nec) { /* we are in a 2d case -> no faces, only edges */
2987:     nec = nfc;
2988:     nfc = 0;
2989:     twodim_flag = PETSC_TRUE;
2990:   }
2991:   /* allocate IS arrays for faces, edges. Vertices need a single index set. 
2992:      Reusing space allocated in mat_graph->where for creating IS objects */
2993:   if(!pcbddc->vertices_flag && !pcbddc->edges_flag) {
2994:     PetscMalloc(nfc*sizeof(IS),&pcbddc->ISForFaces);
2995:     use_faces=PETSC_TRUE;
2996:   }
2997:   if(!pcbddc->vertices_flag && !pcbddc->faces_flag) {
2998:     PetscMalloc(nec*sizeof(IS),&pcbddc->ISForEdges);
2999:     use_edges=PETSC_TRUE;
3000:   }
3001:   nfc=0;
3002:   nec=0;
3003:   for (i=0; i<mat_graph->ncmps; i++) {
3004:     if( mat_graph->cptr[i+1]-mat_graph->cptr[i] > vertex_size ){
3005:       for(j=0;j<mat_graph->cptr[i+1]-mat_graph->cptr[i];j++) {
3006:         mat_graph->where[j]=mat_graph->queue[mat_graph->cptr[i]+j];
3007:       }
3008:       if(mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]==1){
3009:         if(twodim_flag) {
3010:           if(use_edges) {
3011:             ISCreateGeneral(PETSC_COMM_SELF,j,mat_graph->where,PETSC_COPY_VALUES,&pcbddc->ISForEdges[nec]);
3012:             nec++;
3013:           }
3014:         } else {
3015:           if(use_faces) {
3016:             ISCreateGeneral(PETSC_COMM_SELF,j,mat_graph->where,PETSC_COPY_VALUES,&pcbddc->ISForFaces[nfc]);
3017:             nfc++;
3018:           }
3019:         }
3020:       } else {
3021:         if(use_edges) {
3022:           ISCreateGeneral(PETSC_COMM_SELF,j,mat_graph->where,PETSC_COPY_VALUES,&pcbddc->ISForEdges[nec]);
3023:           nec++;
3024:         }
3025:       }
3026:     }
3027:   }
3028:   pcbddc->n_ISForFaces=nfc;
3029:   pcbddc->n_ISForEdges=nec;
3030:   nvc=0;
3031:   if( !pcbddc->constraints_flag ) {
3032:     for (i=0; i<mat_graph->ncmps; i++) {
3033:       if( mat_graph->cptr[i+1]-mat_graph->cptr[i] <= vertex_size ){
3034:         for( j=mat_graph->cptr[i];j<mat_graph->cptr[i+1];j++) {
3035:           mat_graph->where[nvc]=mat_graph->queue[j];
3036:           nvc++;
3037:         }
3038:       }
3039:     }
3040:   }
3041:   /* sort vertex set (by local ordering) */
3042:   PetscSortInt(nvc,mat_graph->where);
3043:   ISCreateGeneral(PETSC_COMM_SELF,nvc,mat_graph->where,PETSC_COPY_VALUES,&pcbddc->ISForVertices);

3045:   if(pcbddc->dbg_flag) {
3046:     PetscViewer viewer=pcbddc->dbg_viewer;

3048:     PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n");
3049:     PetscViewerASCIISynchronizedPrintf(viewer,"Details from PCBDDCManageLocalBoundaries for subdomain %04d\n",PetscGlobalRank);
3050:     PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n");
3051: /*    PetscViewerASCIISynchronizedPrintf(viewer,"Graph (adjacency structure) of local Neumann mat\n");
3052:     PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n");
3053:     for(i=0;i<mat_graph->nvtxs;i++) {
3054:       PetscViewerASCIISynchronizedPrintf(viewer,"Nodes connected to node number %d are %d\n",i,mat_graph->xadj[i+1]-mat_graph->xadj[i]);
3055:       for(j=mat_graph->xadj[i];j<mat_graph->xadj[i+1];j++){
3056:         PetscViewerASCIISynchronizedPrintf(viewer,"%d ",mat_graph->adjncy[j]);
3057:       }
3058:       PetscViewerASCIISynchronizedPrintf(viewer,"\n--------------------------------------------------------------\n");
3059:     }
3060:     PetscViewerASCIISynchronizedPrintf(viewer,"Matrix graph has %d connected components", mat_graph->ncmps);
3061:     for(i=0;i<mat_graph->ncmps;i++) {
3062:       PetscViewerASCIISynchronizedPrintf(viewer,"\nDetails for connected component number %02d: size %04d, count %01d. Nodes follow.\n",
3063:              i,mat_graph->cptr[i+1]-mat_graph->cptr[i],mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]);
3064:       for (j=mat_graph->cptr[i]; j<mat_graph->cptr[i+1]; j++){
3065:         PetscViewerASCIISynchronizedPrintf(viewer,"%d (%d), ",queue_in_global_numbering[j],mat_graph->queue[j]);
3066:       }
3067:     }
3068:     PetscViewerASCIISynchronizedPrintf(viewer,"\n--------------------------------------------------------------\n");*/
3069:     PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local vertices\n",PetscGlobalRank,nvc);
3070:     PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local faces\n",PetscGlobalRank,nfc);
3071:     PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local edges\n",PetscGlobalRank,nec);
3072:     PetscViewerFlush(viewer);
3073:   }

3075:   /* Restore CSR structure into sequantial matrix and free memory space no longer needed */
3076:   MatRestoreRowIJ(mat_adj,0,symmetrize_rowij,compressed_rowij,&mat_graph->nvtxs,&mat_graph->xadj,&mat_graph->adjncy,&flg_row);
3077:   if(!flg_row) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called from PCBDDCManageLocalBoundaries.\n");
3078:   MatDestroy(&mat_adj);
3079:   /* Free graph structure */
3080:   if(mat_graph->nvtxs){
3081:     PetscFree(neighbours_set[0]);
3082:     PetscFree(neighbours_set);
3083:     PetscFree4(mat_graph->where,mat_graph->count,mat_graph->cptr,mat_graph->queue);
3084:     PetscFree3(mat_graph->which_dof,mat_graph->touched,queue_in_global_numbering);
3085:     PetscFree(mat_graph->where_ncmps);
3086:   }
3087:   PetscFree(mat_graph);

3089:   return(0);

3091: }

3093: /* -------------------------------------------------------------------------- */

3095: /* The following code has been adapted from function IsConnectedSubdomain contained 
3096:    in source file contig.c of METIS library (version 5.0.1)                           */
3097: 
3100: static PetscErrorCode PCBDDCFindConnectedComponents(PCBDDCGraph graph, PetscInt n_dist )
3101: {
3102:   PetscInt i, j, k, nvtxs, first, last, nleft, ncmps,pid,cum_queue,n,ncmps_pid;
3103:   PetscInt *xadj, *adjncy, *where, *queue;
3104:   PetscInt *cptr;
3105:   PetscBool *touched;
3106: 

3109:   nvtxs   = graph->nvtxs;
3110:   xadj    = graph->xadj;
3111:   adjncy  = graph->adjncy;
3112:   where   = graph->where;
3113:   touched = graph->touched;
3114:   queue   = graph->queue;
3115:   cptr    = graph->cptr;

3117:   for (i=0; i<nvtxs; i++)
3118:     touched[i] = PETSC_FALSE;

3120:   cum_queue=0;
3121:   ncmps=0;

3123:   for(n=0; n<n_dist; n++) {
3124:     pid = n+1;
3125:     nleft = 0;
3126:     for (i=0; i<nvtxs; i++) {
3127:       if (where[i] == pid)
3128:         nleft++;
3129:     }
3130:     for (i=0; i<nvtxs; i++) {
3131:       if (where[i] == pid)
3132:         break;
3133:     }
3134:     touched[i] = PETSC_TRUE;
3135:     queue[cum_queue] = i;
3136:     first = 0; last = 1;
3137:     cptr[ncmps] = cum_queue;  /* This actually points to queue */
3138:     ncmps_pid = 0;
3139:     while (first != nleft) {
3140:       if (first == last) { /* Find another starting vertex */
3141:         cptr[++ncmps] = first+cum_queue;
3142:         ncmps_pid++;
3143:         for (i=0; i<nvtxs; i++) {
3144:           if (where[i] == pid && !touched[i])
3145:             break;
3146:         }
3147:         queue[cum_queue+last] = i;
3148:         last++;
3149:         touched[i] = PETSC_TRUE;
3150:       }
3151:       i = queue[cum_queue+first];
3152:       first++;
3153:       for (j=xadj[i]; j<xadj[i+1]; j++) {
3154:         k = adjncy[j];
3155:         if (where[k] == pid && !touched[k]) {
3156:           queue[cum_queue+last] = k;
3157:           last++;
3158:           touched[k] = PETSC_TRUE;
3159:         }
3160:       }
3161:     }
3162:     cptr[++ncmps] = first+cum_queue;
3163:     ncmps_pid++;
3164:     cum_queue=cptr[ncmps];
3165:     graph->where_ncmps[n] = ncmps_pid;
3166:   }
3167:   graph->ncmps = ncmps;

3169:   return(0);
3170: }