Actual source code: bddc.c
petsc-3.3-p7 2013-05-11
1: /* TODOLIST
2: DofSplitting and DM attached to pc.
3: Exact solvers: Solve local saddle point directly for very hard problems
4: - change prec_type to switch_inexact_prec_type
5: - add bool solve_exact_saddle_point slot to pdbddc data
6: Inexact solvers: global preconditioner application is ready, ask to developers (Jed?) on how to best implement Dohrmann's approach (PCSHELL?)
7: change how to deal with the coarse problem (PCBDDCSetCoarseEnvironment):
8: - mind the problem with coarsening_factor
9: - simplify coarse problem structure -> PCBDDC or PCREDUDANT, nothing else -> same comm for all levels?
10: - remove coarse enums and allow use of PCBDDCGetCoarseKSP
11: - remove metis dependency -> use MatPartitioning for multilevel -> Assemble serial adjacency in ManageLocalBoundaries?
12: - Add levels' slot to bddc data structure and associated Set/Get functions
13: code refactoring:
14: - pick up better names for static functions
15: check log_summary for leaking (actually: 1 Vector per level )
16: change options structure:
17: - insert BDDC into MG framework?
18: provide other ops? Ask to developers
19: remove all unused printf
20: remove // commments and adhere to PETSc code requirements
21: man pages
22: */
24: /* ----------------------------------------------------------------------------------------------------------------------------------------------
25: Implementation of BDDC preconditioner based on:
26: C. Dohrmann "An approximate BDDC preconditioner", Numerical Linear Algebra with Applications Volume 14, Issue 2, pages 149-168, March 2007
27: ---------------------------------------------------------------------------------------------------------------------------------------------- */
29: #include "bddc.h" /*I "petscpc.h" I*/ /* includes for fortran wrappers */
30: #include <petscblaslapack.h>
32: /* -------------------------------------------------------------------------- */
35: PetscErrorCode PCSetFromOptions_BDDC(PC pc)
36: {
37: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
41: PetscOptionsHead("BDDC options");
42: /* Verbose debugging of main data structures */
43: PetscOptionsBool("-pc_bddc_check_all" ,"Verbose (debugging) output for PCBDDC" ,"none",pcbddc->dbg_flag ,&pcbddc->dbg_flag ,PETSC_NULL);
44: /* Some customization for default primal space */
45: PetscOptionsBool("-pc_bddc_vertices_only" ,"Use vertices only in coarse space (i.e. discard constraints)","none",pcbddc->vertices_flag ,&pcbddc->vertices_flag ,PETSC_NULL);
46: PetscOptionsBool("-pc_bddc_constraints_only","Use constraints only in coarse space (i.e. discard vertices)","none",pcbddc->constraints_flag,&pcbddc->constraints_flag,PETSC_NULL);
47: PetscOptionsBool("-pc_bddc_faces_only" ,"Use faces only in coarse space (i.e. discard edges)" ,"none",pcbddc->faces_flag ,&pcbddc->faces_flag ,PETSC_NULL);
48: PetscOptionsBool("-pc_bddc_edges_only" ,"Use edges only in coarse space (i.e. discard faces)" ,"none",pcbddc->edges_flag ,&pcbddc->edges_flag ,PETSC_NULL);
49: /* Coarse solver context */
50: static const char *avail_coarse_problems[] = {"sequential","replicated","parallel","multilevel",""}; //order of choiches depends on ENUM defined in bddc.h
51: PetscOptionsEnum("-pc_bddc_coarse_problem_type","Set coarse problem type","none",avail_coarse_problems,(PetscEnum)pcbddc->coarse_problem_type,(PetscEnum*)&pcbddc->coarse_problem_type,PETSC_NULL);
52: /* Two different application of BDDC to the whole set of dofs, internal and interface */
53: PetscOptionsBool("-pc_bddc_switch_preconditioning_type","Switch between M_2 (default) and M_3 preconditioners (as defined by Dohrmann)","none",pcbddc->prec_type,&pcbddc->prec_type,PETSC_NULL);
54: PetscOptionsInt("-pc_bddc_coarsening_ratio","Set coarsening ratio used in multilevel coarsening","none",pcbddc->coarsening_ratio,&pcbddc->coarsening_ratio,PETSC_NULL);
55: PetscOptionsTail();
56: return(0);
57: }
59: /* -------------------------------------------------------------------------- */
60: EXTERN_C_BEGIN
63: static PetscErrorCode PCBDDCSetCoarseProblemType_BDDC(PC pc, CoarseProblemType CPT)
64: {
65: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
68: pcbddc->coarse_problem_type = CPT;
69: return(0);
70: }
71: EXTERN_C_END
73: /* -------------------------------------------------------------------------- */
76: /*@
77: PCBDDCSetCoarseProblemType - Set coarse problem type in PCBDDC.
79: Not collective
81: Input Parameters:
82: + pc - the preconditioning context
83: - CoarseProblemType - pick a better name and explain what this is
85: Level: intermediate
87: Notes:
88: Not collective but all procs must call this.
90: .seealso: PCBDDC
91: @*/
92: PetscErrorCode PCBDDCSetCoarseProblemType(PC pc, CoarseProblemType CPT)
93: {
98: PetscTryMethod(pc,"PCBDDCSetCoarseProblemType_C",(PC,CoarseProblemType),(pc,CPT));
99: return(0);
100: }
102: /* -------------------------------------------------------------------------- */
103: EXTERN_C_BEGIN
106: static PetscErrorCode PCBDDCSetDirichletBoundaries_BDDC(PC pc,IS DirichletBoundaries)
107: {
108: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
112: ISDestroy(&pcbddc->DirichletBoundaries);
113: PetscObjectReference((PetscObject)DirichletBoundaries);
114: pcbddc->DirichletBoundaries=DirichletBoundaries;
115: return(0);
116: }
117: EXTERN_C_END
119: /* -------------------------------------------------------------------------- */
122: /*@
123: PCBDDCSetDirichletBoundaries - Set index set defining subdomain part of
124: Dirichlet boundaries for the global problem.
126: Not collective
128: Input Parameters:
129: + pc - the preconditioning context
130: - DirichletBoundaries - sequential index set defining the subdomain part of Dirichlet boundaries (can be PETSC_NULL)
132: Level: intermediate
134: Notes:
136: .seealso: PCBDDC
137: @*/
138: PetscErrorCode PCBDDCSetDirichletBoundaries(PC pc,IS DirichletBoundaries)
139: {
144: PetscTryMethod(pc,"PCBDDCSetDirichletBoundaries_C",(PC,IS),(pc,DirichletBoundaries));
145: return(0);
146: }
147: /* -------------------------------------------------------------------------- */
148: EXTERN_C_BEGIN
151: static PetscErrorCode PCBDDCSetNeumannBoundaries_BDDC(PC pc,IS NeumannBoundaries)
152: {
153: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
157: ISDestroy(&pcbddc->NeumannBoundaries);
158: PetscObjectReference((PetscObject)NeumannBoundaries);
159: pcbddc->NeumannBoundaries=NeumannBoundaries;
160: return(0);
161: }
162: EXTERN_C_END
164: /* -------------------------------------------------------------------------- */
167: /*@
168: PCBDDCSetNeumannBoundaries - Set index set defining subdomain part of
169: Neumann boundaries for the global problem.
171: Not collective
173: Input Parameters:
174: + pc - the preconditioning context
175: - NeumannBoundaries - sequential index set defining the subdomain part of Neumann boundaries (can be PETSC_NULL)
177: Level: intermediate
179: Notes:
181: .seealso: PCBDDC
182: @*/
183: PetscErrorCode PCBDDCSetNeumannBoundaries(PC pc,IS NeumannBoundaries)
184: {
189: PetscTryMethod(pc,"PCBDDCSetNeumannBoundaries_C",(PC,IS),(pc,NeumannBoundaries));
190: return(0);
191: }
192: /* -------------------------------------------------------------------------- */
193: EXTERN_C_BEGIN
196: static PetscErrorCode PCBDDCGetNeumannBoundaries_BDDC(PC pc,IS *NeumannBoundaries)
197: {
198: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
201: *NeumannBoundaries = pcbddc->NeumannBoundaries;
202: return(0);
203: }
204: EXTERN_C_END
206: /* -------------------------------------------------------------------------- */
209: /*@
210: PCBDDCGetNeumannBoundaries - Get index set defining subdomain part of
211: Neumann boundaries for the global problem.
213: Not collective
215: Input Parameters:
216: + pc - the preconditioning context
218: Output Parameters:
219: + NeumannBoundaries - index set defining the subdomain part of Neumann boundaries
221: Level: intermediate
223: Notes:
225: .seealso: PCBDDC
226: @*/
227: PetscErrorCode PCBDDCGetNeumannBoundaries(PC pc,IS *NeumannBoundaries)
228: {
233: PetscUseMethod(pc,"PCBDDCGetNeumannBoundaries_C",(PC,IS*),(pc,NeumannBoundaries));
234: return(0);
235: }
236: /* -------------------------------------------------------------------------- */
237: EXTERN_C_BEGIN
240: static PetscErrorCode PCBDDCGetDirichletBoundaries_BDDC(PC pc,IS *DirichletBoundaries)
241: {
242: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
245: *DirichletBoundaries = pcbddc->DirichletBoundaries;
246: return(0);
247: }
248: EXTERN_C_END
250: /* -------------------------------------------------------------------------- */
253: /*@
254: PCBDDCGetDirichletBoundaries - Get index set defining subdomain part of
255: Dirichlet boundaries for the global problem.
257: Not collective
259: Input Parameters:
260: + pc - the preconditioning context
262: Output Parameters:
263: + DirichletBoundaries - index set defining the subdomain part of Dirichlet boundaries
265: Level: intermediate
267: Notes:
269: .seealso: PCBDDC
270: @*/
271: PetscErrorCode PCBDDCGetDirichletBoundaries(PC pc,IS *DirichletBoundaries)
272: {
277: PetscUseMethod(pc,"PCBDDCGetDirichletBoundaries_C",(PC,IS*),(pc,DirichletBoundaries));
278: return(0);
279: }
281: /* -------------------------------------------------------------------------- */
282: EXTERN_C_BEGIN
285: static PetscErrorCode PCBDDCSetDofsSplitting_BDDC(PC pc,PetscInt n_is, IS ISForDofs[])
286: {
287: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
288: PetscInt i;
292: /* Destroy ISs if they were already set */
293: for(i=0;i<pcbddc->n_ISForDofs;i++) {
294: ISDestroy(&pcbddc->ISForDofs[i]);
295: }
296: PetscFree(pcbddc->ISForDofs);
298: /* allocate space then copy ISs */
299: PetscMalloc(n_is*sizeof(IS),&pcbddc->ISForDofs);
300: for(i=0;i<n_is;i++) {
301: ISDuplicate(ISForDofs[i],&pcbddc->ISForDofs[i]);
302: ISCopy(ISForDofs[i],pcbddc->ISForDofs[i]);
303: }
304: pcbddc->n_ISForDofs=n_is;
306: return(0);
307: }
308: EXTERN_C_END
310: /* -------------------------------------------------------------------------- */
313: /*@
314: PCBDDCSetDofsSplitting - Set index set defining how dofs are splitted.
316: Not collective
318: Input Parameters:
319: + pc - the preconditioning context
320: - n - number of index sets defining dofs spltting
321: - IS[] - array of IS describing dofs splitting
323: Level: intermediate
325: Notes:
326: Sequential ISs are copied, the user must destroy the array of IS passed in.
328: .seealso: PCBDDC
329: @*/
330: PetscErrorCode PCBDDCSetDofsSplitting(PC pc,PetscInt n_is, IS ISForDofs[])
331: {
336: PetscTryMethod(pc,"PCBDDCSetDofsSplitting_C",(PC,PetscInt,IS[]),(pc,n_is,ISForDofs));
337: return(0);
338: }
342: /* -------------------------------------------------------------------------- */
343: /*
344: PCSetUp_BDDC - Prepares for the use of the BDDC preconditioner
345: by setting data structures and options.
347: Input Parameter:
348: + pc - the preconditioner context
350: Application Interface Routine: PCSetUp()
352: Notes:
353: The interface routine PCSetUp() is not usually called directly by
354: the user, but instead is called by PCApply() if necessary.
355: */
356: PetscErrorCode PCSetUp_BDDC(PC pc)
357: {
359: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
360: PC_IS *pcis = (PC_IS*)(pc->data);
363: if (!pc->setupcalled) {
364: /* For BDDC we need to define a local "Neumann" problem different to that defined in PCISSetup
365: So, we set to pcnone the Neumann problem of pcis in order to avoid unneeded computation
366: Also, we decide to directly build the (same) Dirichlet problem */
367: PetscOptionsSetValue("-is_localN_pc_type","none");
368: PetscOptionsSetValue("-is_localD_pc_type","none");
369: /* Set up all the "iterative substructuring" common block */
370: PCISSetUp(pc);
371: /* Get stdout for dbg */
372: if(pcbddc->dbg_flag) {
373: PetscViewerASCIIGetStdout(((PetscObject)pc)->comm,&pcbddc->dbg_viewer);
374: PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);
375: }
376: /* TODO MOVE CODE FRAGMENT */
377: PetscInt im_active=0;
378: if(pcis->n) im_active = 1;
379: MPI_Allreduce(&im_active,&pcbddc->active_procs,1,MPIU_INT,MPI_SUM,((PetscObject)pc)->comm);
380: /* Analyze local interface */
381: PCBDDCManageLocalBoundaries(pc);
382: /* Set up local constraint matrix */
383: PCBDDCCreateConstraintMatrix(pc);
384: /* Create coarse and local stuffs used for evaluating action of preconditioner */
385: PCBDDCCoarseSetUp(pc);
386: /* Processes fakely involved in multilevel should not call ISLocalToGlobalMappingRestoreInfo */
387: if ( !pcis->n_neigh ) pcis->ISLocalToGlobalMappingGetInfoWasCalled=PETSC_FALSE;
388: }
389: return(0);
390: }
392: /* -------------------------------------------------------------------------- */
393: /*
394: PCApply_BDDC - Applies the BDDC preconditioner to a vector.
396: Input Parameters:
397: . pc - the preconditioner context
398: . r - input vector (global)
400: Output Parameter:
401: . z - output vector (global)
403: Application Interface Routine: PCApply()
404: */
407: PetscErrorCode PCApply_BDDC(PC pc,Vec r,Vec z)
408: {
409: PC_IS *pcis = (PC_IS*)(pc->data);
410: PC_BDDC *pcbddc = (PC_BDDC*)(pc->data);
411: PetscErrorCode ierr;
412: const PetscScalar one = 1.0;
413: const PetscScalar m_one = -1.0;
415: /* This code is similar to that provided in nn.c for PCNN
416: NN interface preconditioner changed to BDDC
417: Added support for M_3 preconditioenr in the reference article (code is active if pcbddc->prec_type = PETSC_TRUE) */
420: /* First Dirichlet solve */
421: VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
422: VecScatterEnd (pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
423: KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);
424: /*
425: Assembling right hand side for BDDC operator
426: - vec1_D for the Dirichlet part (if needed, i.e. prec_flag=PETSC_TRUE)
427: - the interface part of the global vector z
428: */
429: VecScale(pcis->vec2_D,m_one);
430: MatMult(pcis->A_BI,pcis->vec2_D,pcis->vec1_B);
431: if(pcbddc->prec_type) { MatMultAdd(pcis->A_II,pcis->vec2_D,pcis->vec1_D,pcis->vec1_D); }
432: VecScale(pcis->vec2_D,m_one);
433: VecCopy(r,z);
434: VecScatterBegin(pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);
435: VecScatterEnd (pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);
437: /*
438: Apply interface preconditioner
439: Results are stored in:
440: - vec1_D (if needed, i.e. with prec_type = PETSC_TRUE)
441: - the interface part of the global vector z
442: */
443: PCBDDCApplyInterfacePreconditioner(pc,z);
445: /* Second Dirichlet solve and assembling of output */
446: VecScatterBegin(pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
447: VecScatterEnd (pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
448: MatMult(pcis->A_IB,pcis->vec1_B,pcis->vec3_D);
449: if(pcbddc->prec_type) { MatMultAdd(pcis->A_II,pcis->vec1_D,pcis->vec3_D,pcis->vec3_D); }
450: KSPSolve(pcbddc->ksp_D,pcis->vec3_D,pcbddc->vec4_D);
451: VecScale(pcbddc->vec4_D,m_one);
452: if(pcbddc->prec_type) { VecAXPY (pcbddc->vec4_D,one,pcis->vec1_D); }
453: VecAXPY (pcis->vec2_D,one,pcbddc->vec4_D);
454: VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);
455: VecScatterEnd (pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);
457: return(0);
459: }
461: /* -------------------------------------------------------------------------- */
462: /*
463: PCBDDCApplyInterfacePreconditioner - Apply the BDDC preconditioner at the interface.
464:
465: */
468: static PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, Vec z)
469: {
471: PC_BDDC* pcbddc = (PC_BDDC*)(pc->data);
472: PC_IS* pcis = (PC_IS*) (pc->data);
473: const PetscScalar zero = 0.0;
476: /* Get Local boundary and apply partition of unity */
477: VecScatterBegin(pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
478: VecScatterEnd (pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
479: VecPointwiseMult(pcis->vec1_B,pcis->D,pcis->vec1_B);
481: /* Application of PHI^T */
482: MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);
483: if(pcbddc->prec_type) { MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P); }
485: /* Scatter data of coarse_rhs */
486: if(pcbddc->coarse_rhs) VecSet(pcbddc->coarse_rhs,zero);
487: PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);
489: /* Local solution on R nodes */
490: VecSet(pcbddc->vec1_R,zero);
491: VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
492: VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
493: if(pcbddc->prec_type) {
494: VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
495: VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
496: }
497: PCBDDCSolveSaddlePoint(pc);
498: VecSet(pcis->vec1_B,zero);
499: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
500: VecScatterEnd (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
501: if(pcbddc->prec_type) {
502: VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
503: VecScatterEnd (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
504: }
506: /* Coarse solution */
507: PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);
508: if(pcbddc->coarse_rhs) KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);
509: PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);
510: PCBDDCScatterCoarseDataEnd (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);
512: /* Sum contributions from two levels */
513: /* Apply partition of unity and sum boundary values */
514: MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);
515: VecPointwiseMult(pcis->vec1_B,pcis->D,pcis->vec1_B);
516: if(pcbddc->prec_type) { MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D); }
517: VecSet(z,zero);
518: VecScatterBegin(pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);
519: VecScatterEnd (pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);
521: return(0);
522: }
525: /* -------------------------------------------------------------------------- */
526: /*
527: PCBDDCSolveSaddlePoint
528:
529: */
532: static PetscErrorCode PCBDDCSolveSaddlePoint(PC pc)
533: {
535: PC_BDDC* pcbddc = (PC_BDDC*)(pc->data);
539: KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
540: if(pcbddc->n_constraints) {
541: MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);
542: MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);
543: }
545: return(0);
546: }
547: /* -------------------------------------------------------------------------- */
548: /*
549: PCBDDCScatterCoarseDataBegin
550:
551: */
554: static PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
555: {
557: PC_BDDC* pcbddc = (PC_BDDC*)(pc->data);
561: switch(pcbddc->coarse_communications_type){
562: case SCATTERS_BDDC:
563: VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);
564: break;
565: case GATHERS_BDDC:
566: break;
567: }
568: return(0);
570: }
571: /* -------------------------------------------------------------------------- */
572: /*
573: PCBDDCScatterCoarseDataEnd
574:
575: */
578: static PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
579: {
581: PC_BDDC* pcbddc = (PC_BDDC*)(pc->data);
582: PetscScalar* array_to;
583: PetscScalar* array_from;
584: MPI_Comm comm=((PetscObject)pc)->comm;
585: PetscInt i;
589: switch(pcbddc->coarse_communications_type){
590: case SCATTERS_BDDC:
591: VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);
592: break;
593: case GATHERS_BDDC:
594: if(vec_from) VecGetArray(vec_from,&array_from);
595: if(vec_to) VecGetArray(vec_to,&array_to);
596: switch(pcbddc->coarse_problem_type){
597: case SEQUENTIAL_BDDC:
598: if(smode == SCATTER_FORWARD) {
599: MPI_Gatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,0,comm);
600: if(vec_to) {
601: for(i=0;i<pcbddc->replicated_primal_size;i++)
602: array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i];
603: }
604: } else {
605: if(vec_from)
606: for(i=0;i<pcbddc->replicated_primal_size;i++)
607: pcbddc->replicated_local_primal_values[i]=array_from[pcbddc->replicated_local_primal_indices[i]];
608: MPI_Scatterv(&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,&array_to[0],pcbddc->local_primal_size,MPIU_SCALAR,0,comm);
609: }
610: break;
611: case REPLICATED_BDDC:
612: if(smode == SCATTER_FORWARD) {
613: MPI_Allgatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,comm);
614: for(i=0;i<pcbddc->replicated_primal_size;i++)
615: array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i];
616: } else { /* no communications needed for SCATTER_REVERSE since needed data is already present */
617: for(i=0;i<pcbddc->local_primal_size;i++)
618: array_to[i]=array_from[pcbddc->local_primal_indices[i]];
619: }
620: break;
621: case MULTILEVEL_BDDC:
622: break;
623: case PARALLEL_BDDC:
624: break;
625: }
626: if(vec_from) VecRestoreArray(vec_from,&array_from);
627: if(vec_to) VecRestoreArray(vec_to,&array_to);
628: break;
629: }
630: return(0);
632: }
634: /* -------------------------------------------------------------------------- */
635: /*
636: PCDestroy_BDDC - Destroys the private context for the NN preconditioner
637: that was created with PCCreate_BDDC().
639: Input Parameter:
640: . pc - the preconditioner context
642: Application Interface Routine: PCDestroy()
643: */
646: PetscErrorCode PCDestroy_BDDC(PC pc)
647: {
648: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
652: /* free data created by PCIS */
653: PCISDestroy(pc);
654: /* free BDDC data */
655: VecDestroy(&pcbddc->coarse_vec);
656: VecDestroy(&pcbddc->coarse_rhs);
657: KSPDestroy(&pcbddc->coarse_ksp);
658: MatDestroy(&pcbddc->coarse_mat);
659: MatDestroy(&pcbddc->coarse_phi_B);
660: MatDestroy(&pcbddc->coarse_phi_D);
661: VecDestroy(&pcbddc->vec1_P);
662: VecDestroy(&pcbddc->vec1_C);
663: MatDestroy(&pcbddc->local_auxmat1);
664: MatDestroy(&pcbddc->local_auxmat2);
665: VecDestroy(&pcbddc->vec1_R);
666: VecDestroy(&pcbddc->vec2_R);
667: VecDestroy(&pcbddc->vec4_D);
668: VecScatterDestroy(&pcbddc->R_to_B);
669: VecScatterDestroy(&pcbddc->R_to_D);
670: VecScatterDestroy(&pcbddc->coarse_loc_to_glob);
671: KSPDestroy(&pcbddc->ksp_D);
672: KSPDestroy(&pcbddc->ksp_R);
673: ISDestroy(&pcbddc->NeumannBoundaries);
674: ISDestroy(&pcbddc->DirichletBoundaries);
675: MatDestroy(&pcbddc->ConstraintMatrix);
676: PetscFree(pcbddc->local_primal_indices);
677: PetscFree(pcbddc->replicated_local_primal_indices);
678: if (pcbddc->replicated_local_primal_values) { free(pcbddc->replicated_local_primal_values); }
679: PetscFree(pcbddc->local_primal_displacements);
680: PetscFree(pcbddc->local_primal_sizes);
681: PetscInt i;
682: for(i=0;i<pcbddc->n_ISForDofs;i++) { ISDestroy(&pcbddc->ISForDofs[i]); }
683: PetscFree(pcbddc->ISForDofs);
684: for(i=0;i<pcbddc->n_ISForFaces;i++) { ISDestroy(&pcbddc->ISForFaces[i]); }
685: PetscFree(pcbddc->ISForFaces);
686: for(i=0;i<pcbddc->n_ISForEdges;i++) { ISDestroy(&pcbddc->ISForEdges[i]); }
687: PetscFree(pcbddc->ISForEdges);
688: ISDestroy(&pcbddc->ISForVertices);
689: /* Free the private data structure that was hanging off the PC */
690: PetscFree(pcbddc);
691: return(0);
692: }
694: /* -------------------------------------------------------------------------- */
695: /*MC
696: PCBDDC - Balancing Domain Decomposition by Constraints.
698: Options Database Keys:
699: . -pcbddc ??? -
701: Level: intermediate
703: Notes: The matrix used with this preconditioner must be of type MATIS
705: Unlike more 'conventional' interface preconditioners, this iterates over ALL the
706: degrees of freedom, NOT just those on the interface (this allows the use of approximate solvers
707: on the subdomains).
709: Options for the coarse grid preconditioner can be set with -
710: Options for the Dirichlet subproblem can be set with -
711: Options for the Neumann subproblem can be set with -
713: Contributed by Stefano Zampini
715: .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, MATIS
716: M*/
717: EXTERN_C_BEGIN
720: PetscErrorCode PCCreate_BDDC(PC pc)
721: {
723: PC_BDDC *pcbddc;
726: /* Creates the private data structure for this preconditioner and attach it to the PC object. */
727: PetscNewLog(pc,PC_BDDC,&pcbddc);
728: pc->data = (void*)pcbddc;
729: /* create PCIS data structure */
730: PCISCreate(pc);
731: /* BDDC specific */
732: pcbddc->coarse_vec = 0;
733: pcbddc->coarse_rhs = 0;
734: pcbddc->coarse_ksp = 0;
735: pcbddc->coarse_phi_B = 0;
736: pcbddc->coarse_phi_D = 0;
737: pcbddc->vec1_P = 0;
738: pcbddc->vec1_R = 0;
739: pcbddc->vec2_R = 0;
740: pcbddc->local_auxmat1 = 0;
741: pcbddc->local_auxmat2 = 0;
742: pcbddc->R_to_B = 0;
743: pcbddc->R_to_D = 0;
744: pcbddc->ksp_D = 0;
745: pcbddc->ksp_R = 0;
746: pcbddc->local_primal_indices = 0;
747: pcbddc->prec_type = PETSC_FALSE;
748: pcbddc->NeumannBoundaries = 0;
749: pcbddc->ISForDofs = 0;
750: pcbddc->ISForVertices = 0;
751: pcbddc->n_ISForFaces = 0;
752: pcbddc->n_ISForEdges = 0;
753: pcbddc->ConstraintMatrix = 0;
754: pcbddc->use_nnsp_true = PETSC_FALSE;
755: pcbddc->local_primal_sizes = 0;
756: pcbddc->local_primal_displacements = 0;
757: pcbddc->replicated_local_primal_indices = 0;
758: pcbddc->replicated_local_primal_values = 0;
759: pcbddc->coarse_loc_to_glob = 0;
760: pcbddc->dbg_flag = PETSC_FALSE;
761: pcbddc->coarsening_ratio = 8;
762: /* function pointers */
763: pc->ops->apply = PCApply_BDDC;
764: pc->ops->applytranspose = 0;
765: pc->ops->setup = PCSetUp_BDDC;
766: pc->ops->destroy = PCDestroy_BDDC;
767: pc->ops->setfromoptions = PCSetFromOptions_BDDC;
768: pc->ops->view = 0;
769: pc->ops->applyrichardson = 0;
770: pc->ops->applysymmetricleft = 0;
771: pc->ops->applysymmetricright = 0;
772: /* composing function */
773: PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetDirichletBoundaries_C","PCBDDCSetDirichletBoundaries_BDDC",
774: PCBDDCSetDirichletBoundaries_BDDC);
775: PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetNeumannBoundaries_C","PCBDDCSetNeumannBoundaries_BDDC",
776: PCBDDCSetNeumannBoundaries_BDDC);
777: PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCGetDirichletBoundaries_C","PCBDDCGetDirichletBoundaries_BDDC",
778: PCBDDCGetDirichletBoundaries_BDDC);
779: PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCGetNeumannBoundaries_C","PCBDDCGetNeumannBoundaries_BDDC",
780: PCBDDCGetNeumannBoundaries_BDDC);
781: PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetCoarseProblemType_C","PCBDDCSetCoarseProblemType_BDDC",
782: PCBDDCSetCoarseProblemType_BDDC);
783: PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetDofsSplitting_C","PCBDDCSetDofsSplitting_BDDC",
784: PCBDDCSetDofsSplitting_BDDC);
785: return(0);
786: }
787: EXTERN_C_END
789: /* -------------------------------------------------------------------------- */
790: /*
791: Create C matrix [I 0; 0 const]
792: */
793: #ifdef BDDC_USE_POD
794: #if !defined(PETSC_MISSING_LAPACK_GESVD)
795: #define PETSC_MISSING_LAPACK_GESVD 1
796: #define UNDEF_PETSC_MISSING_LAPACK_GESVD 1
797: #endif
798: #endif
802: static PetscErrorCode PCBDDCCreateConstraintMatrix(PC pc)
803: {
805: PC_IS* pcis = (PC_IS*)(pc->data);
806: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
807: Mat_IS *matis = (Mat_IS*)pc->pmat->data;
808: PetscInt *nnz,*vertices,*is_indices;
809: PetscScalar *temp_quadrature_constraint;
810: PetscInt *temp_indices,*temp_indices_to_constraint;
811: PetscInt local_primal_size,i,j,k,total_counts,max_size_of_constraint;
812: PetscInt n_constraints,n_vertices,size_of_constraint;
813: PetscReal quad_value;
814: PetscBool nnsp_has_cnst=PETSC_FALSE,use_nnsp_true=pcbddc->use_nnsp_true;
815: PetscInt nnsp_size=0,nnsp_addone=0,temp_constraints,temp_start_ptr;
816: IS *used_IS;
817: const MatType impMatType=MATSEQAIJ;
818: PetscBLASInt Bs,Bt,lwork,lierr;
819: PetscReal tol=1.0e-8;
820: MatNullSpace nearnullsp;
821: const Vec *nearnullvecs;
822: Vec *localnearnullsp;
823: PetscScalar *work,*temp_basis,*array_vector,*correlation_mat;
824: PetscReal *rwork,*singular_vals;
825: PetscBLASInt Bone=1;
826: /* some ugly conditional declarations */
827: #if defined(PETSC_MISSING_LAPACK_GESVD)
828: PetscScalar dot_result;
829: PetscScalar one=1.0,zero=0.0;
830: PetscInt ii;
831: #if defined(PETSC_USE_COMPLEX)
832: PetscScalar val1,val2;
833: #endif
834: #else
835: PetscBLASInt dummy_int;
836: PetscScalar dummy_scalar;
837: #endif
840: /* check if near null space is attached to global mat */
841: MatGetNearNullSpace(pc->pmat,&nearnullsp);
842: if (nearnullsp) {
843: MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);
844: } else { /* if near null space is not provided it uses constants */
845: nnsp_has_cnst = PETSC_TRUE;
846: use_nnsp_true = PETSC_TRUE;
847: }
848: if(nnsp_has_cnst) {
849: nnsp_addone = 1;
850: }
851: /*
852: Evaluate maximum storage size needed by the procedure
853: - temp_indices will contain start index of each constraint stored as follows
854: - temp_indices_to_constraint[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
855: - temp_quadrature_constraint[temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself
856: */
857: total_counts = pcbddc->n_ISForFaces+pcbddc->n_ISForEdges;
858: total_counts *= (nnsp_addone+nnsp_size);
859: PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);
860: total_counts = 0;
861: max_size_of_constraint = 0;
862: for(i=0;i<pcbddc->n_ISForEdges+pcbddc->n_ISForFaces;i++){
863: if(i<pcbddc->n_ISForEdges){
864: used_IS = &pcbddc->ISForEdges[i];
865: } else {
866: used_IS = &pcbddc->ISForFaces[i-pcbddc->n_ISForEdges];
867: }
868: ISGetSize(*used_IS,&j);
869: total_counts += j;
870: if(j>max_size_of_constraint) max_size_of_constraint=j;
871: }
872: total_counts *= (nnsp_addone+nnsp_size);
873: PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);
874: PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);
875: /* First we issue queries to allocate optimal workspace for LAPACKgesvd or LAPACKsyev/LAPACKheev */
876: rwork = 0;
877: work = 0;
878: singular_vals = 0;
879: temp_basis = 0;
880: correlation_mat = 0;
881: if(!pcbddc->use_nnsp_true) {
882: PetscScalar temp_work;
883: #if defined(PETSC_MISSING_LAPACK_GESVD)
884: /* POD */
885: PetscInt max_n;
886: max_n = nnsp_addone+nnsp_size;
887: /* using some techniques borrowed from Proper Orthogonal Decomposition */
888: PetscMalloc(max_n*max_n*sizeof(PetscScalar),&correlation_mat);
889: PetscMalloc(max_n*sizeof(PetscReal),&singular_vals);
890: PetscMalloc(max_size_of_constraint*(nnsp_addone+nnsp_size)*sizeof(PetscScalar),&temp_basis);
891: #if defined(PETSC_USE_COMPLEX)
892: PetscMalloc(3*max_n*sizeof(PetscReal),&rwork);
893: #endif
894: /* now we evaluate the optimal workspace using query with lwork=-1 */
895: Bt = PetscBLASIntCast(max_n);
896: lwork=-1;
897: #if !defined(PETSC_USE_COMPLEX)
898: LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,&temp_work,&lwork,&lierr);
899: #else
900: LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,&temp_work,&lwork,rwork,&lierr);
901: #endif
902: #else /* on missing GESVD */
903: /* SVD */
904: PetscInt max_n,min_n;
905: max_n = max_size_of_constraint;
906: min_n = nnsp_addone+nnsp_size;
907: if(max_size_of_constraint < ( nnsp_addone+nnsp_size ) ) {
908: min_n = max_size_of_constraint;
909: max_n = nnsp_addone+nnsp_size;
910: }
911: PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);
912: #if defined(PETSC_USE_COMPLEX)
913: PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);
914: #endif
915: /* now we evaluate the optimal workspace using query with lwork=-1 */
916: lwork=-1;
917: Bs = PetscBLASIntCast(max_n);
918: Bt = PetscBLASIntCast(min_n);
919: dummy_int = Bs;
920: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
921: #if !defined(PETSC_USE_COMPLEX)
922: LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[0],&Bs,singular_vals,
923: &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr);
924: #else
925: LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[0],&Bs,singular_vals,
926: &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr);
927: #endif
928: if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SVD Lapack routine %d",(int)lierr);
929: PetscFPTrapPop();
930: #endif
931: /* Allocate optimal workspace */
932: lwork = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work));
933: total_counts = (PetscInt)lwork;
934: PetscMalloc(total_counts*sizeof(PetscScalar),&work);
935: }
936: /* get local part of global near null space vectors */
937: PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);
938: for(k=0;k<nnsp_size;k++) {
939: VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);
940: VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);
941: VecScatterEnd (matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);
942: }
943: /* Now we can loop on constraining sets */
944: total_counts=0;
945: temp_indices[0]=0;
946: for(i=0;i<pcbddc->n_ISForEdges+pcbddc->n_ISForFaces;i++){
947: if(i<pcbddc->n_ISForEdges){
948: used_IS = &pcbddc->ISForEdges[i];
949: } else {
950: used_IS = &pcbddc->ISForFaces[i-pcbddc->n_ISForEdges];
951: }
952: temp_constraints = 0; /* zero the number of constraints I have on this conn comp */
953: temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
954: ISGetSize(*used_IS,&size_of_constraint);
955: ISGetIndices(*used_IS,(const PetscInt**)&is_indices);
956: if(nnsp_has_cnst) {
957: temp_constraints++;
958: quad_value = 1.0/PetscSqrtReal((PetscReal)size_of_constraint);
959: for(j=0;j<size_of_constraint;j++) {
960: temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j];
961: temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
962: }
963: temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */
964: total_counts++;
965: }
966: for(k=0;k<nnsp_size;k++) {
967: VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);
968: for(j=0;j<size_of_constraint;j++) {
969: temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j];
970: temp_quadrature_constraint[temp_indices[total_counts]+j]=array_vector[is_indices[j]];
971: }
972: VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);
973: quad_value = 1.0;
974: if( use_nnsp_true ) { /* check if array is null on the connected component in case use_nnsp_true has been requested */
975: Bs = PetscBLASIntCast(size_of_constraint);
976: quad_value = BLASasum_(&Bs,&temp_quadrature_constraint[temp_indices[total_counts]],&Bone);
977: }
978: if ( quad_value > 0.0 ) { /* keep indices and values */
979: temp_constraints++;
980: temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */
981: total_counts++;
982: }
983: }
984: ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);
985: /* perform SVD on the constraint if use_nnsp_true has not be requested by the user */
986: if(!use_nnsp_true) {
988: Bs = PetscBLASIntCast(size_of_constraint);
989: Bt = PetscBLASIntCast(temp_constraints);
991: #if defined(PETSC_MISSING_LAPACK_GESVD)
992: PetscMemzero(correlation_mat,Bt*Bt*sizeof(PetscScalar));
993: /* Store upper triangular part of correlation matrix */
994: for(j=0;j<temp_constraints;j++) {
995: for(k=0;k<j+1;k++) {
996: #if defined(PETSC_USE_COMPLEX)
997: /* hand made complex dot product */
998: dot_result = 0.0;
999: for (ii=0; ii<size_of_constraint; ii++) {
1000: val1 = temp_quadrature_constraint[temp_indices[temp_start_ptr+j]+ii];
1001: val2 = temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii];
1002: dot_result += val1*PetscConj(val2);
1003: }
1004: #else
1005: dot_result = BLASdot_(&Bs,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Bone,
1006: &temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Bone);
1007: #endif
1008: correlation_mat[j*temp_constraints+k]=dot_result;
1009: }
1010: }
1011: #if !defined(PETSC_USE_COMPLEX)
1012: LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,work,&lwork,&lierr);
1013: #else
1014: LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,work,&lwork,rwork,&lierr);
1015: #endif
1016: if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in EV Lapack routine %d",(int)lierr);
1017: /* retain eigenvalues greater than tol: note that lapack SYEV gives eigs in ascending order */
1018: j=0;
1019: while( j < Bt && singular_vals[j] < tol) j++;
1020: total_counts=total_counts-j;
1021: if(j<temp_constraints) {
1022: for(k=j;k<Bt;k++) { singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); }
1023: BLASgemm_("N","N",&Bs,&Bt,&Bt,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,correlation_mat,&Bt,&zero,temp_basis,&Bs);
1024: /* copy POD basis into used quadrature memory */
1025: for(k=0;k<Bt-j;k++) {
1026: for(ii=0;ii<size_of_constraint;ii++) {
1027: temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[Bt-1-k]*temp_basis[(Bt-1-k)*size_of_constraint+ii];
1028: }
1029: }
1030: }
1032: #else /* on missing GESVD */
1034: PetscInt min_n = temp_constraints;
1035: if(min_n > size_of_constraint) min_n = size_of_constraint;
1036: dummy_int = Bs;
1037: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
1038: #if !defined(PETSC_USE_COMPLEX)
1039: LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,singular_vals,
1040: &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr);
1041: #else
1042: LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,singular_vals,
1043: &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr);
1044: #endif
1045: if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SVD Lapack routine %d",(int)lierr);
1046: PetscFPTrapPop();
1047: /* retain eigenvalues greater than tol: note that lapack SVD gives eigs in descending order */
1048: j=0;
1049: while( j < min_n && singular_vals[min_n-j-1] < tol) j++;
1050: total_counts = total_counts-(PetscInt)Bt+(min_n-j);
1052: #endif
1053: }
1054: }
1055: n_constraints=total_counts;
1056: ISGetSize(pcbddc->ISForVertices,&n_vertices);
1057: local_primal_size = n_vertices+n_constraints;
1058: PetscMalloc(local_primal_size*sizeof(PetscInt),&nnz);
1059: MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);
1060: MatSetType(pcbddc->ConstraintMatrix,impMatType);
1061: MatSetSizes(pcbddc->ConstraintMatrix,local_primal_size,pcis->n,local_primal_size,pcis->n);
1062: for(i=0;i<n_vertices;i++) { nnz[i]= 1; }
1063: for(i=0;i<n_constraints;i++) { nnz[i+n_vertices]=temp_indices[i+1]-temp_indices[i]; }
1064: MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);
1065: ISGetIndices(pcbddc->ISForVertices,(const PetscInt**)&vertices);
1066: for(i=0;i<n_vertices;i++) { MatSetValue(pcbddc->ConstraintMatrix,i,vertices[i],1.0,INSERT_VALUES); }
1067: ISRestoreIndices(pcbddc->ISForVertices,(const PetscInt**)&vertices);
1068: for(i=0;i<n_constraints;i++) {
1069: j=i+n_vertices;
1070: size_of_constraint=temp_indices[i+1]-temp_indices[i];
1071: MatSetValues(pcbddc->ConstraintMatrix,1,&j,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);
1072: }
1073: MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);
1074: MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);
1075: /* set quantities in pcbddc data structure */
1076: pcbddc->n_vertices = n_vertices;
1077: pcbddc->n_constraints = n_constraints;
1078: pcbddc->local_primal_size = n_vertices+n_constraints;
1079: /* free workspace no longer needed */
1080: PetscFree(rwork);
1081: PetscFree(work);
1082: PetscFree(temp_basis);
1083: PetscFree(singular_vals);
1084: PetscFree(correlation_mat);
1085: PetscFree(temp_indices);
1086: PetscFree(temp_indices_to_constraint);
1087: PetscFree(temp_quadrature_constraint);
1088: PetscFree(nnz);
1089: for(k=0;k<nnsp_size;k++) { VecDestroy(&localnearnullsp[k]); }
1090: PetscFree(localnearnullsp);
1091: return(0);
1092: }
1093: #ifdef UNDEF_PETSC_MISSING_LAPACK_GESVD
1094: #undef PETSC_MISSING_LAPACK_GESVD
1095: #endif
1097: /* -------------------------------------------------------------------------- */
1098: /*
1099: PCBDDCCoarseSetUp -
1100: */
1103: static PetscErrorCode PCBDDCCoarseSetUp(PC pc)
1104: {
1105: PetscErrorCode ierr;
1107: PC_IS* pcis = (PC_IS*)(pc->data);
1108: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
1109: Mat_IS *matis = (Mat_IS*)pc->pmat->data;
1110: IS is_R_local;
1111: IS is_V_local;
1112: IS is_C_local;
1113: IS is_aux1;
1114: IS is_aux2;
1115: const VecType impVecType;
1116: const MatType impMatType;
1117: PetscInt n_R=0;
1118: PetscInt n_D=0;
1119: PetscInt n_B=0;
1120: PetscScalar zero=0.0;
1121: PetscScalar one=1.0;
1122: PetscScalar m_one=-1.0;
1123: PetscScalar* array;
1124: PetscScalar *coarse_submat_vals;
1125: PetscInt *idx_R_local;
1126: PetscInt *idx_V_B;
1127: PetscScalar *coarsefunctions_errors;
1128: PetscScalar *constraints_errors;
1129: /* auxiliary indices */
1130: PetscInt s,i,j,k;
1131: /* for verbose output of bddc */
1132: PetscViewer viewer=pcbddc->dbg_viewer;
1133: PetscBool dbg_flag=pcbddc->dbg_flag;
1134: /* for counting coarse dofs */
1135: PetscScalar coarsesum;
1136: PetscInt n_vertices=pcbddc->n_vertices,n_constraints=pcbddc->n_constraints;
1137: PetscInt size_of_constraint;
1138: PetscInt *row_cmat_indices;
1139: PetscScalar *row_cmat_values;
1140: const PetscInt *vertices;
1141:
1143: /* Set Non-overlapping dimensions */
1144: n_B = pcis->n_B; n_D = pcis->n - n_B;
1145: ISGetIndices(pcbddc->ISForVertices,&vertices);
1146: /* First let's count coarse dofs: note that we allow to have a constraint on a subdomain and not its counterpart on the neighbour subdomain (if user wants) */
1147: VecSet(pcis->vec1_N,zero);
1148: VecGetArray(pcis->vec1_N,&array);
1149: for(i=0;i<n_vertices;i++) { array[ vertices[i] ] = one; }
1151: for(i=0;i<n_constraints;i++) {
1152: MatGetRow(pcbddc->ConstraintMatrix,n_vertices+i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);
1153: for (j=0; j<size_of_constraint; j++) {
1154: k = row_cmat_indices[j];
1155: if( array[k] == zero ) {
1156: array[k] = one;
1157: break;
1158: }
1159: }
1160: MatRestoreRow(pcbddc->ConstraintMatrix,n_vertices+i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);
1161: }
1162: VecRestoreArray(pcis->vec1_N,&array);
1163: VecSet(pcis->vec1_global,zero);
1164: VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
1165: VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
1166: VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
1167: VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
1168: VecGetArray(pcis->vec1_N,&array);
1169: for(i=0;i<pcis->n;i++) { if( array[i] > zero) array[i] = one/array[i]; }
1170: VecRestoreArray(pcis->vec1_N,&array);
1171: VecSet(pcis->vec1_global,zero);
1172: VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
1173: VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
1174: VecSum(pcis->vec1_global,&coarsesum);
1175: pcbddc->coarse_size = (PetscInt) coarsesum;
1177: /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
1178: VecSet(pcis->vec1_N,one);
1179: VecGetArray(pcis->vec1_N,&array);
1180: for (i=0;i<n_vertices;i++) { array[ vertices[i] ] = zero; }
1181: PetscMalloc(( pcis->n - n_vertices )*sizeof(PetscInt),&idx_R_local);
1182: for (i=0, n_R=0; i<pcis->n; i++) { if (array[i] == one) { idx_R_local[n_R] = i; n_R++; } }
1183: VecRestoreArray(pcis->vec1_N,&array);
1184: if(dbg_flag) {
1185: PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");
1186: PetscViewerFlush(viewer);
1187: PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);
1188: PetscViewerASCIISynchronizedPrintf(viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);
1189: PetscViewerASCIISynchronizedPrintf(viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,n_constraints,pcbddc->local_primal_size);
1190: PetscViewerFlush(viewer);
1191: PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");
1192: PetscViewerASCIIPrintf(viewer,"Size of coarse problem = %d (%f)\n",pcbddc->coarse_size,coarsesum);
1193: PetscViewerFlush(viewer);
1194: }
1195: /* Allocate needed vectors */
1196: /* Set Mat type for local matrices needed by BDDC precondtioner */
1197: impMatType = MATSEQDENSE;
1198: impVecType = VECSEQ;
1199: VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);
1200: VecDuplicate(pcis->vec1_N,&pcis->vec2_N);
1201: VecCreate(PETSC_COMM_SELF,&pcbddc->vec1_R);
1202: VecSetSizes(pcbddc->vec1_R,n_R,n_R);
1203: VecSetType(pcbddc->vec1_R,impVecType);
1204: VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);
1205: VecCreate(PETSC_COMM_SELF,&pcbddc->vec1_P);
1206: VecSetSizes(pcbddc->vec1_P,pcbddc->local_primal_size,pcbddc->local_primal_size);
1207: VecSetType(pcbddc->vec1_P,impVecType);
1209: /* Creating some index sets needed */
1210: /* For submatrices */
1211: ISCreateGeneral(PETSC_COMM_SELF,n_R,idx_R_local,PETSC_COPY_VALUES,&is_R_local);
1212: if(n_vertices) {
1213: ISDuplicate(pcbddc->ISForVertices,&is_V_local);
1214: ISCopy(pcbddc->ISForVertices,is_V_local);
1215: }
1216: if(n_constraints) { ISCreateStride (PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_C_local); }
1217: /* For VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
1218: {
1219: PetscInt *aux_array1;
1220: PetscInt *aux_array2;
1221: PetscScalar value;
1223: PetscMalloc( (pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);
1224: PetscMalloc( (pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);
1226: VecSet(pcis->vec1_global,zero);
1227: VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
1228: VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
1229: VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
1230: VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
1231: VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
1232: VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
1233: VecGetArray(pcis->vec1_N,&array);
1234: for (i=0, s=0; i<n_R; i++) { if (array[idx_R_local[i]] > one) { aux_array1[s] = i; s++; } }
1235: VecRestoreArray(pcis->vec1_N,&array);
1236: ISCreateGeneral(PETSC_COMM_SELF,s,aux_array1,PETSC_COPY_VALUES,&is_aux1);
1237: VecGetArray(pcis->vec1_B,&array);
1238: for (i=0, s=0; i<n_B; i++) { if (array[i] > one) { aux_array2[s] = i; s++; } }
1239: VecRestoreArray(pcis->vec1_B,&array);
1240: ISCreateGeneral(PETSC_COMM_SELF,s,aux_array2,PETSC_COPY_VALUES,&is_aux2);
1241: VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);
1242: PetscFree(aux_array1);
1243: PetscFree(aux_array2);
1244: ISDestroy(&is_aux1);
1245: ISDestroy(&is_aux2);
1247: if(pcbddc->prec_type || dbg_flag ) {
1248: PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);
1249: VecGetArray(pcis->vec1_N,&array);
1250: for (i=0, s=0; i<n_R; i++) { if (array[idx_R_local[i]] == one) { aux_array1[s] = i; s++; } }
1251: VecRestoreArray(pcis->vec1_N,&array);
1252: ISCreateGeneral(PETSC_COMM_SELF,s,aux_array1,PETSC_COPY_VALUES,&is_aux1);
1253: VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);
1254: PetscFree(aux_array1);
1255: ISDestroy(&is_aux1);
1256: }
1258: /* Check scatters */
1259: if(dbg_flag) {
1260:
1261: Vec vec_aux;
1263: PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");
1264: PetscViewerASCIIPrintf(viewer,"Checking pcbddc->R_to_B scatter\n");
1265: PetscViewerFlush(viewer);
1266: VecSetRandom(pcbddc->vec1_R,PETSC_NULL);
1267: VecSetRandom(pcis->vec1_B,PETSC_NULL);
1268: VecDuplicate(pcbddc->vec1_R,&vec_aux);
1269: VecCopy(pcbddc->vec1_R,vec_aux);
1270: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
1271: VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
1272: VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,vec_aux,INSERT_VALUES,SCATTER_REVERSE);
1273: VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,vec_aux,INSERT_VALUES,SCATTER_REVERSE);
1274: VecAXPY(vec_aux,m_one,pcbddc->vec1_R);
1275: VecNorm(vec_aux,NORM_INFINITY,&value);
1276: PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d R_to_B FORWARD error = % 1.14e\n",PetscGlobalRank,value);
1277: VecDestroy(&vec_aux);
1279: VecSetRandom(pcbddc->vec1_R,PETSC_NULL);
1280: VecSetRandom(pcis->vec1_B,PETSC_NULL);
1281: VecDuplicate(pcis->vec1_B,&vec_aux);
1282: VecCopy(pcis->vec1_B,vec_aux);
1283: VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
1284: VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
1285: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,vec_aux,INSERT_VALUES,SCATTER_FORWARD);
1286: VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,vec_aux,INSERT_VALUES,SCATTER_FORWARD);
1287: VecAXPY(vec_aux,m_one,pcis->vec1_B);
1288: VecNorm(vec_aux,NORM_INFINITY,&value);
1289: PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d R_to_B REVERSE error = % 1.14e\n",PetscGlobalRank,value);
1290: VecDestroy(&vec_aux);
1292: PetscViewerFlush(viewer);
1293: PetscViewerASCIIPrintf(viewer,"Checking pcbddc->R_to_D scatter\n");
1294: PetscViewerFlush(viewer);
1296: VecSetRandom(pcbddc->vec1_R,PETSC_NULL);
1297: VecSetRandom(pcis->vec1_D,PETSC_NULL);
1298: VecDuplicate(pcbddc->vec1_R,&vec_aux);
1299: VecCopy(pcbddc->vec1_R,vec_aux);
1300: VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
1301: VecScatterEnd (pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
1302: VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,vec_aux,INSERT_VALUES,SCATTER_REVERSE);
1303: VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,vec_aux,INSERT_VALUES,SCATTER_REVERSE);
1304: VecAXPY(vec_aux,m_one,pcbddc->vec1_R);
1305: VecNorm(vec_aux,NORM_INFINITY,&value);
1306: PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d R_to_D FORWARD error = % 1.14e\n",PetscGlobalRank,value);
1307: VecDestroy(&vec_aux);
1309: VecSetRandom(pcbddc->vec1_R,PETSC_NULL);
1310: VecSetRandom(pcis->vec1_D,PETSC_NULL);
1311: VecDuplicate(pcis->vec1_D,&vec_aux);
1312: VecCopy(pcis->vec1_D,vec_aux);
1313: VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
1314: VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
1315: VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,vec_aux,INSERT_VALUES,SCATTER_FORWARD);
1316: VecScatterEnd (pcbddc->R_to_D,pcbddc->vec1_R,vec_aux,INSERT_VALUES,SCATTER_FORWARD);
1317: VecAXPY(vec_aux,m_one,pcis->vec1_D);
1318: VecNorm(vec_aux,NORM_INFINITY,&value);
1319: PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d R_to_D REVERSE error = % 1.14e\n",PetscGlobalRank,value);
1320: VecDestroy(&vec_aux);
1321: PetscViewerFlush(viewer);
1323: }
1324: }
1326: /* vertices in boundary numbering */
1327: if(n_vertices) {
1328: VecSet(pcis->vec1_N,m_one);
1329: VecGetArray(pcis->vec1_N,&array);
1330: for (i=0; i<n_vertices; i++) { array[ vertices[i] ] = i; }
1331: VecRestoreArray(pcis->vec1_N,&array);
1332: VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
1333: VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
1334: PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);
1335: VecGetArray(pcis->vec1_B,&array);
1336: for (i=0; i<n_vertices; i++) {
1337: s=0;
1338: while (array[s] != i ) {s++;}
1339: idx_V_B[i]=s;
1340: }
1341: VecRestoreArray(pcis->vec1_B,&array);
1342: }
1345: /* Creating PC contexts for local Dirichlet and Neumann problems */
1346: {
1347: Mat A_RR;
1348: PC pc_temp;
1349: /* Matrix for Dirichlet problem is A_II -> we already have it from pcis.c code */
1350: KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);
1351: PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);
1352: KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,SAME_PRECONDITIONER);
1353: KSPSetType(pcbddc->ksp_D,KSPPREONLY);
1354: //KSPSetOptionsPrefix();
1355: /* default */
1356: KSPGetPC(pcbddc->ksp_D,&pc_temp);
1357: PCSetType(pc_temp,PCLU);
1358: /* Allow user's customization */
1359: KSPSetFromOptions(pcbddc->ksp_D);
1360: /* Set Up KSP for Dirichlet problem of BDDC */
1361: KSPSetUp(pcbddc->ksp_D);
1362: /* Matrix for Neumann problem is A_RR -> we need to create it */
1363: MatGetSubMatrix(matis->A,is_R_local,is_R_local,MAT_INITIAL_MATRIX,&A_RR);
1364: KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);
1365: PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);
1366: KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,SAME_PRECONDITIONER);
1367: KSPSetType(pcbddc->ksp_R,KSPPREONLY);
1368: //KSPSetOptionsPrefix();
1369: /* default */
1370: KSPGetPC(pcbddc->ksp_R,&pc_temp);
1371: PCSetType(pc_temp,PCLU);
1372: /* Allow user's customization */
1373: KSPSetFromOptions(pcbddc->ksp_R);
1374: /* Set Up KSP for Neumann problem of BDDC */
1375: KSPSetUp(pcbddc->ksp_R);
1376: /* check Dirichlet and Neumann solvers */
1377: if(pcbddc->dbg_flag) {
1378: Vec temp_vec;
1379: PetscScalar value;
1381: VecDuplicate(pcis->vec1_D,&temp_vec);
1382: VecSetRandom(pcis->vec1_D,PETSC_NULL);
1383: MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);
1384: KSPSolve(pcbddc->ksp_D,pcis->vec2_D,temp_vec);
1385: VecAXPY(temp_vec,m_one,pcis->vec1_D);
1386: VecNorm(temp_vec,NORM_INFINITY,&value);
1387: VecDestroy(&temp_vec);
1388: PetscViewerFlush(viewer);
1389: PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");
1390: PetscViewerASCIIPrintf(viewer,"Checking solution of Dirichlet and Neumann problems\n");
1391: PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d infinity error for Dirichlet solve = % 1.14e \n",PetscGlobalRank,value);
1392: VecDuplicate(pcbddc->vec1_R,&temp_vec);
1393: VecSetRandom(pcbddc->vec1_R,PETSC_NULL);
1394: MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);
1395: KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,temp_vec);
1396: VecAXPY(temp_vec,m_one,pcbddc->vec1_R);
1397: VecNorm(temp_vec,NORM_INFINITY,&value);
1398: VecDestroy(&temp_vec);
1399: PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d infinity error for Neumann solve = % 1.14e \n",PetscGlobalRank,value);
1400: PetscViewerFlush(viewer);
1401: }
1402: /* free Neumann problem's matrix */
1403: MatDestroy(&A_RR);
1404: }
1406: /* Assemble all remaining stuff needed to apply BDDC */
1407: {
1408: Mat A_RV,A_VR,A_VV;
1409: Mat M1,M2;
1410: Mat C_CR;
1411: Mat AUXMAT;
1412: Vec vec1_C;
1413: Vec vec2_C;
1414: Vec vec1_V;
1415: Vec vec2_V;
1416: PetscInt *nnz;
1417: PetscInt *auxindices;
1418: PetscInt index;
1419: PetscScalar* array2;
1420: MatFactorInfo matinfo;
1422: /* Allocating some extra storage just to be safe */
1423: PetscMalloc (pcis->n*sizeof(PetscInt),&nnz);
1424: PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);
1425: for(i=0;i<pcis->n;i++) {auxindices[i]=i;}
1427: /* some work vectors on vertices and/or constraints */
1428: if(n_vertices) {
1429: VecCreate(PETSC_COMM_SELF,&vec1_V);
1430: VecSetSizes(vec1_V,n_vertices,n_vertices);
1431: VecSetType(vec1_V,impVecType);
1432: VecDuplicate(vec1_V,&vec2_V);
1433: }
1434: if(pcbddc->n_constraints) {
1435: VecCreate(PETSC_COMM_SELF,&vec1_C);
1436: VecSetSizes(vec1_C,pcbddc->n_constraints,pcbddc->n_constraints);
1437: VecSetType(vec1_C,impVecType);
1438: VecDuplicate(vec1_C,&vec2_C);
1439: VecDuplicate(vec1_C,&pcbddc->vec1_C);
1440: }
1441: /* Precompute stuffs needed for preprocessing and application of BDDC*/
1442: if(n_constraints) {
1443: /* some work vectors */
1444: MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);
1445: MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,n_R,n_constraints);
1446: MatSetType(pcbddc->local_auxmat2,impMatType);
1447: MatSeqDenseSetPreallocation(pcbddc->local_auxmat2,PETSC_NULL);
1449: /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
1450: for(i=0;i<n_constraints;i++) {
1451: VecSet(pcis->vec1_N,zero);
1452: VecSet(pcbddc->vec1_R,zero);
1453: /* Get row of constraint matrix in R numbering */
1454: VecGetArray(pcis->vec1_N,&array);
1455: MatGetRow(pcbddc->ConstraintMatrix,n_vertices+i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);
1456: for(j=0;j<size_of_constraint;j++) { array[ row_cmat_indices[j] ] = - row_cmat_values[j]; }
1457: MatRestoreRow(pcbddc->ConstraintMatrix,n_vertices+i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);
1458: VecGetArray(pcbddc->vec1_R,&array2);
1459: for(j=0;j<n_R;j++) { array2[j] = array[ idx_R_local[j] ]; }
1460: VecRestoreArray(pcis->vec1_N,&array);
1461: VecRestoreArray(pcbddc->vec1_R,&array2);
1462: /* Solve for row of constraint matrix in R numbering */
1463: KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
1464: /* Set values */
1465: VecGetArray(pcbddc->vec2_R,&array);
1466: MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);
1467: VecRestoreArray(pcbddc->vec2_R,&array);
1468: }
1469: MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);
1470: MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);
1472: /* Create Constraint matrix on R nodes: C_{CR} */
1473: MatGetSubMatrix(pcbddc->ConstraintMatrix,is_C_local,is_R_local,MAT_INITIAL_MATRIX,&C_CR);
1474: ISDestroy(&is_C_local);
1476: /* Assemble AUXMAT = ( LUFactor )( -C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */
1477: MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&AUXMAT);
1478: MatFactorInfoInitialize(&matinfo);
1479: ISCreateStride(PETSC_COMM_SELF,n_constraints,0,1,&is_aux1);
1480: MatLUFactor(AUXMAT,is_aux1,is_aux1,&matinfo);
1481: ISDestroy(&is_aux1);
1483: /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */
1484: MatCreate(PETSC_COMM_SELF,&M1);
1485: MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);
1486: MatSetType(M1,impMatType);
1487: MatSeqDenseSetPreallocation(M1,PETSC_NULL);
1488: for(i=0;i<n_constraints;i++) {
1489: VecSet(vec1_C,zero);
1490: VecSetValue(vec1_C,i,one,INSERT_VALUES);
1491: VecAssemblyBegin(vec1_C);
1492: VecAssemblyEnd(vec1_C);
1493: MatSolve(AUXMAT,vec1_C,vec2_C);
1494: VecScale(vec2_C,m_one);
1495: VecGetArray(vec2_C,&array);
1496: MatSetValues(M1,n_constraints,auxindices,1,&i,array,INSERT_VALUES);
1497: VecRestoreArray(vec2_C,&array);
1498: }
1499: MatAssemblyBegin(M1,MAT_FINAL_ASSEMBLY);
1500: MatAssemblyEnd(M1,MAT_FINAL_ASSEMBLY);
1501: MatDestroy(&AUXMAT);
1502: /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */
1503: MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);
1505: }
1507: /* Get submatrices from subdomain matrix */
1508: if(n_vertices){
1509: MatGetSubMatrix(matis->A,is_R_local,is_V_local,MAT_INITIAL_MATRIX,&A_RV);
1510: MatGetSubMatrix(matis->A,is_V_local,is_R_local,MAT_INITIAL_MATRIX,&A_VR);
1511: MatGetSubMatrix(matis->A,is_V_local,is_V_local,MAT_INITIAL_MATRIX,&A_VV);
1512: /* Assemble M2 = A_RR^{-1}A_RV */
1513: MatCreate(PETSC_COMM_SELF,&M2);
1514: MatSetSizes(M2,n_R,n_vertices,n_R,n_vertices);
1515: MatSetType(M2,impMatType);
1516: MatSeqDenseSetPreallocation(M2,PETSC_NULL);
1517: for(i=0;i<n_vertices;i++) {
1518: VecSet(vec1_V,zero);
1519: VecSetValue(vec1_V,i,one,INSERT_VALUES);
1520: VecAssemblyBegin(vec1_V);
1521: VecAssemblyEnd(vec1_V);
1522: MatMult(A_RV,vec1_V,pcbddc->vec1_R);
1523: KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
1524: VecGetArray(pcbddc->vec2_R,&array);
1525: MatSetValues(M2,n_R,auxindices,1,&i,array,INSERT_VALUES);
1526: VecRestoreArray(pcbddc->vec2_R,&array);
1527: }
1528: MatAssemblyBegin(M2,MAT_FINAL_ASSEMBLY);
1529: MatAssemblyEnd(M2,MAT_FINAL_ASSEMBLY);
1530: }
1532: /* Matrix of coarse basis functions (local) */
1533: MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);
1534: MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);
1535: MatSetType(pcbddc->coarse_phi_B,impMatType);
1536: MatSeqDenseSetPreallocation(pcbddc->coarse_phi_B,PETSC_NULL);
1537: if(pcbddc->prec_type || dbg_flag ) {
1538: MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);
1539: MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);
1540: MatSetType(pcbddc->coarse_phi_D,impMatType);
1541: MatSeqDenseSetPreallocation(pcbddc->coarse_phi_D,PETSC_NULL);
1542: }
1544: if(dbg_flag) {
1545: PetscMalloc( pcbddc->local_primal_size*sizeof(PetscScalar),&coarsefunctions_errors);
1546: PetscMalloc( pcbddc->local_primal_size*sizeof(PetscScalar),&constraints_errors);
1547: }
1548: /* Subdomain contribution (Non-overlapping) to coarse matrix */
1549: PetscMalloc ((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);
1551: /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
1552: for(i=0;i<n_vertices;i++){
1553: VecSet(vec1_V,zero);
1554: VecSetValue(vec1_V,i,one,INSERT_VALUES);
1555: VecAssemblyBegin(vec1_V);
1556: VecAssemblyEnd(vec1_V);
1557: /* solution of saddle point problem */
1558: MatMult(M2,vec1_V,pcbddc->vec1_R);
1559: VecScale(pcbddc->vec1_R,m_one);
1560: if(n_constraints) {
1561: MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);
1562: MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);
1563: VecScale(vec1_C,m_one);
1564: }
1565: MatMult(A_VR,pcbddc->vec1_R,vec2_V);
1566: MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);
1568: /* Set values in coarse basis function and subdomain part of coarse_mat */
1569: /* coarse basis functions */
1570: VecSet(pcis->vec1_B,zero);
1571: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
1572: VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
1573: VecGetArray(pcis->vec1_B,&array);
1574: MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);
1575: VecRestoreArray(pcis->vec1_B,&array);
1576: MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);
1577: if( pcbddc->prec_type || dbg_flag ) {
1578: VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
1579: VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
1580: VecGetArray(pcis->vec1_D,&array);
1581: MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);
1582: VecRestoreArray(pcis->vec1_D,&array);
1583: }
1584: /* subdomain contribution to coarse matrix */
1585: VecGetArray(vec2_V,&array);
1586: for(j=0;j<n_vertices;j++) { coarse_submat_vals[i*pcbddc->local_primal_size+j] = array[j]; } //WARNING -> column major ordering
1587: VecRestoreArray(vec2_V,&array);
1588: if(n_constraints) {
1589: VecGetArray(vec1_C,&array);
1590: for(j=0;j<n_constraints;j++) { coarse_submat_vals[i*pcbddc->local_primal_size+j+n_vertices] = array[j]; } //WARNING -> column major ordering
1591: VecRestoreArray(vec1_C,&array);
1592: }
1593:
1594: if( dbg_flag ) {
1595: /* assemble subdomain vector on nodes */
1596: VecSet(pcis->vec1_N,zero);
1597: VecGetArray(pcis->vec1_N,&array);
1598: VecGetArray(pcbddc->vec1_R,&array2);
1599: for(j=0;j<n_R;j++) { array[idx_R_local[j]] = array2[j]; }
1600: array[ vertices[i] ] = one;
1601: VecRestoreArray(pcbddc->vec1_R,&array2);
1602: VecRestoreArray(pcis->vec1_N,&array);
1603: /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */
1604: VecSet(pcbddc->vec1_P,zero);
1605: VecGetArray(pcbddc->vec1_P,&array2);
1606: VecGetArray(vec2_V,&array);
1607: for(j=0;j<n_vertices;j++) { array2[j]=array[j]; }
1608: VecRestoreArray(vec2_V,&array);
1609: if(n_constraints) {
1610: VecGetArray(vec1_C,&array);
1611: for(j=0;j<n_constraints;j++) { array2[j+n_vertices]=array[j]; }
1612: VecRestoreArray(vec1_C,&array);
1613: }
1614: VecRestoreArray(pcbddc->vec1_P,&array2);
1615: VecScale(pcbddc->vec1_P,m_one);
1616: /* check saddle point solution */
1617: MatMult(matis->A,pcis->vec1_N,pcis->vec2_N);
1618: MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);
1619: VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);
1620: MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);
1621: VecGetArray(pcbddc->vec1_P,&array);
1622: array[i]=array[i]+m_one; /* shift by the identity matrix */
1623: VecRestoreArray(pcbddc->vec1_P,&array);
1624: VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);
1625: }
1626: }
1627:
1628: for(i=0;i<n_constraints;i++){
1629: VecSet(vec2_C,zero);
1630: VecSetValue(vec2_C,i,m_one,INSERT_VALUES);
1631: VecAssemblyBegin(vec2_C);
1632: VecAssemblyEnd(vec2_C);
1633: /* solution of saddle point problem */
1634: MatMult(M1,vec2_C,vec1_C);
1635: MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);
1636: VecScale(vec1_C,m_one);
1637: if(n_vertices) { MatMult(A_VR,pcbddc->vec1_R,vec2_V); }
1638: /* Set values in coarse basis function and subdomain part of coarse_mat */
1639: /* coarse basis functions */
1640: index=i+n_vertices;
1641: VecSet(pcis->vec1_B,zero);
1642: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
1643: VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
1644: VecGetArray(pcis->vec1_B,&array);
1645: MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&index,array,INSERT_VALUES);
1646: VecRestoreArray(pcis->vec1_B,&array);
1647: if( pcbddc->prec_type || dbg_flag ) {
1648: VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
1649: VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
1650: VecGetArray(pcis->vec1_D,&array);
1651: MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&index,array,INSERT_VALUES);
1652: VecRestoreArray(pcis->vec1_D,&array);
1653: }
1654: /* subdomain contribution to coarse matrix */
1655: if(n_vertices) {
1656: VecGetArray(vec2_V,&array);
1657: for(j=0;j<n_vertices;j++) {coarse_submat_vals[index*pcbddc->local_primal_size+j]=array[j];} //WARNING -> column major ordering
1658: VecRestoreArray(vec2_V,&array);
1659: }
1660: VecGetArray(vec1_C,&array);
1661: for(j=0;j<n_constraints;j++) {coarse_submat_vals[index*pcbddc->local_primal_size+j+n_vertices]=array[j];} //WARNING -> column major ordering
1662: VecRestoreArray(vec1_C,&array);
1663:
1664: if( dbg_flag ) {
1665: /* assemble subdomain vector on nodes */
1666: VecSet(pcis->vec1_N,zero);
1667: VecGetArray(pcis->vec1_N,&array);
1668: VecGetArray(pcbddc->vec1_R,&array2);
1669: for(j=0;j<n_R;j++){ array[ idx_R_local[j] ] = array2[j]; }
1670: VecRestoreArray(pcbddc->vec1_R,&array2);
1671: VecRestoreArray(pcis->vec1_N,&array);
1672: /* assemble subdomain vector of lagrange multipliers */
1673: VecSet(pcbddc->vec1_P,zero);
1674: VecGetArray(pcbddc->vec1_P,&array2);
1675: if( n_vertices) {
1676: VecGetArray(vec2_V,&array);
1677: for(j=0;j<n_vertices;j++) {array2[j]=-array[j];}
1678: VecRestoreArray(vec2_V,&array);
1679: }
1680: VecGetArray(vec1_C,&array);
1681: for(j=0;j<n_constraints;j++) {array2[j+n_vertices]=-array[j];}
1682: VecRestoreArray(vec1_C,&array);
1683: VecRestoreArray(pcbddc->vec1_P,&array2);
1684: /* check saddle point solution */
1685: MatMult(matis->A,pcis->vec1_N,pcis->vec2_N);
1686: MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);
1687: VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[index]);
1688: MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);
1689: VecGetArray(pcbddc->vec1_P,&array);
1690: array[index]=array[index]+m_one; /* shift by the identity matrix */
1691: VecRestoreArray(pcbddc->vec1_P,&array);
1692: VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[index]);
1693: }
1694: }
1695: MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);
1696: MatAssemblyEnd (pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);
1697: if( pcbddc->prec_type || dbg_flag ) {
1698: MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);
1699: MatAssemblyEnd (pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);
1700: }
1701: /* Checking coarse_sub_mat and coarse basis functios */
1702: /* It shuld be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
1703: if(dbg_flag) {
1705: Mat coarse_sub_mat;
1706: Mat TM1,TM2,TM3,TM4;
1707: Mat coarse_phi_D,coarse_phi_B,A_II,A_BB,A_IB,A_BI;
1708: const MatType checkmattype=MATSEQAIJ;
1709: PetscScalar value;
1711: MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);
1712: MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);
1713: MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);
1714: MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);
1715: MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);
1716: MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);
1717: MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);
1718: MatConvert(coarse_sub_mat,checkmattype,MAT_REUSE_MATRIX,&coarse_sub_mat);
1720: PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");
1721: PetscViewerASCIIPrintf(viewer,"Check coarse sub mat and local basis functions\n");
1722: PetscViewerFlush(viewer);
1723: MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);
1724: MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);
1725: MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
1726: MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);
1727: MatDestroy(&AUXMAT);
1728: MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
1729: MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);
1730: MatDestroy(&AUXMAT);
1731: MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);
1732: MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);
1733: MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);
1734: MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);
1735: MatNorm(TM1,NORM_INFINITY,&value);
1736: PetscViewerASCIISynchronizedPrintf(viewer,"----------------------------------\n");
1737: PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d \n",PetscGlobalRank);
1738: PetscViewerASCIISynchronizedPrintf(viewer,"matrix error = % 1.14e\n",value);
1739: PetscViewerASCIISynchronizedPrintf(viewer,"coarse functions errors\n");
1740: for(i=0;i<pcbddc->local_primal_size;i++) { PetscViewerASCIISynchronizedPrintf(viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]); }
1741: PetscViewerASCIISynchronizedPrintf(viewer,"constraints errors\n");
1742: for(i=0;i<pcbddc->local_primal_size;i++) { PetscViewerASCIISynchronizedPrintf(viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]); }
1743: PetscViewerFlush(viewer);
1744: MatDestroy(&A_II);
1745: MatDestroy(&A_BB);
1746: MatDestroy(&A_IB);
1747: MatDestroy(&A_BI);
1748: MatDestroy(&TM1);
1749: MatDestroy(&TM2);
1750: MatDestroy(&TM3);
1751: MatDestroy(&TM4);
1752: MatDestroy(&coarse_phi_D);
1753: MatDestroy(&coarse_sub_mat);
1754: MatDestroy(&coarse_phi_B);
1755: PetscFree(coarsefunctions_errors);
1756: PetscFree(constraints_errors);
1757: }
1759: /* create coarse matrix and data structures for message passing associated actual choice of coarse problem type */
1760: PCBDDCSetupCoarseEnvironment(pc,coarse_submat_vals);
1761: /* free memory */
1762: PetscFree(coarse_submat_vals);
1763: PetscFree(auxindices);
1764: PetscFree(nnz);
1765: if(n_vertices) {
1766: VecDestroy(&vec1_V);
1767: VecDestroy(&vec2_V);
1768: MatDestroy(&M2);
1769: MatDestroy(&A_RV);
1770: MatDestroy(&A_VR);
1771: MatDestroy(&A_VV);
1772: }
1773: if(pcbddc->n_constraints) {
1774: VecDestroy(&vec1_C);
1775: VecDestroy(&vec2_C);
1776: MatDestroy(&M1);
1777: MatDestroy(&C_CR);
1778: }
1779: }
1780: /* free memory */
1781: if(n_vertices) {
1782: PetscFree(idx_V_B);
1783: ISDestroy(&is_V_local);
1784: }
1785: PetscFree(idx_R_local);
1786: ISDestroy(&is_R_local);
1787: ISRestoreIndices(pcbddc->ISForVertices,(const PetscInt**)&vertices);
1789: return(0);
1790: }
1792: /* -------------------------------------------------------------------------- */
1796: static PetscErrorCode PCBDDCSetupCoarseEnvironment(PC pc,PetscScalar* coarse_submat_vals)
1797: {
1799:
1800: Mat_IS *matis = (Mat_IS*)pc->pmat->data;
1801: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
1802: PC_IS *pcis = (PC_IS*)pc->data;
1803: MPI_Comm prec_comm = ((PetscObject)pc)->comm;
1804: MPI_Comm coarse_comm;
1806: /* common to all choiches */
1807: PetscScalar *temp_coarse_mat_vals;
1808: PetscScalar *ins_coarse_mat_vals;
1809: PetscInt *ins_local_primal_indices;
1810: PetscMPIInt *localsizes2,*localdispl2;
1811: PetscMPIInt size_prec_comm;
1812: PetscMPIInt rank_prec_comm;
1813: PetscMPIInt active_rank=MPI_PROC_NULL;
1814: PetscMPIInt master_proc=0;
1815: PetscInt ins_local_primal_size;
1816: /* specific to MULTILEVEL_BDDC */
1817: PetscMPIInt *ranks_recv;
1818: PetscMPIInt count_recv=0;
1819: PetscMPIInt rank_coarse_proc_send_to;
1820: PetscMPIInt coarse_color = MPI_UNDEFINED;
1821: ISLocalToGlobalMapping coarse_ISLG;
1822: /* some other variables */
1824: const MatType coarse_mat_type;
1825: const PCType coarse_pc_type;
1826: const KSPType coarse_ksp_type;
1827: PC pc_temp;
1828: PetscInt i,j,k,bs;
1829: PetscInt max_it_coarse_ksp=1; /* don't increase this value */
1830: /* verbose output viewer */
1831: PetscViewer viewer=pcbddc->dbg_viewer;
1832: PetscBool dbg_flag=pcbddc->dbg_flag;
1833:
1836: ins_local_primal_indices = 0;
1837: ins_coarse_mat_vals = 0;
1838: localsizes2 = 0;
1839: localdispl2 = 0;
1840: temp_coarse_mat_vals = 0;
1841: coarse_ISLG = 0;
1843: MPI_Comm_size(prec_comm,&size_prec_comm);
1844: MPI_Comm_rank(prec_comm,&rank_prec_comm);
1845: MatGetBlockSize(matis->A,&bs);
1846:
1847: /* Assign global numbering to coarse dofs */
1848: {
1849: PetscScalar one=1.,zero=0.;
1850: PetscScalar *array;
1851: PetscMPIInt *auxlocal_primal;
1852: PetscMPIInt *auxglobal_primal;
1853: PetscMPIInt *all_auxglobal_primal;
1854: PetscMPIInt *all_auxglobal_primal_dummy;
1855: PetscMPIInt mpi_local_primal_size = (PetscMPIInt)pcbddc->local_primal_size;
1856: PetscInt *vertices,*row_cmat_indices;
1857: PetscInt size_of_constraint;
1859: /* Construct needed data structures for message passing */
1860: PetscMalloc(mpi_local_primal_size*sizeof(PetscMPIInt),&pcbddc->local_primal_indices);
1861: PetscMalloc(size_prec_comm*sizeof(PetscMPIInt),&pcbddc->local_primal_sizes);
1862: PetscMalloc(size_prec_comm*sizeof(PetscMPIInt),&pcbddc->local_primal_displacements);
1863: /* Gather local_primal_size information for all processes */
1864: MPI_Allgather(&mpi_local_primal_size,1,MPIU_INT,&pcbddc->local_primal_sizes[0],1,MPIU_INT,prec_comm);
1865: pcbddc->replicated_primal_size = 0;
1866: for (i=0; i<size_prec_comm; i++) {
1867: pcbddc->local_primal_displacements[i] = pcbddc->replicated_primal_size ;
1868: pcbddc->replicated_primal_size += pcbddc->local_primal_sizes[i];
1869: }
1870: if(rank_prec_comm == 0) {
1871: /* allocate some auxiliary space */
1872: PetscMalloc(pcbddc->replicated_primal_size*sizeof(*all_auxglobal_primal),&all_auxglobal_primal);
1873: PetscMalloc(pcbddc->replicated_primal_size*sizeof(*all_auxglobal_primal_dummy),&all_auxglobal_primal_dummy);
1874: }
1875: PetscMalloc(pcbddc->local_primal_size*sizeof(PetscMPIInt),&auxlocal_primal);
1876: PetscMalloc(pcbddc->local_primal_size*sizeof(PetscMPIInt),&auxglobal_primal);
1878: /* First let's count coarse dofs: note that we allow to have a constraint on a subdomain and not its counterpart on the neighbour subdomain (if user wants)
1879: This code fragment assumes that the number of local constraints per connected component
1880: is not greater than the number of nodes defined for the connected component
1881: (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
1882: /* auxlocal_primal : primal indices in local nodes numbering (internal and interface) with complete queue sorted by global ordering */
1883: VecSet(pcis->vec1_N,zero);
1884: ISGetIndices(pcbddc->ISForVertices,(const PetscInt**)&vertices);
1885: VecGetArray(pcis->vec1_N,&array);
1886: for(i=0;i<pcbddc->n_vertices;i++) { /* note that pcbddc->n_vertices can be different from size of ISForVertices */
1887: array[ vertices[i] ] = one;
1888: auxlocal_primal[i] = vertices[i];
1889: }
1890: ISRestoreIndices(pcbddc->ISForVertices,(const PetscInt**)&vertices);
1891: for(i=0;i<pcbddc->n_constraints;i++) {
1892: MatGetRow(pcbddc->ConstraintMatrix,pcbddc->n_vertices+i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);
1893: for (j=0; j<size_of_constraint; j++) {
1894: k = row_cmat_indices[j];
1895: if( array[k] == zero ) {
1896: array[k] = one;
1897: auxlocal_primal[i+pcbddc->n_vertices] = k;
1898: break;
1899: }
1900: }
1901: MatRestoreRow(pcbddc->ConstraintMatrix,pcbddc->n_vertices+i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);
1902: }
1903: VecRestoreArray(pcis->vec1_N,&array);
1905: /* Now assign them a global numbering */
1906: /* auxglobal_primal contains indices in global nodes numbering (internal and interface) */
1907: ISLocalToGlobalMappingApply(matis->mapping,pcbddc->local_primal_size,auxlocal_primal,auxglobal_primal);
1908: /* all_auxglobal_primal contains all primal nodes indices in global nodes numbering (internal and interface) */
1909: MPI_Gatherv(&auxglobal_primal[0],pcbddc->local_primal_size,MPIU_INT,&all_auxglobal_primal[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,0,prec_comm);
1911: /* After this block all_auxglobal_primal should contains one copy of each primal node's indices in global nodes numbering */
1912: /* It implements a function similar to PetscSortRemoveDupsInt */
1913: if(rank_prec_comm==0) {
1914: /* dummy argument since PetscSortMPIInt doesn't exist! */
1915: PetscSortMPIIntWithArray(pcbddc->replicated_primal_size,all_auxglobal_primal,all_auxglobal_primal_dummy);
1916: k=1;
1917: j=all_auxglobal_primal[0]; /* first dof in global numbering */
1918: for(i=1;i< pcbddc->replicated_primal_size ;i++) {
1919: if(j != all_auxglobal_primal[i] ) {
1920: all_auxglobal_primal[k]=all_auxglobal_primal[i];
1921: k++;
1922: j=all_auxglobal_primal[i];
1923: }
1924: }
1925: } else {
1926: PetscMalloc(pcbddc->coarse_size*sizeof(PetscMPIInt),&all_auxglobal_primal);
1927: }
1928: /* We only need to broadcast the indices from 0 to pcbddc->coarse_size. Remaning elements of array all_aux_global_primal are garbage. */
1929: MPI_Bcast(all_auxglobal_primal,pcbddc->coarse_size,MPIU_INT,0,prec_comm);
1930:
1931: /* Now get global coarse numbering of local primal nodes */
1932: for(i=0;i<pcbddc->local_primal_size;i++) {
1933: k=0;
1934: while( all_auxglobal_primal[k] != auxglobal_primal[i] ) { k++;}
1935: pcbddc->local_primal_indices[i]=k;
1936: }
1937: if(dbg_flag) {
1938: PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");
1939: PetscViewerASCIIPrintf(viewer,"Distribution of local primal indices\n");
1940: PetscViewerFlush(viewer);
1941: PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d\n",PetscGlobalRank);
1942: for(i=0;i<pcbddc->local_primal_size;i++) {
1943: PetscViewerASCIISynchronizedPrintf(viewer,"local_primal_indices[%d]=%d \n",i,pcbddc->local_primal_indices[i]);
1944: }
1945: PetscViewerFlush(viewer);
1946: }
1947: /* free allocated memory */
1948: PetscFree(auxlocal_primal);
1949: PetscFree(auxglobal_primal);
1950: PetscFree(all_auxglobal_primal);
1951: if(rank_prec_comm == 0) {
1952: PetscFree(all_auxglobal_primal_dummy);
1953: }
1954: }
1956: /* adapt coarse problem type */
1957: if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC && pcbddc->active_procs < MIN_PROCS_FOR_BDDC )
1958: pcbddc->coarse_problem_type = PARALLEL_BDDC;
1960: switch(pcbddc->coarse_problem_type){
1962: case(MULTILEVEL_BDDC): //we define a coarse mesh where subdomains are elements
1963: {
1964: /* we need additional variables */
1965: MetisInt n_subdomains,n_parts,objval,ncon,faces_nvtxs;
1966: MetisInt *metis_coarse_subdivision;
1967: MetisInt options[METIS_NOPTIONS];
1968: PetscMPIInt size_coarse_comm,rank_coarse_comm;
1969: PetscMPIInt procs_jumps_coarse_comm;
1970: PetscMPIInt *coarse_subdivision;
1971: PetscMPIInt *total_count_recv;
1972: PetscMPIInt *total_ranks_recv;
1973: PetscMPIInt *displacements_recv;
1974: PetscMPIInt *my_faces_connectivity;
1975: PetscMPIInt *petsc_faces_adjncy;
1976: MetisInt *faces_adjncy;
1977: MetisInt *faces_xadj;
1978: PetscMPIInt *number_of_faces;
1979: PetscMPIInt *faces_displacements;
1980: PetscInt *array_int;
1981: PetscMPIInt my_faces=0;
1982: PetscMPIInt total_faces=0;
1983: PetscInt ranks_stretching_ratio;
1985: /* define some quantities */
1986: pcbddc->coarse_communications_type = SCATTERS_BDDC;
1987: coarse_mat_type = MATIS;
1988: coarse_pc_type = PCBDDC;
1989: coarse_ksp_type = KSPCHEBYSHEV;
1991: /* details of coarse decomposition */
1992: n_subdomains = pcbddc->active_procs;
1993: n_parts = n_subdomains/pcbddc->coarsening_ratio;
1994: ranks_stretching_ratio = size_prec_comm/pcbddc->active_procs;
1995: procs_jumps_coarse_comm = pcbddc->coarsening_ratio*ranks_stretching_ratio;
1997: printf("Coarse algorithm details: \n");
1998: printf("n_subdomains %d, n_parts %d\nstretch %d,jumps %d,coarse_ratio %d\nlevel should be log_%d(%d)\n",n_subdomains,n_parts,ranks_stretching_ratio,procs_jumps_coarse_comm,pcbddc->coarsening_ratio,pcbddc->coarsening_ratio,(ranks_stretching_ratio/pcbddc->coarsening_ratio+1));
2000: /* build CSR graph of subdomains' connectivity through faces */
2001: PetscMalloc (pcis->n*sizeof(PetscInt),&array_int);
2002: PetscMemzero(array_int,pcis->n*sizeof(PetscInt));
2003: for(i=1;i<pcis->n_neigh;i++){/* i=1 so I don't count myself -> faces nodes counts to 1 */
2004: for(j=0;j<pcis->n_shared[i];j++){
2005: array_int[ pcis->shared[i][j] ]+=1;
2006: }
2007: }
2008: for(i=1;i<pcis->n_neigh;i++){
2009: for(j=0;j<pcis->n_shared[i];j++){
2010: if(array_int[ pcis->shared[i][j] ] == 1 ){
2011: my_faces++;
2012: break;
2013: }
2014: }
2015: }
2016: //printf("I found %d faces.\n",my_faces);
2018: MPI_Reduce(&my_faces,&total_faces,1,MPIU_INT,MPI_SUM,master_proc,prec_comm);
2019: PetscMalloc (my_faces*sizeof(PetscInt),&my_faces_connectivity);
2020: my_faces=0;
2021: for(i=1;i<pcis->n_neigh;i++){
2022: for(j=0;j<pcis->n_shared[i];j++){
2023: if(array_int[ pcis->shared[i][j] ] == 1 ){
2024: my_faces_connectivity[my_faces]=pcis->neigh[i];
2025: my_faces++;
2026: break;
2027: }
2028: }
2029: }
2030: if(rank_prec_comm == master_proc) {
2031: //printf("I found %d total faces.\n",total_faces);
2032: PetscMalloc (total_faces*sizeof(PetscMPIInt),&petsc_faces_adjncy);
2033: PetscMalloc (size_prec_comm*sizeof(PetscMPIInt),&number_of_faces);
2034: PetscMalloc (total_faces*sizeof(MetisInt),&faces_adjncy);
2035: PetscMalloc ((n_subdomains+1)*sizeof(MetisInt),&faces_xadj);
2036: PetscMalloc ((size_prec_comm+1)*sizeof(PetscMPIInt),&faces_displacements);
2037: }
2038: MPI_Gather(&my_faces,1,MPIU_INT,&number_of_faces[0],1,MPIU_INT,master_proc,prec_comm);
2039: if(rank_prec_comm == master_proc) {
2040: faces_xadj[0]=0;
2041: faces_displacements[0]=0;
2042: j=0;
2043: for(i=1;i<size_prec_comm+1;i++) {
2044: faces_displacements[i]=faces_displacements[i-1]+number_of_faces[i-1];
2045: if(number_of_faces[i-1]) {
2046: j++;
2047: faces_xadj[j]=faces_xadj[j-1]+number_of_faces[i-1];
2048: }
2049: }
2050: printf("The J I count is %d and should be %d\n",j,n_subdomains);
2051: printf("Total faces seem %d and should be %d\n",faces_xadj[j],total_faces);
2052: }
2053: MPI_Gatherv(&my_faces_connectivity[0],my_faces,MPIU_INT,&petsc_faces_adjncy[0],number_of_faces,faces_displacements,MPIU_INT,master_proc,prec_comm);
2054: PetscFree(my_faces_connectivity);
2055: PetscFree(array_int);
2056: if(rank_prec_comm == master_proc) {
2057: for(i=0;i<total_faces;i++) faces_adjncy[i]=(MetisInt)(petsc_faces_adjncy[i]/ranks_stretching_ratio); /* cast to MetisInt */
2058: printf("This is the face connectivity (actual ranks)\n");
2059: for(i=0;i<n_subdomains;i++){
2060: printf("proc %d is connected with \n",i);
2061: for(j=faces_xadj[i];j<faces_xadj[i+1];j++)
2062: printf("%d ",faces_adjncy[j]);
2063: printf("\n");
2064: }
2065: PetscFree(faces_displacements);
2066: PetscFree(number_of_faces);
2067: PetscFree(petsc_faces_adjncy);
2068: }
2070: if( rank_prec_comm == master_proc ) {
2072: PetscInt heuristic_for_metis=3;
2074: ncon=1;
2075: faces_nvtxs=n_subdomains;
2076: /* partition graoh induced by face connectivity */
2077: PetscMalloc (n_subdomains*sizeof(MetisInt),&metis_coarse_subdivision);
2078: METIS_SetDefaultOptions(options);
2079: /* we need a contiguous partition of the coarse mesh */
2080: options[METIS_OPTION_CONTIG]=1;
2081: options[METIS_OPTION_DBGLVL]=1;
2082: options[METIS_OPTION_NITER]=30;
2083: //options[METIS_OPTION_NCUTS]=1;
2084: printf("METIS PART GRAPH\n");
2085: if(n_subdomains>n_parts*heuristic_for_metis) {
2086: printf("Using Kway\n");
2087: options[METIS_OPTION_IPTYPE]=METIS_IPTYPE_EDGE;
2088: options[METIS_OPTION_OBJTYPE]=METIS_OBJTYPE_CUT;
2089: METIS_PartGraphKway(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision);
2090: } else {
2091: printf("Using Recursive\n");
2092: METIS_PartGraphRecursive(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision);
2093: }
2094: if(ierr != METIS_OK) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in METIS_PartGraphKway (metis error code %D) called from PCBDDCSetupCoarseEnvironment\n",ierr);
2095: printf("Partition done!\n");
2096: PetscFree(faces_xadj);
2097: PetscFree(faces_adjncy);
2098: coarse_subdivision = (PetscMPIInt*)calloc(size_prec_comm,sizeof(PetscMPIInt)); /* calloc for contiguous memory since we need to scatter these values later */
2099: /* copy/cast values avoiding possible type conflicts between PETSc, MPI and METIS */
2100: for(i=0;i<size_prec_comm;i++) coarse_subdivision[i]=MPI_PROC_NULL;
2101: for(i=0;i<n_subdomains;i++) coarse_subdivision[ranks_stretching_ratio*i]=(PetscInt)(metis_coarse_subdivision[i]);
2102: PetscFree(metis_coarse_subdivision);
2103: }
2105: /* Create new communicator for coarse problem splitting the old one */
2106: if( !(rank_prec_comm%procs_jumps_coarse_comm) && rank_prec_comm < procs_jumps_coarse_comm*n_parts ){
2107: coarse_color=0; //for communicator splitting
2108: active_rank=rank_prec_comm; //for insertion of matrix values
2109: }
2110: // procs with coarse_color = MPI_UNDEFINED will have coarse_comm = MPI_COMM_NULL (from mpi standards)
2111: // key = rank_prec_comm -> keep same ordering of ranks from the old to the new communicator
2112: MPI_Comm_split(prec_comm,coarse_color,rank_prec_comm,&coarse_comm);
2114: if( coarse_color == 0 ) {
2115: MPI_Comm_size(coarse_comm,&size_coarse_comm);
2116: MPI_Comm_rank(coarse_comm,&rank_coarse_comm);
2117: printf("Details of coarse comm\n");
2118: printf("size = %d, myrank = %d\n",size_coarse_comm,rank_coarse_comm);
2119: printf("jumps = %d, coarse_color = %d, n_parts = %d\n",procs_jumps_coarse_comm,coarse_color,n_parts);
2120: } else {
2121: rank_coarse_comm = MPI_PROC_NULL;
2122: }
2124: /* master proc take care of arranging and distributing coarse informations */
2125: if(rank_coarse_comm == master_proc) {
2126: PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&displacements_recv);
2127: //PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&total_count_recv);
2128: //PetscMalloc (n_subdomains*sizeof(PetscMPIInt),&total_ranks_recv);
2129: total_count_recv = (PetscMPIInt*)calloc(size_prec_comm,sizeof(PetscMPIInt));
2130: total_ranks_recv = (PetscMPIInt*)calloc(n_subdomains,sizeof(PetscMPIInt));
2131: /* some initializations */
2132: displacements_recv[0]=0;
2133: //PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt)); not needed -> calloc initializes to zero
2134: /* count from how many processes the j-th process of the coarse decomposition will receive data */
2135: for(j=0;j<size_coarse_comm;j++)
2136: for(i=0;i<size_prec_comm;i++)
2137: if(coarse_subdivision[i]==j)
2138: total_count_recv[j]++;
2139: /* displacements needed for scatterv of total_ranks_recv */
2140: for(i=1;i<size_coarse_comm;i++) displacements_recv[i]=displacements_recv[i-1]+total_count_recv[i-1];
2141: /* Now fill properly total_ranks_recv -> each coarse process will receive the ranks (in prec_comm communicator) of its friend (sending) processes */
2142: PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt));
2143: for(j=0;j<size_coarse_comm;j++) {
2144: for(i=0;i<size_prec_comm;i++) {
2145: if(coarse_subdivision[i]==j) {
2146: total_ranks_recv[displacements_recv[j]+total_count_recv[j]]=i;
2147: total_count_recv[j]+=1;
2148: }
2149: }
2150: }
2151: for(j=0;j<size_coarse_comm;j++) {
2152: printf("process %d in new rank will receive from %d processes (original ranks follows)\n",j,total_count_recv[j]);
2153: for(i=0;i<total_count_recv[j];i++) {
2154: printf("%d ",total_ranks_recv[displacements_recv[j]+i]);
2155: }
2156: printf("\n");
2157: }
2159: /* identify new decomposition in terms of ranks in the old communicator */
2160: for(i=0;i<n_subdomains;i++) coarse_subdivision[ranks_stretching_ratio*i]=coarse_subdivision[ranks_stretching_ratio*i]*procs_jumps_coarse_comm;
2161: printf("coarse_subdivision in old end new ranks\n");
2162: for(i=0;i<size_prec_comm;i++)
2163: if(coarse_subdivision[i]!=MPI_PROC_NULL) {
2164: printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]/procs_jumps_coarse_comm);
2165: } else {
2166: printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]);
2167: }
2168: printf("\n");
2169: }
2171: /* Scatter new decomposition for send details */
2172: MPI_Scatter(&coarse_subdivision[0],1,MPIU_INT,&rank_coarse_proc_send_to,1,MPIU_INT,master_proc,prec_comm);
2173: /* Scatter receiving details to members of coarse decomposition */
2174: if( coarse_color == 0) {
2175: MPI_Scatter(&total_count_recv[0],1,MPIU_INT,&count_recv,1,MPIU_INT,master_proc,coarse_comm);
2176: PetscMalloc (count_recv*sizeof(PetscMPIInt),&ranks_recv);
2177: MPI_Scatterv(&total_ranks_recv[0],total_count_recv,displacements_recv,MPIU_INT,&ranks_recv[0],count_recv,MPIU_INT,master_proc,coarse_comm);
2178: }
2180: //printf("I will send my matrix data to proc %d\n",rank_coarse_proc_send_to);
2181: //if(coarse_color == 0) {
2182: // printf("I will receive some matrix data from %d processes (ranks follows)\n",count_recv);
2183: // for(i=0;i<count_recv;i++)
2184: // printf("%d ",ranks_recv[i]);
2185: // printf("\n");
2186: //}
2188: if(rank_prec_comm == master_proc) {
2189: //PetscFree(coarse_subdivision);
2190: //PetscFree(total_count_recv);
2191: //PetscFree(total_ranks_recv);
2192: free(coarse_subdivision);
2193: free(total_count_recv);
2194: free(total_ranks_recv);
2195: PetscFree(displacements_recv);
2196: }
2197: break;
2198: }
2200: case(REPLICATED_BDDC):
2202: pcbddc->coarse_communications_type = GATHERS_BDDC;
2203: coarse_mat_type = MATSEQAIJ;
2204: coarse_pc_type = PCLU;
2205: coarse_ksp_type = KSPPREONLY;
2206: coarse_comm = PETSC_COMM_SELF;
2207: active_rank = rank_prec_comm;
2208: break;
2210: case(PARALLEL_BDDC):
2212: pcbddc->coarse_communications_type = SCATTERS_BDDC;
2213: coarse_mat_type = MATMPIAIJ;
2214: coarse_pc_type = PCREDUNDANT;
2215: coarse_ksp_type = KSPPREONLY;
2216: coarse_comm = prec_comm;
2217: active_rank = rank_prec_comm;
2218: break;
2220: case(SEQUENTIAL_BDDC):
2221: pcbddc->coarse_communications_type = GATHERS_BDDC;
2222: coarse_mat_type = MATSEQAIJ;
2223: coarse_pc_type = PCLU;
2224: coarse_ksp_type = KSPPREONLY;
2225: coarse_comm = PETSC_COMM_SELF;
2226: active_rank = master_proc;
2227: break;
2228: }
2230: switch(pcbddc->coarse_communications_type){
2232: case(SCATTERS_BDDC):
2233: {
2234: if(pcbddc->coarse_problem_type==MULTILEVEL_BDDC) {
2236: PetscMPIInt send_size;
2237: PetscInt *aux_ins_indices;
2238: PetscInt ii,jj;
2239: MPI_Request *requests;
2241: /* allocate auxiliary space */
2242: PetscMalloc (pcbddc->replicated_primal_size*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);
2243: MPI_Allgatherv(&pcbddc->local_primal_indices[0],pcbddc->local_primal_size,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,prec_comm);
2244: PetscMalloc ( pcbddc->coarse_size*sizeof(PetscInt),&aux_ins_indices);
2245: PetscMemzero(aux_ins_indices,pcbddc->coarse_size*sizeof(PetscInt));
2246: /* allocate stuffs for message massing */
2247: PetscMalloc ( (count_recv+1)*sizeof(MPI_Request),&requests);
2248: for(i=0;i<count_recv+1;i++) requests[i]=MPI_REQUEST_NULL;
2249: PetscMalloc ( count_recv*sizeof(PetscMPIInt),&localsizes2);
2250: PetscMalloc ( count_recv*sizeof(PetscMPIInt),&localdispl2);
2251: /* fill up quantities */
2252: j=0;
2253: for(i=0;i<count_recv;i++){
2254: ii = ranks_recv[i];
2255: localsizes2[i]=pcbddc->local_primal_sizes[ii]*pcbddc->local_primal_sizes[ii];
2256: localdispl2[i]=j;
2257: j+=localsizes2[i];
2258: jj = pcbddc->local_primal_displacements[ii];
2259: for(k=0;k<pcbddc->local_primal_sizes[ii];k++) aux_ins_indices[pcbddc->replicated_local_primal_indices[jj+k]]+=1; // it counts the coarse subdomains sharing the coarse node
2260: }
2261: //printf("aux_ins_indices 1\n");
2262: //for(i=0;i<pcbddc->coarse_size;i++)
2263: // printf("%d ",aux_ins_indices[i]);
2264: //printf("\n");
2265: /* temp_coarse_mat_vals used to store temporarly received matrix values */
2266: PetscMalloc ( j*sizeof(PetscScalar),&temp_coarse_mat_vals);
2267: /* evaluate how many values I will insert in coarse mat */
2268: ins_local_primal_size=0;
2269: for(i=0;i<pcbddc->coarse_size;i++)
2270: if(aux_ins_indices[i])
2271: ins_local_primal_size++;
2272: /* evaluate indices I will insert in coarse mat */
2273: PetscMalloc ( ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);
2274: j=0;
2275: for(i=0;i<pcbddc->coarse_size;i++)
2276: if(aux_ins_indices[i])
2277: ins_local_primal_indices[j++]=i;
2278: /* use aux_ins_indices to realize a global to local mapping */
2279: j=0;
2280: for(i=0;i<pcbddc->coarse_size;i++){
2281: if(aux_ins_indices[i]==0){
2282: aux_ins_indices[i]=-1;
2283: } else {
2284: aux_ins_indices[i]=j;
2285: j++;
2286: }
2287: }
2289: //printf("New details localsizes2 localdispl2\n");
2290: //for(i=0;i<count_recv;i++)
2291: // printf("(%d %d) ",localsizes2[i],localdispl2[i]);
2292: //printf("\n");
2293: //printf("aux_ins_indices 2\n");
2294: //for(i=0;i<pcbddc->coarse_size;i++)
2295: // printf("%d ",aux_ins_indices[i]);
2296: //printf("\n");
2297: //printf("ins_local_primal_indices\n");
2298: //for(i=0;i<ins_local_primal_size;i++)
2299: // printf("%d ",ins_local_primal_indices[i]);
2300: //printf("\n");
2301: //printf("coarse_submat_vals\n");
2302: //for(i=0;i<pcbddc->local_primal_size;i++)
2303: // for(j=0;j<pcbddc->local_primal_size;j++)
2304: // printf("(%lf %d %d)\n",coarse_submat_vals[j*pcbddc->local_primal_size+i],pcbddc->local_primal_indices[i],pcbddc->local_primal_indices[j]);
2305: //printf("\n");
2306:
2307: /* processes partecipating in coarse problem receive matrix data from their friends */
2308: for(i=0;i<count_recv;i++) MPI_Irecv(&temp_coarse_mat_vals[localdispl2[i]],localsizes2[i],MPIU_SCALAR,ranks_recv[i],666,prec_comm,&requests[i]);
2309: if(rank_coarse_proc_send_to != MPI_PROC_NULL ) {
2310: send_size=pcbddc->local_primal_size*pcbddc->local_primal_size;
2311: MPI_Isend(&coarse_submat_vals[0],send_size,MPIU_SCALAR,rank_coarse_proc_send_to,666,prec_comm,&requests[count_recv]);
2312: }
2313: MPI_Waitall(count_recv+1,requests,MPI_STATUSES_IGNORE);
2315: //if(coarse_color == 0) {
2316: // printf("temp_coarse_mat_vals\n");
2317: // for(k=0;k<count_recv;k++){
2318: // printf("---- %d ----\n",ranks_recv[k]);
2319: // for(i=0;i<pcbddc->local_primal_sizes[ranks_recv[k]];i++)
2320: // for(j=0;j<pcbddc->local_primal_sizes[ranks_recv[k]];j++)
2321: // printf("(%lf %d %d)\n",temp_coarse_mat_vals[localdispl2[k]+j*pcbddc->local_primal_sizes[ranks_recv[k]]+i],pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[ranks_recv[k]]+i],pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[ranks_recv[k]]+j]);
2322: // printf("\n");
2323: // }
2324: //}
2325: /* calculate data to insert in coarse mat */
2326: PetscMalloc ( ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);
2327: PetscMemzero(ins_coarse_mat_vals,ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar));
2329: PetscMPIInt rr,kk,lps,lpd;
2330: PetscInt row_ind,col_ind;
2331: for(k=0;k<count_recv;k++){
2332: rr = ranks_recv[k];
2333: kk = localdispl2[k];
2334: lps = pcbddc->local_primal_sizes[rr];
2335: lpd = pcbddc->local_primal_displacements[rr];
2336: //printf("Inserting the following indices (received from %d)\n",rr);
2337: for(j=0;j<lps;j++){
2338: col_ind=aux_ins_indices[pcbddc->replicated_local_primal_indices[lpd+j]];
2339: for(i=0;i<lps;i++){
2340: row_ind=aux_ins_indices[pcbddc->replicated_local_primal_indices[lpd+i]];
2341: //printf("%d %d\n",row_ind,col_ind);
2342: ins_coarse_mat_vals[col_ind*ins_local_primal_size+row_ind]+=temp_coarse_mat_vals[kk+j*lps+i];
2343: }
2344: }
2345: }
2346: PetscFree(requests);
2347: PetscFree(aux_ins_indices);
2348: PetscFree(temp_coarse_mat_vals);
2349: if(coarse_color == 0) { PetscFree(ranks_recv); }
2351: /* create local to global mapping needed by coarse MATIS */
2352: {
2353: IS coarse_IS;
2354: if(coarse_comm != MPI_COMM_NULL ) MPI_Comm_free(&coarse_comm);
2355: coarse_comm = prec_comm;
2356: active_rank=rank_prec_comm;
2357: ISCreateGeneral(coarse_comm,ins_local_primal_size,ins_local_primal_indices,PETSC_COPY_VALUES,&coarse_IS);
2358: ISLocalToGlobalMappingCreateIS(coarse_IS,&coarse_ISLG);
2359: ISDestroy(&coarse_IS);
2360: }
2361: }
2362: if(pcbddc->coarse_problem_type==PARALLEL_BDDC) {
2363: /* arrays for values insertion */
2364: ins_local_primal_size = pcbddc->local_primal_size;
2365: PetscMalloc ( ins_local_primal_size*sizeof(PetscMPIInt),&ins_local_primal_indices);
2366: PetscMalloc ( ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);
2367: for(j=0;j<ins_local_primal_size;j++){
2368: ins_local_primal_indices[j]=pcbddc->local_primal_indices[j];
2369: for(i=0;i<ins_local_primal_size;i++) ins_coarse_mat_vals[j*ins_local_primal_size+i]=coarse_submat_vals[j*ins_local_primal_size+i];
2370: }
2371: }
2372: break;
2373:
2374: }
2376: case(GATHERS_BDDC):
2377: {
2379: PetscMPIInt mysize,mysize2;
2381: if(rank_prec_comm==active_rank) {
2382: PetscMalloc ( pcbddc->replicated_primal_size*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);
2383: pcbddc->replicated_local_primal_values = (PetscScalar*)calloc(pcbddc->replicated_primal_size,sizeof(PetscScalar));
2384: PetscMalloc ( size_prec_comm*sizeof(PetscMPIInt),&localsizes2);
2385: PetscMalloc ( size_prec_comm*sizeof(PetscMPIInt),&localdispl2);
2386: /* arrays for values insertion */
2387: ins_local_primal_size = pcbddc->coarse_size;
2388: PetscMalloc ( ins_local_primal_size*sizeof(PetscMPIInt),&ins_local_primal_indices);
2389: PetscMalloc ( ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);
2390: for(i=0;i<size_prec_comm;i++) localsizes2[i]=pcbddc->local_primal_sizes[i]*pcbddc->local_primal_sizes[i];
2391: localdispl2[0]=0;
2392: for(i=1;i<size_prec_comm;i++) localdispl2[i]=localsizes2[i-1]+localdispl2[i-1];
2393: j=0;
2394: for(i=0;i<size_prec_comm;i++) j+=localsizes2[i];
2395: PetscMalloc ( j*sizeof(PetscScalar),&temp_coarse_mat_vals);
2396: }
2398: mysize=pcbddc->local_primal_size;
2399: mysize2=pcbddc->local_primal_size*pcbddc->local_primal_size;
2400: if(pcbddc->coarse_problem_type == SEQUENTIAL_BDDC){
2401: MPI_Gatherv(&pcbddc->local_primal_indices[0],mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,master_proc,prec_comm);
2402: MPI_Gatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,master_proc,prec_comm);
2403: } else {
2404: MPI_Allgatherv(&pcbddc->local_primal_indices[0],mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,prec_comm);
2405: MPI_Allgatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,prec_comm);
2406: }
2408: /* free data structures no longer needed and allocate some space which will be needed in BDDC application */
2409: if(rank_prec_comm==active_rank) {
2410: PetscInt offset,offset2,row_ind,col_ind;
2411: for(j=0;j<ins_local_primal_size;j++){
2412: ins_local_primal_indices[j]=j;
2413: for(i=0;i<ins_local_primal_size;i++) ins_coarse_mat_vals[j*ins_local_primal_size+i]=0.0;
2414: }
2415: for(k=0;k<size_prec_comm;k++){
2416: offset=pcbddc->local_primal_displacements[k];
2417: offset2=localdispl2[k];
2418: for(j=0;j<pcbddc->local_primal_sizes[k];j++){
2419: col_ind=pcbddc->replicated_local_primal_indices[offset+j];
2420: for(i=0;i<pcbddc->local_primal_sizes[k];i++){
2421: row_ind=pcbddc->replicated_local_primal_indices[offset+i];
2422: ins_coarse_mat_vals[col_ind*pcbddc->coarse_size+row_ind]+=temp_coarse_mat_vals[offset2+j*pcbddc->local_primal_sizes[k]+i];
2423: }
2424: }
2425: }
2426: }
2427: break;
2428: }//switch on coarse problem and communications associated with finished
2429: }
2431: /* Now create and fill up coarse matrix */
2432: if( rank_prec_comm == active_rank ) {
2433: if(pcbddc->coarse_problem_type != MULTILEVEL_BDDC) {
2434: MatCreate(coarse_comm,&pcbddc->coarse_mat);
2435: MatSetSizes(pcbddc->coarse_mat,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size);
2436: MatSetType(pcbddc->coarse_mat,coarse_mat_type);
2437: MatSetUp(pcbddc->coarse_mat);
2438: MatSetOption(pcbddc->coarse_mat,MAT_ROW_ORIENTED,PETSC_FALSE); //local values stored in column major
2439: MatSetOption(pcbddc->coarse_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);
2440: } else {
2441: Mat matis_coarse_local_mat;
2442: /* remind bs */
2443: MatCreateIS(coarse_comm,bs,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_ISLG,&pcbddc->coarse_mat);
2444: MatSetUp(pcbddc->coarse_mat);
2445: MatISGetLocalMat(pcbddc->coarse_mat,&matis_coarse_local_mat);
2446: MatSetUp(matis_coarse_local_mat);
2447: MatSetOption(matis_coarse_local_mat,MAT_ROW_ORIENTED,PETSC_FALSE); //local values stored in column major
2448: MatSetOption(matis_coarse_local_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);
2449: }
2450: MatSetOption(pcbddc->coarse_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);
2451: MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);
2452: MatAssemblyBegin(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);
2453: MatAssemblyEnd(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);
2455: MatGetVecs(pcbddc->coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);
2456: /* Preconditioner for coarse problem */
2457: KSPCreate(coarse_comm,&pcbddc->coarse_ksp);
2458: PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);
2459: KSPSetOperators(pcbddc->coarse_ksp,pcbddc->coarse_mat,pcbddc->coarse_mat,SAME_PRECONDITIONER);
2460: KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,max_it_coarse_ksp);
2461: KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);
2462: KSPGetPC(pcbddc->coarse_ksp,&pc_temp);
2463: PCSetType(pc_temp,coarse_pc_type);
2464: /* Allow user's customization */
2465: KSPSetFromOptions(pcbddc->coarse_ksp);
2466: /* Set Up PC for coarse problem BDDC */
2467: if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC) {
2468: if(dbg_flag) {
2469: PetscViewerASCIIPrintf(viewer,"----------------Setting up a new level---------------\n");
2470: PetscViewerFlush(viewer);
2471: }
2472: PCBDDCSetCoarseProblemType(pc_temp,MULTILEVEL_BDDC);
2473: }
2474: KSPSetUp(pcbddc->coarse_ksp);
2475: if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC) {
2476: if(dbg_flag) {
2477: PetscViewerASCIIPrintf(viewer,"----------------New level set------------------------\n");
2478: PetscViewerFlush(viewer);
2479: }
2480: }
2481: }
2482: if(pcbddc->coarse_communications_type == SCATTERS_BDDC) {
2483: IS local_IS,global_IS;
2484: ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&local_IS);
2485: ISCreateGeneral(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_indices,PETSC_COPY_VALUES,&global_IS);
2486: VecScatterCreate(pcbddc->vec1_P,local_IS,pcbddc->coarse_vec,global_IS,&pcbddc->coarse_loc_to_glob);
2487: ISDestroy(&local_IS);
2488: ISDestroy(&global_IS);
2489: }
2492: /* Evaluate condition number of coarse problem for cheby (and verbose output if requested) */
2493: if( pcbddc->coarse_problem_type == MULTILEVEL_BDDC && rank_prec_comm == active_rank ) {
2494: PetscScalar m_one=-1.0;
2495: PetscReal infty_error,lambda_min,lambda_max,kappa_2;
2496: const KSPType check_ksp_type=KSPGMRES;
2498: /* change coarse ksp object to an iterative method suitable for extreme eigenvalues' estimation */
2499: KSPSetType(pcbddc->coarse_ksp,check_ksp_type);
2500: KSPSetComputeSingularValues(pcbddc->coarse_ksp,PETSC_TRUE);
2501: KSPSetTolerances(pcbddc->coarse_ksp,1.e-8,1.e-8,PETSC_DEFAULT,pcbddc->coarse_size);
2502: KSPSetUp(pcbddc->coarse_ksp);
2503: VecSetRandom(pcbddc->coarse_rhs,PETSC_NULL);
2504: MatMult(pcbddc->coarse_mat,pcbddc->coarse_rhs,pcbddc->coarse_vec);
2505: MatMult(pcbddc->coarse_mat,pcbddc->coarse_vec,pcbddc->coarse_rhs);
2506: KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_rhs);
2507: KSPComputeExtremeSingularValues(pcbddc->coarse_ksp,&lambda_max,&lambda_min);
2508: if(dbg_flag) {
2509: kappa_2=lambda_max/lambda_min;
2510: KSPGetIterationNumber(pcbddc->coarse_ksp,&k);
2511: VecAXPY(pcbddc->coarse_rhs,m_one,pcbddc->coarse_vec);
2512: VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&infty_error);
2513: PetscViewerASCIIPrintf(viewer,"Coarse problem condition number estimated with %d iterations of %s is: % 1.14e\n",k,check_ksp_type,kappa_2);
2514: PetscViewerASCIIPrintf(viewer,"Coarse problem eigenvalues: % 1.14e %1.14e\n",lambda_min,lambda_max);
2515: PetscViewerASCIIPrintf(viewer,"Coarse problem infty_error: %1.14e\n",infty_error);
2516: }
2517: /* restore coarse ksp to default values */
2518: KSPSetComputeSingularValues(pcbddc->coarse_ksp,PETSC_FALSE);
2519: KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);
2520: KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);
2521: KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,max_it_coarse_ksp);
2522: KSPSetFromOptions(pcbddc->coarse_ksp);
2523: KSPSetUp(pcbddc->coarse_ksp);
2524: }
2526: /* free data structures no longer needed */
2527: if(coarse_ISLG) { ISLocalToGlobalMappingDestroy(&coarse_ISLG); }
2528: if(ins_local_primal_indices) { PetscFree(ins_local_primal_indices); }
2529: if(ins_coarse_mat_vals) { PetscFree(ins_coarse_mat_vals);}
2530: if(localsizes2) { PetscFree(localsizes2);}
2531: if(localdispl2) { PetscFree(localdispl2);}
2532: if(temp_coarse_mat_vals) { PetscFree(temp_coarse_mat_vals);}
2534: return(0);
2535: }
2539: static PetscErrorCode PCBDDCManageLocalBoundaries(PC pc)
2540: {
2542: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
2543: PC_IS *pcis = (PC_IS*)pc->data;
2544: Mat_IS *matis = (Mat_IS*)pc->pmat->data;
2545: PCBDDCGraph mat_graph;
2546: Mat mat_adj;
2547: PetscInt **neighbours_set;
2548: PetscInt *queue_in_global_numbering;
2549: PetscInt bs,ierr,i,j,s,k,iindex,neumann_bsize,dirichlet_bsize;
2550: PetscInt total_counts,nodes_touched=0,where_values=1,vertex_size;
2551: PetscMPIInt adapt_interface=0,adapt_interface_reduced=0;
2552: PetscBool same_set,flg_row;
2553: PetscBool symmetrize_rowij=PETSC_TRUE,compressed_rowij=PETSC_FALSE;
2554: MPI_Comm interface_comm=((PetscObject)pc)->comm;
2555: PetscBool use_faces=PETSC_FALSE,use_edges=PETSC_FALSE;
2556: const PetscInt *neumann_nodes;
2557: const PetscInt *dirichlet_nodes;
2558: IS used_IS;
2561: /* allocate and initialize needed graph structure */
2562: PetscMalloc(sizeof(*mat_graph),&mat_graph);
2563: MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);
2564: /* MatDuplicate(matis->A,MAT_COPY_VALUES,&mat_adj); */
2565: MatGetRowIJ(mat_adj,0,symmetrize_rowij,compressed_rowij,&mat_graph->nvtxs,&mat_graph->xadj,&mat_graph->adjncy,&flg_row);
2566: if(!flg_row) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called from PCBDDCManageLocalBoundaries.\n");
2567: i = mat_graph->nvtxs;
2568: PetscMalloc4(i,PetscInt,&mat_graph->where,i,PetscInt,&mat_graph->count,i+1,PetscInt,&mat_graph->cptr,i,PetscInt,&mat_graph->queue);
2569: PetscMalloc3(i,PetscInt,&mat_graph->which_dof,i,PetscBool,&mat_graph->touched,i,PetscInt,&queue_in_global_numbering);
2570: PetscMemzero(mat_graph->where,mat_graph->nvtxs*sizeof(PetscInt));
2571: PetscMemzero(mat_graph->count,mat_graph->nvtxs*sizeof(PetscInt));
2572: PetscMemzero(mat_graph->which_dof,mat_graph->nvtxs*sizeof(PetscInt));
2573: PetscMemzero(mat_graph->queue,mat_graph->nvtxs*sizeof(PetscInt));
2574: PetscMemzero(mat_graph->cptr,(mat_graph->nvtxs+1)*sizeof(PetscInt));
2575: for(i=0;i<mat_graph->nvtxs;i++){mat_graph->touched[i]=PETSC_FALSE;}
2576:
2577: /* Setting dofs splitting in mat_graph->which_dof */
2578: if(pcbddc->n_ISForDofs) { /* get information about dofs' splitting if provided by the user */
2579: PetscInt *is_indices;
2580: PetscInt is_size;
2581: for(i=0;i<pcbddc->n_ISForDofs;i++) {
2582: ISGetSize(pcbddc->ISForDofs[i],&is_size);
2583: ISGetIndices(pcbddc->ISForDofs[i],(const PetscInt**)&is_indices);
2584: for(j=0;j<is_size;j++) {
2585: mat_graph->which_dof[is_indices[j]]=i;
2586: }
2587: ISRestoreIndices(pcbddc->ISForDofs[i],(const PetscInt**)&is_indices);
2588: }
2589: /* use mat block size as vertex size */
2590: MatGetBlockSize(matis->A,&vertex_size);
2591: } else { /* otherwise it assumes a constant block size */
2592: MatGetBlockSize(matis->A,&bs);
2593: for(i=0;i<mat_graph->nvtxs/bs;i++) {
2594: for(s=0;s<bs;s++) {
2595: mat_graph->which_dof[i*bs+s]=s;
2596: }
2597: }
2598: vertex_size=1;
2599: }
2600: /* count number of neigh per node */
2601: total_counts=0;
2602: for(i=1;i<pcis->n_neigh;i++){
2603: s=pcis->n_shared[i];
2604: total_counts+=s;
2605: for(j=0;j<s;j++){
2606: mat_graph->count[pcis->shared[i][j]] += 1;
2607: }
2608: }
2609: /* Take into account Neumann data -> it increments number of sharing subdomains for all but faces nodes lying on the interface */
2610: PCBDDCGetNeumannBoundaries(pc,&used_IS);
2611: if(used_IS) {
2612: ISGetSize(used_IS,&neumann_bsize);
2613: ISGetIndices(used_IS,&neumann_nodes);
2614: for(i=0;i<neumann_bsize;i++){
2615: iindex = neumann_nodes[i];
2616: if(mat_graph->count[iindex] > 1){
2617: mat_graph->count[iindex]+=1;
2618: total_counts++;
2619: }
2620: }
2621: }
2622: /* allocate space for storing the set of neighbours of each node */
2623: PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt*),&neighbours_set);
2624: if(mat_graph->nvtxs) { PetscMalloc(total_counts*sizeof(PetscInt),&neighbours_set[0]); }
2625: for(i=1;i<mat_graph->nvtxs;i++) neighbours_set[i]=neighbours_set[i-1]+mat_graph->count[i-1];
2626: PetscMemzero(mat_graph->count,mat_graph->nvtxs*sizeof(PetscInt));
2627: for(i=1;i<pcis->n_neigh;i++){
2628: s=pcis->n_shared[i];
2629: for(j=0;j<s;j++) {
2630: k=pcis->shared[i][j];
2631: neighbours_set[k][mat_graph->count[k]] = pcis->neigh[i];
2632: mat_graph->count[k]+=1;
2633: }
2634: }
2635: /* set -1 fake neighbour to mimic Neumann boundary */
2636: if(used_IS) {
2637: for(i=0;i<neumann_bsize;i++){
2638: iindex = neumann_nodes[i];
2639: if(mat_graph->count[iindex] > 1){
2640: neighbours_set[iindex][mat_graph->count[iindex]] = -1;
2641: mat_graph->count[iindex]+=1;
2642: }
2643: }
2644: ISRestoreIndices(used_IS,&neumann_nodes);
2645: }
2646: /* sort set of sharing subdomains (needed for comparison below) */
2647: for(i=0;i<mat_graph->nvtxs;i++) { PetscSortInt(mat_graph->count[i],neighbours_set[i]); }
2648: /* remove interior nodes and dirichlet boundary nodes from the next search into the graph */
2649: PCBDDCGetDirichletBoundaries(pc,&used_IS);
2650: if(used_IS) {
2651: ISGetSize(used_IS,&dirichlet_bsize);
2652: ISGetIndices(used_IS,&dirichlet_nodes);
2653: for(i=0;i<dirichlet_bsize;i++){
2654: mat_graph->count[dirichlet_nodes[i]]=0;
2655: }
2656: ISRestoreIndices(used_IS,&dirichlet_nodes);
2657: }
2658: for(i=0;i<mat_graph->nvtxs;i++){
2659: if(!mat_graph->count[i]){ /* interior nodes */
2660: mat_graph->touched[i]=PETSC_TRUE;
2661: mat_graph->where[i]=0;
2662: nodes_touched++;
2663: }
2664: }
2665: mat_graph->ncmps = 0;
2666: while(nodes_touched<mat_graph->nvtxs) {
2667: /* find first untouched node in local ordering */
2668: i=0;
2669: while(mat_graph->touched[i]) i++;
2670: mat_graph->touched[i]=PETSC_TRUE;
2671: mat_graph->where[i]=where_values;
2672: nodes_touched++;
2673: /* now find all other nodes having the same set of sharing subdomains */
2674: for(j=i+1;j<mat_graph->nvtxs;j++){
2675: /* check for same number of sharing subdomains and dof number */
2676: if(mat_graph->count[i]==mat_graph->count[j] && mat_graph->which_dof[i] == mat_graph->which_dof[j] ){
2677: /* check for same set of sharing subdomains */
2678: same_set=PETSC_TRUE;
2679: for(k=0;k<mat_graph->count[j];k++){
2680: if(neighbours_set[i][k]!=neighbours_set[j][k]) {
2681: same_set=PETSC_FALSE;
2682: }
2683: }
2684: /* I found a friend of mine */
2685: if(same_set) {
2686: mat_graph->where[j]=where_values;
2687: mat_graph->touched[j]=PETSC_TRUE;
2688: nodes_touched++;
2689: }
2690: }
2691: }
2692: where_values++;
2693: }
2694: where_values--; if(where_values<0) where_values=0;
2695: PetscMalloc(where_values*sizeof(PetscMPIInt),&mat_graph->where_ncmps);
2696: /* Find connected components defined on the shared interface */
2697: if(where_values) {
2698: PCBDDCFindConnectedComponents(mat_graph, where_values);
2699: /* For consistency among neughbouring procs, I need to sort (by global ordering) each connected component */
2700: for(i=0;i<mat_graph->ncmps;i++) {
2701: ISLocalToGlobalMappingApply(matis->mapping,mat_graph->cptr[i+1]-mat_graph->cptr[i],&mat_graph->queue[mat_graph->cptr[i]],&queue_in_global_numbering[mat_graph->cptr[i]]);
2702: PetscSortIntWithArray(mat_graph->cptr[i+1]-mat_graph->cptr[i],&queue_in_global_numbering[mat_graph->cptr[i]],&mat_graph->queue[mat_graph->cptr[i]]);
2703: }
2704: }
2705: /* check consistency of connected components among neighbouring subdomains -> it adapt them in case it is needed */
2706: for(i=0;i<where_values;i++) {
2707: /* We are not sure that two connected components will be the same among subdomains sharing a subset of local interface */
2708: if(mat_graph->where_ncmps[i]>1) {
2709: adapt_interface=1;
2710: break;
2711: }
2712: }
2713: MPI_Allreduce(&adapt_interface,&adapt_interface_reduced,1,MPIU_INT,MPI_LOR,interface_comm);
2714: if(where_values && adapt_interface_reduced) {
2716: printf("Adapting Interface\n");
2718: PetscInt sum_requests=0,my_rank;
2719: PetscInt buffer_size,start_of_recv,size_of_recv,start_of_send;
2720: PetscInt temp_buffer_size,ins_val,global_where_counter;
2721: PetscInt *cum_recv_counts;
2722: PetscInt *where_to_nodes_indices;
2723: PetscInt *petsc_buffer;
2724: PetscMPIInt *recv_buffer;
2725: PetscMPIInt *recv_buffer_where;
2726: PetscMPIInt *send_buffer;
2727: PetscMPIInt size_of_send;
2728: PetscInt *sizes_of_sends;
2729: MPI_Request *send_requests;
2730: MPI_Request *recv_requests;
2731: PetscInt *where_cc_adapt;
2732: PetscInt **temp_buffer;
2733: PetscInt *nodes_to_temp_buffer_indices;
2734: PetscInt *add_to_where;
2736: MPI_Comm_rank(interface_comm,&my_rank);
2737: PetscMalloc((where_values+1)*sizeof(PetscInt),&cum_recv_counts);
2738: PetscMemzero(cum_recv_counts,(where_values+1)*sizeof(PetscInt));
2739: PetscMalloc(where_values*sizeof(PetscInt),&where_to_nodes_indices);
2740: /* first count how many neighbours per connected component I will receive from */
2741: cum_recv_counts[0]=0;
2742: for(i=1;i<where_values+1;i++){
2743: j=0;
2744: while(mat_graph->where[j] != i) j++;
2745: where_to_nodes_indices[i-1]=j;
2746: if(neighbours_set[j][0]!=-1) { cum_recv_counts[i]=cum_recv_counts[i-1]+mat_graph->count[j]; } /* We don't want sends/recvs_to/from_self -> here I don't count myself */
2747: else { cum_recv_counts[i]=cum_recv_counts[i-1]+mat_graph->count[j]-1; }
2748: }
2749: buffer_size=2*cum_recv_counts[where_values]+mat_graph->nvtxs;
2750: PetscMalloc(2*cum_recv_counts[where_values]*sizeof(PetscMPIInt),&recv_buffer_where);
2751: PetscMalloc(buffer_size*sizeof(PetscMPIInt),&send_buffer);
2752: PetscMalloc(cum_recv_counts[where_values]*sizeof(MPI_Request),&send_requests);
2753: PetscMalloc(cum_recv_counts[where_values]*sizeof(MPI_Request),&recv_requests);
2754: for(i=0;i<cum_recv_counts[where_values];i++) {
2755: send_requests[i]=MPI_REQUEST_NULL;
2756: recv_requests[i]=MPI_REQUEST_NULL;
2757: }
2758: /* exchange with my neighbours the number of my connected components on the shared interface */
2759: for(i=0;i<where_values;i++){
2760: j=where_to_nodes_indices[i];
2761: k = (neighbours_set[j][0] == -1 ? 1 : 0);
2762: for(;k<mat_graph->count[j];k++){
2763: MPI_Isend(&mat_graph->where_ncmps[i],1,MPIU_INT,neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);
2764: MPI_Irecv(&recv_buffer_where[sum_requests],1,MPIU_INT,neighbours_set[j][k],(neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);
2765: sum_requests++;
2766: }
2767: }
2768: MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);
2769: MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);
2770: /* determine the connected component I need to adapt */
2771: PetscMalloc(where_values*sizeof(PetscInt),&where_cc_adapt);
2772: PetscMemzero(where_cc_adapt,where_values*sizeof(PetscInt));
2773: for(i=0;i<where_values;i++){
2774: for(j=cum_recv_counts[i];j<cum_recv_counts[i+1];j++){
2775: /* The first condition is natural (i.e someone has a different number of cc than me), the second one is just to be safe */
2776: if( mat_graph->where_ncmps[i]!=recv_buffer_where[j] || mat_graph->where_ncmps[i] > 1 ) {
2777: where_cc_adapt[i]=PETSC_TRUE;
2778: break;
2779: }
2780: }
2781: }
2782: /* now get from neighbours their ccs (in global numbering) and adapt them (in case it is needed) */
2783: /* first determine how much data to send (size of each queue plus the global indices) and communicate it to neighbours */
2784: PetscMalloc(where_values*sizeof(PetscInt),&sizes_of_sends);
2785: PetscMemzero(sizes_of_sends,where_values*sizeof(PetscInt));
2786: sum_requests=0;
2787: start_of_send=0;
2788: start_of_recv=cum_recv_counts[where_values];
2789: for(i=0;i<where_values;i++) {
2790: if(where_cc_adapt[i]) {
2791: size_of_send=0;
2792: for(j=i;j<mat_graph->ncmps;j++) {
2793: if(mat_graph->where[mat_graph->queue[mat_graph->cptr[j]]] == i+1) { /* WARNING -> where values goes from 1 to where_values included */
2794: send_buffer[start_of_send+size_of_send]=mat_graph->cptr[j+1]-mat_graph->cptr[j];
2795: size_of_send+=1;
2796: for(k=0;k<mat_graph->cptr[j+1]-mat_graph->cptr[j];k++) {
2797: send_buffer[start_of_send+size_of_send+k]=queue_in_global_numbering[mat_graph->cptr[j]+k];
2798: }
2799: size_of_send=size_of_send+mat_graph->cptr[j+1]-mat_graph->cptr[j];
2800: }
2801: }
2802: j = where_to_nodes_indices[i];
2803: k = (neighbours_set[j][0] == -1 ? 1 : 0);
2804: for(;k<mat_graph->count[j];k++){
2805: MPI_Isend(&size_of_send,1,MPIU_INT,neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);
2806: MPI_Irecv(&recv_buffer_where[sum_requests+start_of_recv],1,MPIU_INT,neighbours_set[j][k],(neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);
2807: sum_requests++;
2808: }
2809: sizes_of_sends[i]=size_of_send;
2810: start_of_send+=size_of_send;
2811: }
2812: }
2813: MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);
2814: MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);
2815: buffer_size=0;
2816: for(k=0;k<sum_requests;k++) { buffer_size+=recv_buffer_where[start_of_recv+k]; }
2817: PetscMalloc(buffer_size*sizeof(PetscMPIInt),&recv_buffer);
2818: /* now exchange the data */
2819: start_of_recv=0;
2820: start_of_send=0;
2821: sum_requests=0;
2822: for(i=0;i<where_values;i++) {
2823: if(where_cc_adapt[i]) {
2824: size_of_send = sizes_of_sends[i];
2825: j = where_to_nodes_indices[i];
2826: k = (neighbours_set[j][0] == -1 ? 1 : 0);
2827: for(;k<mat_graph->count[j];k++){
2828: MPI_Isend(&send_buffer[start_of_send],size_of_send,MPIU_INT,neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);
2829: size_of_recv=recv_buffer_where[cum_recv_counts[where_values]+sum_requests];
2830: MPI_Irecv(&recv_buffer[start_of_recv],size_of_recv,MPIU_INT,neighbours_set[j][k],(neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);
2831: start_of_recv+=size_of_recv;
2832: sum_requests++;
2833: }
2834: start_of_send+=size_of_send;
2835: }
2836: }
2837: MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);
2838: MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);
2839: PetscMalloc(buffer_size*sizeof(PetscInt),&petsc_buffer);
2840: for(k=0;k<start_of_recv;k++) { petsc_buffer[k]=(PetscInt)recv_buffer[k]; }
2841: for(j=0;j<buffer_size;) {
2842: ISGlobalToLocalMappingApply(matis->mapping,IS_GTOLM_MASK,petsc_buffer[j],&petsc_buffer[j+1],&petsc_buffer[j],&petsc_buffer[j+1]);
2843: k=petsc_buffer[j]+1;
2844: j+=k;
2845: }
2846: sum_requests=cum_recv_counts[where_values];
2847: start_of_recv=0;
2848: PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt),&nodes_to_temp_buffer_indices);
2849: global_where_counter=0;
2850: for(i=0;i<where_values;i++){
2851: if(where_cc_adapt[i]){
2852: temp_buffer_size=0;
2853: /* find nodes on the shared interface we need to adapt */
2854: for(j=0;j<mat_graph->nvtxs;j++){
2855: if(mat_graph->where[j]==i+1) {
2856: nodes_to_temp_buffer_indices[j]=temp_buffer_size;
2857: temp_buffer_size++;
2858: } else {
2859: nodes_to_temp_buffer_indices[j]=-1;
2860: }
2861: }
2862: /* allocate some temporary space */
2863: PetscMalloc(temp_buffer_size*sizeof(PetscInt*),&temp_buffer);
2864: PetscMalloc(temp_buffer_size*(cum_recv_counts[i+1]-cum_recv_counts[i])*sizeof(PetscInt),&temp_buffer[0]);
2865: PetscMemzero(temp_buffer[0],temp_buffer_size*(cum_recv_counts[i+1]-cum_recv_counts[i])*sizeof(PetscInt));
2866: for(j=1;j<temp_buffer_size;j++){
2867: temp_buffer[j]=temp_buffer[j-1]+cum_recv_counts[i+1]-cum_recv_counts[i];
2868: }
2869: /* analyze contributions from neighbouring subdomains for i-th conn comp
2870: temp buffer structure:
2871: supposing part of the interface has dimension 5 (global nodes 0,1,2,3,4)
2872: 3 neighs procs with structured connected components:
2873: neigh 0: [0 1 4], [2 3]; (2 connected components)
2874: neigh 1: [0 1], [2 3 4]; (2 connected components)
2875: neigh 2: [0 4], [1], [2 3]; (3 connected components)
2876: tempbuffer (row-oriented) should be filled as:
2877: [ 0, 0, 0;
2878: 0, 0, 1;
2879: 1, 1, 2;
2880: 1, 1, 2;
2881: 0, 1, 0; ];
2882: This way we can simply recover the resulting structure account for possible intersections of ccs among neighs.
2883: The mat_graph->where array will be modified to reproduce the following 4 connected components [0], [1], [2 3], [4];
2884: */
2885: for(j=0;j<cum_recv_counts[i+1]-cum_recv_counts[i];j++) {
2886: ins_val=0;
2887: size_of_recv=recv_buffer_where[sum_requests]; /* total size of recv from neighs */
2888: for(buffer_size=0;buffer_size<size_of_recv;) { /* loop until all data from neighs has been taken into account */
2889: for(k=1;k<petsc_buffer[buffer_size+start_of_recv]+1;k++) { /* filling properly temp_buffer using data from a single recv */
2890: temp_buffer[ nodes_to_temp_buffer_indices[ petsc_buffer[ start_of_recv+buffer_size+k ] ] ][j]=ins_val;
2891: }
2892: buffer_size+=k;
2893: ins_val++;
2894: }
2895: start_of_recv+=size_of_recv;
2896: sum_requests++;
2897: }
2898: PetscMalloc(temp_buffer_size*sizeof(PetscInt),&add_to_where);
2899: PetscMemzero(add_to_where,temp_buffer_size*sizeof(PetscInt));
2900: for(j=0;j<temp_buffer_size;j++){
2901: if(!add_to_where[j]){ /* found a new cc */
2902: global_where_counter++;
2903: add_to_where[j]=global_where_counter;
2904: for(k=j+1;k<temp_buffer_size;k++){ /* check for other nodes in new cc */
2905: same_set=PETSC_TRUE;
2906: for(s=0;s<cum_recv_counts[i+1]-cum_recv_counts[i];s++){
2907: if(temp_buffer[j][s]!=temp_buffer[k][s]) {
2908: same_set=PETSC_FALSE;
2909: break;
2910: }
2911: }
2912: if(same_set) add_to_where[k]=global_where_counter;
2913: }
2914: }
2915: }
2916: /* insert new data in where array */
2917: temp_buffer_size=0;
2918: for(j=0;j<mat_graph->nvtxs;j++){
2919: if(mat_graph->where[j]==i+1) {
2920: mat_graph->where[j]=where_values+add_to_where[temp_buffer_size];
2921: temp_buffer_size++;
2922: }
2923: }
2924: PetscFree(temp_buffer[0]);
2925: PetscFree(temp_buffer);
2926: PetscFree(add_to_where);
2927: }
2928: }
2929: PetscFree(nodes_to_temp_buffer_indices);
2930: PetscFree(sizes_of_sends);
2931: PetscFree(send_requests);
2932: PetscFree(recv_requests);
2933: PetscFree(petsc_buffer);
2934: PetscFree(recv_buffer);
2935: PetscFree(recv_buffer_where);
2936: PetscFree(send_buffer);
2937: PetscFree(cum_recv_counts);
2938: PetscFree(where_to_nodes_indices);
2939: /* We are ready to evaluate consistent connected components on each part of the shared interface */
2940: if(global_where_counter) {
2941: for(i=0;i<mat_graph->nvtxs;i++){ mat_graph->touched[i]=PETSC_FALSE; }
2942: global_where_counter=0;
2943: for(i=0;i<mat_graph->nvtxs;i++){
2944: if(mat_graph->where[i] && !mat_graph->touched[i]) {
2945: global_where_counter++;
2946: for(j=i+1;j<mat_graph->nvtxs;j++){
2947: if(!mat_graph->touched[j] && mat_graph->where[j]==mat_graph->where[i]) {
2948: mat_graph->where[j]=global_where_counter;
2949: mat_graph->touched[j]=PETSC_TRUE;
2950: }
2951: }
2952: mat_graph->where[i]=global_where_counter;
2953: mat_graph->touched[i]=PETSC_TRUE;
2954: }
2955: }
2956: where_values=global_where_counter;
2957: }
2958: if(global_where_counter) {
2959: PetscMemzero(mat_graph->cptr,(mat_graph->nvtxs+1)*sizeof(PetscInt));
2960: PetscMemzero(mat_graph->queue,mat_graph->nvtxs*sizeof(PetscInt));
2961: PetscFree(mat_graph->where_ncmps);
2962: PetscMalloc(where_values*sizeof(PetscMPIInt),&mat_graph->where_ncmps);
2963: PCBDDCFindConnectedComponents(mat_graph, where_values);
2964: for(i=0;i<mat_graph->ncmps;i++) {
2965: ISLocalToGlobalMappingApply(matis->mapping,mat_graph->cptr[i+1]-mat_graph->cptr[i],&mat_graph->queue[mat_graph->cptr[i]],&queue_in_global_numbering[mat_graph->cptr[i]]);
2966: PetscSortIntWithArray(mat_graph->cptr[i+1]-mat_graph->cptr[i],&queue_in_global_numbering[mat_graph->cptr[i]],&mat_graph->queue[mat_graph->cptr[i]]);
2967: }
2968: }
2969: } /* Finished adapting interface */
2970: PetscInt nfc=0;
2971: PetscInt nec=0;
2972: PetscInt nvc=0;
2973: PetscBool twodim_flag=PETSC_FALSE;
2974: for (i=0; i<mat_graph->ncmps; i++) {
2975: if( mat_graph->cptr[i+1]-mat_graph->cptr[i] > vertex_size ){
2976: if(mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]==1){ /* 1 neigh */
2977: nfc++;
2978: } else { /* note that nec will be zero in 2d */
2979: nec++;
2980: }
2981: } else {
2982: nvc+=mat_graph->cptr[i+1]-mat_graph->cptr[i];
2983: }
2984: }
2986: if(!nec) { /* we are in a 2d case -> no faces, only edges */
2987: nec = nfc;
2988: nfc = 0;
2989: twodim_flag = PETSC_TRUE;
2990: }
2991: /* allocate IS arrays for faces, edges. Vertices need a single index set.
2992: Reusing space allocated in mat_graph->where for creating IS objects */
2993: if(!pcbddc->vertices_flag && !pcbddc->edges_flag) {
2994: PetscMalloc(nfc*sizeof(IS),&pcbddc->ISForFaces);
2995: use_faces=PETSC_TRUE;
2996: }
2997: if(!pcbddc->vertices_flag && !pcbddc->faces_flag) {
2998: PetscMalloc(nec*sizeof(IS),&pcbddc->ISForEdges);
2999: use_edges=PETSC_TRUE;
3000: }
3001: nfc=0;
3002: nec=0;
3003: for (i=0; i<mat_graph->ncmps; i++) {
3004: if( mat_graph->cptr[i+1]-mat_graph->cptr[i] > vertex_size ){
3005: for(j=0;j<mat_graph->cptr[i+1]-mat_graph->cptr[i];j++) {
3006: mat_graph->where[j]=mat_graph->queue[mat_graph->cptr[i]+j];
3007: }
3008: if(mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]==1){
3009: if(twodim_flag) {
3010: if(use_edges) {
3011: ISCreateGeneral(PETSC_COMM_SELF,j,mat_graph->where,PETSC_COPY_VALUES,&pcbddc->ISForEdges[nec]);
3012: nec++;
3013: }
3014: } else {
3015: if(use_faces) {
3016: ISCreateGeneral(PETSC_COMM_SELF,j,mat_graph->where,PETSC_COPY_VALUES,&pcbddc->ISForFaces[nfc]);
3017: nfc++;
3018: }
3019: }
3020: } else {
3021: if(use_edges) {
3022: ISCreateGeneral(PETSC_COMM_SELF,j,mat_graph->where,PETSC_COPY_VALUES,&pcbddc->ISForEdges[nec]);
3023: nec++;
3024: }
3025: }
3026: }
3027: }
3028: pcbddc->n_ISForFaces=nfc;
3029: pcbddc->n_ISForEdges=nec;
3030: nvc=0;
3031: if( !pcbddc->constraints_flag ) {
3032: for (i=0; i<mat_graph->ncmps; i++) {
3033: if( mat_graph->cptr[i+1]-mat_graph->cptr[i] <= vertex_size ){
3034: for( j=mat_graph->cptr[i];j<mat_graph->cptr[i+1];j++) {
3035: mat_graph->where[nvc]=mat_graph->queue[j];
3036: nvc++;
3037: }
3038: }
3039: }
3040: }
3041: /* sort vertex set (by local ordering) */
3042: PetscSortInt(nvc,mat_graph->where);
3043: ISCreateGeneral(PETSC_COMM_SELF,nvc,mat_graph->where,PETSC_COPY_VALUES,&pcbddc->ISForVertices);
3045: if(pcbddc->dbg_flag) {
3046: PetscViewer viewer=pcbddc->dbg_viewer;
3048: PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n");
3049: PetscViewerASCIISynchronizedPrintf(viewer,"Details from PCBDDCManageLocalBoundaries for subdomain %04d\n",PetscGlobalRank);
3050: PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n");
3051: /* PetscViewerASCIISynchronizedPrintf(viewer,"Graph (adjacency structure) of local Neumann mat\n");
3052: PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n");
3053: for(i=0;i<mat_graph->nvtxs;i++) {
3054: PetscViewerASCIISynchronizedPrintf(viewer,"Nodes connected to node number %d are %d\n",i,mat_graph->xadj[i+1]-mat_graph->xadj[i]);
3055: for(j=mat_graph->xadj[i];j<mat_graph->xadj[i+1];j++){
3056: PetscViewerASCIISynchronizedPrintf(viewer,"%d ",mat_graph->adjncy[j]);
3057: }
3058: PetscViewerASCIISynchronizedPrintf(viewer,"\n--------------------------------------------------------------\n");
3059: }
3060: PetscViewerASCIISynchronizedPrintf(viewer,"Matrix graph has %d connected components", mat_graph->ncmps);
3061: for(i=0;i<mat_graph->ncmps;i++) {
3062: PetscViewerASCIISynchronizedPrintf(viewer,"\nDetails for connected component number %02d: size %04d, count %01d. Nodes follow.\n",
3063: i,mat_graph->cptr[i+1]-mat_graph->cptr[i],mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]);
3064: for (j=mat_graph->cptr[i]; j<mat_graph->cptr[i+1]; j++){
3065: PetscViewerASCIISynchronizedPrintf(viewer,"%d (%d), ",queue_in_global_numbering[j],mat_graph->queue[j]);
3066: }
3067: }
3068: PetscViewerASCIISynchronizedPrintf(viewer,"\n--------------------------------------------------------------\n");*/
3069: PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local vertices\n",PetscGlobalRank,nvc);
3070: PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local faces\n",PetscGlobalRank,nfc);
3071: PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local edges\n",PetscGlobalRank,nec);
3072: PetscViewerFlush(viewer);
3073: }
3075: /* Restore CSR structure into sequantial matrix and free memory space no longer needed */
3076: MatRestoreRowIJ(mat_adj,0,symmetrize_rowij,compressed_rowij,&mat_graph->nvtxs,&mat_graph->xadj,&mat_graph->adjncy,&flg_row);
3077: if(!flg_row) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called from PCBDDCManageLocalBoundaries.\n");
3078: MatDestroy(&mat_adj);
3079: /* Free graph structure */
3080: if(mat_graph->nvtxs){
3081: PetscFree(neighbours_set[0]);
3082: PetscFree(neighbours_set);
3083: PetscFree4(mat_graph->where,mat_graph->count,mat_graph->cptr,mat_graph->queue);
3084: PetscFree3(mat_graph->which_dof,mat_graph->touched,queue_in_global_numbering);
3085: PetscFree(mat_graph->where_ncmps);
3086: }
3087: PetscFree(mat_graph);
3089: return(0);
3091: }
3093: /* -------------------------------------------------------------------------- */
3095: /* The following code has been adapted from function IsConnectedSubdomain contained
3096: in source file contig.c of METIS library (version 5.0.1) */
3097:
3100: static PetscErrorCode PCBDDCFindConnectedComponents(PCBDDCGraph graph, PetscInt n_dist )
3101: {
3102: PetscInt i, j, k, nvtxs, first, last, nleft, ncmps,pid,cum_queue,n,ncmps_pid;
3103: PetscInt *xadj, *adjncy, *where, *queue;
3104: PetscInt *cptr;
3105: PetscBool *touched;
3106:
3109: nvtxs = graph->nvtxs;
3110: xadj = graph->xadj;
3111: adjncy = graph->adjncy;
3112: where = graph->where;
3113: touched = graph->touched;
3114: queue = graph->queue;
3115: cptr = graph->cptr;
3117: for (i=0; i<nvtxs; i++)
3118: touched[i] = PETSC_FALSE;
3120: cum_queue=0;
3121: ncmps=0;
3123: for(n=0; n<n_dist; n++) {
3124: pid = n+1;
3125: nleft = 0;
3126: for (i=0; i<nvtxs; i++) {
3127: if (where[i] == pid)
3128: nleft++;
3129: }
3130: for (i=0; i<nvtxs; i++) {
3131: if (where[i] == pid)
3132: break;
3133: }
3134: touched[i] = PETSC_TRUE;
3135: queue[cum_queue] = i;
3136: first = 0; last = 1;
3137: cptr[ncmps] = cum_queue; /* This actually points to queue */
3138: ncmps_pid = 0;
3139: while (first != nleft) {
3140: if (first == last) { /* Find another starting vertex */
3141: cptr[++ncmps] = first+cum_queue;
3142: ncmps_pid++;
3143: for (i=0; i<nvtxs; i++) {
3144: if (where[i] == pid && !touched[i])
3145: break;
3146: }
3147: queue[cum_queue+last] = i;
3148: last++;
3149: touched[i] = PETSC_TRUE;
3150: }
3151: i = queue[cum_queue+first];
3152: first++;
3153: for (j=xadj[i]; j<xadj[i+1]; j++) {
3154: k = adjncy[j];
3155: if (where[k] == pid && !touched[k]) {
3156: queue[cum_queue+last] = k;
3157: last++;
3158: touched[k] = PETSC_TRUE;
3159: }
3160: }
3161: }
3162: cptr[++ncmps] = first+cum_queue;
3163: ncmps_pid++;
3164: cum_queue=cptr[ncmps];
3165: graph->where_ncmps[n] = ncmps_pid;
3166: }
3167: graph->ncmps = ncmps;
3169: return(0);
3170: }