Actual source code: bddc.c
petsc-3.4.5 2014-06-29
1: /* TODOLIST
2: DofSplitting and DM attached to pc?
3: Change SetNeumannBoundaries to SetNeumannBoundariesLocal and provide new SetNeumannBoundaries (same Dirichlet)
4: change how to deal with the coarse problem (PCBDDCSetCoarseEnvironment):
5: - simplify coarse problem structure -> PCBDDC or PCREDUDANT, nothing else -> same comm for all levels?
6: - remove coarse enums and allow use of PCBDDCGetCoarseKSP
7: - remove metis dependency -> use MatPartitioning for multilevel -> Assemble serial adjacency in ManageLocalBoundaries?
8: code refactoring:
9: - pick up better names for static functions
10: change options structure:
11: - insert BDDC into MG framework?
12: provide other ops? Ask to developers
13: remove all unused printf
14: man pages
15: */
17: /* ----------------------------------------------------------------------------------------------------------------------------------------------
18: Implementation of BDDC preconditioner based on:
19: C. Dohrmann "An approximate BDDC preconditioner", Numerical Linear Algebra with Applications Volume 14, Issue 2, pages 149-168, March 2007
20: ---------------------------------------------------------------------------------------------------------------------------------------------- */
22: #include bddc.h
23: #include <petscblaslapack.h>
24: /* -------------------------------------------------------------------------- */
27: PetscErrorCode PCSetFromOptions_BDDC(PC pc)
28: {
29: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
33: PetscOptionsHead("BDDC options");
34: /* Verbose debugging of main data structures */
35: PetscOptionsBool("-pc_bddc_check_all" ,"Verbose (debugging) output for PCBDDC" ,"none",pcbddc->dbg_flag ,&pcbddc->dbg_flag ,NULL);
36: /* Some customization for default primal space */
37: PetscOptionsBool("-pc_bddc_vertices_only" ,"Use only vertices in coarse space (i.e. discard constraints)","none",pcbddc->vertices_flag ,&pcbddc->vertices_flag ,NULL);
38: PetscOptionsBool("-pc_bddc_constraints_only","Use only constraints in coarse space (i.e. discard vertices)","none",pcbddc->constraints_flag,&pcbddc->constraints_flag,NULL);
39: PetscOptionsBool("-pc_bddc_faces_only" ,"Use only faces among constraints of coarse space (i.e. discard edges)" ,"none",pcbddc->faces_flag ,&pcbddc->faces_flag ,NULL);
40: PetscOptionsBool("-pc_bddc_edges_only" ,"Use only edges among constraints of coarse space (i.e. discard faces)" ,"none",pcbddc->edges_flag ,&pcbddc->edges_flag ,NULL);
42: /* Coarse solver context */
43: static const char * const avail_coarse_problems[] = {"sequential","replicated","parallel","multilevel","CoarseProblemType","PC_BDDC_",0}; /* order of choiches depends on ENUM defined in bddc.h */
44: PetscOptionsEnum("-pc_bddc_coarse_problem_type","Set coarse problem type","none",avail_coarse_problems,(PetscEnum)pcbddc->coarse_problem_type,(PetscEnum*)&pcbddc->coarse_problem_type,NULL);
46: /* Two different application of BDDC to the whole set of dofs, internal and interface */
47: PetscOptionsBool("-pc_bddc_switch_preconditioning_type","Switch between M_2 (default) and M_3 preconditioners (as defined by Dohrmann)","none",pcbddc->inexact_prec_type,&pcbddc->inexact_prec_type,NULL);
48: PetscOptionsBool("-pc_bddc_use_change_of_basis","Use change of basis approach for primal space","none",pcbddc->usechangeofbasis,&pcbddc->usechangeofbasis,NULL);
49: PetscOptionsBool("-pc_bddc_use_change_on_faces","Use change of basis approach for face constraints","none",pcbddc->usechangeonfaces,&pcbddc->usechangeonfaces,NULL);
51: pcbddc->usechangeonfaces = pcbddc->usechangeonfaces && pcbddc->usechangeofbasis;
53: PetscOptionsInt("-pc_bddc_coarsening_ratio","Set coarsening ratio used in multilevel coarsening","none",pcbddc->coarsening_ratio,&pcbddc->coarsening_ratio,NULL);
54: PetscOptionsInt("-pc_bddc_max_levels","Set maximum number of levels for multilevel","none",pcbddc->max_levels,&pcbddc->max_levels,NULL);
55: PetscOptionsTail();
56: return(0);
57: }
58: /* -------------------------------------------------------------------------- */
62: static PetscErrorCode PCBDDCSetCoarseProblemType_BDDC(PC pc, CoarseProblemType CPT)
63: {
64: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
67: pcbddc->coarse_problem_type = CPT;
68: return(0);
69: }
73: /*@
74: PCBDDCSetCoarseProblemType - Set coarse problem type in PCBDDC.
76: Not collective
78: Input Parameters:
79: + pc - the preconditioning context
80: - CoarseProblemType - pick a better name and explain what this is
82: Level: intermediate
84: Notes:
85: Not collective but all procs must call with same arguments.
87: .seealso: PCBDDC
88: @*/
89: PetscErrorCode PCBDDCSetCoarseProblemType(PC pc, CoarseProblemType CPT)
90: {
95: PetscTryMethod(pc,"PCBDDCSetCoarseProblemType_C",(PC,CoarseProblemType),(pc,CPT));
96: return(0);
97: }
98: /* -------------------------------------------------------------------------- */
101: static PetscErrorCode PCBDDCSetCoarseningRatio_BDDC(PC pc,PetscInt k)
102: {
103: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
106: pcbddc->coarsening_ratio=k;
107: return(0);
108: }
112: /*@
113: PCBDDCSetCoarseningRatio - Set coarsening ratio used in multilevel coarsening
115: Logically collective on PC
117: Input Parameters:
118: + pc - the preconditioning context
119: - k - coarsening ratio
121: Approximatively k subdomains at the finer level will be aggregated into a single subdomain at the coarser level.
123: Level: intermediate
125: Notes:
127: .seealso: PCBDDC
128: @*/
129: PetscErrorCode PCBDDCSetCoarseningRatio(PC pc,PetscInt k)
130: {
135: PetscTryMethod(pc,"PCBDDCSetCoarseningRatio_C",(PC,PetscInt),(pc,k));
136: return(0);
137: }
138: /* -------------------------------------------------------------------------- */
142: static PetscErrorCode PCBDDCSetMaxLevels_BDDC(PC pc,PetscInt max_levels)
143: {
144: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
147: pcbddc->max_levels=max_levels;
148: return(0);
149: }
153: /*@
154: PCBDDCSetMaxLevels - Sets the maximum number of levels within the multilevel approach.
156: Logically collective on PC
158: Input Parameters:
159: + pc - the preconditioning context
160: - max_levels - the maximum number of levels
162: Default value is 1, i.e. coarse problem will be solved inexactly with one application
163: of PCBDDC preconditioner if the multilevel approach is requested.
165: Level: intermediate
167: Notes:
169: .seealso: PCBDDC
170: @*/
171: PetscErrorCode PCBDDCSetMaxLevels(PC pc,PetscInt max_levels)
172: {
177: PetscTryMethod(pc,"PCBDDCSetMaxLevels_C",(PC,PetscInt),(pc,max_levels));
178: return(0);
179: }
180: /* -------------------------------------------------------------------------- */
184: static PetscErrorCode PCBDDCSetNullSpace_BDDC(PC pc,MatNullSpace NullSpace)
185: {
186: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
190: PetscObjectReference((PetscObject)NullSpace);
191: MatNullSpaceDestroy(&pcbddc->NullSpace);
193: pcbddc->NullSpace = NullSpace;
194: return(0);
195: }
199: /*@
200: PCBDDCSetNullSpace - Set NullSpace of global operator of BDDC preconditioned mat.
202: Logically collective on PC and MatNullSpace
204: Input Parameters:
205: + pc - the preconditioning context
206: - NullSpace - Null space of the linear operator to be preconditioned.
208: Level: intermediate
210: Notes:
212: .seealso: PCBDDC
213: @*/
214: PetscErrorCode PCBDDCSetNullSpace(PC pc,MatNullSpace NullSpace)
215: {
220: PetscTryMethod(pc,"PCBDDCSetNullSpace_C",(PC,MatNullSpace),(pc,NullSpace));
221: return(0);
222: }
223: /* -------------------------------------------------------------------------- */
227: static PetscErrorCode PCBDDCSetDirichletBoundaries_BDDC(PC pc,IS DirichletBoundaries)
228: {
229: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
233: ISDestroy(&pcbddc->DirichletBoundaries);
234: PetscObjectReference((PetscObject)DirichletBoundaries);
236: pcbddc->DirichletBoundaries = DirichletBoundaries;
237: return(0);
238: }
242: /*@
243: PCBDDCSetDirichletBoundaries - Set index set defining subdomain part (in local ordering)
244: of Dirichlet boundaries for the global problem.
246: Not collective
248: Input Parameters:
249: + pc - the preconditioning context
250: - DirichletBoundaries - sequential index set defining the subdomain part of Dirichlet boundaries (can be NULL)
252: Level: intermediate
254: Notes:
256: .seealso: PCBDDC
257: @*/
258: PetscErrorCode PCBDDCSetDirichletBoundaries(PC pc,IS DirichletBoundaries)
259: {
264: PetscTryMethod(pc,"PCBDDCSetDirichletBoundaries_C",(PC,IS),(pc,DirichletBoundaries));
265: return(0);
266: }
267: /* -------------------------------------------------------------------------- */
271: static PetscErrorCode PCBDDCSetNeumannBoundaries_BDDC(PC pc,IS NeumannBoundaries)
272: {
273: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
277: ISDestroy(&pcbddc->NeumannBoundaries);
278: PetscObjectReference((PetscObject)NeumannBoundaries);
280: pcbddc->NeumannBoundaries = NeumannBoundaries;
281: return(0);
282: }
286: /*@
287: PCBDDCSetNeumannBoundaries - Set index set defining subdomain part (in local ordering)
288: of Neumann boundaries for the global problem.
290: Not collective
292: Input Parameters:
293: + pc - the preconditioning context
294: - NeumannBoundaries - sequential index set defining the subdomain part of Neumann boundaries (can be NULL)
296: Level: intermediate
298: Notes:
300: .seealso: PCBDDC
301: @*/
302: PetscErrorCode PCBDDCSetNeumannBoundaries(PC pc,IS NeumannBoundaries)
303: {
308: PetscTryMethod(pc,"PCBDDCSetNeumannBoundaries_C",(PC,IS),(pc,NeumannBoundaries));
309: return(0);
310: }
311: /* -------------------------------------------------------------------------- */
315: static PetscErrorCode PCBDDCGetDirichletBoundaries_BDDC(PC pc,IS *DirichletBoundaries)
316: {
317: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
320: *DirichletBoundaries = pcbddc->DirichletBoundaries;
321: return(0);
322: }
326: /*@
327: PCBDDCGetDirichletBoundaries - Get index set defining subdomain part (in local ordering)
328: of Dirichlet boundaries for the global problem.
330: Not collective
332: Input Parameters:
333: + pc - the preconditioning context
335: Output Parameters:
336: + DirichletBoundaries - index set defining the subdomain part of Dirichlet boundaries
338: Level: intermediate
340: Notes:
342: .seealso: PCBDDC
343: @*/
344: PetscErrorCode PCBDDCGetDirichletBoundaries(PC pc,IS *DirichletBoundaries)
345: {
350: PetscUseMethod(pc,"PCBDDCGetDirichletBoundaries_C",(PC,IS*),(pc,DirichletBoundaries));
351: return(0);
352: }
353: /* -------------------------------------------------------------------------- */
357: static PetscErrorCode PCBDDCGetNeumannBoundaries_BDDC(PC pc,IS *NeumannBoundaries)
358: {
359: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
362: *NeumannBoundaries = pcbddc->NeumannBoundaries;
363: return(0);
364: }
368: /*@
369: PCBDDCGetNeumannBoundaries - Get index set defining subdomain part (in local ordering)
370: of Neumann boundaries for the global problem.
372: Not collective
374: Input Parameters:
375: + pc - the preconditioning context
377: Output Parameters:
378: + NeumannBoundaries - index set defining the subdomain part of Neumann boundaries
380: Level: intermediate
382: Notes:
384: .seealso: PCBDDC
385: @*/
386: PetscErrorCode PCBDDCGetNeumannBoundaries(PC pc,IS *NeumannBoundaries)
387: {
392: PetscUseMethod(pc,"PCBDDCGetNeumannBoundaries_C",(PC,IS*),(pc,NeumannBoundaries));
393: return(0);
394: }
395: /* -------------------------------------------------------------------------- */
399: static PetscErrorCode PCBDDCSetLocalAdjacencyGraph_BDDC(PC pc, PetscInt nvtxs,const PetscInt xadj[],const PetscInt adjncy[], PetscCopyMode copymode)
400: {
401: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
402: PCBDDCGraph mat_graph=pcbddc->mat_graph;
406: mat_graph->nvtxs=nvtxs;
408: PetscFree(mat_graph->xadj);
409: PetscFree(mat_graph->adjncy);
410: if (copymode == PETSC_COPY_VALUES) {
411: PetscMalloc((mat_graph->nvtxs+1)*sizeof(PetscInt),&mat_graph->xadj);
412: PetscMalloc(xadj[mat_graph->nvtxs]*sizeof(PetscInt),&mat_graph->adjncy);
413: PetscMemcpy(mat_graph->xadj,xadj,(mat_graph->nvtxs+1)*sizeof(PetscInt));
414: PetscMemcpy(mat_graph->adjncy,adjncy,xadj[mat_graph->nvtxs]*sizeof(PetscInt));
415: } else if (copymode == PETSC_OWN_POINTER) {
416: mat_graph->xadj = (PetscInt*)xadj;
417: mat_graph->adjncy = (PetscInt*)adjncy;
418: } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported copy mode %d\n",copymode);
419: return(0);
420: }
424: /*@
425: PCBDDCSetLocalAdjacencyGraph - Set CSR graph of local matrix for use of PCBDDC.
427: Not collective
429: Input Parameters:
430: + pc - the preconditioning context
431: - nvtxs - number of local vertices of the graph
432: - xadj, adjncy - the CSR graph
433: - copymode - either PETSC_COPY_VALUES or PETSC_OWN_POINTER. In the former case the user must free the array passed in;
434: in the latter case, memory must be obtained with PetscMalloc.
436: Level: intermediate
438: Notes:
440: .seealso: PCBDDC
441: @*/
442: PetscErrorCode PCBDDCSetLocalAdjacencyGraph(PC pc,PetscInt nvtxs,const PetscInt xadj[],const PetscInt adjncy[], PetscCopyMode copymode)
443: {
444: PetscInt nrows,ncols;
445: Mat_IS *matis = (Mat_IS*)pc->pmat->data;
450: MatGetSize(matis->A,&nrows,&ncols);
451: if (nvtxs != nrows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local adjacency size %d passed in %s differs from local problem size %d!\n",nvtxs,nrows);
452: else {
453: PetscTryMethod(pc,"PCBDDCSetLocalAdjacencyGraph_C",(PC,PetscInt,const PetscInt[],const PetscInt[],PetscCopyMode),(pc,nvtxs,xadj,adjncy,copymode));
454: }
455: return(0);
456: }
457: /* -------------------------------------------------------------------------- */
461: static PetscErrorCode PCBDDCSetDofsSplitting_BDDC(PC pc,PetscInt n_is, IS ISForDofs[])
462: {
463: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
464: PetscInt i;
468: /* Destroy ISes if they were already set */
469: for (i=0; i<pcbddc->n_ISForDofs; i++) {
470: ISDestroy(&pcbddc->ISForDofs[i]);
471: }
472: PetscFree(pcbddc->ISForDofs);
473: /* allocate space then set */
474: PetscMalloc(n_is*sizeof(IS),&pcbddc->ISForDofs);
475: for (i=0; i<n_is; i++) {
476: PetscObjectReference((PetscObject)ISForDofs[i]);
478: pcbddc->ISForDofs[i]=ISForDofs[i];
479: }
480: pcbddc->n_ISForDofs=n_is;
481: return(0);
482: }
486: /*@
487: PCBDDCSetDofsSplitting - Set index sets defining fields of local mat.
489: Not collective
491: Input Parameters:
492: + pc - the preconditioning context
493: - n - number of index sets defining the fields
494: - IS[] - array of IS describing the fields
496: Level: intermediate
498: Notes:
500: .seealso: PCBDDC
501: @*/
502: PetscErrorCode PCBDDCSetDofsSplitting(PC pc,PetscInt n_is, IS ISForDofs[])
503: {
508: PetscTryMethod(pc,"PCBDDCSetDofsSplitting_C",(PC,PetscInt,IS[]),(pc,n_is,ISForDofs));
509: return(0);
510: }
511: /* -------------------------------------------------------------------------- */
514: /* -------------------------------------------------------------------------- */
515: /*
516: PCPreSolve_BDDC - Changes the right hand side and (if necessary) the initial
517: guess if a transformation of basis approach has been selected.
519: Input Parameter:
520: + pc - the preconditioner contex
522: Application Interface Routine: PCPreSolve()
524: Notes:
525: The interface routine PCPreSolve() is not usually called directly by
526: the user, but instead is called by KSPSolve().
527: */
528: static PetscErrorCode PCPreSolve_BDDC(PC pc, KSP ksp, Vec rhs, Vec x)
529: {
531: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
532: PC_IS *pcis = (PC_IS*)(pc->data);
533: Mat_IS *matis = (Mat_IS*)pc->pmat->data;
534: Mat temp_mat;
535: IS dirIS;
536: PetscInt dirsize,i,*is_indices;
537: PetscScalar *array_x,*array_diagonal;
538: Vec used_vec;
539: PetscBool guess_nonzero;
542: if (x) {
543: PetscObjectReference((PetscObject)x);
544: used_vec = x;
545: } else {
546: PetscObjectReference((PetscObject)pcbddc->temp_solution);
547: used_vec = pcbddc->temp_solution;
548: VecSet(used_vec,0.0);
549: }
550: /* hack into ksp data structure PCPreSolve comes earlier in src/ksp/ksp/interface/itfunc.c */
551: if (ksp) {
552: KSPGetInitialGuessNonzero(ksp,&guess_nonzero);
553: if (!guess_nonzero) {
554: VecSet(used_vec,0.0);
555: }
556: }
557: /* store the original rhs */
558: VecCopy(rhs,pcbddc->original_rhs);
560: /* Take into account zeroed rows -> change rhs and store solution removed */
561: MatGetDiagonal(pc->pmat,pcis->vec1_global);
562: VecPointwiseDivide(pcis->vec1_global,rhs,pcis->vec1_global);
563: VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);
564: VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);
565: VecScatterBegin(matis->ctx,used_vec,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
566: VecScatterEnd (matis->ctx,used_vec,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
567: PCBDDCGetDirichletBoundaries(pc,&dirIS);
568: if (dirIS) {
569: ISGetSize(dirIS,&dirsize);
570: VecGetArray(pcis->vec1_N,&array_x);
571: VecGetArray(pcis->vec2_N,&array_diagonal);
572: ISGetIndices(dirIS,(const PetscInt**)&is_indices);
573: for (i=0; i<dirsize; i++) array_x[is_indices[i]] = array_diagonal[is_indices[i]];
575: ISRestoreIndices(dirIS,(const PetscInt**)&is_indices);
576: VecRestoreArray(pcis->vec2_N,&array_diagonal);
577: VecRestoreArray(pcis->vec1_N,&array_x);
578: }
579: VecScatterBegin(matis->ctx,pcis->vec1_N,used_vec,INSERT_VALUES,SCATTER_REVERSE);
580: VecScatterEnd (matis->ctx,pcis->vec1_N,used_vec,INSERT_VALUES,SCATTER_REVERSE);
582: /* remove the computed solution from the rhs */
583: VecScale(used_vec,-1.0);
584: MatMultAdd(pc->pmat,used_vec,rhs,rhs);
585: VecScale(used_vec,-1.0);
587: /* store partially computed solution and set initial guess */
588: if (x) {
589: VecCopy(used_vec,pcbddc->temp_solution);
590: VecSet(used_vec,0.0);
591: if (pcbddc->use_exact_dirichlet) {
592: VecScatterBegin(pcis->global_to_D,rhs,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
593: VecScatterEnd (pcis->global_to_D,rhs,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
594: KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);
595: VecScatterBegin(pcis->global_to_D,pcis->vec2_D,used_vec,INSERT_VALUES,SCATTER_REVERSE);
596: VecScatterEnd (pcis->global_to_D,pcis->vec2_D,used_vec,INSERT_VALUES,SCATTER_REVERSE);
597: if (ksp) {
598: KSPSetInitialGuessNonzero(ksp,PETSC_TRUE);
599: }
600: }
601: }
603: /* rhs change of basis */
604: if (pcbddc->usechangeofbasis) {
605: /* swap pointers for local matrices */
606: temp_mat = matis->A;
607: matis->A = pcbddc->local_mat;
608: pcbddc->local_mat = temp_mat;
609: /* Get local rhs and apply transformation of basis */
610: VecScatterBegin(pcis->global_to_B,rhs,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
611: VecScatterEnd (pcis->global_to_B,rhs,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
612: /* from original basis to modified basis */
613: MatMultTranspose(pcbddc->ChangeOfBasisMatrix,pcis->vec1_B,pcis->vec2_B);
614: /* put back modified values into the global vec using INSERT_VALUES copy mode */
615: VecScatterBegin(pcis->global_to_B,pcis->vec2_B,rhs,INSERT_VALUES,SCATTER_REVERSE);
616: VecScatterEnd (pcis->global_to_B,pcis->vec2_B,rhs,INSERT_VALUES,SCATTER_REVERSE);
617: if (ksp && pcbddc->NullSpace) {
618: MatNullSpaceRemove(pcbddc->NullSpace,used_vec,NULL);
619: MatNullSpaceRemove(pcbddc->NullSpace,rhs,NULL);
620: }
621: }
622: VecDestroy(&used_vec);
623: return(0);
624: }
625: /* -------------------------------------------------------------------------- */
628: /* -------------------------------------------------------------------------- */
629: /*
630: PCPostSolve_BDDC - Changes the computed solution if a transformation of basis
631: approach has been selected. Also, restores rhs to its original state.
633: Input Parameter:
634: + pc - the preconditioner contex
636: Application Interface Routine: PCPostSolve()
638: Notes:
639: The interface routine PCPostSolve() is not usually called directly by
640: the user, but instead is called by KSPSolve().
641: */
642: static PetscErrorCode PCPostSolve_BDDC(PC pc, KSP ksp, Vec rhs, Vec x)
643: {
645: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
646: PC_IS *pcis = (PC_IS*)(pc->data);
647: Mat_IS *matis = (Mat_IS*)pc->pmat->data;
648: Mat temp_mat;
651: if (pcbddc->usechangeofbasis) {
652: /* swap pointers for local matrices */
653: temp_mat = matis->A;
654: matis->A = pcbddc->local_mat;
655: pcbddc->local_mat = temp_mat;
656: /* restore rhs to its original state */
657: if (rhs) {
658: VecCopy(pcbddc->original_rhs,rhs);
659: }
660: /* Get Local boundary and apply transformation of basis to solution vector */
661: VecScatterBegin(pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
662: VecScatterEnd (pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
663: /* from modified basis to original basis */
664: MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_B,pcis->vec2_B);
665: /* put back modified values into the global vec using INSERT_VALUES copy mode */
666: VecScatterBegin(pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);
667: VecScatterEnd (pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);
668: }
669: /* add solution removed in presolve */
670: if (x) {
671: VecAXPY(x,1.0,pcbddc->temp_solution);
672: }
673: return(0);
674: }
675: /* -------------------------------------------------------------------------- */
678: /* -------------------------------------------------------------------------- */
679: /*
680: PCSetUp_BDDC - Prepares for the use of the BDDC preconditioner
681: by setting data structures and options.
683: Input Parameter:
684: + pc - the preconditioner context
686: Application Interface Routine: PCSetUp()
688: Notes:
689: The interface routine PCSetUp() is not usually called directly by
690: the user, but instead is called by PCApply() if necessary.
691: */
692: PetscErrorCode PCSetUp_BDDC(PC pc)
693: {
695: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
698: if (!pc->setupcalled) {
699: /* For BDDC we need to define a local "Neumann" problem different to that defined in PCISSetup
700: So, we set to pcnone the Neumann problem of pcis in order to avoid unneeded computation
701: Also, we decide to directly build the (same) Dirichlet problem */
702: PetscOptionsSetValue("-is_localN_pc_type","none");
703: PetscOptionsSetValue("-is_localD_pc_type","none");
704: /* Set up all the "iterative substructuring" common block */
706: PCISSetUp(pc);
707: /* Get stdout for dbg */
708: if (pcbddc->dbg_flag) {
709: PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)pc),&pcbddc->dbg_viewer);
710: PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);
711: }
712: /* Analyze local interface */
713: PCBDDCManageLocalBoundaries(pc);
714: /* Set up local constraint matrix */
715: PCBDDCCreateConstraintMatrix(pc);
716: /* Create coarse and local stuffs used for evaluating action of preconditioner */
717: PCBDDCCoarseSetUp(pc);
718: }
719: return(0);
720: }
722: /* -------------------------------------------------------------------------- */
723: /*
724: PCApply_BDDC - Applies the BDDC preconditioner to a vector.
726: Input Parameters:
727: . pc - the preconditioner context
728: . r - input vector (global)
730: Output Parameter:
731: . z - output vector (global)
733: Application Interface Routine: PCApply()
734: */
737: PetscErrorCode PCApply_BDDC(PC pc,Vec r,Vec z)
738: {
739: PC_IS *pcis = (PC_IS*)(pc->data);
740: PC_BDDC *pcbddc = (PC_BDDC*)(pc->data);
741: PetscErrorCode ierr;
742: const PetscScalar one = 1.0;
743: const PetscScalar m_one = -1.0;
744: const PetscScalar zero = 0.0;
746: /* This code is similar to that provided in nn.c for PCNN
747: NN interface preconditioner changed to BDDC
748: Added support for M_3 preconditioner in the reference article (code is active if pcbddc->inexact_prec_type = PETSC_TRUE) */
751: if (!pcbddc->use_exact_dirichlet) {
752: /* First Dirichlet solve */
753: VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
754: VecScatterEnd (pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
755: KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);
756: /*
757: Assembling right hand side for BDDC operator
758: - vec1_D for the Dirichlet part (if needed, i.e. prec_flag=PETSC_TRUE)
759: - the interface part of the global vector z
760: */
761: VecScale(pcis->vec2_D,m_one);
762: MatMult(pcis->A_BI,pcis->vec2_D,pcis->vec1_B);
763: if (pcbddc->inexact_prec_type) { MatMultAdd(pcis->A_II,pcis->vec2_D,pcis->vec1_D,pcis->vec1_D); }
764: VecScale(pcis->vec2_D,m_one);
765: VecCopy(r,z);
766: VecScatterBegin(pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);
767: VecScatterEnd (pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);
768: VecScatterBegin(pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
769: VecScatterEnd (pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
770: } else {
771: VecScatterBegin(pcis->global_to_B,r,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
772: VecScatterEnd (pcis->global_to_B,r,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
773: VecSet(pcis->vec1_D,zero);
774: VecSet(pcis->vec2_D,zero);
775: }
777: /* Apply partition of unity */
778: VecPointwiseMult(pcis->vec1_B,pcis->D,pcis->vec1_B);
780: /* Apply interface preconditioner
781: input/output vecs: pcis->vec1_B and pcis->vec1_D */
782: PCBDDCApplyInterfacePreconditioner(pc);
784: /* Apply partition of unity and sum boundary values */
785: VecPointwiseMult(pcis->vec1_B,pcis->D,pcis->vec1_B);
786: VecSet(z,zero);
787: VecScatterBegin(pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);
788: VecScatterEnd (pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);
790: /* Second Dirichlet solve and assembling of output */
791: VecScatterBegin(pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
792: VecScatterEnd (pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
793: MatMult(pcis->A_IB,pcis->vec1_B,pcis->vec3_D);
794: if (pcbddc->inexact_prec_type) { MatMultAdd(pcis->A_II,pcis->vec1_D,pcis->vec3_D,pcis->vec3_D); }
795: KSPSolve(pcbddc->ksp_D,pcis->vec3_D,pcbddc->vec4_D);
796: VecScale(pcbddc->vec4_D,m_one);
797: if (pcbddc->inexact_prec_type) { VecAXPY (pcbddc->vec4_D,one,pcis->vec1_D); }
798: VecAXPY (pcis->vec2_D,one,pcbddc->vec4_D);
799: VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);
800: VecScatterEnd (pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);
801: return(0);
803: }
804: /* -------------------------------------------------------------------------- */
807: PetscErrorCode PCDestroy_BDDC(PC pc)
808: {
809: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
810: PetscInt i;
814: /* free data created by PCIS */
815: PCISDestroy(pc);
816: /* free BDDC data */
817: MatNullSpaceDestroy(&pcbddc->CoarseNullSpace);
818: MatNullSpaceDestroy(&pcbddc->NullSpace);
819: VecDestroy(&pcbddc->temp_solution);
820: VecDestroy(&pcbddc->original_rhs);
821: MatDestroy(&pcbddc->local_mat);
822: MatDestroy(&pcbddc->ChangeOfBasisMatrix);
823: VecDestroy(&pcbddc->coarse_vec);
824: VecDestroy(&pcbddc->coarse_rhs);
825: KSPDestroy(&pcbddc->coarse_ksp);
826: MatDestroy(&pcbddc->coarse_mat);
827: MatDestroy(&pcbddc->coarse_phi_B);
828: MatDestroy(&pcbddc->coarse_phi_D);
829: VecDestroy(&pcbddc->vec1_P);
830: VecDestroy(&pcbddc->vec1_C);
831: MatDestroy(&pcbddc->local_auxmat1);
832: MatDestroy(&pcbddc->local_auxmat2);
833: VecDestroy(&pcbddc->vec1_R);
834: VecDestroy(&pcbddc->vec2_R);
835: VecDestroy(&pcbddc->vec4_D);
836: VecScatterDestroy(&pcbddc->R_to_B);
837: VecScatterDestroy(&pcbddc->R_to_D);
838: VecScatterDestroy(&pcbddc->coarse_loc_to_glob);
839: KSPDestroy(&pcbddc->ksp_D);
840: KSPDestroy(&pcbddc->ksp_R);
841: ISDestroy(&pcbddc->NeumannBoundaries);
842: ISDestroy(&pcbddc->DirichletBoundaries);
843: MatDestroy(&pcbddc->ConstraintMatrix);
844: PetscFree(pcbddc->local_primal_indices);
845: PetscFree(pcbddc->replicated_local_primal_indices);
846: PetscFree(pcbddc->replicated_local_primal_values);
847: PetscFree(pcbddc->local_primal_displacements);
848: PetscFree(pcbddc->local_primal_sizes);
849: for (i=0; i<pcbddc->n_ISForDofs; i++) {
850: ISDestroy(&pcbddc->ISForDofs[i]);
851: }
852: PetscFree(pcbddc->ISForDofs);
853: for (i=0; i<pcbddc->n_ISForFaces; i++) {
854: ISDestroy(&pcbddc->ISForFaces[i]);
855: }
856: PetscFree(pcbddc->ISForFaces);
857: for (i=0; i<pcbddc->n_ISForEdges; i++) {
858: ISDestroy(&pcbddc->ISForEdges[i]);
859: }
860: PetscFree(pcbddc->ISForEdges);
861: ISDestroy(&pcbddc->ISForVertices);
862: /* Free graph structure */
863: PetscFree(pcbddc->mat_graph->xadj);
864: PetscFree(pcbddc->mat_graph->adjncy);
865: if (pcbddc->mat_graph->nvtxs) {
866: PetscFree(pcbddc->mat_graph->neighbours_set[0]);
867: }
868: PetscFree(pcbddc->mat_graph->neighbours_set);
869: PetscFree4(pcbddc->mat_graph->where,pcbddc->mat_graph->count,pcbddc->mat_graph->cptr,pcbddc->mat_graph->queue);
870: PetscFree2(pcbddc->mat_graph->which_dof,pcbddc->mat_graph->touched);
871: PetscFree(pcbddc->mat_graph->where_ncmps);
872: PetscFree(pcbddc->mat_graph);
873: /* remove functions */
874: PetscObjectComposeFunction((PetscObject)pc,"PCBDDCSetCoarseningRatio_C",NULL);
875: PetscObjectComposeFunction((PetscObject)pc,"PCBDDCSetMaxLevels_C",NULL);
876: PetscObjectComposeFunction((PetscObject)pc,"PCBDDCSetNullSpace_C",NULL);
877: PetscObjectComposeFunction((PetscObject)pc,"PCBDDCSetDirichletBoundaries_C",NULL);
878: PetscObjectComposeFunction((PetscObject)pc,"PCBDDCSetNeumannBoundaries_C",NULL);
879: PetscObjectComposeFunction((PetscObject)pc,"PCBDDCGetDirichletBoundaries_C",NULL);
880: PetscObjectComposeFunction((PetscObject)pc,"PCBDDCGetNeumannBoundaries_C",NULL);
881: PetscObjectComposeFunction((PetscObject)pc,"PCBDDCSetCoarseProblemType_C",NULL);
882: PetscObjectComposeFunction((PetscObject)pc,"PCBDDCSetDofsSplitting_C",NULL);
883: PetscObjectComposeFunction((PetscObject)pc,"PCBDDCSetLocalAdjacencyGraph_C",NULL);
884: PetscObjectComposeFunction((PetscObject)pc,"PCPreSolve_C",NULL);
885: PetscObjectComposeFunction((PetscObject)pc,"PCPostSolve_C",NULL);
886: PetscObjectComposeFunction((PetscObject)pc,"PCBDDCCreateFETIDPOperators_C",NULL);
887: PetscObjectComposeFunction((PetscObject)pc,"PCBDDCMatFETIDPGetRHS_C",NULL);
888: PetscObjectComposeFunction((PetscObject)pc,"PCBDDCMatFETIDPGetSolution_C",NULL);
889: /* Free the private data structure that was hanging off the PC */
890: PetscFree(pcbddc);
891: return(0);
892: }
893: /* -------------------------------------------------------------------------- */
897: static PetscErrorCode PCBDDCMatFETIDPGetRHS_BDDC(Mat fetidp_mat, Vec standard_rhs, Vec fetidp_flux_rhs)
898: {
899: FETIDPMat_ctx *mat_ctx;
900: PC_IS * pcis;
901: PC_BDDC * pcbddc;
905: MatShellGetContext(fetidp_mat,&mat_ctx);
906: pcis = (PC_IS*)mat_ctx->pc->data;
907: pcbddc = (PC_BDDC*)mat_ctx->pc->data;
909: /* change of basis for physical rhs if needed
910: It also changes the rhs in case of dirichlet boundaries */
911: (*mat_ctx->pc->ops->presolve)(mat_ctx->pc,NULL,standard_rhs,NULL);
912: /* store vectors for computation of fetidp final solution */
913: VecScatterBegin(pcis->global_to_D,standard_rhs,mat_ctx->temp_solution_D,INSERT_VALUES,SCATTER_FORWARD);
914: VecScatterEnd (pcis->global_to_D,standard_rhs,mat_ctx->temp_solution_D,INSERT_VALUES,SCATTER_FORWARD);
915: VecScatterBegin(pcis->global_to_B,standard_rhs,mat_ctx->temp_solution_B,INSERT_VALUES,SCATTER_FORWARD);
916: VecScatterEnd (pcis->global_to_B,standard_rhs,mat_ctx->temp_solution_B,INSERT_VALUES,SCATTER_FORWARD);
917: /* scale rhs since it should be unassembled */
918: VecPointwiseMult(mat_ctx->temp_solution_B,pcis->D,mat_ctx->temp_solution_B);
919: if (!pcbddc->inexact_prec_type) {
920: /* compute partially subassembled Schur complement right-hand side */
921: KSPSolve(pcbddc->ksp_D,mat_ctx->temp_solution_D,pcis->vec1_D);
922: MatMult(pcis->A_BI,pcis->vec1_D,pcis->vec1_B);
923: VecAXPY(mat_ctx->temp_solution_B,-1.0,pcis->vec1_B);
924: VecSet(standard_rhs,0.0);
925: VecScatterBegin(pcis->global_to_B,mat_ctx->temp_solution_B,standard_rhs,ADD_VALUES,SCATTER_REVERSE);
926: VecScatterEnd (pcis->global_to_B,mat_ctx->temp_solution_B,standard_rhs,ADD_VALUES,SCATTER_REVERSE);
927: VecScatterBegin(pcis->global_to_B,standard_rhs,mat_ctx->temp_solution_B,INSERT_VALUES,SCATTER_FORWARD);
928: VecScatterEnd (pcis->global_to_B,standard_rhs,mat_ctx->temp_solution_B,INSERT_VALUES,SCATTER_FORWARD);
929: VecPointwiseMult(mat_ctx->temp_solution_B,pcis->D,mat_ctx->temp_solution_B);
930: }
931: /* BDDC rhs */
932: VecCopy(mat_ctx->temp_solution_B,pcis->vec1_B);
933: if (pcbddc->inexact_prec_type) {
934: VecCopy(mat_ctx->temp_solution_D,pcis->vec1_D);
935: }
936: /* apply BDDC */
937: PCBDDCApplyInterfacePreconditioner(mat_ctx->pc);
938: /* Application of B_delta and assembling of rhs for fetidp fluxes */
939: VecSet(fetidp_flux_rhs,0.0);
940: MatMult(mat_ctx->B_delta,pcis->vec1_B,mat_ctx->lambda_local);
941: VecScatterBegin(mat_ctx->l2g_lambda,mat_ctx->lambda_local,fetidp_flux_rhs,ADD_VALUES,SCATTER_FORWARD);
942: VecScatterEnd (mat_ctx->l2g_lambda,mat_ctx->lambda_local,fetidp_flux_rhs,ADD_VALUES,SCATTER_FORWARD);
943: /* restore original rhs */
944: VecCopy(pcbddc->original_rhs,standard_rhs);
945: return(0);
946: }
950: /*@
951: PCBDDCMatFETIDPGetRHS - Get rhs for FETIDP linear system.
953: Collective
955: Input Parameters:
956: + fetidp_mat - the FETIDP mat obtained by a call to PCBDDCCreateFETIDPOperators
957: + standard_rhs - the rhs of your linear system
959: Output Parameters:
960: + fetidp_flux_rhs - the rhs of the FETIDP linear system
962: Level: developer
964: Notes:
966: .seealso: PCBDDC
967: @*/
968: PetscErrorCode PCBDDCMatFETIDPGetRHS(Mat fetidp_mat, Vec standard_rhs, Vec fetidp_flux_rhs)
969: {
970: FETIDPMat_ctx *mat_ctx;
974: MatShellGetContext(fetidp_mat,&mat_ctx);
975: PetscTryMethod(mat_ctx->pc,"PCBDDCMatFETIDPGetRHS_C",(Mat,Vec,Vec),(fetidp_mat,standard_rhs,fetidp_flux_rhs));
976: return(0);
977: }
978: /* -------------------------------------------------------------------------- */
982: static PetscErrorCode PCBDDCMatFETIDPGetSolution_BDDC(Mat fetidp_mat, Vec fetidp_flux_sol, Vec standard_sol)
983: {
984: FETIDPMat_ctx *mat_ctx;
985: PC_IS *pcis;
986: PC_BDDC *pcbddc;
990: MatShellGetContext(fetidp_mat,&mat_ctx);
991: pcis = (PC_IS*)mat_ctx->pc->data;
992: pcbddc = (PC_BDDC*)mat_ctx->pc->data;
994: /* apply B_delta^T */
995: VecScatterBegin(mat_ctx->l2g_lambda,fetidp_flux_sol,mat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);
996: VecScatterEnd (mat_ctx->l2g_lambda,fetidp_flux_sol,mat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);
997: MatMultTranspose(mat_ctx->B_delta,mat_ctx->lambda_local,pcis->vec1_B);
998: /* compute rhs for BDDC application */
999: VecAYPX(pcis->vec1_B,-1.0,mat_ctx->temp_solution_B);
1000: if (pcbddc->inexact_prec_type) {
1001: VecCopy(mat_ctx->temp_solution_D,pcis->vec1_D);
1002: }
1003: /* apply BDDC */
1004: PCBDDCApplyInterfacePreconditioner(mat_ctx->pc);
1005: /* put values into standard global vector */
1006: VecScatterBegin(pcis->global_to_B,pcis->vec1_B,standard_sol,INSERT_VALUES,SCATTER_REVERSE);
1007: VecScatterEnd (pcis->global_to_B,pcis->vec1_B,standard_sol,INSERT_VALUES,SCATTER_REVERSE);
1008: if (!pcbddc->inexact_prec_type) {
1009: /* compute values into the interior if solved for the partially subassembled Schur complement */
1010: MatMult(pcis->A_IB,pcis->vec1_B,pcis->vec1_D);
1011: VecAXPY(mat_ctx->temp_solution_D,-1.0,pcis->vec1_D);
1012: KSPSolve(pcbddc->ksp_D,mat_ctx->temp_solution_D,pcis->vec1_D);
1013: }
1014: VecScatterBegin(pcis->global_to_D,pcis->vec1_D,standard_sol,INSERT_VALUES,SCATTER_REVERSE);
1015: VecScatterEnd (pcis->global_to_D,pcis->vec1_D,standard_sol,INSERT_VALUES,SCATTER_REVERSE);
1016: /* final change of basis if needed
1017: Is also sums the dirichlet part removed during RHS assembling */
1018: (*mat_ctx->pc->ops->postsolve)(mat_ctx->pc,NULL,NULL,standard_sol);
1019: return(0);
1021: }
1025: /*@
1026: PCBDDCMatFETIDPGetSolution - Get Solution for FETIDP linear system.
1028: Collective
1030: Input Parameters:
1031: + fetidp_mat - the FETIDP mat obtained by a call to PCBDDCCreateFETIDPOperators
1032: + fetidp_flux_sol - the solution of the FETIDP linear system
1034: Output Parameters:
1035: + standard_sol - the solution on the global domain
1037: Level: developer
1039: Notes:
1041: .seealso: PCBDDC
1042: @*/
1043: PetscErrorCode PCBDDCMatFETIDPGetSolution(Mat fetidp_mat, Vec fetidp_flux_sol, Vec standard_sol)
1044: {
1045: FETIDPMat_ctx *mat_ctx;
1049: MatShellGetContext(fetidp_mat,&mat_ctx);
1050: PetscTryMethod(mat_ctx->pc,"PCBDDCMatFETIDPGetSolution_C",(Mat,Vec,Vec),(fetidp_mat,fetidp_flux_sol,standard_sol));
1051: return(0);
1052: }
1053: /* -------------------------------------------------------------------------- */
1055: extern PetscErrorCode FETIDPMatMult(Mat,Vec,Vec);
1056: extern PetscErrorCode PCBDDCDestroyFETIDPMat(Mat);
1057: extern PetscErrorCode FETIDPPCApply(PC,Vec,Vec);
1058: extern PetscErrorCode PCBDDCDestroyFETIDPPC(PC);
1061: static PetscErrorCode PCBDDCCreateFETIDPOperators_BDDC(PC pc, Mat *fetidp_mat, PC *fetidp_pc)
1062: {
1063: FETIDPMat_ctx *fetidpmat_ctx;
1064: Mat newmat;
1065: FETIDPPC_ctx *fetidppc_ctx;
1066: PC newpc;
1067: MPI_Comm comm;
1071: PetscObjectGetComm((PetscObject)pc,&comm);
1072: /* FETIDP linear matrix */
1073: PCBDDCCreateFETIDPMatContext(pc, &fetidpmat_ctx);
1074: PCBDDCSetupFETIDPMatContext(fetidpmat_ctx);
1075: MatCreateShell(comm,PETSC_DECIDE,PETSC_DECIDE,fetidpmat_ctx->n_lambda,fetidpmat_ctx->n_lambda,fetidpmat_ctx,&newmat);
1076: MatShellSetOperation(newmat,MATOP_MULT,(void (*)(void))FETIDPMatMult);
1077: MatShellSetOperation(newmat,MATOP_DESTROY,(void (*)(void))PCBDDCDestroyFETIDPMat);
1078: MatSetUp(newmat);
1079: /* FETIDP preconditioner */
1080: PCBDDCCreateFETIDPPCContext(pc, &fetidppc_ctx);
1081: PCBDDCSetupFETIDPPCContext(newmat,fetidppc_ctx);
1082: PCCreate(comm,&newpc);
1083: PCSetType(newpc,PCSHELL);
1084: PCShellSetContext(newpc,fetidppc_ctx);
1085: PCShellSetApply(newpc,FETIDPPCApply);
1086: PCShellSetDestroy(newpc,PCBDDCDestroyFETIDPPC);
1087: PCSetOperators(newpc,newmat,newmat,SAME_PRECONDITIONER);
1088: PCSetUp(newpc);
1090: /* return pointers for objects created */
1091: *fetidp_mat = newmat;
1092: *fetidp_pc = newpc;
1093: return(0);
1094: }
1098: /*@
1099: PCBDDCCreateFETIDPOperators - Create operators for FETIDP.
1101: Collective
1103: Input Parameters:
1104: + pc - the BDDC preconditioning context (setup must be already called)
1106: Level: developer
1108: Notes:
1110: .seealso: PCBDDC
1111: @*/
1112: PetscErrorCode PCBDDCCreateFETIDPOperators(PC pc, Mat *fetidp_mat, PC *fetidp_pc)
1113: {
1118: if (pc->setupcalled) {
1119: PetscTryMethod(pc,"PCBDDCCreateFETIDPOperators_C",(PC,Mat*,PC*),(pc,fetidp_mat,fetidp_pc));
1120: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"You must call PCSetup_BDDC() first \n");
1121: return(0);
1122: }
1123: /* -------------------------------------------------------------------------- */
1124: /*MC
1125: PCBDDC - Balancing Domain Decomposition by Constraints.
1127: Options Database Keys:
1128: . -pcbddc ??? -
1130: Level: intermediate
1132: Notes: The matrix used with this preconditioner must be of type MATIS
1134: Unlike more 'conventional' interface preconditioners, this iterates over ALL the
1135: degrees of freedom, NOT just those on the interface (this allows the use of approximate solvers
1136: on the subdomains).
1138: Options for the coarse grid preconditioner can be set with -
1139: Options for the Dirichlet subproblem can be set with -
1140: Options for the Neumann subproblem can be set with -
1142: Contributed by Stefano Zampini
1144: .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, MATIS
1145: M*/
1149: PETSC_EXTERN PetscErrorCode PCCreate_BDDC(PC pc)
1150: {
1152: PC_BDDC *pcbddc;
1153: PCBDDCGraph mat_graph;
1156: /* Creates the private data structure for this preconditioner and attach it to the PC object. */
1157: PetscNewLog(pc,PC_BDDC,&pcbddc);
1158: pc->data = (void*)pcbddc;
1160: /* create PCIS data structure */
1161: PCISCreate(pc);
1163: /* BDDC specific */
1164: pcbddc->CoarseNullSpace = 0;
1165: pcbddc->NullSpace = 0;
1166: pcbddc->temp_solution = 0;
1167: pcbddc->original_rhs = 0;
1168: pcbddc->local_mat = 0;
1169: pcbddc->ChangeOfBasisMatrix = 0;
1170: pcbddc->usechangeofbasis = PETSC_TRUE;
1171: pcbddc->usechangeonfaces = PETSC_FALSE;
1172: pcbddc->coarse_vec = 0;
1173: pcbddc->coarse_rhs = 0;
1174: pcbddc->coarse_ksp = 0;
1175: pcbddc->coarse_phi_B = 0;
1176: pcbddc->coarse_phi_D = 0;
1177: pcbddc->vec1_P = 0;
1178: pcbddc->vec1_R = 0;
1179: pcbddc->vec2_R = 0;
1180: pcbddc->local_auxmat1 = 0;
1181: pcbddc->local_auxmat2 = 0;
1182: pcbddc->R_to_B = 0;
1183: pcbddc->R_to_D = 0;
1184: pcbddc->ksp_D = 0;
1185: pcbddc->ksp_R = 0;
1186: pcbddc->local_primal_indices = 0;
1187: pcbddc->inexact_prec_type = PETSC_FALSE;
1188: pcbddc->NeumannBoundaries = 0;
1189: pcbddc->ISForDofs = 0;
1190: pcbddc->ISForVertices = 0;
1191: pcbddc->n_ISForFaces = 0;
1192: pcbddc->n_ISForEdges = 0;
1193: pcbddc->ConstraintMatrix = 0;
1194: pcbddc->use_nnsp_true = PETSC_FALSE;
1195: pcbddc->local_primal_sizes = 0;
1196: pcbddc->local_primal_displacements = 0;
1197: pcbddc->replicated_local_primal_indices = 0;
1198: pcbddc->replicated_local_primal_values = 0;
1199: pcbddc->coarse_loc_to_glob = 0;
1200: pcbddc->dbg_flag = PETSC_FALSE;
1201: pcbddc->coarsening_ratio = 8;
1202: pcbddc->use_exact_dirichlet = PETSC_TRUE;
1203: pcbddc->current_level = 0;
1204: pcbddc->max_levels = 1;
1206: /* allocate and initialize needed graph structure */
1207: PetscMalloc(sizeof(*mat_graph),&pcbddc->mat_graph);
1208: pcbddc->mat_graph->xadj = 0;
1209: pcbddc->mat_graph->adjncy = 0;
1211: /* function pointers */
1212: pc->ops->apply = PCApply_BDDC;
1213: pc->ops->applytranspose = 0;
1214: pc->ops->setup = PCSetUp_BDDC;
1215: pc->ops->destroy = PCDestroy_BDDC;
1216: pc->ops->setfromoptions = PCSetFromOptions_BDDC;
1217: pc->ops->view = 0;
1218: pc->ops->applyrichardson = 0;
1219: pc->ops->applysymmetricleft = 0;
1220: pc->ops->applysymmetricright = 0;
1221: pc->ops->presolve = PCPreSolve_BDDC;
1222: pc->ops->postsolve = PCPostSolve_BDDC;
1224: /* composing function */
1225: PetscObjectComposeFunction((PetscObject)pc,"PCBDDCSetCoarseningRatio_C",PCBDDCSetCoarseningRatio_BDDC);
1226: PetscObjectComposeFunction((PetscObject)pc,"PCBDDCSetMaxLevels_C",PCBDDCSetMaxLevels_BDDC);
1227: PetscObjectComposeFunction((PetscObject)pc,"PCBDDCSetNullSpace_C",PCBDDCSetNullSpace_BDDC);
1228: PetscObjectComposeFunction((PetscObject)pc,"PCBDDCSetDirichletBoundaries_C",PCBDDCSetDirichletBoundaries_BDDC);
1229: PetscObjectComposeFunction((PetscObject)pc,"PCBDDCSetNeumannBoundaries_C",PCBDDCSetNeumannBoundaries_BDDC);
1230: PetscObjectComposeFunction((PetscObject)pc,"PCBDDCGetDirichletBoundaries_C",PCBDDCGetDirichletBoundaries_BDDC);
1231: PetscObjectComposeFunction((PetscObject)pc,"PCBDDCGetNeumannBoundaries_C",PCBDDCGetNeumannBoundaries_BDDC);
1232: PetscObjectComposeFunction((PetscObject)pc,"PCBDDCSetCoarseProblemType_C",PCBDDCSetCoarseProblemType_BDDC);
1233: PetscObjectComposeFunction((PetscObject)pc,"PCBDDCSetDofsSplitting_C",PCBDDCSetDofsSplitting_BDDC);
1234: PetscObjectComposeFunction((PetscObject)pc,"PCBDDCSetLocalAdjacencyGraph_C",PCBDDCSetLocalAdjacencyGraph_BDDC);
1235: PetscObjectComposeFunction((PetscObject)pc,"PCPreSolve_C",PCPreSolve_BDDC);
1236: PetscObjectComposeFunction((PetscObject)pc,"PCPostSolve_C",PCPostSolve_BDDC);
1237: PetscObjectComposeFunction((PetscObject)pc,"PCBDDCCreateFETIDPOperators_C",PCBDDCCreateFETIDPOperators_BDDC);
1238: PetscObjectComposeFunction((PetscObject)pc,"PCBDDCMatFETIDPGetRHS_C",PCBDDCMatFETIDPGetRHS_BDDC);
1239: PetscObjectComposeFunction((PetscObject)pc,"PCBDDCMatFETIDPGetSolution_C",PCBDDCMatFETIDPGetSolution_BDDC);
1240: return(0);
1241: }
1243: /* -------------------------------------------------------------------------- */
1244: /* All static functions from now on */
1245: /* -------------------------------------------------------------------------- */
1248: static PetscErrorCode PCBDDCApplyNullSpaceCorrectionPC(PC pc,Vec x,Vec y)
1249: {
1250: NullSpaceCorrection_ctx *pc_ctx;
1251: PetscErrorCode ierr;
1254: PCShellGetContext(pc,(void**)&pc_ctx);
1255: /* E */
1256: MatMultTranspose(pc_ctx->Lbasis_mat,x,pc_ctx->work_small_2);
1257: MatMultAdd(pc_ctx->Kbasis_mat,pc_ctx->work_small_2,x,pc_ctx->work_full_1);
1258: /* P^-1 */
1259: PCApply(pc_ctx->local_pc,pc_ctx->work_full_1,pc_ctx->work_full_2);
1260: /* E^T */
1261: MatMultTranspose(pc_ctx->Kbasis_mat,pc_ctx->work_full_2,pc_ctx->work_small_1);
1262: VecScale(pc_ctx->work_small_1,-1.0);
1263: MatMultAdd(pc_ctx->Lbasis_mat,pc_ctx->work_small_1,pc_ctx->work_full_2,pc_ctx->work_full_1);
1264: /* Sum contributions */
1265: MatMultAdd(pc_ctx->basis_mat,pc_ctx->work_small_2,pc_ctx->work_full_1,y);
1266: return(0);
1267: }
1271: static PetscErrorCode PCBDDCDestroyNullSpaceCorrectionPC(PC pc)
1272: {
1273: NullSpaceCorrection_ctx *pc_ctx;
1274: PetscErrorCode ierr;
1277: PCShellGetContext(pc,(void**)&pc_ctx);
1278: VecDestroy(&pc_ctx->work_small_1);
1279: VecDestroy(&pc_ctx->work_small_2);
1280: VecDestroy(&pc_ctx->work_full_1);
1281: VecDestroy(&pc_ctx->work_full_2);
1282: MatDestroy(&pc_ctx->basis_mat);
1283: MatDestroy(&pc_ctx->Lbasis_mat);
1284: MatDestroy(&pc_ctx->Kbasis_mat);
1285: PCDestroy(&pc_ctx->local_pc);
1286: PetscFree(pc_ctx);
1287: return(0);
1288: }
1292: static PetscErrorCode PCBDDCAdaptLocalProblem(PC pc,IS local_dofs)
1293: {
1294: extern PetscErrorCode PCBDDCApplyNullSpaceCorrectionPC(PC,Vec,Vec);
1295: extern PetscErrorCode PCBDDCDestroyNullSpaceCorrectionPC(PC);
1297: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
1298: PC_IS *pcis = (PC_IS*)pc->data;
1299: Mat_IS * matis = (Mat_IS*)pc->pmat->data;
1300: KSP *local_ksp;
1301: PC newpc;
1302: NullSpaceCorrection_ctx *shell_ctx;
1303: Mat local_mat,local_pmat,small_mat,inv_small_mat;
1304: MatStructure local_mat_struct;
1305: Vec work1,work2,work3;
1306: const Vec *nullvecs;
1307: VecScatter scatter_ctx;
1308: IS is_aux;
1309: MatFactorInfo matinfo;
1310: PetscScalar *basis_mat,*Kbasis_mat,*array,*array_mat;
1311: PetscScalar one = 1.0,zero = 0.0, m_one = -1.0;
1312: PetscInt basis_dofs,basis_size,nnsp_size,i,k,n_I,n_R;
1313: PetscBool nnsp_has_cnst;
1314: PetscErrorCode ierr;
1317: /* Infer the local solver */
1318: ISGetSize(local_dofs,&basis_dofs);
1319: VecGetSize(pcis->vec1_D,&n_I);
1320: VecGetSize(pcbddc->vec1_R,&n_R);
1321: if (basis_dofs == n_I) {
1322: /* Dirichlet solver */
1323: local_ksp = &pcbddc->ksp_D;
1324: } else if (basis_dofs == n_R) {
1325: /* Neumann solver */
1326: local_ksp = &pcbddc->ksp_R;
1327: } else SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in %s: unknown local IS size %d. n_I=%d, n_R=%d)\n",basis_dofs,n_I,n_R);
1328: KSPGetOperators(*local_ksp,&local_mat,&local_pmat,&local_mat_struct);
1330: /* Get null space vecs */
1331: MatNullSpaceGetVecs(pcbddc->NullSpace,&nnsp_has_cnst,&nnsp_size,&nullvecs);
1332: basis_size = nnsp_size;
1333: if (nnsp_has_cnst) basis_size++;
1335: /* Create shell ctx */
1336: PetscMalloc(sizeof(*shell_ctx),&shell_ctx);
1338: /* Create work vectors in shell context */
1339: VecCreate(PETSC_COMM_SELF,&shell_ctx->work_small_1);
1340: VecSetSizes(shell_ctx->work_small_1,basis_size,basis_size);
1341: VecSetType(shell_ctx->work_small_1,VECSEQ);
1342: VecDuplicate(shell_ctx->work_small_1,&shell_ctx->work_small_2);
1343: VecCreate(PETSC_COMM_SELF,&shell_ctx->work_full_1);
1344: VecSetSizes(shell_ctx->work_full_1,basis_dofs,basis_dofs);
1345: VecSetType(shell_ctx->work_full_1,VECSEQ);
1346: VecDuplicate(shell_ctx->work_full_1,&shell_ctx->work_full_2);
1348: /* Allocate workspace */
1349: MatCreateSeqDense(PETSC_COMM_SELF,basis_dofs,basis_size,NULL,&shell_ctx->basis_mat);
1350: MatCreateSeqDense(PETSC_COMM_SELF,basis_dofs,basis_size,NULL,&shell_ctx->Kbasis_mat);
1351: MatDenseGetArray(shell_ctx->basis_mat,&basis_mat);
1352: MatDenseGetArray(shell_ctx->Kbasis_mat,&Kbasis_mat);
1354: /* Restrict local null space on selected dofs (Dirichlet or Neumann)
1355: and compute matrices N and K*N */
1356: VecDuplicate(shell_ctx->work_full_1,&work1);
1357: VecDuplicate(shell_ctx->work_full_1,&work2);
1358: VecScatterCreate(pcis->vec1_N,local_dofs,work1,(IS)0,&scatter_ctx);
1359: k = 0;
1360: for (; k<nnsp_size; k++) {
1361: VecScatterBegin(matis->ctx,nullvecs[k],pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
1362: VecScatterEnd(matis->ctx,nullvecs[k],pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
1363: VecPlaceArray(work1,(const PetscScalar*)&basis_mat[k*basis_dofs]);
1364: VecScatterBegin(scatter_ctx,pcis->vec1_N,work1,INSERT_VALUES,SCATTER_FORWARD);
1365: VecScatterEnd(scatter_ctx,pcis->vec1_N,work1,INSERT_VALUES,SCATTER_FORWARD);
1366: VecPlaceArray(work2,(const PetscScalar*)&Kbasis_mat[k*basis_dofs]);
1367: MatMult(local_mat,work1,work2);
1368: VecResetArray(work1);
1369: VecResetArray(work2);
1370: }
1371: if (nnsp_has_cnst) {
1372: VecPlaceArray(work1,(const PetscScalar*)&basis_mat[k*basis_dofs]);
1373: VecSet(work1,one);
1374: VecPlaceArray(work2,(const PetscScalar*)&Kbasis_mat[k*basis_dofs]);
1375: MatMult(local_mat,work1,work2);
1376: VecResetArray(work1);
1377: VecResetArray(work2);
1378: }
1379: VecDestroy(&work1);
1380: VecDestroy(&work2);
1381: VecScatterDestroy(&scatter_ctx);
1382: MatDenseRestoreArray(shell_ctx->basis_mat,&basis_mat);
1383: MatDenseRestoreArray(shell_ctx->Kbasis_mat,&Kbasis_mat);
1385: /* Assemble another Mat object in shell context */
1386: MatTransposeMatMult(shell_ctx->basis_mat,shell_ctx->Kbasis_mat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&small_mat);
1387: MatFactorInfoInitialize(&matinfo);
1388: ISCreateStride(PETSC_COMM_SELF,basis_size,0,1,&is_aux);
1389: MatLUFactor(small_mat,is_aux,is_aux,&matinfo);
1390: ISDestroy(&is_aux);
1391: PetscMalloc(basis_size*basis_size*sizeof(PetscScalar),&array_mat);
1392: for (k=0; k<basis_size; k++) {
1393: VecSet(shell_ctx->work_small_1,zero);
1394: VecSetValue(shell_ctx->work_small_1,k,one,INSERT_VALUES);
1395: VecAssemblyBegin(shell_ctx->work_small_1);
1396: VecAssemblyEnd(shell_ctx->work_small_1);
1397: MatSolve(small_mat,shell_ctx->work_small_1,shell_ctx->work_small_2);
1398: VecGetArray(shell_ctx->work_small_2,&array);
1399: for (i=0; i<basis_size; i++) array_mat[i*basis_size+k]=array[i];
1400: VecRestoreArray(shell_ctx->work_small_2,&array);
1401: }
1402: MatCreateSeqDense(PETSC_COMM_SELF,basis_size,basis_size,array_mat,&inv_small_mat);
1403: MatMatMult(shell_ctx->basis_mat,inv_small_mat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&shell_ctx->Lbasis_mat);
1404: PetscFree(array_mat);
1405: MatDestroy(&inv_small_mat);
1406: MatDestroy(&small_mat);
1407: MatScale(shell_ctx->Kbasis_mat,m_one);
1409: /* Rebuild local PC */
1410: KSPGetPC(*local_ksp,&shell_ctx->local_pc);
1411: PetscObjectReference((PetscObject)shell_ctx->local_pc);
1412: PCCreate(PETSC_COMM_SELF,&newpc);
1413: PCSetOperators(newpc,local_mat,local_mat,SAME_PRECONDITIONER);
1414: PCSetType(newpc,PCSHELL);
1415: PCShellSetContext(newpc,shell_ctx);
1416: PCShellSetApply(newpc,PCBDDCApplyNullSpaceCorrectionPC);
1417: PCShellSetDestroy(newpc,PCBDDCDestroyNullSpaceCorrectionPC);
1418: PCSetUp(newpc);
1419: KSPSetPC(*local_ksp,newpc);
1420: PCDestroy(&newpc);
1421: KSPSetUp(*local_ksp);
1423: /* test */
1424: if (pcbddc->dbg_flag) {
1425: PetscReal test_err;
1426: KSP check_ksp;
1427: PC check_pc;
1428: PetscReal lambda_min,lambda_max;
1429: Mat test_mat;
1430: PetscViewer viewer=pcbddc->dbg_viewer;
1431: PetscBool setsym,issym=PETSC_FALSE;
1433: KSPGetPC(*local_ksp,&check_pc);
1434: VecDuplicate(shell_ctx->work_full_1,&work1);
1435: VecDuplicate(shell_ctx->work_full_1,&work2);
1436: VecDuplicate(shell_ctx->work_full_1,&work3);
1437: VecSetRandom(shell_ctx->work_small_1,NULL);
1438: MatMult(shell_ctx->basis_mat,shell_ctx->work_small_1,work1);
1439: VecCopy(work1,work2);
1440: MatMult(local_mat,work1,work3);
1441: PCApply(check_pc,work3,work1);
1442: VecAXPY(work1,m_one,work2);
1443: VecNorm(work1,NORM_INFINITY,&test_err);
1444: PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d error for nullspace correction for ",PetscGlobalRank);
1445: if (basis_dofs == n_I) {
1446: PetscViewerASCIISynchronizedPrintf(viewer,"Dirichlet ");
1447: } else {
1448: PetscViewerASCIISynchronizedPrintf(viewer,"Neumann ");
1449: }
1450: PetscViewerASCIISynchronizedPrintf(viewer,"solver is :%1.14e\n",test_err);
1452: MatTransposeMatMult(shell_ctx->Lbasis_mat,shell_ctx->Kbasis_mat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&test_mat);
1453: MatShift(test_mat,one);
1454: MatNorm(test_mat,NORM_INFINITY,&test_err);
1455: MatDestroy(&test_mat);
1456: PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d error for nullspace matrices is :%1.14e\n",PetscGlobalRank,test_err);
1458: /* Create ksp object suitable for extreme eigenvalues' estimation */
1459: KSPCreate(PETSC_COMM_SELF,&check_ksp);
1460: KSPSetOperators(check_ksp,local_mat,local_mat,SAME_PRECONDITIONER);
1461: KSPSetTolerances(check_ksp,1.e-8,1.e-8,PETSC_DEFAULT,basis_dofs);
1462: KSPSetComputeSingularValues(check_ksp,PETSC_TRUE);
1463: MatIsSymmetricKnown(pc->pmat,&setsym,&issym);
1464: if (issym) {
1465: KSPSetType(check_ksp,KSPCG);
1466: }
1467: KSPSetPC(check_ksp,check_pc);
1468: KSPSetUp(check_ksp);
1469: VecSetRandom(work1,NULL);
1470: MatMult(local_mat,work1,work2);
1471: KSPSolve(check_ksp,work2,work2);
1472: VecAXPY(work2,m_one,work1);
1473: VecNorm(work2,NORM_INFINITY,&test_err);
1474: KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);
1475: KSPGetIterationNumber(check_ksp,&k);
1476: PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d error for adapted KSP %1.14e (it %d, eigs %1.6e %1.6e)\n",PetscGlobalRank,test_err,k,lambda_min,lambda_max);
1477: KSPDestroy(&check_ksp);
1478: VecDestroy(&work1);
1479: VecDestroy(&work2);
1480: VecDestroy(&work3);
1481: }
1482: return(0);
1483: }
1487: static PetscErrorCode PCBDDCSetUseExactDirichlet(PC pc,PetscBool use)
1488: {
1489: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
1492: pcbddc->use_exact_dirichlet=use;
1493: return(0);
1494: }
1498: static PetscErrorCode PCBDDCSetLevel(PC pc,PetscInt level)
1499: {
1500: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
1503: pcbddc->current_level=level;
1504: return(0);
1505: }
1509: static PetscErrorCode PCBDDCAdaptNullSpace(PC pc)
1510: {
1511: PC_IS *pcis = (PC_IS*) (pc->data);
1512: PC_BDDC *pcbddc = (PC_BDDC*)(pc->data);
1513: KSP inv_change;
1514: PC pc_change;
1515: const Vec *nsp_vecs;
1516: Vec *new_nsp_vecs;
1517: PetscInt i,nsp_size,new_nsp_size,start_new;
1518: PetscBool nsp_has_cnst;
1519: MatNullSpace new_nsp;
1523: MatNullSpaceGetVecs(pcbddc->NullSpace,&nsp_has_cnst,&nsp_size,&nsp_vecs);
1524: KSPCreate(PETSC_COMM_SELF,&inv_change);
1525: KSPSetOperators(inv_change,pcbddc->ChangeOfBasisMatrix,pcbddc->ChangeOfBasisMatrix,SAME_PRECONDITIONER);
1526: KSPSetType(inv_change,KSPPREONLY);
1527: KSPGetPC(inv_change,&pc_change);
1528: PCSetType(pc_change,PCLU);
1529: KSPSetUp(inv_change);
1531: new_nsp_size = nsp_size;
1532: if (nsp_has_cnst) new_nsp_size++;
1533: PetscMalloc(new_nsp_size*sizeof(Vec),&new_nsp_vecs);
1534: for (i=0;i<new_nsp_size;i++) { VecDuplicate(pcis->vec1_global,&new_nsp_vecs[i]); }
1535: start_new = 0;
1536: if (nsp_has_cnst) {
1537: start_new = 1;
1538: VecSet(new_nsp_vecs[0],1.0);
1539: VecSet(pcis->vec1_B,1.0);
1540: KSPSolve(inv_change,pcis->vec1_B,pcis->vec1_B);
1541: VecScatterBegin(pcis->global_to_B,pcis->vec1_B,new_nsp_vecs[0],INSERT_VALUES,SCATTER_REVERSE);
1542: VecScatterEnd (pcis->global_to_B,pcis->vec1_B,new_nsp_vecs[0],INSERT_VALUES,SCATTER_REVERSE);
1543: }
1544: for (i=0; i<nsp_size; i++) {
1545: VecCopy(nsp_vecs[i],new_nsp_vecs[i+start_new]);
1546: VecScatterBegin(pcis->global_to_B,nsp_vecs[i],pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
1547: VecScatterEnd (pcis->global_to_B,nsp_vecs[i],pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
1548: KSPSolve(inv_change,pcis->vec1_B,pcis->vec1_B);
1549: VecScatterBegin(pcis->global_to_B,pcis->vec1_B,new_nsp_vecs[i+start_new],INSERT_VALUES,SCATTER_REVERSE);
1550: VecScatterEnd (pcis->global_to_B,pcis->vec1_B,new_nsp_vecs[i+start_new],INSERT_VALUES,SCATTER_REVERSE);
1551: }
1552: VecNormalize(new_nsp_vecs[0],NULL);
1553: /* TODO : Orthonormalize vecs when new_nsp_size > 0! */
1555: KSPDestroy(&inv_change);
1556: MatNullSpaceCreate(PetscObjectComm((PetscObject)pc),PETSC_FALSE,new_nsp_size,new_nsp_vecs,&new_nsp);
1557: PCBDDCSetNullSpace(pc,new_nsp);
1558: MatNullSpaceDestroy(&new_nsp);
1559: /*
1560: MatNullSpaceTest(pcbddc->NullSpace,pc->pmat,&nsp_t);
1561: printf("New Null Space, mat changed: %d\n",nsp_t);
1562: temp_mat = matis->A;
1563: matis->A = pcbddc->local_mat;
1564: pcbddc->local_mat = temp_mat;
1565: MatNullSpaceTest(pcbddc->NullSpace,pc->pmat,&nsp_t);
1566: printf("New Null Space, mat original: %d\n",nsp_t);*/
1568: for (i=0; i<new_nsp_size; i++) { VecDestroy(&new_nsp_vecs[i]); }
1569: PetscFree(new_nsp_vecs);
1570: return(0);
1571: }
1575: static PetscErrorCode PCBDDCCreateFETIDPMatContext(PC pc, FETIDPMat_ctx **fetidpmat_ctx)
1576: {
1577: FETIDPMat_ctx *newctx;
1581: PetscMalloc(sizeof(*newctx),&newctx);
1583: newctx->lambda_local = 0;
1584: newctx->temp_solution_B = 0;
1585: newctx->temp_solution_D = 0;
1586: newctx->B_delta = 0;
1587: newctx->B_Ddelta = 0; /* theoretically belongs to the FETIDP preconditioner */
1588: newctx->l2g_lambda = 0;
1590: /* increase the reference count for BDDC preconditioner */
1591: PetscObjectReference((PetscObject)pc);
1592: newctx->pc = pc;
1593: *fetidpmat_ctx = newctx;
1594: return(0);
1595: }
1599: static PetscErrorCode PCBDDCCreateFETIDPPCContext(PC pc, FETIDPPC_ctx **fetidppc_ctx)
1600: {
1601: FETIDPPC_ctx *newctx;
1605: PetscMalloc(sizeof(*newctx),&newctx);
1607: newctx->lambda_local = 0;
1608: newctx->B_Ddelta = 0;
1609: newctx->l2g_lambda = 0;
1611: /* increase the reference count for BDDC preconditioner */
1612: PetscObjectReference((PetscObject)pc);
1613: newctx->pc = pc;
1614: *fetidppc_ctx = newctx;
1615: return(0);
1616: }
1620: static PetscErrorCode PCBDDCDestroyFETIDPMat(Mat A)
1621: {
1622: FETIDPMat_ctx *mat_ctx;
1626: MatShellGetContext(A,(void**)&mat_ctx);
1627: VecDestroy(&mat_ctx->lambda_local);
1628: VecDestroy(&mat_ctx->temp_solution_D);
1629: VecDestroy(&mat_ctx->temp_solution_B);
1630: MatDestroy(&mat_ctx->B_delta);
1631: MatDestroy(&mat_ctx->B_Ddelta);
1632: VecScatterDestroy(&mat_ctx->l2g_lambda);
1633: PCDestroy(&mat_ctx->pc); /* actually it does not destroy BDDC, only decrease its reference count */
1634: PetscFree(mat_ctx);
1635: return(0);
1636: }
1640: static PetscErrorCode PCBDDCDestroyFETIDPPC(PC pc)
1641: {
1642: FETIDPPC_ctx *pc_ctx;
1646: PCShellGetContext(pc,(void**)&pc_ctx);
1647: VecDestroy(&pc_ctx->lambda_local);
1648: MatDestroy(&pc_ctx->B_Ddelta);
1649: VecScatterDestroy(&pc_ctx->l2g_lambda);
1650: PCDestroy(&pc_ctx->pc); /* actually it does not destroy BDDC, only decrease its reference count */
1651: PetscFree(pc_ctx);
1652: return(0);
1653: }
1657: static PetscErrorCode PCBDDCSetupFETIDPMatContext(FETIDPMat_ctx *fetidpmat_ctx)
1658: {
1660: PC_IS *pcis =(PC_IS*)fetidpmat_ctx->pc->data;
1661: PC_BDDC *pcbddc =(PC_BDDC*)fetidpmat_ctx->pc->data;
1662: PCBDDCGraph mat_graph=pcbddc->mat_graph;
1663: Mat_IS *matis = (Mat_IS*)fetidpmat_ctx->pc->pmat->data;
1664: MPI_Comm comm = ((PetscObject)(fetidpmat_ctx->pc))->comm;
1666: Mat ScalingMat;
1667: Vec lambda_global;
1668: IS IS_l2g_lambda;
1670: PetscBool skip_node,fully_redundant;
1671: PetscInt i,j,k,s,n_boundary_dofs,n_global_lambda,n_vertices,partial_sum;
1672: PetscInt n_local_lambda,n_lambda_for_dof,dual_size,n_neg_values,n_pos_values;
1673: PetscMPIInt rank,nprocs;
1674: PetscScalar scalar_value;
1676: PetscInt *vertex_indices,*temp_indices;
1677: PetscInt *dual_dofs_boundary_indices,*aux_local_numbering_1,*aux_global_numbering;
1678: PetscInt *aux_sums,*cols_B_delta,*l2g_indices;
1679: PetscScalar *array,*scaling_factors,*vals_B_delta;
1680: PetscInt *aux_local_numbering_2,*dof_sizes,*dof_displs;
1681: PetscInt first_index,old_index;
1682: PetscBool first_found = PETSC_FALSE;
1684: /* For communication of scaling factors */
1685: PetscInt *ptrs_buffer,neigh_position;
1686: PetscScalar **all_factors,*send_buffer,*recv_buffer;
1687: MPI_Request *send_reqs,*recv_reqs;
1689: /* tests */
1690: Vec test_vec;
1691: PetscBool test_fetidp;
1692: PetscViewer viewer;
1695: MPI_Comm_rank(comm,&rank);
1696: MPI_Comm_size(comm,&nprocs);
1698: /* Default type of lagrange multipliers is non-redundant */
1699: fully_redundant = PETSC_FALSE;
1700: PetscOptionsGetBool(NULL,"-fetidp_fullyredundant",&fully_redundant,NULL);
1702: /* Evaluate local and global number of lagrange multipliers */
1703: VecSet(pcis->vec1_N,0.0);
1704: n_local_lambda = 0;
1705: partial_sum = 0;
1706: n_boundary_dofs = 0;
1707: s = 0;
1708: n_vertices = 0;
1709: /* Get Vertices used to define the BDDC */
1710: PetscMalloc(pcbddc->local_primal_size*sizeof(*vertex_indices),&vertex_indices);
1711: for (i=0; i<pcbddc->local_primal_size; i++) {
1712: MatGetRow(pcbddc->ConstraintMatrix,i,&j,(const PetscInt**)&temp_indices,NULL);
1713: if (j == 1) {
1714: vertex_indices[n_vertices]=temp_indices[0];
1715: n_vertices++;
1716: }
1717: MatRestoreRow(pcbddc->ConstraintMatrix,i,&j,(const PetscInt**)&temp_indices,NULL);
1718: }
1719: dual_size = pcis->n_B-n_vertices;
1721: PetscSortInt(n_vertices,vertex_indices);
1722: PetscMalloc(dual_size*sizeof(*dual_dofs_boundary_indices),&dual_dofs_boundary_indices);
1723: PetscMalloc(dual_size*sizeof(*aux_local_numbering_1),&aux_local_numbering_1);
1724: PetscMalloc(dual_size*sizeof(*aux_local_numbering_2),&aux_local_numbering_2);
1726: VecGetArray(pcis->vec1_N,&array);
1727: for (i=0; i<pcis->n; i++) {
1728: j = mat_graph->count[i]; /* RECALL: mat_graph->count[i] does not count myself */
1729: k = 0;
1730: if (j > 0) k = (mat_graph->neighbours_set[i][0] == -1 ? 1 : 0);
1731: j = j - k;
1732: if (j > 0) n_boundary_dofs++;
1734: skip_node = PETSC_FALSE;
1735: if (s < n_vertices && vertex_indices[s]==i) { /* it works for a sorted set of vertices */
1736: skip_node = PETSC_TRUE;
1737: s++;
1738: }
1739: if (j < 1) skip_node = PETSC_TRUE;
1740: if (!skip_node) {
1741: if (fully_redundant) {
1742: /* fully redundant set of lagrange multipliers */
1743: n_lambda_for_dof = (j*(j+1))/2;
1744: } else {
1745: n_lambda_for_dof = j;
1746: }
1747: n_local_lambda += j;
1748: /* needed to evaluate global number of lagrange multipliers */
1749: array[i]=(1.0*n_lambda_for_dof)/(j+1.0); /* already scaled for the next global sum */
1750: /* store some data needed */
1751: dual_dofs_boundary_indices[partial_sum] = n_boundary_dofs-1;
1752: aux_local_numbering_1[partial_sum] = i;
1753: aux_local_numbering_2[partial_sum] = n_lambda_for_dof;
1754: partial_sum++;
1755: }
1756: }
1757: VecRestoreArray(pcis->vec1_N,&array);
1759: VecSet(pcis->vec1_global,0.0);
1760: VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
1761: VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
1762: VecSum(pcis->vec1_global,&scalar_value);
1764: fetidpmat_ctx->n_lambda = (PetscInt) scalar_value;
1765: /* printf("I found %d global multipliers (%f)\n",fetidpmat_ctx->n_lambda,scalar_value); */
1767: /* compute global ordering of lagrange multipliers and associate l2g map */
1768: VecSet(pcis->vec1_global,0.0);
1769: VecSet(pcis->vec1_N,0.0);
1770: VecGetArray(pcis->vec1_N,&array);
1771: for (i=0;i<dual_size;i++) array[aux_local_numbering_1[i]] = aux_local_numbering_2[i];
1772: VecRestoreArray(pcis->vec1_N,&array);
1773: VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,INSERT_VALUES,SCATTER_REVERSE);
1774: VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,INSERT_VALUES,SCATTER_REVERSE);
1775: VecSum(pcis->vec1_global,&scalar_value);
1776: if (pcbddc->dbg_flag && (PetscInt)scalar_value != fetidpmat_ctx->n_lambda) {
1777: SETERRQ2(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"Global number of multipliers mismatch! (%d!=%d)\n",(PetscInt)scalar_value,fetidpmat_ctx->n_lambda);
1778: }
1780: /* Fill pcis->vec1_global with cumulative function for global numbering */
1781: VecGetArray(pcis->vec1_global,&array);
1782: VecGetLocalSize(pcis->vec1_global,&s);
1783: k = 0;
1784: first_index = -1;
1785: for (i=0; i<s; i++) {
1786: if (!first_found && array[i] > 0.0) {
1787: first_found = PETSC_TRUE;
1788: first_index = i;
1789: }
1790: k += (PetscInt)array[i];
1791: }
1792: j = (!rank ? nprocs : 0);
1793: PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);
1794: PetscMalloc(j*sizeof(*dof_displs),&dof_displs);
1795: MPI_Gather(&k,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);
1796: if (!rank) {
1797: dof_displs[0]=0;
1798: for (i=1; i<nprocs; i++) dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
1799: }
1800: MPI_Scatter(dof_displs,1,MPIU_INT,&k,1,MPIU_INT,0,comm);
1801: if (first_found) {
1802: array[first_index] += k;
1804: old_index = first_index;
1805: for (i=first_index+1; i<s; i++) {
1806: if (array[i] > 0.0) {
1807: array[i] += array[old_index];
1808: old_index = i;
1809: }
1810: }
1811: }
1812: VecRestoreArray(pcis->vec1_global,&array);
1813: VecSet(pcis->vec1_N,0.0);
1814: VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
1815: VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
1816: PetscMalloc(dual_size*sizeof(*aux_global_numbering),&aux_global_numbering);
1817: VecGetArray(pcis->vec1_N,&array);
1818: for (i=0; i<dual_size; i++) {
1819: aux_global_numbering[i] = (PetscInt)array[aux_local_numbering_1[i]]-aux_local_numbering_2[i];
1820: }
1821: VecRestoreArray(pcis->vec1_N,&array);
1822: PetscFree(aux_local_numbering_2);
1823: PetscFree(dof_displs);
1824: PetscFree(dof_sizes);
1826: /* init data for scaling factors exchange */
1827: partial_sum = 0;
1828: j = 0;
1830: PetscMalloc(pcis->n_neigh*sizeof(PetscInt),&ptrs_buffer);
1831: PetscMalloc((pcis->n_neigh-1)*sizeof(MPI_Request),&send_reqs);
1832: PetscMalloc((pcis->n_neigh-1)*sizeof(MPI_Request),&recv_reqs);
1833: PetscMalloc(pcis->n*sizeof(PetscScalar*),&all_factors);
1835: ptrs_buffer[0] = 0;
1836: for (i=1; i<pcis->n_neigh; i++) {
1837: partial_sum += pcis->n_shared[i];
1838: ptrs_buffer[i] = ptrs_buffer[i-1]+pcis->n_shared[i];
1839: }
1840: PetscMalloc(partial_sum*sizeof(PetscScalar),&send_buffer);
1841: PetscMalloc(partial_sum*sizeof(PetscScalar),&recv_buffer);
1842: PetscMalloc(partial_sum*sizeof(PetscScalar),&all_factors[0]);
1843: for (i=0; i<pcis->n-1; i++) {
1844: j = mat_graph->count[i];
1845: if (j>0) {
1846: k = (mat_graph->neighbours_set[i][0] == -1 ? 1 : 0);
1847: j = j - k;
1848: }
1849: all_factors[i+1]=all_factors[i]+j;
1850: }
1851: /* scatter B scaling to N vec */
1852: VecScatterBegin(pcis->N_to_B,pcis->D,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);
1853: VecScatterEnd (pcis->N_to_B,pcis->D,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);
1854: /* communications */
1855: VecGetArray(pcis->vec1_N,&array);
1856: for (i=1; i<pcis->n_neigh; i++) {
1857: for (j=0; j<pcis->n_shared[i]; j++) send_buffer[ptrs_buffer[i-1]+j] = array[pcis->shared[i][j]];
1859: j = ptrs_buffer[i]-ptrs_buffer[i-1];
1860: MPI_Isend(&send_buffer[ptrs_buffer[i-1]],j,MPIU_SCALAR,pcis->neigh[i],0,comm,&send_reqs[i-1]);
1861: MPI_Irecv(&recv_buffer[ptrs_buffer[i-1]],j,MPIU_SCALAR,pcis->neigh[i],0,comm,&recv_reqs[i-1]);
1862: }
1863: VecRestoreArray(pcis->vec1_N,&array);
1864: MPI_Waitall((pcis->n_neigh-1),recv_reqs,MPI_STATUSES_IGNORE);
1865: /* put values in correct places */
1866: for (i=1; i<pcis->n_neigh; i++) {
1867: for (j=0; j<pcis->n_shared[i]; j++) {
1868: k = pcis->shared[i][j];
1870: neigh_position = 0;
1871: while (mat_graph->neighbours_set[k][neigh_position] != pcis->neigh[i]) neigh_position++;
1872: s = (mat_graph->neighbours_set[k][0] == -1 ? 1 : 0);
1874: neigh_position = neigh_position - s;
1876: all_factors[k][neigh_position]=recv_buffer[ptrs_buffer[i-1]+j];
1877: }
1878: }
1879: MPI_Waitall((pcis->n_neigh-1),send_reqs,MPI_STATUSES_IGNORE);
1880: PetscFree(send_reqs);
1881: PetscFree(recv_reqs);
1882: PetscFree(send_buffer);
1883: PetscFree(recv_buffer);
1884: PetscFree(ptrs_buffer);
1886: /* Compute B and B_delta (local actions) */
1887: PetscMalloc(pcis->n_neigh*sizeof(*aux_sums),&aux_sums);
1888: PetscMalloc(n_local_lambda*sizeof(*l2g_indices),&l2g_indices);
1889: PetscMalloc(n_local_lambda*sizeof(*vals_B_delta),&vals_B_delta);
1890: PetscMalloc(n_local_lambda*sizeof(*cols_B_delta),&cols_B_delta);
1891: PetscMalloc(n_local_lambda*sizeof(*scaling_factors),&scaling_factors);
1893: n_global_lambda = 0;
1894: partial_sum = 0;
1896: for (i=0;i<dual_size;i++) {
1897: n_global_lambda = aux_global_numbering[i];
1898: j = mat_graph->count[aux_local_numbering_1[i]];
1899: k = (mat_graph->neighbours_set[aux_local_numbering_1[i]][0] == -1 ? 1 : 0);
1900: j = j - k;
1901: aux_sums[0] = 0;
1902: for (s=1; s<j; s++) aux_sums[s]=aux_sums[s-1]+j-s+1;
1904: array = all_factors[aux_local_numbering_1[i]];
1905: n_neg_values = 0;
1907: while (n_neg_values < j && mat_graph->neighbours_set[aux_local_numbering_1[i]][n_neg_values+k] < rank) n_neg_values++;
1908: n_pos_values = j - n_neg_values;
1910: if (fully_redundant) {
1911: for (s=0; s<n_neg_values; s++) {
1912: l2g_indices [partial_sum+s]=aux_sums[s]+n_neg_values-s-1+n_global_lambda;
1913: cols_B_delta [partial_sum+s]=dual_dofs_boundary_indices[i];
1914: vals_B_delta [partial_sum+s]=-1.0;
1915: scaling_factors[partial_sum+s]=array[s];
1916: }
1917: for (s=0; s<n_pos_values; s++) {
1918: l2g_indices [partial_sum+s+n_neg_values]=aux_sums[n_neg_values]+s+n_global_lambda;
1919: cols_B_delta [partial_sum+s+n_neg_values]=dual_dofs_boundary_indices[i];
1920: vals_B_delta [partial_sum+s+n_neg_values]=1.0;
1921: scaling_factors[partial_sum+s+n_neg_values]=array[s+n_neg_values];
1922: }
1923: partial_sum += j;
1924: } else {
1925: /* l2g_indices and default cols and vals of B_delta */
1926: for (s=0; s<j; s++) {
1927: l2g_indices [partial_sum+s]=n_global_lambda+s;
1928: cols_B_delta [partial_sum+s]=dual_dofs_boundary_indices[i];
1929: vals_B_delta [partial_sum+s]=0.0;
1930: }
1931: /* B_delta */
1932: if (n_neg_values > 0) vals_B_delta[partial_sum+n_neg_values-1] = -1.0; /* there's a rank next to me to the left */
1933: if (n_neg_values < j) vals_B_delta[partial_sum+n_neg_values] = 1.0; /* there's a rank next to me to the right */
1935: /* scaling as in Klawonn-Widlund 1999*/
1936: for (s=0;s<n_neg_values;s++) {
1937: scalar_value = 0.0;
1938: for (k=0;k<s+1;k++) scalar_value += array[k];
1939: scaling_factors[partial_sum+s] = -scalar_value;
1940: }
1941: for (s=0;s<n_pos_values;s++) {
1942: scalar_value = 0.0;
1943: for (k=s+n_neg_values;k<j;k++) scalar_value += array[k];
1944: scaling_factors[partial_sum+s+n_neg_values] = scalar_value;
1945: }
1946: partial_sum += j;
1947: }
1948: }
1949: PetscFree(aux_global_numbering);
1950: PetscFree(aux_sums);
1951: PetscFree(aux_local_numbering_1);
1952: PetscFree(dual_dofs_boundary_indices);
1953: PetscFree(all_factors[0]);
1954: PetscFree(all_factors);
1955: /* printf("I found %d local lambda dofs when numbering them (should be %d)\n",partial_sum,n_local_lambda); */
1957: /* Local to global mapping of fetidpmat */
1958: VecCreate(PETSC_COMM_SELF,&fetidpmat_ctx->lambda_local);
1959: VecSetSizes(fetidpmat_ctx->lambda_local,n_local_lambda,n_local_lambda);
1960: VecSetType(fetidpmat_ctx->lambda_local,VECSEQ);
1961: VecCreate(comm,&lambda_global);
1962: VecSetSizes(lambda_global,PETSC_DECIDE,fetidpmat_ctx->n_lambda);
1963: VecSetType(lambda_global,VECMPI);
1964: ISCreateGeneral(comm,n_local_lambda,l2g_indices,PETSC_OWN_POINTER,&IS_l2g_lambda);
1965: VecScatterCreate(fetidpmat_ctx->lambda_local,(IS)0,lambda_global,IS_l2g_lambda,&fetidpmat_ctx->l2g_lambda);
1966: ISDestroy(&IS_l2g_lambda);
1968: /* Create local part of B_delta */
1969: MatCreate(PETSC_COMM_SELF,&fetidpmat_ctx->B_delta);
1970: MatSetSizes(fetidpmat_ctx->B_delta,n_local_lambda,pcis->n_B,n_local_lambda,pcis->n_B);
1971: MatSetType(fetidpmat_ctx->B_delta,MATSEQAIJ);
1972: MatSeqAIJSetPreallocation(fetidpmat_ctx->B_delta,1,NULL);
1973: MatSetOption(fetidpmat_ctx->B_delta,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);
1974: for (i=0; i<n_local_lambda; i++) {
1975: MatSetValue(fetidpmat_ctx->B_delta,i,cols_B_delta[i],vals_B_delta[i],INSERT_VALUES);
1976: }
1977: PetscFree(vals_B_delta);
1978: MatAssemblyBegin(fetidpmat_ctx->B_delta,MAT_FINAL_ASSEMBLY);
1979: MatAssemblyEnd (fetidpmat_ctx->B_delta,MAT_FINAL_ASSEMBLY);
1981: if (fully_redundant) {
1982: MatCreate(PETSC_COMM_SELF,&ScalingMat);
1983: MatSetSizes(ScalingMat,n_local_lambda,n_local_lambda,n_local_lambda,n_local_lambda);
1984: MatSetType(ScalingMat,MATSEQAIJ);
1985: MatSeqAIJSetPreallocation(ScalingMat,1,NULL);
1986: for (i=0; i<n_local_lambda; i++) {
1987: MatSetValue(ScalingMat,i,i,scaling_factors[i],INSERT_VALUES);
1988: }
1989: MatAssemblyBegin(ScalingMat,MAT_FINAL_ASSEMBLY);
1990: MatAssemblyEnd (ScalingMat,MAT_FINAL_ASSEMBLY);
1991: MatMatMult(ScalingMat,fetidpmat_ctx->B_delta,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&fetidpmat_ctx->B_Ddelta);
1992: MatDestroy(&ScalingMat);
1993: } else {
1994: MatCreate(PETSC_COMM_SELF,&fetidpmat_ctx->B_Ddelta);
1995: MatSetSizes(fetidpmat_ctx->B_Ddelta,n_local_lambda,pcis->n_B,n_local_lambda,pcis->n_B);
1996: MatSetType(fetidpmat_ctx->B_Ddelta,MATSEQAIJ);
1997: MatSeqAIJSetPreallocation(fetidpmat_ctx->B_Ddelta,1,NULL);
1998: for (i=0; i<n_local_lambda; i++) {
1999: MatSetValue(fetidpmat_ctx->B_Ddelta,i,cols_B_delta[i],scaling_factors[i],INSERT_VALUES);
2000: }
2001: MatAssemblyBegin(fetidpmat_ctx->B_Ddelta,MAT_FINAL_ASSEMBLY);
2002: MatAssemblyEnd (fetidpmat_ctx->B_Ddelta,MAT_FINAL_ASSEMBLY);
2003: }
2004: PetscFree(scaling_factors);
2005: PetscFree(cols_B_delta);
2007: /* Create some vectors needed by fetidp */
2008: VecDuplicate(pcis->vec1_B,&fetidpmat_ctx->temp_solution_B);
2009: VecDuplicate(pcis->vec1_D,&fetidpmat_ctx->temp_solution_D);
2011: test_fetidp = PETSC_FALSE;
2013: PetscOptionsGetBool(NULL,"-fetidp_check",&test_fetidp,NULL);
2015: if (test_fetidp) {
2017: PetscViewerASCIIGetStdout(((PetscObject)(fetidpmat_ctx->pc))->comm,&viewer);
2018: PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);
2019: PetscViewerASCIIPrintf(viewer,"----------FETI_DP TESTS--------------\n");
2020: PetscViewerASCIIPrintf(viewer,"All tests should return zero!\n");
2021: PetscViewerASCIIPrintf(viewer,"FETIDP MAT context in the ");
2022: if (fully_redundant) {
2023: PetscViewerASCIIPrintf(viewer,"fully redundant case for lagrange multipliers.\n");
2024: } else {
2025: PetscViewerASCIIPrintf(viewer,"Non-fully redundant case for lagrange multiplier.\n");
2026: }
2027: PetscViewerFlush(viewer);
2029: /* TEST A/B: Test numbering of global lambda dofs */
2031: VecDuplicate(fetidpmat_ctx->lambda_local,&test_vec);
2032: VecSet(lambda_global,1.0);
2033: VecSet(test_vec,1.0);
2034: VecScatterBegin(fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);
2035: VecScatterEnd (fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);
2036: scalar_value = -1.0;
2037: VecAXPY(test_vec,scalar_value,fetidpmat_ctx->lambda_local);
2038: VecNorm(test_vec,NORM_INFINITY,&scalar_value);
2039: VecDestroy(&test_vec);
2040: PetscViewerASCIISynchronizedPrintf(viewer,"A[%04d]: CHECK glob to loc: % 1.14e\n",rank,scalar_value);
2041: PetscViewerFlush(viewer);
2042: if (fully_redundant) {
2043: VecSet(lambda_global,0.0);
2044: VecSet(fetidpmat_ctx->lambda_local,0.5);
2045: VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);
2046: VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);
2047: VecSum(lambda_global,&scalar_value);
2048: PetscViewerASCIISynchronizedPrintf(viewer,"B[%04d]: CHECK loc to glob: % 1.14e\n",rank,scalar_value-fetidpmat_ctx->n_lambda);
2049: PetscViewerFlush(viewer);
2050: }
2052: /* TEST C: It should holds B_delta*w=0, w\in\widehat{W} */
2053: /* This is the meaning of the B matrix */
2055: VecSetRandom(pcis->vec1_N,NULL);
2056: VecSet(pcis->vec1_global,0.0);
2057: VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
2058: VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
2059: VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
2060: VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
2061: VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
2062: VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
2063: /* Action of B_delta */
2064: MatMult(fetidpmat_ctx->B_delta,pcis->vec1_B,fetidpmat_ctx->lambda_local);
2065: VecSet(lambda_global,0.0);
2066: VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);
2067: VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);
2068: VecNorm(lambda_global,NORM_INFINITY,&scalar_value);
2069: PetscViewerASCIIPrintf(viewer,"C[coll]: CHECK infty norm of B_delta*w (w continuous): % 1.14e\n",scalar_value);
2070: PetscViewerFlush(viewer);
2072: /* TEST D: It should holds E_Dw = w - P_Dw w\in\widetilde{W} */
2073: /* E_D = R_D^TR */
2074: /* P_D = B_{D,delta}^T B_{delta} */
2075: /* eq.44 Mandel Tezaur and Dohrmann 2005 */
2077: /* compute a random vector in \widetilde{W} */
2078: VecSetRandom(pcis->vec1_N,NULL);
2080: scalar_value = 0.0; /* set zero at vertices */
2081: VecGetArray(pcis->vec1_N,&array);
2082: for (i=0;i<n_vertices;i++) array[vertex_indices[i]] = scalar_value;
2083: VecRestoreArray(pcis->vec1_N,&array);
2085: /* store w for final comparison */
2086: VecDuplicate(pcis->vec1_B,&test_vec);
2087: VecScatterBegin(pcis->N_to_B,pcis->vec1_N,test_vec,INSERT_VALUES,SCATTER_FORWARD);
2088: VecScatterEnd (pcis->N_to_B,pcis->vec1_N,test_vec,INSERT_VALUES,SCATTER_FORWARD);
2090: /* Jump operator P_D : results stored in pcis->vec1_B */
2092: VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
2093: VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
2094: /* Action of B_delta */
2095: MatMult(fetidpmat_ctx->B_delta,pcis->vec1_B,fetidpmat_ctx->lambda_local);
2096: VecSet(lambda_global,0.0);
2097: VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);
2098: VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);
2099: /* Action of B_Ddelta^T */
2100: VecScatterBegin(fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);
2101: VecScatterEnd (fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);
2102: MatMultTranspose(fetidpmat_ctx->B_Ddelta,fetidpmat_ctx->lambda_local,pcis->vec1_B);
2104: /* Average operator E_D : results stored in pcis->vec2_B */
2106: VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);
2107: VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);
2108: VecPointwiseMult(pcis->vec2_B,pcis->D,pcis->vec2_B);
2109: VecScatterBegin(pcis->N_to_B,pcis->vec2_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);
2110: VecScatterEnd (pcis->N_to_B,pcis->vec2_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);
2111: VecSet(pcis->vec1_global,0.0);
2112: VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
2113: VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
2114: VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
2115: VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
2116: VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);
2117: VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);
2119: /* test E_D=I-P_D */
2120: scalar_value = 1.0;
2121: VecAXPY(pcis->vec1_B,scalar_value,pcis->vec2_B);
2122: scalar_value = -1.0;
2123: VecAXPY(pcis->vec1_B,scalar_value,test_vec);
2124: VecNorm(pcis->vec1_B,NORM_INFINITY,&scalar_value);
2125: VecDestroy(&test_vec);
2126: PetscViewerASCIISynchronizedPrintf(viewer,"D[%04d] CHECK infty norm of E_D + P_D - I: % 1.14e\n",rank,scalar_value);
2127: PetscViewerFlush(viewer);
2129: /* TEST E: It should holds R_D^TP_Dw=0 w\in\widetilde{W} */
2130: /* eq.48 Mandel Tezaur and Dohrmann 2005 */
2132: VecSetRandom(pcis->vec1_N,NULL);
2133: VecGetArray(pcis->vec1_N,&array);
2135: scalar_value = 0.0; /* set zero at vertices */
2136: for (i=0;i<n_vertices;i++) array[vertex_indices[i]]=scalar_value;
2137: VecRestoreArray(pcis->vec1_N,&array);
2139: /* Jump operator P_D : results stored in pcis->vec1_B */
2141: VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
2142: VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
2143: /* Action of B_delta */
2144: MatMult(fetidpmat_ctx->B_delta,pcis->vec1_B,fetidpmat_ctx->lambda_local);
2145: VecSet(lambda_global,0.0);
2146: VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);
2147: VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);
2148: /* Action of B_Ddelta^T */
2149: VecScatterBegin(fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);
2150: VecScatterEnd (fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);
2151: MatMultTranspose(fetidpmat_ctx->B_Ddelta,fetidpmat_ctx->lambda_local,pcis->vec1_B);
2152: /* diagonal scaling */
2153: VecPointwiseMult(pcis->vec1_B,pcis->D,pcis->vec1_B);
2154: /* sum on the interface */
2155: VecSet(pcis->vec1_N,0.0);
2156: VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);
2157: VecScatterEnd (pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);
2158: VecSet(pcis->vec1_global,0.0);
2159: VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
2160: VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
2161: VecNorm(pcis->vec1_global,NORM_INFINITY,&scalar_value);
2162: PetscViewerASCIIPrintf(viewer,"E[coll]: CHECK infty norm of R^T_D P_D: % 1.14e\n",scalar_value);
2163: PetscViewerFlush(viewer);
2165: if (!fully_redundant) {
2166: /* TEST F: It should holds B_{delta}B^T_{D,delta}=I */
2167: /* Corollary thm 14 Mandel Tezaur and Dohrmann 2005 */
2168: VecDuplicate(lambda_global,&test_vec);
2169: VecSetRandom(lambda_global,NULL);
2170: /* Action of B_Ddelta^T */
2171: VecScatterBegin(fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);
2172: VecScatterEnd (fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);
2173: MatMultTranspose(fetidpmat_ctx->B_Ddelta,fetidpmat_ctx->lambda_local,pcis->vec1_B);
2174: /* Action of B_delta */
2175: MatMult(fetidpmat_ctx->B_delta,pcis->vec1_B,fetidpmat_ctx->lambda_local);
2176: VecSet(test_vec,0.0);
2177: VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,test_vec,ADD_VALUES,SCATTER_FORWARD);
2178: VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,test_vec,ADD_VALUES,SCATTER_FORWARD);
2179: scalar_value = -1.0;
2180: VecAXPY(lambda_global,scalar_value,test_vec);
2181: VecNorm(lambda_global,NORM_INFINITY,&scalar_value);
2182: PetscViewerASCIIPrintf(viewer,"E[coll]: CHECK infty norm of P^T_D - I: % 1.14e\n",scalar_value);
2183: PetscViewerFlush(viewer);
2184: PetscViewerFlush(viewer);
2185: VecDestroy(&test_vec);
2186: }
2187: }
2188: /* final cleanup */
2189: PetscFree(vertex_indices);
2190: VecDestroy(&lambda_global);
2191: return(0);
2192: }
2196: static PetscErrorCode PCBDDCSetupFETIDPPCContext(Mat fetimat, FETIDPPC_ctx *fetidppc_ctx)
2197: {
2198: FETIDPMat_ctx *mat_ctx;
2202: MatShellGetContext(fetimat,&mat_ctx);
2203: /* get references from objects created when setting up feti mat context */
2204: PetscObjectReference((PetscObject)mat_ctx->lambda_local);
2206: fetidppc_ctx->lambda_local = mat_ctx->lambda_local;
2208: PetscObjectReference((PetscObject)mat_ctx->B_Ddelta);
2210: fetidppc_ctx->B_Ddelta = mat_ctx->B_Ddelta;
2212: PetscObjectReference((PetscObject)mat_ctx->l2g_lambda);
2214: fetidppc_ctx->l2g_lambda = mat_ctx->l2g_lambda;
2215: return(0);
2216: }
2220: static PetscErrorCode FETIDPMatMult(Mat fetimat, Vec x, Vec y)
2221: {
2222: FETIDPMat_ctx *mat_ctx;
2223: PC_IS *pcis;
2227: MatShellGetContext(fetimat,&mat_ctx);
2228: pcis = (PC_IS*)mat_ctx->pc->data;
2229: /* Application of B_delta^T */
2230: VecScatterBegin(mat_ctx->l2g_lambda,x,mat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);
2231: VecScatterEnd(mat_ctx->l2g_lambda,x,mat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);
2232: MatMultTranspose(mat_ctx->B_delta,mat_ctx->lambda_local,pcis->vec1_B);
2233: /* Application of \widetilde{S}^-1 */
2234: VecSet(pcis->vec1_D,0.0);
2235: PCBDDCApplyInterfacePreconditioner(mat_ctx->pc);
2236: /* Application of B_delta */
2237: MatMult(mat_ctx->B_delta,pcis->vec1_B,mat_ctx->lambda_local);
2238: VecSet(y,0.0);
2239: VecScatterBegin(mat_ctx->l2g_lambda,mat_ctx->lambda_local,y,ADD_VALUES,SCATTER_FORWARD);
2240: VecScatterEnd(mat_ctx->l2g_lambda,mat_ctx->lambda_local,y,ADD_VALUES,SCATTER_FORWARD);
2241: return(0);
2242: }
2246: static PetscErrorCode FETIDPPCApply(PC fetipc, Vec x, Vec y)
2247: {
2248: FETIDPPC_ctx *pc_ctx;
2249: PC_IS *pcis;
2253: PCShellGetContext(fetipc,(void**)&pc_ctx);
2254: pcis = (PC_IS*)pc_ctx->pc->data;
2255: /* Application of B_Ddelta^T */
2256: VecScatterBegin(pc_ctx->l2g_lambda,x,pc_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);
2257: VecScatterEnd(pc_ctx->l2g_lambda,x,pc_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);
2258: VecSet(pcis->vec2_B,0.0);
2259: MatMultTranspose(pc_ctx->B_Ddelta,pc_ctx->lambda_local,pcis->vec2_B);
2260: /* Application of S */
2261: PCISApplySchur(pc_ctx->pc,pcis->vec2_B,pcis->vec1_B,(Vec)0,pcis->vec1_D,pcis->vec2_D);
2262: /* Application of B_Ddelta */
2263: MatMult(pc_ctx->B_Ddelta,pcis->vec1_B,pc_ctx->lambda_local);
2264: VecSet(y,0.0);
2265: VecScatterBegin(pc_ctx->l2g_lambda,pc_ctx->lambda_local,y,ADD_VALUES,SCATTER_FORWARD);
2266: VecScatterEnd(pc_ctx->l2g_lambda,pc_ctx->lambda_local,y,ADD_VALUES,SCATTER_FORWARD);
2267: return(0);
2268: }
2272: static PetscErrorCode PCBDDCSetupLocalAdjacencyGraph(PC pc)
2273: {
2274: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
2275: Mat_IS *matis = (Mat_IS*)pc->pmat->data;
2276: PetscInt nvtxs;
2277: const PetscInt *xadj,*adjncy;
2278: Mat mat_adj;
2279: PetscBool symmetrize_rowij=PETSC_TRUE,compressed_rowij=PETSC_FALSE,flg_row=PETSC_TRUE;
2280: PCBDDCGraph mat_graph =pcbddc->mat_graph;
2284: /* get CSR adjacency from local matrix if user has not yet provided local graph using PCBDDCSetLocalAdjacencyGraph function */
2285: if (!mat_graph->xadj) {
2286: MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);
2287: MatGetRowIJ(mat_adj,0,symmetrize_rowij,compressed_rowij,&nvtxs,&xadj,&adjncy,&flg_row);
2288: if (!flg_row) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ()\n");
2289: /* Get adjacency into BDDC workspace */
2290: PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);
2291: MatRestoreRowIJ(mat_adj,0,symmetrize_rowij,compressed_rowij,&nvtxs,&xadj,&adjncy,&flg_row);
2292: if (!flg_row) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ()\n");
2293: MatDestroy(&mat_adj);
2294: }
2295: return(0);
2296: }
2297: /* -------------------------------------------------------------------------- */
2300: static PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc)
2301: {
2302: PetscErrorCode ierr;
2303: PC_BDDC *pcbddc = (PC_BDDC*)(pc->data);
2304: PC_IS *pcis = (PC_IS*)(pc->data);
2305: const PetscScalar zero = 0.0;
2308: /* Application of PHI^T */
2309: MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);
2310: if (pcbddc->inexact_prec_type) { MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P); }
2312: /* Scatter data of coarse_rhs */
2313: if (pcbddc->coarse_rhs) { VecSet(pcbddc->coarse_rhs,zero); }
2314: PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);
2316: /* Local solution on R nodes */
2317: VecSet(pcbddc->vec1_R,zero);
2318: VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
2319: VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
2320: if (pcbddc->inexact_prec_type) {
2321: VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
2322: VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
2323: }
2324: PCBDDCSolveSaddlePoint(pc);
2325: VecSet(pcis->vec1_B,zero);
2326: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
2327: VecScatterEnd (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
2328: if (pcbddc->inexact_prec_type) {
2329: VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
2330: VecScatterEnd (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
2331: }
2333: /* Coarse solution */
2334: PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);
2335: if (pcbddc->coarse_rhs) {
2336: if (pcbddc->CoarseNullSpace) {
2337: MatNullSpaceRemove(pcbddc->CoarseNullSpace,pcbddc->coarse_rhs,NULL);
2338: }
2339: KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);
2340: if (pcbddc->CoarseNullSpace) {
2341: MatNullSpaceRemove(pcbddc->CoarseNullSpace,pcbddc->coarse_vec,NULL);
2342: }
2343: }
2344: PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);
2345: PCBDDCScatterCoarseDataEnd (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);
2347: /* Sum contributions from two levels */
2348: MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);
2349: if (pcbddc->inexact_prec_type) { MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D); }
2350: return(0);
2351: }
2352: /* -------------------------------------------------------------------------- */
2355: static PetscErrorCode PCBDDCSolveSaddlePoint(PC pc)
2356: {
2358: PC_BDDC *pcbddc = (PC_BDDC*)(pc->data);
2361: KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
2362: if (pcbddc->local_auxmat1) {
2363: MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);
2364: MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);
2365: }
2366: return(0);
2367: }
2368: /* -------------------------------------------------------------------------- */
2371: static PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
2372: {
2374: PC_BDDC *pcbddc = (PC_BDDC*)(pc->data);
2377: switch (pcbddc->coarse_communications_type) {
2378: case SCATTERS_BDDC:
2379: VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);
2380: break;
2381: case GATHERS_BDDC:
2382: break;
2383: }
2384: return(0);
2385: }
2386: /* -------------------------------------------------------------------------- */
2389: static PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
2390: {
2392: PC_BDDC *pcbddc = (PC_BDDC*)(pc->data);
2393: PetscScalar *array_to;
2394: PetscScalar *array_from;
2395: MPI_Comm comm;
2396: PetscInt i;
2399: PetscObjectGetComm((PetscObject)pc,&comm);
2400: switch (pcbddc->coarse_communications_type) {
2401: case SCATTERS_BDDC:
2402: VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);
2403: break;
2404: case GATHERS_BDDC:
2405: if (vec_from) VecGetArray(vec_from,&array_from);
2406: if (vec_to) VecGetArray(vec_to,&array_to);
2407: switch (pcbddc->coarse_problem_type) {
2408: case SEQUENTIAL_BDDC:
2409: if (smode == SCATTER_FORWARD) {
2410: MPI_Gatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,0,comm);
2411: if (vec_to) {
2412: if (imode == ADD_VALUES) {
2413: for (i=0;i<pcbddc->replicated_primal_size;i++) {
2414: array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i];
2415: }
2416: } else {
2417: for (i=0;i<pcbddc->replicated_primal_size;i++) {
2418: array_to[pcbddc->replicated_local_primal_indices[i]]=pcbddc->replicated_local_primal_values[i];
2419: }
2420: }
2421: }
2422: } else {
2423: if (vec_from) {
2424: if (imode == ADD_VALUES) {
2425: printf("Scatter mode %d, insert mode %d for case %d not implemented!\n",smode,imode,pcbddc->coarse_problem_type);
2426: }
2427: for (i=0;i<pcbddc->replicated_primal_size;i++) {
2428: pcbddc->replicated_local_primal_values[i]=array_from[pcbddc->replicated_local_primal_indices[i]];
2429: }
2430: }
2431: MPI_Scatterv(&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,&array_to[0],pcbddc->local_primal_size,MPIU_SCALAR,0,comm);
2432: }
2433: break;
2434: case REPLICATED_BDDC:
2435: if (smode == SCATTER_FORWARD) {
2436: MPI_Allgatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,comm);
2437: if (imode == ADD_VALUES) {
2438: for (i=0;i<pcbddc->replicated_primal_size;i++) {
2439: array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i];
2440: }
2441: } else {
2442: for (i=0;i<pcbddc->replicated_primal_size;i++) {
2443: array_to[pcbddc->replicated_local_primal_indices[i]]=pcbddc->replicated_local_primal_values[i];
2444: }
2445: }
2446: } else { /* no communications needed for SCATTER_REVERSE since needed data is already present */
2447: if (imode == ADD_VALUES) {
2448: for (i=0;i<pcbddc->local_primal_size;i++) {
2449: array_to[i]+=array_from[pcbddc->local_primal_indices[i]];
2450: }
2451: } else {
2452: for (i=0;i<pcbddc->local_primal_size;i++) {
2453: array_to[i]=array_from[pcbddc->local_primal_indices[i]];
2454: }
2455: }
2456: }
2457: break;
2458: case MULTILEVEL_BDDC:
2459: break;
2460: case PARALLEL_BDDC:
2461: break;
2462: }
2463: if (vec_from) VecRestoreArray(vec_from,&array_from);
2464: if (vec_to) VecRestoreArray(vec_to,&array_to);
2465: break;
2466: }
2467: return(0);
2468: }
2469: /* -------------------------------------------------------------------------- */
2472: static PetscErrorCode PCBDDCCreateConstraintMatrix(PC pc)
2473: {
2475: PC_IS *pcis = (PC_IS*)(pc->data);
2476: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
2477: Mat_IS *matis = (Mat_IS*)pc->pmat->data;
2478: PetscInt *nnz,*is_indices;
2479: PetscScalar *temp_quadrature_constraint;
2480: PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B,*local_to_B;
2481: PetscInt local_primal_size,i,j,k,total_counts,max_size_of_constraint;
2482: PetscInt n_constraints,n_vertices,size_of_constraint;
2483: PetscScalar quad_value;
2484: PetscBool nnsp_has_cnst=PETSC_FALSE,use_nnsp_true=pcbddc->use_nnsp_true;
2485: PetscInt nnsp_size =0,nnsp_addone=0,temp_constraints,temp_start_ptr;
2486: IS *used_IS;
2487: MatType impMatType=MATSEQAIJ;
2488: PetscBLASInt Bs,Bt,lwork,lierr;
2489: PetscReal tol=1.0e-8;
2490: MatNullSpace nearnullsp;
2491: const Vec *nearnullvecs;
2492: Vec *localnearnullsp;
2493: PetscScalar *work,*temp_basis,*array_vector,*correlation_mat;
2494: PetscReal *rwork,*singular_vals;
2495: PetscBLASInt Bone=1,*ipiv;
2496: Vec temp_vec;
2497: Mat temp_mat;
2498: KSP temp_ksp;
2499: PC temp_pc;
2500: PetscInt s,start_constraint,dual_dofs;
2501: PetscBool compute_submatrix,useksp=PETSC_FALSE;
2502: PetscInt *aux_primal_permutation,*aux_primal_numbering;
2503: PetscBool boolforface,*change_basis;
2505: /* some ugly conditional declarations */
2506: #if defined(PETSC_MISSING_LAPACK_GESVD)
2507: PetscScalar dot_result;
2508: PetscScalar one=1.0,zero=0.0;
2509: PetscInt ii;
2510: PetscScalar *singular_vectors;
2511: PetscBLASInt *iwork,*ifail;
2512: PetscReal dummy_real,abs_tol;
2513: PetscBLASInt eigs_found;
2514: #if defined(PETSC_USE_COMPLEX)
2515: PetscScalar val1,val2;
2516: #endif
2517: #endif
2518: PetscBLASInt dummy_int;
2519: PetscScalar dummy_scalar;
2522: /* check if near null space is attached to global mat */
2523: MatGetNearNullSpace(pc->pmat,&nearnullsp);
2524: if (nearnullsp) {
2525: MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);
2526: } else { /* if near null space is not provided it uses constants */
2527: nnsp_has_cnst = PETSC_TRUE;
2528: use_nnsp_true = PETSC_TRUE;
2529: }
2530: if (nnsp_has_cnst) nnsp_addone = 1;
2532: /*
2533: Evaluate maximum storage size needed by the procedure
2534: - temp_indices will contain start index of each constraint stored as follows
2535: - temp_indices_to_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
2536: - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
2537: - temp_quadrature_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself
2538: */
2540: total_counts = pcbddc->n_ISForFaces+pcbddc->n_ISForEdges;
2541: total_counts *= (nnsp_addone+nnsp_size);
2543: ISGetSize(pcbddc->ISForVertices,&n_vertices);
2545: total_counts += n_vertices;
2547: PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);
2548: PetscMalloc((total_counts+1)*sizeof(PetscBool),&change_basis);
2550: total_counts = 0;
2551: max_size_of_constraint = 0;
2552: for (i=0;i<pcbddc->n_ISForEdges+pcbddc->n_ISForFaces;i++) {
2553: if (i<pcbddc->n_ISForEdges) used_IS = &pcbddc->ISForEdges[i];
2554: else used_IS = &pcbddc->ISForFaces[i-pcbddc->n_ISForEdges];
2555: ISGetSize(*used_IS,&j);
2556: total_counts += j;
2557: if (j>max_size_of_constraint) max_size_of_constraint=j;
2558: }
2559: total_counts *= (nnsp_addone+nnsp_size);
2560: total_counts += n_vertices;
2562: PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);
2563: PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);
2564: PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);
2565: PetscMalloc(pcis->n*sizeof(PetscInt),&local_to_B);
2566: ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);
2568: for (i=0;i<pcis->n;i++) local_to_B[i]=-1;
2569: for (i=0;i<pcis->n_B;i++) local_to_B[is_indices[i]]=i;
2570: ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);
2572: /* First we issue queries to allocate optimal workspace for LAPACKgesvd or LAPACKsyev/LAPACKheev */
2573: rwork = 0;
2574: work = 0;
2575: singular_vals = 0;
2576: temp_basis = 0;
2577: correlation_mat = 0;
2578: if (!pcbddc->use_nnsp_true) {
2579: PetscScalar temp_work;
2580: #if defined(PETSC_MISSING_LAPACK_GESVD)
2581: /* POD */
2582: PetscInt max_n;
2583: max_n = nnsp_addone+nnsp_size;
2584: /* using some techniques borrowed from Proper Orthogonal Decomposition */
2585: PetscMalloc(max_n*max_n*sizeof(PetscScalar),&correlation_mat);
2586: PetscMalloc(max_n*max_n*sizeof(PetscScalar),&singular_vectors);
2587: PetscMalloc(max_n*sizeof(PetscReal),&singular_vals);
2588: PetscMalloc(max_size_of_constraint*(nnsp_addone+nnsp_size)*sizeof(PetscScalar),&temp_basis);
2589: #if defined(PETSC_USE_COMPLEX)
2590: PetscMalloc(3*max_n*sizeof(PetscReal),&rwork);
2591: #endif
2592: PetscMalloc(5*max_n*sizeof(PetscBLASInt),&iwork);
2593: PetscMalloc(max_n*sizeof(PetscBLASInt),&ifail);
2594: /* now we evaluate the optimal workspace using query with lwork=-1 */
2595: PetscBLASIntCast(max_n,&Bt);
2596: lwork =-1;
2597: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
2598: #if !defined(PETSC_USE_COMPLEX)
2599: abs_tol=1.e-8;
2600: PetscStackCallBLAS("LAPACKsyevx",LAPACKsyevx_("V","A","U",&Bt,correlation_mat,&Bt,&dummy_real,&dummy_real,&dummy_int,&dummy_int,&abs_tol,&eigs_found,singular_vals,singular_vectors,&Bt,&temp_work,&lwork,iwork,ifail,&lierr));
2601: #else
2602: SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Not yet implemented for complexes when PETSC_MISSING_GESVD = 1");
2603: #endif
2604: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEVX Lapack routine %d",(int)lierr);
2605: PetscFPTrapPop();
2606: #else /* on missing GESVD */
2607: /* SVD */
2608: PetscInt max_n,min_n;
2609: max_n = max_size_of_constraint;
2610: min_n = nnsp_addone+nnsp_size;
2611: if (max_size_of_constraint < (nnsp_addone+nnsp_size)) {
2612: min_n = max_size_of_constraint;
2613: max_n = nnsp_addone+nnsp_size;
2614: }
2615: PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);
2616: #if defined(PETSC_USE_COMPLEX)
2617: PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);
2618: #endif
2619: /* now we evaluate the optimal workspace using query with lwork=-1 */
2620: lwork =-1;
2621: PetscBLASIntCast(max_n,&Bs);
2622: PetscBLASIntCast(min_n,&Bt);
2623: dummy_int = Bs;
2624: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
2625: #if !defined(PETSC_USE_COMPLEX)
2626: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[0],&Bs,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr));
2627: #else
2628: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[0],&Bs,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr);
2629: #endif
2630: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SVD Lapack routine %d",(int)lierr);
2631: PetscFPTrapPop();
2632: #endif
2633: /* Allocate optimal workspace */
2634: PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);
2635: total_counts = (PetscInt)lwork;
2636: PetscMalloc(total_counts*sizeof(PetscScalar),&work);
2637: }
2638: /* get local part of global near null space vectors */
2639: PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);
2640: for (k=0; k<nnsp_size; k++) {
2641: VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);
2642: VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);
2643: VecScatterEnd (matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);
2644: }
2645: /* Now we can loop on constraining sets */
2646: total_counts =0;
2647: temp_indices[0]=0;
2648: /* vertices */
2649: PetscBool used_vertex;
2650: ISGetIndices(pcbddc->ISForVertices,(const PetscInt**)&is_indices);
2651: if (nnsp_has_cnst) { /* consider all vertices */
2652: for (i=0; i<n_vertices; i++) {
2653: temp_indices_to_constraint[temp_indices[total_counts]] = is_indices[i];
2654: temp_indices_to_constraint_B[temp_indices[total_counts]]= local_to_B[is_indices[i]];
2655: temp_quadrature_constraint[temp_indices[total_counts]] = 1.0;
2656: temp_indices[total_counts+1] = temp_indices[total_counts]+1;
2657: change_basis[total_counts] = PETSC_FALSE;
2658: total_counts++;
2659: }
2660: } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
2661: for (i=0; i<n_vertices; i++) {
2662: used_vertex = PETSC_FALSE;
2663: k = 0;
2664: while (!used_vertex && k<nnsp_size) {
2665: VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);
2666: if (PetscAbsScalar(array_vector[is_indices[i]])>0.0) {
2667: temp_indices_to_constraint[temp_indices[total_counts]] =is_indices[i];
2668: temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]];
2669: temp_quadrature_constraint[temp_indices[total_counts]] =1.0;
2670: temp_indices[total_counts+1] =temp_indices[total_counts]+1;
2671: change_basis[total_counts] =PETSC_FALSE;
2672: total_counts++;
2673: used_vertex=PETSC_TRUE;
2674: }
2675: VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);
2676: k++;
2677: }
2678: }
2679: }
2680: ISRestoreIndices(pcbddc->ISForVertices,(const PetscInt**)&is_indices);
2681: n_vertices = total_counts;
2683: /* edges and faces */
2684: for (i=0; i<pcbddc->n_ISForEdges+pcbddc->n_ISForFaces; i++) {
2685: if (i<pcbddc->n_ISForEdges) {
2686: used_IS = &pcbddc->ISForEdges[i];
2687: boolforface = pcbddc->usechangeofbasis;
2688: } else {
2689: used_IS = &pcbddc->ISForFaces[i-pcbddc->n_ISForEdges];
2690: boolforface = pcbddc->usechangeonfaces;
2691: }
2692: temp_constraints = 0; /* zero the number of constraints I have on this conn comp */
2693: temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
2694: ISGetSize(*used_IS,&size_of_constraint);
2695: ISGetIndices(*used_IS,(const PetscInt**)&is_indices);
2696: if (nnsp_has_cnst) {
2697: temp_constraints++;
2698: quad_value = (PetscScalar) (1.0/PetscSqrtReal((PetscReal)size_of_constraint));
2699: for (j=0; j<size_of_constraint; j++) {
2700: temp_indices_to_constraint[temp_indices[total_counts]+j] =is_indices[j];
2701: temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]];
2702: temp_quadrature_constraint[temp_indices[total_counts]+j] =quad_value;
2703: }
2704: temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */
2705: change_basis[total_counts] =boolforface;
2706: total_counts++;
2707: }
2708: for (k=0; k<nnsp_size; k++) {
2709: VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);
2710: for (j=0; j<size_of_constraint; j++) {
2711: temp_indices_to_constraint[temp_indices[total_counts]+j] =is_indices[j];
2712: temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]];
2713: temp_quadrature_constraint[temp_indices[total_counts]+j] =array_vector[is_indices[j]];
2714: }
2715: VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);
2716: quad_value = 1.0;
2717: if (use_nnsp_true) { /* check if array is null on the connected component in case use_nnsp_true has been requested */
2718: PetscBLASIntCast(size_of_constraint,&Bs);
2719: PetscStackCallBLAS("BLASasum",quad_value = BLASasum_(&Bs,&temp_quadrature_constraint[temp_indices[total_counts]],&Bone));
2720: }
2721: if (quad_value > 0.0) { /* keep indices and values */
2722: temp_constraints++;
2723: temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */
2724: change_basis[total_counts] =boolforface;
2725: total_counts++;
2726: }
2727: }
2728: ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);
2729: /* perform SVD on the constraint if use_nnsp_true has not be requested by the user */
2730: if (!use_nnsp_true) {
2731: PetscBLASIntCast(size_of_constraint,&Bs);
2732: PetscBLASIntCast(temp_constraints,&Bt);
2734: #if defined(PETSC_MISSING_LAPACK_GESVD)
2735: PetscMemzero(correlation_mat,Bt*Bt*sizeof(PetscScalar));
2736: /* Store upper triangular part of correlation matrix */
2737: for (j=0; j<temp_constraints; j++) {
2738: for (k=0; k<j+1; k++) {
2739: #if defined(PETSC_USE_COMPLEX)
2740: /* hand made complex dot product -> replace */
2741: dot_result = 0.0;
2742: for (ii=0; ii<size_of_constraint; ii++) {
2743: val1 = temp_quadrature_constraint[temp_indices[temp_start_ptr+j]+ii];
2744: val2 = temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii];
2745: dot_result += val1*PetscConj(val2);
2746: }
2747: #else
2748: PetscStackCallBLAS("BLASdot",dot_result = BLASdot_(&Bs,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Bone,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Bone));
2749: #endif
2750: correlation_mat[j*temp_constraints+k]=dot_result;
2751: }
2752: }
2753: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
2754: #if !defined(PETSC_USE_COMPLEX)
2755: PetscStackCallBLAS("LAPACKsyevx",LAPACKsyevx_("V","A","U",&Bt,correlation_mat,&Bt,&dummy_real,&dummy_real,&dummy_int,&dummy_int,&abs_tol,&eigs_found,singular_vals,singular_vectors,&Bt,work,&lwork,iwork,ifail,&lierr));
2756: #else
2757: SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Not yet implemented for complexes when PETSC_MISSING_GESVD = 1");
2758: #endif
2759: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEVX Lapack routine %d",(int)lierr);
2760: PetscFPTrapPop();
2761: /* retain eigenvalues greater than tol: note that lapack SYEV gives eigs in ascending order */
2762: j=0;
2763: while (j < Bt && singular_vals[j] < tol) j++;
2764: total_counts=total_counts-j;
2765: if (j<temp_constraints) {
2766: for (k=j;k<Bt;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
2767: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
2768: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Bs,&Bt,&Bt,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,correlation_mat,&Bt,&zero,temp_basis,&Bs));
2769: PetscFPTrapPop();
2770: /* copy POD basis into used quadrature memory */
2771: for (k=0;k<Bt-j;k++) {
2772: for (ii=0;ii<size_of_constraint;ii++) {
2773: temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[Bt-1-k]*temp_basis[(Bt-1-k)*size_of_constraint+ii];
2774: }
2775: }
2776: }
2778: #else /* on missing GESVD */
2779: PetscInt min_n = temp_constraints;
2780: if (min_n > size_of_constraint) min_n = size_of_constraint;
2781: dummy_int = Bs;
2782: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
2783: #if !defined(PETSC_USE_COMPLEX)
2784: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr));
2785: #else
2786: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr);
2787: #endif
2788: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SVD Lapack routine %d",(int)lierr);
2789: PetscFPTrapPop();
2790: /* retain eigenvalues greater than tol: note that lapack SVD gives eigs in descending order */
2791: j=0;
2792: while (j < min_n && singular_vals[min_n-j-1] < tol) j++;
2793: total_counts = total_counts-(PetscInt)Bt+(min_n-j);
2794: #endif
2795: }
2796: }
2798: n_constraints =total_counts-n_vertices;
2799: local_primal_size = total_counts;
2800: /* set quantities in pcbddc data structure */
2801: pcbddc->n_vertices = n_vertices;
2802: pcbddc->n_constraints = n_constraints;
2803: pcbddc->local_primal_size = local_primal_size;
2805: /* Create constraint matrix */
2806: /* The constraint matrix is used to compute the l2g map of primal dofs */
2807: /* so we need to set it up properly either with or without change of basis */
2808: MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);
2809: MatSetType(pcbddc->ConstraintMatrix,impMatType);
2810: MatSetSizes(pcbddc->ConstraintMatrix,local_primal_size,pcis->n,local_primal_size,pcis->n);
2812: /* compute a local numbering of constraints : vertices first then constraints */
2813: VecSet(pcis->vec1_N,0.0);
2814: VecGetArray(pcis->vec1_N,&array_vector);
2815: PetscMalloc(local_primal_size*sizeof(PetscInt),&aux_primal_numbering);
2816: PetscMalloc(local_primal_size*sizeof(PetscInt),&aux_primal_permutation);
2818: total_counts=0;
2820: /* find vertices: subdomain corners plus dofs with basis changed */
2821: for (i=0; i<local_primal_size; i++) {
2822: size_of_constraint=temp_indices[i+1]-temp_indices[i];
2823: if (change_basis[i] || size_of_constraint == 1) {
2824: k=0;
2825: while (k < size_of_constraint && array_vector[temp_indices_to_constraint[temp_indices[i]+size_of_constraint-k-1]] != 0.0) {
2826: k=k+1;
2827: }
2828: j = temp_indices_to_constraint[temp_indices[i]+size_of_constraint-k-1];
2830: array_vector[j] = 1.0;
2831: aux_primal_numbering[total_counts] = j;
2832: aux_primal_permutation[total_counts] = total_counts;
2833: total_counts++;
2834: }
2835: }
2836: VecRestoreArray(pcis->vec1_N,&array_vector);
2837: /* permute indices in order to have a sorted set of vertices */
2838: PetscSortIntWithPermutation(total_counts,aux_primal_numbering,aux_primal_permutation);
2839: /* nonzero structure */
2840: PetscMalloc(local_primal_size*sizeof(PetscInt),&nnz);
2841: for (i=0;i<total_counts;i++) nnz[i]=1;
2843: j=total_counts;
2844: for (i=n_vertices; i<local_primal_size; i++) {
2845: if (!change_basis[i]) {
2846: nnz[j]=temp_indices[i+1]-temp_indices[i];
2847: j++;
2848: }
2849: }
2850: MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);
2851: PetscFree(nnz);
2852: /* set values in constraint matrix */
2853: for (i=0; i<total_counts; i++) {
2854: j = aux_primal_permutation[i];
2855: k = aux_primal_numbering[j];
2856: MatSetValue(pcbddc->ConstraintMatrix,i,k,1.0,INSERT_VALUES);
2857: }
2858: for (i=n_vertices; i<local_primal_size; i++) {
2859: if (!change_basis[i]) {
2860: size_of_constraint = temp_indices[i+1]-temp_indices[i];
2861: MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);
2862: total_counts++;
2863: }
2864: }
2865: PetscFree(aux_primal_numbering);
2866: PetscFree(aux_primal_permutation);
2867: /* assembling */
2868: MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);
2869: MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);
2871: /* Create matrix for change of basis. We don't need it in case pcbddc->usechangeofbasis is FALSE */
2872: if (pcbddc->usechangeofbasis) {
2873: MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);
2874: MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);
2875: MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);
2876: /* work arrays */
2877: /* we need to reuse these arrays, so we free them */
2878: PetscFree(temp_basis);
2879: PetscFree(work);
2880: PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);
2881: PetscMalloc((nnsp_addone+nnsp_size)*(nnsp_addone+nnsp_size)*sizeof(PetscScalar),&temp_basis);
2882: PetscMalloc((nnsp_addone+nnsp_size)*sizeof(PetscScalar),&work);
2883: PetscMalloc((nnsp_addone+nnsp_size)*sizeof(PetscBLASInt),&ipiv);
2884: for (i=0;i<pcis->n_B;i++) nnz[i]=1;
2886: /* Overestimated nonzeros per row */
2887: k=1;
2888: for (i=pcbddc->n_vertices;i<local_primal_size;i++) {
2889: if (change_basis[i]) {
2890: size_of_constraint = temp_indices[i+1]-temp_indices[i];
2891: if (k < size_of_constraint) k = size_of_constraint;
2893: for (j=0;j<size_of_constraint;j++) {
2894: nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
2895: }
2896: }
2897: }
2898: MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);
2899: PetscFree(nnz);
2900: /* Temporary array to store indices */
2901: PetscMalloc(k*sizeof(PetscInt),&is_indices);
2902: /* Set initial identity in the matrix */
2903: for (i=0; i<pcis->n_B; i++) {
2904: MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);
2905: }
2906: /* Now we loop on the constraints which need a change of basis */
2907: /* Change of basis matrix is evaluated as the FIRST APPROACH in */
2908: /* Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (6.2.1) */
2909: temp_constraints = 0;
2910: if (pcbddc->n_vertices < local_primal_size) {
2911: temp_start_ptr = temp_indices_to_constraint_B[temp_indices[pcbddc->n_vertices]];
2912: }
2913: for (i=pcbddc->n_vertices; i<local_primal_size; i++) {
2914: if (change_basis[i]) {
2915: compute_submatrix = PETSC_FALSE;
2916: useksp = PETSC_FALSE;
2917: if (temp_start_ptr == temp_indices_to_constraint_B[temp_indices[i]]) {
2918: temp_constraints++;
2919: if (i == local_primal_size -1 || temp_start_ptr != temp_indices_to_constraint_B[temp_indices[i+1]]) {
2920: compute_submatrix = PETSC_TRUE;
2921: }
2922: }
2923: if (compute_submatrix) {
2924: if (temp_constraints > 1 || pcbddc->use_nnsp_true) useksp = PETSC_TRUE;
2925: size_of_constraint = temp_indices[i+1]-temp_indices[i];
2926: if (useksp) { /* experimental */
2927: MatCreate(PETSC_COMM_SELF,&temp_mat);
2928: MatSetType(temp_mat,impMatType);
2929: MatSetSizes(temp_mat,size_of_constraint,size_of_constraint,size_of_constraint,size_of_constraint);
2930: MatSeqAIJSetPreallocation(temp_mat,size_of_constraint,NULL);
2931: }
2932: /* First _size_of_constraint-temp_constraints_ columns */
2933: dual_dofs = size_of_constraint-temp_constraints;
2934: start_constraint = i+1-temp_constraints;
2935: for (s=0; s<dual_dofs; s++) {
2936: is_indices[0] = s;
2937: for (j=0;j<temp_constraints;j++) {
2938: for (k=0;k<temp_constraints;k++) {
2939: temp_basis[j*temp_constraints+k]=temp_quadrature_constraint[temp_indices[start_constraint+k]+s+j+1];
2940: }
2941: work[j] = -temp_quadrature_constraint[temp_indices[start_constraint+j]+s];
2942: is_indices[j+1] = s+j+1;
2943: }
2944: Bt = temp_constraints;
2945: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
2946: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesv_(&Bt,&Bone,temp_basis,&Bt,ipiv,work,&Bt,&lierr));
2947: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESV Lapack routine %d",(int)lierr);
2948: PetscFPTrapPop();
2949: j = temp_indices_to_constraint_B[temp_indices[start_constraint]+s];
2950: MatSetValues(pcbddc->ChangeOfBasisMatrix,temp_constraints,&temp_indices_to_constraint_B[temp_indices[start_constraint]+s+1],1,&j,work,INSERT_VALUES);
2951: if (useksp) {
2952: /* temp mat with transposed rows and columns */
2953: MatSetValues(temp_mat,1,&s,temp_constraints,&is_indices[1],work,INSERT_VALUES);
2954: MatSetValue(temp_mat,is_indices[0],is_indices[0],1.0,INSERT_VALUES);
2955: }
2956: }
2957: if (useksp) {
2958: /* last rows of temp_mat */
2959: for (j=0;j<size_of_constraint;j++) is_indices[j] = j;
2961: for (s=0;s<temp_constraints;s++) {
2962: k = s + dual_dofs;
2963: MatSetValues(temp_mat,1,&k,size_of_constraint,is_indices,&temp_quadrature_constraint[temp_indices[start_constraint+s]],INSERT_VALUES);
2964: }
2965: MatAssemblyBegin(temp_mat,MAT_FINAL_ASSEMBLY);
2966: MatAssemblyEnd(temp_mat,MAT_FINAL_ASSEMBLY);
2967: MatGetVecs(temp_mat,&temp_vec,NULL);
2968: KSPCreate(PETSC_COMM_SELF,&temp_ksp);
2969: KSPSetOperators(temp_ksp,temp_mat,temp_mat,SAME_PRECONDITIONER);
2970: KSPSetType(temp_ksp,KSPPREONLY);
2971: KSPGetPC(temp_ksp,&temp_pc);
2972: PCSetType(temp_pc,PCLU);
2973: KSPSetUp(temp_ksp);
2974: for (s=0; s<temp_constraints; s++) {
2975: VecSet(temp_vec,0.0);
2976: VecSetValue(temp_vec,s+dual_dofs,1.0,INSERT_VALUES);
2977: VecAssemblyBegin(temp_vec);
2978: VecAssemblyEnd(temp_vec);
2979: KSPSolve(temp_ksp,temp_vec,temp_vec);
2980: VecGetArray(temp_vec,&array_vector);
2981: j = temp_indices_to_constraint_B[temp_indices[start_constraint+s]+size_of_constraint-s-1];
2982: /* last columns of change of basis matrix associated to new primal dofs */
2983: MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,&temp_indices_to_constraint_B[temp_indices[start_constraint+s]],1,&j,array_vector,INSERT_VALUES);
2984: VecRestoreArray(temp_vec,&array_vector);
2985: }
2986: MatDestroy(&temp_mat);
2987: KSPDestroy(&temp_ksp);
2988: VecDestroy(&temp_vec);
2989: } else {
2990: /* last columns of change of basis matrix associated to new primal dofs */
2991: for (s=0; s<temp_constraints; s++) {
2992: j = temp_indices_to_constraint_B[temp_indices[start_constraint+s]+size_of_constraint-s-1];
2993: MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,&temp_indices_to_constraint_B[temp_indices[start_constraint+s]],1,&j,&temp_quadrature_constraint[temp_indices[start_constraint+s]],INSERT_VALUES);
2994: }
2995: }
2996: /* prepare for the next cycle */
2997: temp_constraints = 0;
2998: if (i != local_primal_size -1) temp_start_ptr = temp_indices_to_constraint_B[temp_indices[i+1]];
2999: }
3000: }
3001: }
3002: /* assembling */
3003: MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);
3004: MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);
3005: PetscFree(ipiv);
3006: PetscFree(is_indices);
3007: }
3008: /* free workspace no longer needed */
3009: PetscFree(rwork);
3010: PetscFree(work);
3011: PetscFree(temp_basis);
3012: PetscFree(singular_vals);
3013: PetscFree(correlation_mat);
3014: PetscFree(temp_indices);
3015: PetscFree(change_basis);
3016: PetscFree(temp_indices_to_constraint);
3017: PetscFree(temp_indices_to_constraint_B);
3018: PetscFree(local_to_B);
3019: PetscFree(temp_quadrature_constraint);
3020: #if defined(PETSC_MISSING_LAPACK_GESVD)
3021: PetscFree(iwork);
3022: PetscFree(ifail);
3023: PetscFree(singular_vectors);
3024: #endif
3025: for (k=0; k<nnsp_size; k++) {
3026: VecDestroy(&localnearnullsp[k]);
3027: }
3028: PetscFree(localnearnullsp);
3029: return(0);
3030: }
3031: /* -------------------------------------------------------------------------- */
3034: static PetscErrorCode PCBDDCCoarseSetUp(PC pc)
3035: {
3037: PC_IS *pcis = (PC_IS*)(pc->data);
3038: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
3039: Mat_IS *matis = (Mat_IS*)pc->pmat->data;
3040: Mat change_mat_all;
3041: IS is_R_local;
3042: IS is_V_local;
3043: IS is_C_local;
3044: IS is_aux1;
3045: IS is_aux2;
3046: VecType impVecType;
3047: MatType impMatType;
3048: PetscInt n_R =0;
3049: PetscInt n_D =0;
3050: PetscInt n_B =0;
3051: PetscScalar zero =0.0;
3052: PetscScalar one =1.0;
3053: PetscScalar m_one=-1.0;
3054: PetscScalar * array;
3055: PetscScalar *coarse_submat_vals;
3056: PetscInt *idx_R_local;
3057: PetscInt *idx_V_B;
3058: PetscScalar *coarsefunctions_errors;
3059: PetscScalar *constraints_errors;
3061: /* auxiliary indices */
3062: PetscInt i,j,k;
3064: /* for verbose output of bddc */
3065: PetscViewer viewer =pcbddc->dbg_viewer;
3066: PetscBool dbg_flag=pcbddc->dbg_flag;
3068: /* for counting coarse dofs */
3069: PetscInt n_vertices,n_constraints;
3070: PetscInt size_of_constraint;
3071: PetscInt *row_cmat_indices;
3072: PetscScalar *row_cmat_values;
3073: PetscInt *vertices,*nnz,*is_indices,*temp_indices;
3076: /* Set Non-overlapping dimensions */
3077: n_B = pcis->n_B; n_D = pcis->n - n_B;
3078: /* Set types for local objects needed by BDDC precondtioner */
3079: impMatType = MATSEQDENSE;
3080: impVecType = VECSEQ;
3082: /* get vertex indices from constraint matrix */
3083: PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&vertices);
3084: n_vertices=0;
3085: for (i=0; i<pcbddc->local_primal_size; i++) {
3086: MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);
3087: if (size_of_constraint == 1) {
3088: vertices[n_vertices]=row_cmat_indices[0];
3089: n_vertices++;
3090: }
3091: MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);
3092: }
3093: /* Set number of constraints */
3094: n_constraints = pcbddc->local_primal_size-n_vertices;
3096: /* vertices in boundary numbering */
3097: if (n_vertices) {
3098: VecSet(pcis->vec1_N,m_one);
3099: VecGetArray(pcis->vec1_N,&array);
3100: for (i=0; i<n_vertices; i++) array[vertices[i]] = i;
3101: VecRestoreArray(pcis->vec1_N,&array);
3102: VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
3103: VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
3104: PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);
3105: VecGetArray(pcis->vec1_B,&array);
3106: for (i=0; i<n_vertices; i++) {
3107: j=0;
3108: while (array[j] != i) j++;
3109: idx_V_B[i]=j;
3110: }
3111: VecRestoreArray(pcis->vec1_B,&array);
3112: }
3114: /* transform local matrices if needed */
3115: if (pcbddc->usechangeofbasis) {
3116: PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);
3117: ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);
3118: for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1;
3119: ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);
3120: ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);
3121: k=1;
3122: for (i=0;i<n_B;i++) {
3123: MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);
3124: nnz[is_indices[i]]=j;
3125: if (k < j) k = j;
3126: MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);
3127: }
3128: ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);
3129: /* assemble change of basis matrix on the whole set of local dofs */
3130: PetscMalloc(k*sizeof(PetscInt),&temp_indices);
3131: MatCreate(PETSC_COMM_SELF,&change_mat_all);
3132: MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);
3133: MatSetType(change_mat_all,MATSEQAIJ);
3134: MatSeqAIJSetPreallocation(change_mat_all,0,nnz);
3135: ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);
3136: for (i=0; i<n_D; i++) {
3137: MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);
3138: }
3139: ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);
3140: ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);
3141: for (i=0; i<n_B; i++) {
3142: MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);
3143: for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]];
3144: MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);
3145: MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);
3146: }
3147: MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);
3148: MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);
3149: MatPtAP(matis->A,change_mat_all,MAT_INITIAL_MATRIX,1.0,&pcbddc->local_mat);
3150: MatDestroy(&pcis->A_IB);
3151: MatDestroy(&pcis->A_BI);
3152: MatDestroy(&pcis->A_BB);
3153: MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&pcis->A_IB);
3154: MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&pcis->A_BI);
3155: MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&pcis->A_BB);
3156: MatDestroy(&change_mat_all);
3157: PetscFree(nnz);
3158: PetscFree(temp_indices);
3159: } else {
3160: /* without change of basis, the local matrix is unchanged */
3161: PetscObjectReference((PetscObject)matis->A);
3163: pcbddc->local_mat = matis->A;
3164: }
3165: /* Change global null space passed in by the user if change of basis has been performed */
3166: if (pcbddc->NullSpace && pcbddc->usechangeofbasis) {
3167: PCBDDCAdaptNullSpace(pc);
3168: }
3170: /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
3171: VecSet(pcis->vec1_N,one);
3172: VecGetArray(pcis->vec1_N,&array);
3173: for (i=0;i<n_vertices;i++) array[vertices[i]] = zero;
3174: PetscMalloc((pcis->n - n_vertices)*sizeof(PetscInt),&idx_R_local);
3175: for (i=0, n_R=0; i<pcis->n; i++) {
3176: if (array[i] == one) {
3177: idx_R_local[n_R] = i;
3178: n_R++;
3179: }
3180: }
3181: VecRestoreArray(pcis->vec1_N,&array);
3182: if (dbg_flag) {
3183: PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");
3184: PetscViewerFlush(viewer);
3185: PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);
3186: PetscViewerASCIISynchronizedPrintf(viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);
3187: PetscViewerASCIISynchronizedPrintf(viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,n_constraints,pcbddc->local_primal_size);
3188: PetscViewerASCIISynchronizedPrintf(viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);
3189: PetscViewerFlush(viewer);
3190: }
3192: /* Allocate needed vectors */
3193: VecDuplicate(pcis->vec1_global,&pcbddc->original_rhs);
3194: VecDuplicate(pcis->vec1_global,&pcbddc->temp_solution);
3195: VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);
3196: VecCreate(PETSC_COMM_SELF,&pcbddc->vec1_R);
3197: VecSetSizes(pcbddc->vec1_R,n_R,n_R);
3198: VecSetType(pcbddc->vec1_R,impVecType);
3199: VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);
3200: VecCreate(PETSC_COMM_SELF,&pcbddc->vec1_P);
3201: VecSetSizes(pcbddc->vec1_P,pcbddc->local_primal_size,pcbddc->local_primal_size);
3202: VecSetType(pcbddc->vec1_P,impVecType);
3204: /* Creating some index sets needed */
3205: /* For submatrices */
3206: ISCreateGeneral(PETSC_COMM_SELF,n_R,idx_R_local,PETSC_OWN_POINTER,&is_R_local);
3207: if (n_vertices) {
3208: ISCreateGeneral(PETSC_COMM_SELF,n_vertices,vertices,PETSC_OWN_POINTER,&is_V_local);
3209: }
3210: if (n_constraints) {
3211: ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_C_local);
3212: }
3214: /* For VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
3215: {
3216: PetscInt *aux_array1;
3217: PetscInt *aux_array2;
3218: PetscInt *idx_I_local;
3220: PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);
3221: PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);
3223: ISGetIndices(pcis->is_I_local,(const PetscInt**)&idx_I_local);
3224: VecGetArray(pcis->vec1_N,&array);
3225: for (i=0; i<n_D; i++) array[idx_I_local[i]] = 0;
3226: ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&idx_I_local);
3227: for (i=0, j=0; i<n_R; i++) {
3228: if (array[idx_R_local[i]] == one) {
3229: aux_array1[j] = i;
3230: j++;
3231: }
3232: }
3233: VecRestoreArray(pcis->vec1_N,&array);
3234: ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_COPY_VALUES,&is_aux1);
3235: VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
3236: VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
3237: VecGetArray(pcis->vec1_B,&array);
3238: for (i=0, j=0; i<n_B; i++) {
3239: if (array[i] == one) {
3240: aux_array2[j] = i; j++;
3241: }
3242: }
3243: VecRestoreArray(pcis->vec1_B,&array);
3244: ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_COPY_VALUES,&is_aux2);
3245: VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);
3246: PetscFree(aux_array1);
3247: PetscFree(aux_array2);
3248: ISDestroy(&is_aux1);
3249: ISDestroy(&is_aux2);
3251: if (pcbddc->inexact_prec_type || dbg_flag) {
3252: PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);
3253: VecGetArray(pcis->vec1_N,&array);
3254: for (i=0, j=0; i<n_R; i++) {
3255: if (array[idx_R_local[i]] == zero) {
3256: aux_array1[j] = i;
3257: j++;
3258: }
3259: }
3260: VecRestoreArray(pcis->vec1_N,&array);
3261: ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_COPY_VALUES,&is_aux1);
3262: VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);
3263: PetscFree(aux_array1);
3264: ISDestroy(&is_aux1);
3265: }
3266: }
3268: /* Creating PC contexts for local Dirichlet and Neumann problems */
3269: {
3270: Mat A_RR;
3271: PC pc_temp;
3273: /* Matrix for Dirichlet problem is A_II -> we already have it from pcis.c code */
3274: KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);
3275: PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);
3276: KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,SAME_PRECONDITIONER);
3277: KSPSetType(pcbddc->ksp_D,KSPPREONLY);
3278: KSPSetOptionsPrefix(pcbddc->ksp_D,"dirichlet_");
3280: /* default */
3281: KSPGetPC(pcbddc->ksp_D,&pc_temp);
3282: PCSetType(pc_temp,PCLU);
3284: /* Allow user's customization */
3285: KSPSetFromOptions(pcbddc->ksp_D);
3287: /* umfpack interface has a bug when matrix dimension is zero */
3288: if (!n_D) {
3289: PCSetType(pc_temp,PCNONE);
3290: }
3292: /* Set Up KSP for Dirichlet problem of BDDC */
3293: KSPSetUp(pcbddc->ksp_D);
3295: /* set ksp_D into pcis data */
3296: KSPDestroy(&pcis->ksp_D);
3297: PetscObjectReference((PetscObject)pcbddc->ksp_D);
3298: pcis->ksp_D = pcbddc->ksp_D;
3300: /* Matrix for Neumann problem is A_RR -> we need to create it */
3301: MatGetSubMatrix(pcbddc->local_mat,is_R_local,is_R_local,MAT_INITIAL_MATRIX,&A_RR);
3302: KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);
3303: PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);
3304: KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,SAME_PRECONDITIONER);
3305: KSPSetType(pcbddc->ksp_R,KSPPREONLY);
3306: KSPSetOptionsPrefix(pcbddc->ksp_R,"neumann_");
3308: /* default */
3309: KSPGetPC(pcbddc->ksp_R,&pc_temp);
3310: PCSetType(pc_temp,PCLU);
3312: /* Allow user's customization */
3313: KSPSetFromOptions(pcbddc->ksp_R);
3315: /* umfpack interface has a bug when matrix dimension is zero */
3316: if (!pcis->n) {
3317: PCSetType(pc_temp,PCNONE);
3318: }
3320: /* Set Up KSP for Neumann problem of BDDC */
3321: KSPSetUp(pcbddc->ksp_R);
3323: /* check Dirichlet and Neumann solvers and adapt them is a nullspace correction is needed */
3324: {
3325: Vec temp_vec;
3326: PetscReal value;
3327: PetscMPIInt use_exact,use_exact_reduced;
3329: VecDuplicate(pcis->vec1_D,&temp_vec);
3330: VecSetRandom(pcis->vec1_D,NULL);
3331: MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);
3332: KSPSolve(pcbddc->ksp_D,pcis->vec2_D,temp_vec);
3333: VecAXPY(temp_vec,m_one,pcis->vec1_D);
3334: VecNorm(temp_vec,NORM_INFINITY,&value);
3335: VecDestroy(&temp_vec);
3336: use_exact = 1;
3337: if (PetscAbsReal(value) > 1.e-4) use_exact = 0;
3339: MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_INT,MPI_LAND,PetscObjectComm((PetscObject)pc));
3340: pcbddc->use_exact_dirichlet = (PetscBool) use_exact_reduced;
3341: if (dbg_flag) {
3342: PetscViewerFlush(viewer);
3343: PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");
3344: PetscViewerASCIIPrintf(viewer,"Checking solution of Dirichlet and Neumann problems\n");
3345: PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d infinity error for Dirichlet solve = % 1.14e \n",PetscGlobalRank,value);
3346: }
3347: if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->inexact_prec_type) {
3348: PCBDDCAdaptLocalProblem(pc,pcis->is_I_local);
3349: }
3350: VecDuplicate(pcbddc->vec1_R,&temp_vec);
3351: VecSetRandom(pcbddc->vec1_R,NULL);
3352: MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);
3353: KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,temp_vec);
3354: VecAXPY(temp_vec,m_one,pcbddc->vec1_R);
3355: VecNorm(temp_vec,NORM_INFINITY,&value);
3356: VecDestroy(&temp_vec);
3358: use_exact = 1;
3359: if (PetscAbsReal(value) > 1.e-4) use_exact = 0;
3360: MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_INT,MPI_LAND,PetscObjectComm((PetscObject)pc));
3361: if (dbg_flag) {
3362: PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d infinity error for Neumann solve = % 1.14e \n",PetscGlobalRank,value);
3363: PetscViewerFlush(viewer);
3364: }
3365: if (pcbddc->NullSpace && !use_exact_reduced) {
3366: PCBDDCAdaptLocalProblem(pc,is_R_local);
3367: }
3368: }
3369: /* free Neumann problem's matrix */
3370: MatDestroy(&A_RR);
3371: }
3373: /* Assemble all remaining stuff needed to apply BDDC */
3374: {
3375: Mat A_RV,A_VR,A_VV;
3376: Mat M1;
3377: Mat C_CR;
3378: Mat AUXMAT;
3379: Vec vec1_C;
3380: Vec vec2_C;
3381: Vec vec1_V;
3382: Vec vec2_V;
3383: PetscInt *nnz;
3384: PetscInt *auxindices;
3385: PetscInt index;
3386: PetscScalar *array2;
3387: MatFactorInfo matinfo;
3389: /* Allocating some extra storage just to be safe */
3390: PetscMalloc (pcis->n*sizeof(PetscInt),&nnz);
3391: PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);
3392: for (i=0;i<pcis->n;i++) auxindices[i]=i;
3394: /* some work vectors on vertices and/or constraints */
3395: if (n_vertices) {
3396: VecCreate(PETSC_COMM_SELF,&vec1_V);
3397: VecSetSizes(vec1_V,n_vertices,n_vertices);
3398: VecSetType(vec1_V,impVecType);
3399: VecDuplicate(vec1_V,&vec2_V);
3400: }
3401: if (n_constraints) {
3402: VecCreate(PETSC_COMM_SELF,&vec1_C);
3403: VecSetSizes(vec1_C,n_constraints,n_constraints);
3404: VecSetType(vec1_C,impVecType);
3405: VecDuplicate(vec1_C,&vec2_C);
3406: VecDuplicate(vec1_C,&pcbddc->vec1_C);
3407: }
3408: /* Precompute stuffs needed for preprocessing and application of BDDC*/
3409: if (n_constraints) {
3410: MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);
3411: MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,n_R,n_constraints);
3412: MatSetType(pcbddc->local_auxmat2,impMatType);
3413: MatSeqDenseSetPreallocation(pcbddc->local_auxmat2,NULL);
3415: /* Create Constraint matrix on R nodes: C_{CR} */
3416: MatGetSubMatrix(pcbddc->ConstraintMatrix,is_C_local,is_R_local,MAT_INITIAL_MATRIX,&C_CR);
3417: ISDestroy(&is_C_local);
3419: /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
3420: for (i=0; i<n_constraints; i++) {
3421: VecSet(pcbddc->vec1_R,zero);
3423: /* Get row of constraint matrix in R numbering */
3424: VecGetArray(pcbddc->vec1_R,&array);
3425: MatGetRow(C_CR,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);
3426: for (j=0;j<size_of_constraint;j++) array[row_cmat_indices[j]] = -row_cmat_values[j];
3427: MatRestoreRow(C_CR,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);
3428: VecRestoreArray(pcbddc->vec1_R,&array);
3430: /* Solve for row of constraint matrix in R numbering */
3431: KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
3433: /* Set values */
3434: VecGetArray(pcbddc->vec2_R,&array);
3435: MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);
3436: VecRestoreArray(pcbddc->vec2_R,&array);
3437: }
3438: MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);
3439: MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);
3441: /* Assemble AUXMAT = (LUFactor)(-C_{CR} A_{RR}^{-1} C^T_{CR})^{-1} */
3442: MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&AUXMAT);
3443: MatFactorInfoInitialize(&matinfo);
3444: ISCreateStride(PETSC_COMM_SELF,n_constraints,0,1,&is_aux1);
3445: MatLUFactor(AUXMAT,is_aux1,is_aux1,&matinfo);
3446: ISDestroy(&is_aux1);
3448: /* Assemble explicitly M1 = (C_{CR} A_{RR}^{-1} C^T_{CR})^{-1} needed in preproc */
3449: MatCreate(PETSC_COMM_SELF,&M1);
3450: MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);
3451: MatSetType(M1,impMatType);
3452: MatSeqDenseSetPreallocation(M1,NULL);
3453: for (i=0; i<n_constraints; i++) {
3454: VecSet(vec1_C,zero);
3455: VecSetValue(vec1_C,i,one,INSERT_VALUES);
3456: VecAssemblyBegin(vec1_C);
3457: VecAssemblyEnd(vec1_C);
3458: MatSolve(AUXMAT,vec1_C,vec2_C);
3459: VecScale(vec2_C,m_one);
3460: VecGetArray(vec2_C,&array);
3461: MatSetValues(M1,n_constraints,auxindices,1,&i,array,INSERT_VALUES);
3462: VecRestoreArray(vec2_C,&array);
3463: }
3464: MatAssemblyBegin(M1,MAT_FINAL_ASSEMBLY);
3465: MatAssemblyEnd(M1,MAT_FINAL_ASSEMBLY);
3466: MatDestroy(&AUXMAT);
3467: /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */
3468: MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);
3470: }
3472: /* Get submatrices from subdomain matrix */
3473: if (n_vertices) {
3474: MatGetSubMatrix(pcbddc->local_mat,is_R_local,is_V_local,MAT_INITIAL_MATRIX,&A_RV);
3475: MatGetSubMatrix(pcbddc->local_mat,is_V_local,is_R_local,MAT_INITIAL_MATRIX,&A_VR);
3476: MatGetSubMatrix(pcbddc->local_mat,is_V_local,is_V_local,MAT_INITIAL_MATRIX,&A_VV);
3477: }
3479: /* Matrix of coarse basis functions (local) */
3480: MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);
3481: MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);
3482: MatSetType(pcbddc->coarse_phi_B,impMatType);
3483: MatSeqDenseSetPreallocation(pcbddc->coarse_phi_B,NULL);
3484: if (pcbddc->inexact_prec_type || dbg_flag) {
3485: MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);
3486: MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);
3487: MatSetType(pcbddc->coarse_phi_D,impMatType);
3488: MatSeqDenseSetPreallocation(pcbddc->coarse_phi_D,NULL);
3489: }
3491: if (dbg_flag) {
3492: PetscMalloc(pcbddc->local_primal_size*sizeof(PetscScalar),&coarsefunctions_errors);
3493: PetscMalloc(pcbddc->local_primal_size*sizeof(PetscScalar),&constraints_errors);
3494: }
3495: /* Subdomain contribution (Non-overlapping) to coarse matrix */
3496: PetscMalloc ((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);
3498: /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
3499: for (i=0; i<n_vertices; i++) {
3500: VecSet(vec1_V,zero);
3501: VecSetValue(vec1_V,i,one,INSERT_VALUES);
3502: VecAssemblyBegin(vec1_V);
3503: VecAssemblyEnd(vec1_V);
3504: /* solution of saddle point problem */
3505: MatMult(A_RV,vec1_V,pcbddc->vec1_R);
3506: KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);
3507: VecScale(pcbddc->vec1_R,m_one);
3508: if (n_constraints) {
3509: MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);
3510: MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);
3511: VecScale(vec1_C,m_one);
3512: }
3513: MatMult(A_VR,pcbddc->vec1_R,vec2_V);
3514: MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);
3516: /* Set values in coarse basis function and subdomain part of coarse_mat */
3517: /* coarse basis functions */
3518: VecSet(pcis->vec1_B,zero);
3519: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
3520: VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
3521: VecGetArray(pcis->vec1_B,&array);
3522: MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);
3523: VecRestoreArray(pcis->vec1_B,&array);
3524: MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);
3525: if (pcbddc->inexact_prec_type || dbg_flag) {
3526: VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
3527: VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
3528: VecGetArray(pcis->vec1_D,&array);
3529: MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);
3530: VecRestoreArray(pcis->vec1_D,&array);
3531: }
3532: /* subdomain contribution to coarse matrix */
3533: VecGetArray(vec2_V,&array);
3534: for (j=0; j<n_vertices; j++) coarse_submat_vals[i*pcbddc->local_primal_size+j] = array[j]; /* WARNING -> column major ordering */
3535: VecRestoreArray(vec2_V,&array);
3536: if (n_constraints) {
3537: VecGetArray(vec1_C,&array);
3538: for (j=0; j<n_constraints; j++) coarse_submat_vals[i*pcbddc->local_primal_size+j+n_vertices] = array[j]; /* WARNING -> column major ordering */
3539: VecRestoreArray(vec1_C,&array);
3540: }
3542: if (dbg_flag) {
3543: /* assemble subdomain vector on nodes */
3544: VecSet(pcis->vec1_N,zero);
3545: VecGetArray(pcis->vec1_N,&array);
3546: VecGetArray(pcbddc->vec1_R,&array2);
3547: for (j=0;j<n_R;j++) array[idx_R_local[j]] = array2[j];
3548: array[vertices[i]] = one;
3549: VecRestoreArray(pcbddc->vec1_R,&array2);
3550: VecRestoreArray(pcis->vec1_N,&array);
3552: /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */
3553: VecSet(pcbddc->vec1_P,zero);
3554: VecGetArray(pcbddc->vec1_P,&array2);
3555: VecGetArray(vec2_V,&array);
3556: for (j=0;j<n_vertices;j++) array2[j]=array[j];
3557: VecRestoreArray(vec2_V,&array);
3558: if (n_constraints) {
3559: VecGetArray(vec1_C,&array);
3560: for (j=0;j<n_constraints;j++) array2[j+n_vertices]=array[j];
3561: VecRestoreArray(vec1_C,&array);
3562: }
3563: VecRestoreArray(pcbddc->vec1_P,&array2);
3564: VecScale(pcbddc->vec1_P,m_one);
3566: /* check saddle point solution */
3567: MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);
3568: MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);
3569: VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);
3570: MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);
3571: VecGetArray(pcbddc->vec1_P,&array);
3572: array[i]=array[i]+m_one; /* shift by the identity matrix */
3573: VecRestoreArray(pcbddc->vec1_P,&array);
3574: VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);
3575: }
3576: }
3578: for (i=0; i<n_constraints; i++) {
3579: VecSet(vec2_C,zero);
3580: VecSetValue(vec2_C,i,m_one,INSERT_VALUES);
3581: VecAssemblyBegin(vec2_C);
3582: VecAssemblyEnd(vec2_C);
3584: /* solution of saddle point problem */
3585: MatMult(M1,vec2_C,vec1_C);
3586: MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);
3587: VecScale(vec1_C,m_one);
3588: if (n_vertices) { MatMult(A_VR,pcbddc->vec1_R,vec2_V); }
3590: /* Set values in coarse basis function and subdomain part of coarse_mat */
3591: /* coarse basis functions */
3592: index=i+n_vertices;
3593: VecSet(pcis->vec1_B,zero);
3594: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
3595: VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
3596: VecGetArray(pcis->vec1_B,&array);
3597: MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&index,array,INSERT_VALUES);
3598: VecRestoreArray(pcis->vec1_B,&array);
3599: if (pcbddc->inexact_prec_type || dbg_flag) {
3600: VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
3601: VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
3602: VecGetArray(pcis->vec1_D,&array);
3603: MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&index,array,INSERT_VALUES);
3604: VecRestoreArray(pcis->vec1_D,&array);
3605: }
3607: /* subdomain contribution to coarse matrix */
3608: if (n_vertices) {
3609: VecGetArray(vec2_V,&array);
3610: for (j=0; j<n_vertices; j++) coarse_submat_vals[index*pcbddc->local_primal_size+j]=array[j]; /* WARNING -> column major ordering */
3611: VecRestoreArray(vec2_V,&array);
3612: }
3613: VecGetArray(vec1_C,&array);
3614: for (j=0; j<n_constraints; j++) coarse_submat_vals[index*pcbddc->local_primal_size+j+n_vertices]=array[j]; /* WARNING -> column major ordering */
3615: VecRestoreArray(vec1_C,&array);
3617: if (dbg_flag) {
3618: /* assemble subdomain vector on nodes */
3619: VecSet(pcis->vec1_N,zero);
3620: VecGetArray(pcis->vec1_N,&array);
3621: VecGetArray(pcbddc->vec1_R,&array2);
3622: for (j=0;j<n_R;j++) array[idx_R_local[j]] = array2[j];
3623: VecRestoreArray(pcbddc->vec1_R,&array2);
3624: VecRestoreArray(pcis->vec1_N,&array);
3626: /* assemble subdomain vector of lagrange multipliers */
3627: VecSet(pcbddc->vec1_P,zero);
3628: VecGetArray(pcbddc->vec1_P,&array2);
3629: if (n_vertices) {
3630: VecGetArray(vec2_V,&array);
3631: for (j=0;j<n_vertices;j++) array2[j]=-array[j];
3632: VecRestoreArray(vec2_V,&array);
3633: }
3634: VecGetArray(vec1_C,&array);
3635: for (j=0;j<n_constraints;j++) {array2[j+n_vertices]=-array[j];}
3636: VecRestoreArray(vec1_C,&array);
3637: VecRestoreArray(pcbddc->vec1_P,&array2);
3639: /* check saddle point solution */
3640: MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);
3641: MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);
3642: VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[index]);
3643: MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);
3644: VecGetArray(pcbddc->vec1_P,&array);
3645: array[index] = array[index]+m_one; /* shift by the identity matrix */
3646: VecRestoreArray(pcbddc->vec1_P,&array);
3647: VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[index]);
3648: }
3649: }
3650: MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);
3651: MatAssemblyEnd (pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);
3652: if (pcbddc->inexact_prec_type || dbg_flag) {
3653: MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);
3654: MatAssemblyEnd (pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);
3655: }
3656: /* Checking coarse_sub_mat and coarse basis functios */
3657: /* It shuld be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
3658: if (dbg_flag) {
3659: Mat coarse_sub_mat;
3660: Mat TM1,TM2,TM3,TM4;
3661: Mat coarse_phi_D,coarse_phi_B,A_II,A_BB,A_IB,A_BI;
3662: MatType checkmattype=MATSEQAIJ;
3663: PetscScalar value;
3665: MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);
3666: MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);
3667: MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);
3668: MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);
3669: MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);
3670: MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);
3671: MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);
3672: MatConvert(coarse_sub_mat,checkmattype,MAT_REUSE_MATRIX,&coarse_sub_mat);
3674: PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");
3675: PetscViewerASCIIPrintf(viewer,"Check coarse sub mat and local basis functions\n");
3676: PetscViewerFlush(viewer);
3677: MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);
3678: MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);
3679: MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
3680: MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);
3681: MatDestroy(&AUXMAT);
3682: MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
3683: MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);
3684: MatDestroy(&AUXMAT);
3685: MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);
3686: MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);
3687: MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);
3688: MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);
3689: MatNorm(TM1,NORM_INFINITY,&value);
3690: PetscViewerASCIISynchronizedPrintf(viewer,"----------------------------------\n");
3691: PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d \n",PetscGlobalRank);
3692: PetscViewerASCIISynchronizedPrintf(viewer,"matrix error = % 1.14e\n",value);
3693: PetscViewerASCIISynchronizedPrintf(viewer,"coarse functions errors\n");
3694: for (i=0; i<pcbddc->local_primal_size; i++) { PetscViewerASCIISynchronizedPrintf(viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]); }
3695: PetscViewerASCIISynchronizedPrintf(viewer,"constraints errors\n");
3696: for (i=0; i<pcbddc->local_primal_size; i++) { PetscViewerASCIISynchronizedPrintf(viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]); }
3697: PetscViewerFlush(viewer);
3698: MatDestroy(&A_II);
3699: MatDestroy(&A_BB);
3700: MatDestroy(&A_IB);
3701: MatDestroy(&A_BI);
3702: MatDestroy(&TM1);
3703: MatDestroy(&TM2);
3704: MatDestroy(&TM3);
3705: MatDestroy(&TM4);
3706: MatDestroy(&coarse_phi_D);
3707: MatDestroy(&coarse_sub_mat);
3708: MatDestroy(&coarse_phi_B);
3709: PetscFree(coarsefunctions_errors);
3710: PetscFree(constraints_errors);
3711: }
3712: /* free memory */
3713: if (n_vertices) {
3714: VecDestroy(&vec1_V);
3715: VecDestroy(&vec2_V);
3716: MatDestroy(&A_RV);
3717: MatDestroy(&A_VR);
3718: MatDestroy(&A_VV);
3719: }
3720: if (n_constraints) {
3721: VecDestroy(&vec1_C);
3722: VecDestroy(&vec2_C);
3723: MatDestroy(&M1);
3724: MatDestroy(&C_CR);
3725: }
3726: PetscFree(auxindices);
3727: PetscFree(nnz);
3728: /* create coarse matrix and data structures for message passing associated actual choice of coarse problem type */
3729: PCBDDCSetupCoarseEnvironment(pc,coarse_submat_vals);
3730: PetscFree(coarse_submat_vals);
3731: }
3732: /* free memory */
3733: if (n_vertices) {
3734: PetscFree(idx_V_B);
3735: ISDestroy(&is_V_local);
3736: }
3737: ISDestroy(&is_R_local);
3738: return(0);
3739: }
3741: /* -------------------------------------------------------------------------- */
3745: static PetscErrorCode PCBDDCSetupCoarseEnvironment(PC pc,PetscScalar* coarse_submat_vals)
3746: {
3747: Mat_IS *matis = (Mat_IS*)pc->pmat->data;
3748: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
3749: PC_IS *pcis = (PC_IS*)pc->data;
3750: MPI_Comm prec_comm;
3751: MPI_Comm coarse_comm;
3753: /* common to all choiches */
3754: PetscScalar *temp_coarse_mat_vals;
3755: PetscScalar *ins_coarse_mat_vals;
3756: PetscInt *ins_local_primal_indices;
3757: PetscMPIInt *localsizes2,*localdispl2;
3758: PetscMPIInt size_prec_comm;
3759: PetscMPIInt rank_prec_comm;
3760: PetscMPIInt active_rank=MPI_PROC_NULL;
3761: PetscMPIInt master_proc=0;
3762: PetscInt ins_local_primal_size;
3764: /* specific to MULTILEVEL_BDDC */
3765: PetscMPIInt *ranks_recv;
3766: PetscMPIInt count_recv=0;
3767: PetscMPIInt rank_coarse_proc_send_to;
3768: PetscMPIInt coarse_color = MPI_UNDEFINED;
3769: ISLocalToGlobalMapping coarse_ISLG;
3771: /* some other variables */
3773: MatType coarse_mat_type;
3774: PCType coarse_pc_type;
3775: KSPType coarse_ksp_type;
3776: PC pc_temp;
3777: PetscInt i,j,k;
3778: PetscInt max_it_coarse_ksp=1; /* don't increase this value */
3780: /* verbose output viewer */
3781: PetscViewer viewer = pcbddc->dbg_viewer;
3782: PetscBool dbg_flag = pcbddc->dbg_flag;
3784: PetscInt offset,offset2;
3785: PetscMPIInt im_active,active_procs;
3786: PetscInt *dnz,*onz;
3788: PetscBool setsym,issym=PETSC_FALSE;
3791: PetscObjectGetComm((PetscObject)pc,&prec_comm);
3792: ins_local_primal_indices = 0;
3793: ins_coarse_mat_vals = 0;
3794: localsizes2 = 0;
3795: localdispl2 = 0;
3796: temp_coarse_mat_vals = 0;
3797: coarse_ISLG = 0;
3799: MPI_Comm_size(prec_comm,&size_prec_comm);
3800: MPI_Comm_rank(prec_comm,&rank_prec_comm);
3801: MatIsSymmetricKnown(pc->pmat,&setsym,&issym);
3803: /* Assign global numbering to coarse dofs */
3804: {
3805: PetscInt *auxlocal_primal;
3806: PetscInt *row_cmat_indices;
3807: PetscInt *aux_ordering;
3808: PetscInt *row_cmat_global_indices;
3809: PetscInt *dof_sizes,*dof_displs;
3810: PetscInt size_of_constraint;
3811: PetscBool *array_bool;
3812: PetscBool first_found;
3813: PetscInt first_index,old_index,s;
3814: PetscMPIInt mpi_local_primal_size;
3815: PetscScalar coarsesum,*array;
3817: mpi_local_primal_size = (PetscMPIInt)pcbddc->local_primal_size;
3819: /* Construct needed data structures for message passing */
3820: PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&pcbddc->local_primal_indices);
3821: j = 0;
3822: if (rank_prec_comm == 0 || pcbddc->coarse_problem_type == REPLICATED_BDDC || pcbddc->coarse_problem_type == MULTILEVEL_BDDC) {
3823: j = size_prec_comm;
3824: }
3825: PetscMalloc(j*sizeof(PetscMPIInt),&pcbddc->local_primal_sizes);
3826: PetscMalloc(j*sizeof(PetscMPIInt),&pcbddc->local_primal_displacements);
3827: /* Gather local_primal_size information for all processes */
3828: if (pcbddc->coarse_problem_type == REPLICATED_BDDC || pcbddc->coarse_problem_type == MULTILEVEL_BDDC) {
3829: MPI_Allgather(&mpi_local_primal_size,1,MPIU_INT,&pcbddc->local_primal_sizes[0],1,MPIU_INT,prec_comm);
3830: } else {
3831: MPI_Gather(&mpi_local_primal_size,1,MPIU_INT,&pcbddc->local_primal_sizes[0],1,MPIU_INT,0,prec_comm);
3832: }
3833: pcbddc->replicated_primal_size = 0;
3834: for (i=0; i<j; i++) {
3835: pcbddc->local_primal_displacements[i] = pcbddc->replicated_primal_size;
3836: pcbddc->replicated_primal_size += pcbddc->local_primal_sizes[i];
3837: }
3839: /* First let's count coarse dofs.
3840: This code fragment assumes that the number of local constraints per connected component
3841: is not greater than the number of nodes defined for the connected component
3842: (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
3843: PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&auxlocal_primal);
3844: j = 0;
3845: for (i=0; i<pcbddc->local_primal_size; i++) {
3846: MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);
3847: if (j < size_of_constraint) j = size_of_constraint;
3848: MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);
3849: }
3850: PetscMalloc(j*sizeof(PetscInt),&aux_ordering);
3851: PetscMalloc(j*sizeof(PetscInt),&row_cmat_global_indices);
3852: PetscMalloc(pcis->n*sizeof(PetscBool),&array_bool);
3853: for (i=0;i<pcis->n;i++) array_bool[i] = PETSC_FALSE;
3855: for (i=0;i<pcbddc->local_primal_size;i++) {
3856: MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);
3857: for (j=0; j<size_of_constraint; j++) aux_ordering[j] = j;
3859: ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,row_cmat_indices,row_cmat_global_indices);
3860: PetscSortIntWithPermutation(size_of_constraint,row_cmat_global_indices,aux_ordering);
3861: for (j=0; j<size_of_constraint; j++) {
3862: k = row_cmat_indices[aux_ordering[j]];
3863: if (!array_bool[k]) {
3864: array_bool[k] = PETSC_TRUE;
3865: auxlocal_primal[i] = k;
3866: break;
3867: }
3868: }
3869: MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);
3870: }
3871: PetscFree(aux_ordering);
3872: PetscFree(array_bool);
3873: PetscFree(row_cmat_global_indices);
3875: /* Compute number of coarse dofs */
3876: VecSet(pcis->vec1_N,0.0);
3877: VecGetArray(pcis->vec1_N,&array);
3878: for (i=0;i<pcbddc->local_primal_size;i++) array[auxlocal_primal[i]] = 1.0;
3879: VecRestoreArray(pcis->vec1_N,&array);
3880: VecSet(pcis->vec1_global,0.0);
3881: VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,INSERT_VALUES,SCATTER_REVERSE);
3882: VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,INSERT_VALUES,SCATTER_REVERSE);
3883: VecSum(pcis->vec1_global,&coarsesum);
3884: pcbddc->coarse_size = (PetscInt)coarsesum;
3886: /* Fill pcis->vec1_global with cumulative function for global numbering */
3887: VecGetArray(pcis->vec1_global,&array);
3888: VecGetLocalSize(pcis->vec1_global,&s);
3889: k = 0;
3890: first_index = -1;
3891: first_found = PETSC_FALSE;
3892: for (i=0; i<s; i++) {
3893: if (!first_found && array[i] > 0.0) {
3894: first_found = PETSC_TRUE;
3895: first_index = i;
3896: }
3897: k += (PetscInt)array[i];
3898: }
3899: j = (!rank_prec_comm ? size_prec_comm : 0);
3900: PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);
3901: PetscMalloc(j*sizeof(*dof_displs),&dof_displs);
3902: MPI_Gather(&k,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,prec_comm);
3903: if (!rank_prec_comm) {
3904: dof_displs[0]=0;
3905: for (i=1;i<size_prec_comm;i++) dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
3906: }
3907: MPI_Scatter(dof_displs,1,MPIU_INT,&k,1,MPIU_INT,0,prec_comm);
3908: if (first_found) {
3909: array[first_index] += k;
3910: old_index = first_index;
3911: for (i=first_index+1;i<s;i++) {
3912: if (array[i] > 0.0) {
3913: array[i] += array[old_index];
3914: old_index = i;
3915: }
3916: }
3917: }
3918: VecRestoreArray(pcis->vec1_global,&array);
3919: VecSet(pcis->vec1_N,0.0);
3920: VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
3921: VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
3922: VecGetArray(pcis->vec1_N,&array);
3923: for (i=0;i<pcbddc->local_primal_size;i++) pcbddc->local_primal_indices[i] = (PetscInt)array[auxlocal_primal[i]]-1;
3924: VecRestoreArray(pcis->vec1_N,&array);
3925: PetscFree(dof_displs);
3926: PetscFree(dof_sizes);
3928: if (dbg_flag) {
3929: PetscViewerFlush(viewer);
3930: PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");
3931: PetscViewerASCIIPrintf(viewer,"Check coarse indices\n");
3932: VecSet(pcis->vec1_N,0.0);
3933: VecGetArray(pcis->vec1_N,&array);
3934: for (i=0;i<pcbddc->local_primal_size;i++) array[auxlocal_primal[i]]=1.0;
3935: VecRestoreArray(pcis->vec1_N,&array);
3936: VecSet(pcis->vec1_global,0.0);
3937: VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
3938: VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
3939: VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
3940: VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
3941: VecGetArray(pcis->vec1_N,&array);
3942: for (i=0;i<pcis->n;i++) {
3943: if (array[i] == 1.0) {
3944: ISLocalToGlobalMappingApply(matis->mapping,1,&i,&j);
3945: PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d: WRONG COARSE INDEX %d (local %d)\n",PetscGlobalRank,j,i);
3946: }
3947: }
3948: PetscViewerFlush(viewer);
3949: for (i=0;i<pcis->n;i++) {
3950: if (array[i] > 0.0) array[i] = 1.0/array[i];
3951: }
3952: VecRestoreArray(pcis->vec1_N,&array);
3953: VecSet(pcis->vec1_global,0.0);
3954: VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
3955: VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
3956: VecSum(pcis->vec1_global,&coarsesum);
3957: PetscViewerASCIIPrintf(viewer,"Size of coarse problem SHOULD be %lf\n",coarsesum);
3958: PetscViewerFlush(viewer);
3959: }
3960: PetscFree(auxlocal_primal);
3961: }
3963: if (dbg_flag) {
3964: PetscViewerASCIIPrintf(viewer,"Size of coarse problem is %d\n",pcbddc->coarse_size);
3965: PetscViewerFlush(viewer);
3966: }
3968: im_active = 0;
3969: if (pcis->n) im_active = 1;
3970: MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,prec_comm);
3972: /* adapt coarse problem type */
3973: if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) {
3974: if (pcbddc->current_level < pcbddc->max_levels) {
3975: if ((active_procs/pcbddc->coarsening_ratio) < 2) {
3976: if (dbg_flag) {
3977: PetscViewerASCIIPrintf(viewer,"Not enough active processes on level %d (active %d,ratio %d). Parallel direct solve for coarse problem\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);
3978: PetscViewerFlush(viewer);
3979: }
3980: pcbddc->coarse_problem_type = PARALLEL_BDDC;
3981: }
3982: } else {
3983: if (dbg_flag) {
3984: PetscViewerASCIIPrintf(viewer,"Max number of levels reached. Using parallel direct solve for coarse problem\n",pcbddc->max_levels,active_procs,pcbddc->coarsening_ratio);
3985: PetscViewerFlush(viewer);
3986: }
3987: pcbddc->coarse_problem_type = PARALLEL_BDDC;
3988: }
3989: }
3991: switch (pcbddc->coarse_problem_type) {
3993: case (MULTILEVEL_BDDC): /* we define a coarse mesh where subdomains are elements */
3994: {
3995: /* we need additional variables */
3996: MetisInt n_subdomains,n_parts,objval,ncon,faces_nvtxs;
3997: MetisInt *metis_coarse_subdivision;
3998: MetisInt options[METIS_NOPTIONS];
3999: PetscMPIInt size_coarse_comm,rank_coarse_comm;
4000: PetscMPIInt procs_jumps_coarse_comm;
4001: PetscMPIInt *coarse_subdivision;
4002: PetscMPIInt *total_count_recv;
4003: PetscMPIInt *total_ranks_recv;
4004: PetscMPIInt *displacements_recv;
4005: PetscMPIInt *my_faces_connectivity;
4006: PetscMPIInt *petsc_faces_adjncy;
4007: MetisInt *faces_adjncy;
4008: MetisInt *faces_xadj;
4009: PetscMPIInt *number_of_faces;
4010: PetscMPIInt *faces_displacements;
4011: PetscInt *array_int;
4012: PetscMPIInt my_faces =0;
4013: PetscMPIInt total_faces=0;
4014: PetscInt ranks_stretching_ratio;
4016: /* define some quantities */
4017: pcbddc->coarse_communications_type = SCATTERS_BDDC;
4018: coarse_mat_type = MATIS;
4019: coarse_pc_type = PCBDDC;
4020: coarse_ksp_type = KSPRICHARDSON;
4022: /* details of coarse decomposition */
4023: n_subdomains = active_procs;
4024: n_parts = n_subdomains/pcbddc->coarsening_ratio;
4025: ranks_stretching_ratio = size_prec_comm/active_procs;
4026: procs_jumps_coarse_comm = pcbddc->coarsening_ratio*ranks_stretching_ratio;
4028: #if 0
4029: PetscMPIInt *old_ranks;
4030: PetscInt *new_ranks,*jj,*ii;
4031: MatPartitioning mat_part;
4032: IS coarse_new_decomposition,is_numbering;
4033: PetscViewer viewer_test;
4034: MPI_Comm test_coarse_comm;
4035: PetscMPIInt test_coarse_color;
4036: Mat mat_adj;
4037: /* Create new communicator for coarse problem splitting the old one */
4038: /* procs with coarse_color = MPI_UNDEFINED will have coarse_comm = MPI_COMM_NULL (from mpi standards)
4039: key = rank_prec_comm -> keep same ordering of ranks from the old to the new communicator */
4040: test_coarse_color = (im_active ? 0 : MPI_UNDEFINED);
4041: test_coarse_comm = MPI_COMM_NULL;
4042: MPI_Comm_split(prec_comm,test_coarse_color,rank_prec_comm,&test_coarse_comm);
4043: if (im_active) {
4044: PetscMalloc(n_subdomains*sizeof(PetscMPIInt),&old_ranks);
4045: PetscMalloc(size_prec_comm*sizeof(PetscInt),&new_ranks);
4046: MPI_Comm_rank(test_coarse_comm,&rank_coarse_comm);
4047: MPI_Comm_size(test_coarse_comm,&j);
4048: MPI_Allgather(&rank_prec_comm,1,MPIU_INT,old_ranks,1,MPIU_INT,test_coarse_comm);
4049: for (i=0; i<size_prec_comm; i++)
4050: new_ranks[i] = -1;
4051: for (i=0; i<n_subdomains; i++)
4052: new_ranks[old_ranks[i]] = i;
4053: PetscViewerASCIIOpen(test_coarse_comm,"test_mat_part.out",&viewer_test);
4054: k = pcis->n_neigh-1;
4055: PetscMalloc(2*sizeof(PetscInt),&ii);
4056: ii[0]=0;
4057: ii[1]=k;
4058: PetscMalloc(k*sizeof(PetscInt),&jj);
4059: for (i=0; i<k; i++)
4060: jj[i]=new_ranks[pcis->neigh[i+1]];
4061: PetscSortInt(k,jj);
4062: MatCreateMPIAdj(test_coarse_comm,1,n_subdomains,ii,jj,NULL,&mat_adj);
4063: MatView(mat_adj,viewer_test);
4064: MatPartitioningCreate(test_coarse_comm,&mat_part);
4065: MatPartitioningSetAdjacency(mat_part,mat_adj);
4066: MatPartitioningSetFromOptions(mat_part);
4067: printf("Setting Nparts %d\n",n_parts);
4068: MatPartitioningSetNParts(mat_part,n_parts);
4069: MatPartitioningView(mat_part,viewer_test);
4070: MatPartitioningApply(mat_part,&coarse_new_decomposition);
4071: ISView(coarse_new_decomposition,viewer_test);
4072: ISPartitioningToNumbering(coarse_new_decomposition,&is_numbering);
4073: ISView(is_numbering,viewer_test);
4074: PetscViewerDestroy(&viewer_test);
4075: ISDestroy(&coarse_new_decomposition);
4076: ISDestroy(&is_numbering);
4077: MatPartitioningDestroy(&mat_part);
4078: PetscFree(old_ranks);
4079: PetscFree(new_ranks);
4080: MPI_Comm_free(&test_coarse_comm);
4081: }
4082: #endif
4084: /* build CSR graph of subdomains' connectivity */
4085: PetscMalloc (pcis->n*sizeof(PetscInt),&array_int);
4086: PetscMemzero(array_int,pcis->n*sizeof(PetscInt));
4087: for (i=1;i<pcis->n_neigh;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */
4088: for (j=0;j<pcis->n_shared[i];j++) {
4089: array_int[pcis->shared[i][j]]+=1;
4090: }
4091: }
4092: for (i=1;i<pcis->n_neigh;i++) {
4093: for (j=0;j<pcis->n_shared[i];j++) {
4094: if (array_int[pcis->shared[i][j]] > 0) {
4095: my_faces++;
4096: break;
4097: }
4098: }
4099: }
4101: MPI_Reduce(&my_faces,&total_faces,1,MPIU_INT,MPI_SUM,master_proc,prec_comm);
4102: PetscMalloc (my_faces*sizeof(PetscInt),&my_faces_connectivity);
4103: my_faces=0;
4104: for (i=1;i<pcis->n_neigh;i++) {
4105: for (j=0;j<pcis->n_shared[i];j++) {
4106: if (array_int[pcis->shared[i][j]] > 0) {
4107: my_faces_connectivity[my_faces]=pcis->neigh[i];
4108: my_faces++;
4109: break;
4110: }
4111: }
4112: }
4113: if (rank_prec_comm == master_proc) {
4114: PetscMalloc (total_faces*sizeof(PetscMPIInt),&petsc_faces_adjncy);
4115: PetscMalloc (size_prec_comm*sizeof(PetscMPIInt),&number_of_faces);
4116: PetscMalloc (total_faces*sizeof(MetisInt),&faces_adjncy);
4117: PetscMalloc ((n_subdomains+1)*sizeof(MetisInt),&faces_xadj);
4118: PetscMalloc ((size_prec_comm+1)*sizeof(PetscMPIInt),&faces_displacements);
4119: }
4120: MPI_Gather(&my_faces,1,MPIU_INT,&number_of_faces[0],1,MPIU_INT,master_proc,prec_comm);
4121: if (rank_prec_comm == master_proc) {
4122: faces_xadj[0] = 0;
4123: faces_displacements[0] = 0;
4125: j=0;
4126: for (i=1;i<size_prec_comm+1;i++) {
4127: faces_displacements[i]=faces_displacements[i-1]+number_of_faces[i-1];
4128: if (number_of_faces[i-1]) {
4129: j++;
4130: faces_xadj[j]=faces_xadj[j-1]+number_of_faces[i-1];
4131: }
4132: }
4133: }
4134: MPI_Gatherv(&my_faces_connectivity[0],my_faces,MPIU_INT,&petsc_faces_adjncy[0],number_of_faces,faces_displacements,MPIU_INT,master_proc,prec_comm);
4135: PetscFree(my_faces_connectivity);
4136: PetscFree(array_int);
4137: if (rank_prec_comm == master_proc) {
4138: for (i=0; i<total_faces; i++) faces_adjncy[i]=(MetisInt)(petsc_faces_adjncy[i]/ranks_stretching_ratio); /* cast to MetisInt */
4139: PetscFree(faces_displacements);
4140: PetscFree(number_of_faces);
4141: PetscFree(petsc_faces_adjncy);
4142: }
4144: if (rank_prec_comm == master_proc) {
4145: PetscInt heuristic_for_metis=3;
4146: ncon =1;
4147: faces_nvtxs=n_subdomains;
4148: /* partition graoh induced by face connectivity */
4149: PetscMalloc (n_subdomains*sizeof(MetisInt),&metis_coarse_subdivision);
4150: METIS_SetDefaultOptions(options);
4151: /* we need a contiguous partition of the coarse mesh */
4152: options[METIS_OPTION_CONTIG]=1;
4153: options[METIS_OPTION_NITER] =30;
4154: if (pcbddc->coarsening_ratio > 1) {
4155: if (n_subdomains>n_parts*heuristic_for_metis) {
4156: options[METIS_OPTION_IPTYPE] =METIS_IPTYPE_EDGE;
4157: options[METIS_OPTION_OBJTYPE]=METIS_OBJTYPE_CUT;
4159: METIS_PartGraphKway(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision);
4160: if (ierr != METIS_OK) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in METIS_PartGraphKway (metis error code %D) called from PCBDDCSetupCoarseEnvironment\n",ierr);
4161: } else {
4162: METIS_PartGraphRecursive(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision);
4163: if (ierr != METIS_OK) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in METIS_PartGraphRecursive (metis error code %D) called from PCBDDCSetupCoarseEnvironment\n",ierr);
4164: }
4165: } else {
4166: for (i=0;i<n_subdomains;i++) metis_coarse_subdivision[i]=i;
4167: }
4168: PetscFree(faces_xadj);
4169: PetscFree(faces_adjncy);
4170: PetscMalloc(size_prec_comm*sizeof(PetscMPIInt),&coarse_subdivision);
4172: /* copy/cast values avoiding possible type conflicts between PETSc, MPI and METIS */
4173: for (i=0;i<size_prec_comm;i++) coarse_subdivision[i]=MPI_PROC_NULL;
4174: for (i=0;i<n_subdomains;i++) coarse_subdivision[ranks_stretching_ratio*i]=(PetscInt)(metis_coarse_subdivision[i]);
4175: PetscFree(metis_coarse_subdivision);
4176: }
4178: /* Create new communicator for coarse problem splitting the old one */
4179: if (!(rank_prec_comm%procs_jumps_coarse_comm) && rank_prec_comm < procs_jumps_coarse_comm*n_parts) {
4180: coarse_color=0; /* for communicator splitting */
4181: active_rank =rank_prec_comm; /* for insertion of matrix values */
4182: }
4183: /* procs with coarse_color = MPI_UNDEFINED will have coarse_comm = MPI_COMM_NULL (from mpi standards)
4184: key = rank_prec_comm -> keep same ordering of ranks from the old to the new communicator */
4185: MPI_Comm_split(prec_comm,coarse_color,rank_prec_comm,&coarse_comm);
4187: if (coarse_color == 0) {
4188: MPI_Comm_size(coarse_comm,&size_coarse_comm);
4189: MPI_Comm_rank(coarse_comm,&rank_coarse_comm);
4190: } else {
4191: rank_coarse_comm = MPI_PROC_NULL;
4192: }
4194: /* master proc take care of arranging and distributing coarse information */
4195: if (rank_coarse_comm == master_proc) {
4196: PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&displacements_recv);
4197: PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&total_count_recv);
4198: PetscMalloc (n_subdomains*sizeof(PetscMPIInt),&total_ranks_recv);
4199: /* some initializations */
4200: displacements_recv[0]=0;
4201: PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt));
4202: /* count from how many processes the j-th process of the coarse decomposition will receive data */
4203: for (j=0; j<size_coarse_comm; j++) {
4204: for (i=0; i<size_prec_comm; i++) {
4205: if (coarse_subdivision[i]==j) total_count_recv[j]++;
4206: }
4207: }
4208: /* displacements needed for scatterv of total_ranks_recv */
4209: for (i=1; i<size_coarse_comm; i++) displacements_recv[i]=displacements_recv[i-1]+total_count_recv[i-1];
4211: /* Now fill properly total_ranks_recv -> each coarse process will receive the ranks (in prec_comm communicator) of its friend (sending) processes */
4212: PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt));
4213: for (j=0; j<size_coarse_comm; j++) {
4214: for (i=0; i<size_prec_comm; i++) {
4215: if (coarse_subdivision[i]==j) {
4216: total_ranks_recv[displacements_recv[j]+total_count_recv[j]] = i;
4218: total_count_recv[j] += 1;
4219: }
4220: }
4221: }
4222: /* for (j=0;j<size_coarse_comm;j++) {
4223: printf("process %d in new rank will receive from %d processes (original ranks follows)\n",j,total_count_recv[j]);
4224: for (i=0;i<total_count_recv[j];i++) {
4225: printf("%d ",total_ranks_recv[displacements_recv[j]+i]);
4226: }
4227: printf("\n");
4228: } */
4230: /* identify new decomposition in terms of ranks in the old communicator */
4231: for (i=0; i<n_subdomains; i++) {
4232: coarse_subdivision[ranks_stretching_ratio*i]=coarse_subdivision[ranks_stretching_ratio*i]*procs_jumps_coarse_comm;
4233: }
4234: /* printf("coarse_subdivision in old end new ranks\n");
4235: for (i=0;i<size_prec_comm;i++) {
4236: if (coarse_subdivision[i]!=MPI_PROC_NULL) {
4237: printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]/procs_jumps_coarse_comm);
4238: } else {
4239: printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]);
4240: }
4241: }
4242: printf("\n"); */
4243: }
4245: /* Scatter new decomposition for send details */
4246: MPI_Scatter(&coarse_subdivision[0],1,MPIU_INT,&rank_coarse_proc_send_to,1,MPIU_INT,master_proc,prec_comm);
4247: /* Scatter receiving details to members of coarse decomposition */
4248: if (coarse_color == 0) {
4249: MPI_Scatter(&total_count_recv[0],1,MPIU_INT,&count_recv,1,MPIU_INT,master_proc,coarse_comm);
4250: PetscMalloc (count_recv*sizeof(PetscMPIInt),&ranks_recv);
4251: MPI_Scatterv(&total_ranks_recv[0],total_count_recv,displacements_recv,MPIU_INT,&ranks_recv[0],count_recv,MPIU_INT,master_proc,coarse_comm);
4252: }
4254: /* printf("I will send my matrix data to proc %d\n",rank_coarse_proc_send_to);
4255: if (coarse_color == 0) {
4256: printf("I will receive some matrix data from %d processes (ranks follows)\n",count_recv);
4257: for (i=0;i<count_recv;i++)
4258: printf("%d ",ranks_recv[i]);
4259: printf("\n");
4260: } */
4262: if (rank_prec_comm == master_proc) {
4263: PetscFree(coarse_subdivision);
4264: PetscFree(total_count_recv);
4265: PetscFree(total_ranks_recv);
4266: PetscFree(displacements_recv);
4267: }
4268: break;
4269: }
4271: case (REPLICATED_BDDC):
4273: pcbddc->coarse_communications_type = GATHERS_BDDC;
4274: coarse_mat_type = MATSEQAIJ;
4275: coarse_pc_type = PCLU;
4276: coarse_ksp_type = KSPPREONLY;
4277: coarse_comm = PETSC_COMM_SELF;
4278: active_rank = rank_prec_comm;
4279: break;
4281: case (PARALLEL_BDDC):
4283: pcbddc->coarse_communications_type = SCATTERS_BDDC;
4284: coarse_mat_type = MATMPIAIJ;
4285: coarse_pc_type = PCREDUNDANT;
4286: coarse_ksp_type = KSPPREONLY;
4287: coarse_comm = prec_comm;
4288: active_rank = rank_prec_comm;
4289: break;
4291: case (SEQUENTIAL_BDDC):
4292: pcbddc->coarse_communications_type = GATHERS_BDDC;
4293: coarse_mat_type = MATSEQAIJ;
4294: coarse_pc_type = PCLU;
4295: coarse_ksp_type = KSPPREONLY;
4296: coarse_comm = PETSC_COMM_SELF;
4297: active_rank = master_proc;
4298: break;
4299: }
4301: switch (pcbddc->coarse_communications_type) {
4303: case(SCATTERS_BDDC):
4304: {
4305: if (pcbddc->coarse_problem_type==MULTILEVEL_BDDC) {
4307: IS coarse_IS;
4309: if (pcbddc->coarsening_ratio == 1) {
4310: ins_local_primal_size = pcbddc->local_primal_size;
4311: ins_local_primal_indices = pcbddc->local_primal_indices;
4312: if (coarse_color == 0) { PetscFree(ranks_recv); }
4313: /* nonzeros */
4314: PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&dnz);
4315: PetscMemzero(dnz,ins_local_primal_size*sizeof(PetscInt));
4316: for (i=0;i<ins_local_primal_size;i++) {
4317: dnz[i] = ins_local_primal_size;
4318: }
4319: } else {
4320: PetscMPIInt send_size;
4321: PetscMPIInt *send_buffer;
4322: PetscInt *aux_ins_indices;
4323: PetscInt ii,jj;
4324: MPI_Request *requests;
4326: PetscMalloc(count_recv*sizeof(PetscMPIInt),&localdispl2);
4327: /* reusing pcbddc->local_primal_displacements and pcbddc->replicated_primal_size */
4328: PetscFree(pcbddc->local_primal_displacements);
4329: PetscMalloc((count_recv+1)*sizeof(PetscMPIInt),&pcbddc->local_primal_displacements);
4330: pcbddc->replicated_primal_size = count_recv;
4331: j = 0;
4332: for (i=0;i<count_recv;i++) {
4333: pcbddc->local_primal_displacements[i] = j;
4334: j += pcbddc->local_primal_sizes[ranks_recv[i]];
4335: }
4336: pcbddc->local_primal_displacements[count_recv] = j;
4337: PetscMalloc(j*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);
4338: /* allocate auxiliary space */
4339: PetscMalloc(count_recv*sizeof(PetscMPIInt),&localsizes2);
4340: PetscMalloc(pcbddc->coarse_size*sizeof(PetscInt),&aux_ins_indices);
4341: PetscMemzero(aux_ins_indices,pcbddc->coarse_size*sizeof(PetscInt));
4342: /* allocate stuffs for message massing */
4343: PetscMalloc((count_recv+1)*sizeof(MPI_Request),&requests);
4344: for (i=0;i<count_recv+1;i++) { requests[i]=MPI_REQUEST_NULL; }
4345: /* send indices to be inserted */
4346: for (i=0;i<count_recv;i++) {
4347: send_size = pcbddc->local_primal_sizes[ranks_recv[i]];
4348: MPI_Irecv(&pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[i]],send_size,MPIU_INT,ranks_recv[i],999,prec_comm,&requests[i]);
4349: }
4350: if (rank_coarse_proc_send_to != MPI_PROC_NULL) {
4351: send_size = pcbddc->local_primal_size;
4352: PetscMalloc(send_size*sizeof(PetscMPIInt),&send_buffer);
4353: for (i=0;i<send_size;i++) {
4354: send_buffer[i]=(PetscMPIInt)pcbddc->local_primal_indices[i];
4355: }
4356: MPI_Isend(send_buffer,send_size,MPIU_INT,rank_coarse_proc_send_to,999,prec_comm,&requests[count_recv]);
4357: }
4358: MPI_Waitall(count_recv+1,requests,MPI_STATUSES_IGNORE);
4359: if (rank_coarse_proc_send_to != MPI_PROC_NULL) {
4360: PetscFree(send_buffer);
4361: }
4362: j = 0;
4363: for (i=0;i<count_recv;i++) {
4364: ii = pcbddc->local_primal_displacements[i+1]-pcbddc->local_primal_displacements[i];
4365: localsizes2[i] = ii*ii;
4366: localdispl2[i] = j;
4367: j += localsizes2[i];
4368: jj = pcbddc->local_primal_displacements[i];
4369: /* it counts the coarse subdomains sharing the coarse node */
4370: for (k=0;k<ii;k++) {
4371: aux_ins_indices[pcbddc->replicated_local_primal_indices[jj+k]] += 1;
4372: }
4373: }
4374: /* temp_coarse_mat_vals used to store matrix values to be received */
4375: PetscMalloc(j*sizeof(PetscScalar),&temp_coarse_mat_vals);
4376: /* evaluate how many values I will insert in coarse mat */
4377: ins_local_primal_size = 0;
4378: for (i=0;i<pcbddc->coarse_size;i++) {
4379: if (aux_ins_indices[i]) {
4380: ins_local_primal_size++;
4381: }
4382: }
4383: /* evaluate indices I will insert in coarse mat */
4384: PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);
4385: j = 0;
4386: for (i=0;i<pcbddc->coarse_size;i++) {
4387: if (aux_ins_indices[i]) {
4388: ins_local_primal_indices[j] = i;
4389: j++;
4390: }
4391: }
4392: /* processes partecipating in coarse problem receive matrix data from their friends */
4393: for (i=0;i<count_recv;i++) {
4394: MPI_Irecv(&temp_coarse_mat_vals[localdispl2[i]],localsizes2[i],MPIU_SCALAR,ranks_recv[i],666,prec_comm,&requests[i]);
4395: }
4396: if (rank_coarse_proc_send_to != MPI_PROC_NULL) {
4397: send_size = pcbddc->local_primal_size*pcbddc->local_primal_size;
4398: MPI_Isend(&coarse_submat_vals[0],send_size,MPIU_SCALAR,rank_coarse_proc_send_to,666,prec_comm,&requests[count_recv]);
4399: }
4400: MPI_Waitall(count_recv+1,requests,MPI_STATUSES_IGNORE);
4401: /* nonzeros */
4402: PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&dnz);
4403: PetscMemzero(dnz,ins_local_primal_size*sizeof(PetscInt));
4404: /* use aux_ins_indices to realize a global to local mapping */
4405: j=0;
4406: for (i=0;i<pcbddc->coarse_size;i++) {
4407: if (aux_ins_indices[i]==0) {
4408: aux_ins_indices[i]=-1;
4409: } else {
4410: aux_ins_indices[i]=j;
4411: j++;
4412: }
4413: }
4414: for (i=0;i<count_recv;i++) {
4415: j = pcbddc->local_primal_sizes[ranks_recv[i]];
4416: for (k=0;k<j;k++) {
4417: dnz[aux_ins_indices[pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[i]+k]]] += j;
4418: }
4419: }
4420: /* check */
4421: for (i=0;i<ins_local_primal_size;i++) {
4422: if (dnz[i] > ins_local_primal_size) {
4423: dnz[i] = ins_local_primal_size;
4424: }
4425: }
4426: PetscFree(requests);
4427: PetscFree(aux_ins_indices);
4428: if (coarse_color == 0) { PetscFree(ranks_recv); }
4429: }
4430: /* create local to global mapping needed by coarse MATIS */
4431: if (coarse_comm != MPI_COMM_NULL) {MPI_Comm_free(&coarse_comm);}
4432: coarse_comm = prec_comm;
4433: active_rank = rank_prec_comm;
4434: ISCreateGeneral(coarse_comm,ins_local_primal_size,ins_local_primal_indices,PETSC_COPY_VALUES,&coarse_IS);
4435: ISLocalToGlobalMappingCreateIS(coarse_IS,&coarse_ISLG);
4436: ISDestroy(&coarse_IS);
4437: } else if (pcbddc->coarse_problem_type==PARALLEL_BDDC) {
4438: /* arrays for values insertion */
4439: ins_local_primal_size = pcbddc->local_primal_size;
4440: PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);
4441: PetscMalloc(ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);
4442: for (j=0;j<ins_local_primal_size;j++) {
4443: ins_local_primal_indices[j]=pcbddc->local_primal_indices[j];
4444: for (i=0;i<ins_local_primal_size;i++) {
4445: ins_coarse_mat_vals[j*ins_local_primal_size+i]=coarse_submat_vals[j*ins_local_primal_size+i];
4446: }
4447: }
4448: }
4449: break;
4450: }
4452: case (GATHERS_BDDC):
4453: {
4454: PetscMPIInt mysize,mysize2;
4455: PetscMPIInt *send_buffer;
4457: if (rank_prec_comm==active_rank) {
4458: PetscMalloc (pcbddc->replicated_primal_size*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);
4459: PetscMalloc (pcbddc->replicated_primal_size*sizeof(PetscScalar),&pcbddc->replicated_local_primal_values);
4460: PetscMalloc (size_prec_comm*sizeof(PetscMPIInt),&localsizes2);
4461: PetscMalloc (size_prec_comm*sizeof(PetscMPIInt),&localdispl2);
4462: /* arrays for values insertion */
4463: for (i=0;i<size_prec_comm;i++) localsizes2[i]=pcbddc->local_primal_sizes[i]*pcbddc->local_primal_sizes[i];
4464: localdispl2[0]=0;
4465: for (i=1;i<size_prec_comm;i++) localdispl2[i]=localsizes2[i-1]+localdispl2[i-1];
4466: j = 0;
4467: for (i=0;i<size_prec_comm;i++) j+=localsizes2[i];
4468: PetscMalloc (j*sizeof(PetscScalar),&temp_coarse_mat_vals);
4469: }
4471: mysize =pcbddc->local_primal_size;
4472: mysize2=pcbddc->local_primal_size*pcbddc->local_primal_size;
4473: PetscMalloc(mysize*sizeof(PetscMPIInt),&send_buffer);
4474: for (i=0; i<mysize; i++) send_buffer[i]=(PetscMPIInt)pcbddc->local_primal_indices[i];
4476: if (pcbddc->coarse_problem_type == SEQUENTIAL_BDDC) {
4477: MPI_Gatherv(send_buffer,mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,master_proc,prec_comm);
4478: MPI_Gatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,master_proc,prec_comm);
4479: } else {
4480: MPI_Allgatherv(send_buffer,mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,prec_comm);
4481: MPI_Allgatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,prec_comm);
4482: }
4483: PetscFree(send_buffer);
4484: break;
4485: }/* switch on coarse problem and communications associated with finished */
4486: }
4488: /* Now create and fill up coarse matrix */
4489: if (rank_prec_comm == active_rank) {
4491: Mat matis_coarse_local_mat;
4493: if (pcbddc->coarse_problem_type != MULTILEVEL_BDDC) {
4494: MatCreate(coarse_comm,&pcbddc->coarse_mat);
4495: MatSetSizes(pcbddc->coarse_mat,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size);
4496: MatSetType(pcbddc->coarse_mat,coarse_mat_type);
4497: MatSetUp(pcbddc->coarse_mat);
4498: MatSetOption(pcbddc->coarse_mat,MAT_ROW_ORIENTED,PETSC_FALSE); /* local values stored in column major */
4499: MatSetOption(pcbddc->coarse_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);
4500: } else {
4501: MatCreateIS(coarse_comm,1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_ISLG,&pcbddc->coarse_mat);
4502: MatSetUp(pcbddc->coarse_mat);
4503: MatISGetLocalMat(pcbddc->coarse_mat,&matis_coarse_local_mat);
4504: MatSetUp(matis_coarse_local_mat);
4505: MatSetOption(matis_coarse_local_mat,MAT_ROW_ORIENTED,PETSC_FALSE); /* local values stored in column major */
4506: MatSetOption(matis_coarse_local_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);
4507: }
4508: /* preallocation */
4509: if (pcbddc->coarse_problem_type != MULTILEVEL_BDDC) {
4511: PetscInt lrows,lcols;
4513: MatGetLocalSize(pcbddc->coarse_mat,&lrows,&lcols);
4514: MatPreallocateInitialize(coarse_comm,lrows,lcols,dnz,onz);
4516: if (pcbddc->coarse_problem_type == PARALLEL_BDDC) {
4518: Vec vec_dnz,vec_onz;
4519: PetscScalar *my_dnz,*my_onz,*array;
4520: PetscInt *mat_ranges,*row_ownership;
4521: PetscInt coarse_index_row,coarse_index_col,owner;
4523: VecCreate(prec_comm,&vec_dnz);
4524: VecSetSizes(vec_dnz,PETSC_DECIDE,pcbddc->coarse_size);
4525: VecSetType(vec_dnz,VECMPI);
4526: VecDuplicate(vec_dnz,&vec_onz);
4528: PetscMalloc(pcbddc->local_primal_size*sizeof(PetscScalar),&my_dnz);
4529: PetscMalloc(pcbddc->local_primal_size*sizeof(PetscScalar),&my_onz);
4530: PetscMemzero(my_dnz,pcbddc->local_primal_size*sizeof(PetscScalar));
4531: PetscMemzero(my_onz,pcbddc->local_primal_size*sizeof(PetscScalar));
4533: PetscMalloc(pcbddc->coarse_size*sizeof(PetscInt),&row_ownership);
4534: MatGetOwnershipRanges(pcbddc->coarse_mat,(const PetscInt**)&mat_ranges);
4535: for (i=0; i<size_prec_comm; i++) {
4536: for (j=mat_ranges[i]; j<mat_ranges[i+1]; j++) {
4537: row_ownership[j]=i;
4538: }
4539: }
4541: for (i=0; i<pcbddc->local_primal_size; i++) {
4542: coarse_index_row = pcbddc->local_primal_indices[i];
4543: owner = row_ownership[coarse_index_row];
4544: for (j=i; j<pcbddc->local_primal_size; j++) {
4545: owner = row_ownership[coarse_index_row];
4546: coarse_index_col = pcbddc->local_primal_indices[j];
4547: if (coarse_index_col > mat_ranges[owner]-1 && coarse_index_col < mat_ranges[owner+1]) {
4548: my_dnz[i] += 1.0;
4549: } else {
4550: my_onz[i] += 1.0;
4551: }
4552: if (i != j) {
4553: owner = row_ownership[coarse_index_col];
4554: if (coarse_index_row > mat_ranges[owner]-1 && coarse_index_row < mat_ranges[owner+1]) {
4555: my_dnz[j] += 1.0;
4556: } else {
4557: my_onz[j] += 1.0;
4558: }
4559: }
4560: }
4561: }
4562: VecSet(vec_dnz,0.0);
4563: VecSet(vec_onz,0.0);
4564: if (pcbddc->local_primal_size) {
4565: VecSetValues(vec_dnz,pcbddc->local_primal_size,pcbddc->local_primal_indices,my_dnz,ADD_VALUES);
4566: VecSetValues(vec_onz,pcbddc->local_primal_size,pcbddc->local_primal_indices,my_onz,ADD_VALUES);
4567: }
4568: VecAssemblyBegin(vec_dnz);
4569: VecAssemblyBegin(vec_onz);
4570: VecAssemblyEnd(vec_dnz);
4571: VecAssemblyEnd(vec_onz);
4572: j = mat_ranges[rank_prec_comm+1]-mat_ranges[rank_prec_comm];
4573: VecGetArray(vec_dnz,&array);
4574: for (i=0; i<j; i++) dnz[i] = (PetscInt)array[i];
4576: VecRestoreArray(vec_dnz,&array);
4577: VecGetArray(vec_onz,&array);
4578: for (i=0;i<j;i++) onz[i] = (PetscInt)array[i];
4580: VecRestoreArray(vec_onz,&array);
4581: PetscFree(my_dnz);
4582: PetscFree(my_onz);
4583: PetscFree(row_ownership);
4584: VecDestroy(&vec_dnz);
4585: VecDestroy(&vec_onz);
4586: } else {
4587: for (k=0;k<size_prec_comm;k++) {
4588: offset=pcbddc->local_primal_displacements[k];
4589: offset2=localdispl2[k];
4590: ins_local_primal_size = pcbddc->local_primal_sizes[k];
4591: PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);
4592: for (j=0;j<ins_local_primal_size;j++) {
4593: ins_local_primal_indices[j]=(PetscInt)pcbddc->replicated_local_primal_indices[offset+j];
4594: }
4595: for (j=0;j<ins_local_primal_size;j++) {
4596: MatPreallocateSet(ins_local_primal_indices[j],ins_local_primal_size,ins_local_primal_indices,dnz,onz);
4597: }
4598: PetscFree(ins_local_primal_indices);
4599: }
4600: }
4602: /* check */
4603: for (i=0;i<lrows;i++) {
4604: if (dnz[i]>lcols) dnz[i]=lcols;
4605: if (onz[i]>pcbddc->coarse_size-lcols) onz[i]=pcbddc->coarse_size-lcols;
4606: }
4607: MatSeqAIJSetPreallocation(pcbddc->coarse_mat,0,dnz);
4608: MatMPIAIJSetPreallocation(pcbddc->coarse_mat,0,dnz,0,onz);
4609: MatPreallocateFinalize(dnz,onz);
4610: } else {
4611: MatSeqAIJSetPreallocation(matis_coarse_local_mat,0,dnz);
4612: PetscFree(dnz);
4613: }
4615: /* insert values */
4616: if (pcbddc->coarse_problem_type == PARALLEL_BDDC) {
4617: MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);
4618: } else if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) {
4619: if (pcbddc->coarsening_ratio == 1) {
4620: ins_coarse_mat_vals = coarse_submat_vals;
4621: MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,INSERT_VALUES);
4622: } else {
4623: PetscFree(ins_local_primal_indices);
4624: for (k=0; k<pcbddc->replicated_primal_size; k++) {
4625: offset = pcbddc->local_primal_displacements[k];
4626: offset2 = localdispl2[k];
4627: ins_local_primal_size = pcbddc->local_primal_displacements[k+1]-pcbddc->local_primal_displacements[k];
4628: PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);
4629: for (j=0; j<ins_local_primal_size; j++) {
4630: ins_local_primal_indices[j]=(PetscInt)pcbddc->replicated_local_primal_indices[offset+j];
4631: }
4632: ins_coarse_mat_vals = &temp_coarse_mat_vals[offset2];
4633: MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);
4634: PetscFree(ins_local_primal_indices);
4635: }
4636: }
4637: ins_local_primal_indices = 0;
4638: ins_coarse_mat_vals = 0;
4639: } else {
4640: for (k=0; k<size_prec_comm; k++) {
4641: offset =pcbddc->local_primal_displacements[k];
4642: offset2 =localdispl2[k];
4643: ins_local_primal_size = pcbddc->local_primal_sizes[k];
4644: PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);
4645: for (j=0; j<ins_local_primal_size; j++) {
4646: ins_local_primal_indices[j]=(PetscInt)pcbddc->replicated_local_primal_indices[offset+j];
4647: }
4648: ins_coarse_mat_vals = &temp_coarse_mat_vals[offset2];
4649: MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);
4650: PetscFree(ins_local_primal_indices);
4651: }
4652: ins_local_primal_indices = 0;
4653: ins_coarse_mat_vals = 0;
4654: }
4655: MatAssemblyBegin(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);
4656: MatAssemblyEnd(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);
4657: /* symmetry of coarse matrix */
4658: if (issym) {
4659: MatSetOption(pcbddc->coarse_mat,MAT_SYMMETRIC,PETSC_TRUE);
4660: }
4661: MatGetVecs(pcbddc->coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);
4662: }
4664: /* create loc to glob scatters if needed */
4665: if (pcbddc->coarse_communications_type == SCATTERS_BDDC) {
4666: IS local_IS,global_IS;
4667: ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&local_IS);
4668: ISCreateGeneral(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_indices,PETSC_COPY_VALUES,&global_IS);
4669: VecScatterCreate(pcbddc->vec1_P,local_IS,pcbddc->coarse_vec,global_IS,&pcbddc->coarse_loc_to_glob);
4670: ISDestroy(&local_IS);
4671: ISDestroy(&global_IS);
4672: }
4674: /* free memory no longer needed */
4675: if (coarse_ISLG) { ISLocalToGlobalMappingDestroy(&coarse_ISLG); }
4676: if (ins_local_primal_indices) { PetscFree(ins_local_primal_indices); }
4677: if (ins_coarse_mat_vals) { PetscFree(ins_coarse_mat_vals); }
4678: if (localsizes2) { PetscFree(localsizes2); }
4679: if (localdispl2) { PetscFree(localdispl2); }
4680: if (temp_coarse_mat_vals) { PetscFree(temp_coarse_mat_vals); }
4682: /* Eval coarse null space */
4683: if (pcbddc->NullSpace) {
4684: const Vec *nsp_vecs;
4685: PetscInt nsp_size,coarse_nsp_size;
4686: PetscBool nsp_has_cnst;
4687: PetscReal test_null;
4688: Vec *coarse_nsp_vecs;
4690: coarse_nsp_size = 0;
4691: coarse_nsp_vecs = 0;
4692: MatNullSpaceGetVecs(pcbddc->NullSpace,&nsp_has_cnst,&nsp_size,&nsp_vecs);
4693: if (rank_prec_comm == active_rank) {
4694: PetscMalloc((nsp_size+1)*sizeof(Vec),&coarse_nsp_vecs);
4695: for (i=0; i<nsp_size+1; i++) {
4696: VecDuplicate(pcbddc->coarse_vec,&coarse_nsp_vecs[i]);
4697: }
4698: }
4699: if (nsp_has_cnst) {
4700: VecSet(pcis->vec1_N,1.0);
4701: MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);
4702: PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);
4703: PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);
4704: if (rank_prec_comm == active_rank) {
4705: MatMult(pcbddc->coarse_mat,pcbddc->coarse_vec,pcbddc->coarse_rhs);
4706: VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&test_null);
4707: if (test_null > 1.0e-12 && pcbddc->dbg_flag) {
4708: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Constant coarse null space error % 1.14e\n",test_null);
4709: }
4710: VecCopy(pcbddc->coarse_vec,coarse_nsp_vecs[coarse_nsp_size]);
4711: coarse_nsp_size++;
4712: }
4713: }
4714: for (i=0; i<nsp_size; i++) {
4715: VecScatterBegin(matis->ctx,nsp_vecs[i],pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
4716: VecScatterEnd (matis->ctx,nsp_vecs[i],pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
4717: MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);
4718: PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);
4719: PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);
4720: if (rank_prec_comm == active_rank) {
4721: MatMult(pcbddc->coarse_mat,pcbddc->coarse_vec,pcbddc->coarse_rhs);
4722: VecNorm(pcbddc->coarse_rhs,NORM_2,&test_null);
4723: if (test_null > 1.0e-12 && pcbddc->dbg_flag) {
4724: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Vec %d coarse null space error % 1.14e\n",i,test_null);
4725: }
4726: VecCopy(pcbddc->coarse_vec,coarse_nsp_vecs[coarse_nsp_size]);
4727: coarse_nsp_size++;
4728: }
4729: }
4730: if (coarse_nsp_size > 0) {
4731: /* TODO orthonormalize vecs */
4732: VecNormalize(coarse_nsp_vecs[0],NULL);
4733: MatNullSpaceCreate(coarse_comm,PETSC_FALSE,coarse_nsp_size,coarse_nsp_vecs,&pcbddc->CoarseNullSpace);
4734: for (i=0; i<nsp_size+1; i++) {
4735: VecDestroy(&coarse_nsp_vecs[i]);
4736: }
4737: }
4738: PetscFree(coarse_nsp_vecs);
4739: }
4741: /* KSP for coarse problem */
4742: if (rank_prec_comm == active_rank) {
4743: PetscBool isbddc=PETSC_FALSE;
4745: KSPCreate(coarse_comm,&pcbddc->coarse_ksp);
4746: PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);
4747: KSPSetOperators(pcbddc->coarse_ksp,pcbddc->coarse_mat,pcbddc->coarse_mat,SAME_PRECONDITIONER);
4748: KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,max_it_coarse_ksp);
4749: KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);
4750: KSPGetPC(pcbddc->coarse_ksp,&pc_temp);
4751: PCSetType(pc_temp,coarse_pc_type);
4752: /* Allow user's customization */
4753: KSPSetOptionsPrefix(pcbddc->coarse_ksp,"coarse_");
4754: /* Set Up PC for coarse problem BDDC */
4755: if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) {
4756: i = pcbddc->current_level+1;
4757: PCBDDCSetLevel(pc_temp,i);
4758: PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);
4759: PCBDDCSetMaxLevels(pc_temp,pcbddc->max_levels);
4760: PCBDDCSetCoarseProblemType(pc_temp,MULTILEVEL_BDDC);
4761: if (pcbddc->CoarseNullSpace) { PCBDDCSetNullSpace(pc_temp,pcbddc->CoarseNullSpace); }
4762: if (dbg_flag) {
4763: PetscViewerASCIIPrintf(viewer,"----------------Level %d: Setting up level %d---------------\n",pcbddc->current_level,i);
4764: PetscViewerFlush(viewer);
4765: }
4766: }
4767: KSPSetFromOptions(pcbddc->coarse_ksp);
4768: KSPSetUp(pcbddc->coarse_ksp);
4770: KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&j);
4771: KSPGetPC(pcbddc->coarse_ksp,&pc_temp);
4772: PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);
4773: if (j == 1) {
4774: KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);
4775: if (isbddc) {
4776: PCBDDCSetUseExactDirichlet(pc_temp,PETSC_FALSE);
4777: }
4778: }
4779: }
4780: /* Check coarse problem if requested */
4781: if (dbg_flag && rank_prec_comm == active_rank) {
4782: KSP check_ksp;
4783: PC check_pc;
4784: Vec check_vec;
4785: PetscReal abs_infty_error,infty_error,lambda_min,lambda_max;
4786: KSPType check_ksp_type;
4788: /* Create ksp object suitable for extreme eigenvalues' estimation */
4789: KSPCreate(coarse_comm,&check_ksp);
4790: KSPSetOperators(check_ksp,pcbddc->coarse_mat,pcbddc->coarse_mat,SAME_PRECONDITIONER);
4791: KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);
4792: if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) {
4793: if (issym) check_ksp_type = KSPCG;
4794: else check_ksp_type = KSPGMRES;
4795: KSPSetComputeSingularValues(check_ksp,PETSC_TRUE);
4796: } else {
4797: check_ksp_type = KSPPREONLY;
4798: }
4799: KSPSetType(check_ksp,check_ksp_type);
4800: KSPGetPC(pcbddc->coarse_ksp,&check_pc);
4801: KSPSetPC(check_ksp,check_pc);
4802: KSPSetUp(check_ksp);
4803: /* create random vec */
4804: VecDuplicate(pcbddc->coarse_vec,&check_vec);
4805: VecSetRandom(check_vec,NULL);
4806: if (pcbddc->CoarseNullSpace) { MatNullSpaceRemove(pcbddc->CoarseNullSpace,check_vec,NULL); }
4807: MatMult(pcbddc->coarse_mat,check_vec,pcbddc->coarse_rhs);
4808: /* solve coarse problem */
4809: KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);
4810: if (pcbddc->CoarseNullSpace) { MatNullSpaceRemove(pcbddc->CoarseNullSpace,pcbddc->coarse_vec,NULL); }
4811: /* check coarse problem residual error */
4812: VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);
4813: VecNorm(check_vec,NORM_INFINITY,&infty_error);
4814: MatMult(pcbddc->coarse_mat,check_vec,pcbddc->coarse_rhs);
4815: VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);
4816: VecDestroy(&check_vec);
4817: /* get eigenvalue estimation if inexact */
4818: if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) {
4819: KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);
4820: KSPGetIterationNumber(check_ksp,&k);
4821: PetscViewerASCIIPrintf(viewer,"Coarse problem eigenvalues estimated with %d iterations of %s.\n",k,check_ksp_type);
4822: PetscViewerASCIIPrintf(viewer,"Coarse problem eigenvalues: % 1.14e %1.14e\n",lambda_min,lambda_max);
4823: }
4824: PetscViewerASCIIPrintf(viewer,"Coarse problem exact infty_error : %1.14e\n",infty_error);
4825: PetscViewerASCIIPrintf(viewer,"Coarse problem residual infty_error: %1.14e\n",abs_infty_error);
4826: KSPDestroy(&check_ksp);
4827: }
4828: if (dbg_flag) { PetscViewerFlush(viewer); }
4829: return(0);
4830: }
4834: static PetscErrorCode PCBDDCManageLocalBoundaries(PC pc)
4835: {
4836: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
4837: PC_IS *pcis = (PC_IS*)pc->data;
4838: Mat_IS *matis = (Mat_IS*)pc->pmat->data;
4839: PCBDDCGraph mat_graph=pcbddc->mat_graph;
4840: PetscInt *is_indices,*auxis;
4841: PetscInt bs,ierr,i,j,s,k,iindex,neumann_bsize,dirichlet_bsize;
4842: PetscInt total_counts,nodes_touched,where_values=1,vertex_size;
4843: PetscMPIInt adapt_interface=0,adapt_interface_reduced=0,NEUMANNCNT=0;
4844: PetscBool same_set;
4845: MPI_Comm interface_comm;
4846: PetscBool use_faces = PETSC_FALSE,use_edges = PETSC_FALSE;
4847: const PetscInt *neumann_nodes;
4848: const PetscInt *dirichlet_nodes;
4849: IS used_IS,*custom_ISForDofs;
4850: PetscScalar *array;
4851: PetscScalar *array2;
4852: PetscViewer viewer=pcbddc->dbg_viewer;
4853: PetscInt *queue_in_global_numbering;
4856: PetscObjectGetComm((PetscObject)pc,&interface_comm);
4857: /* Setup local adjacency graph */
4858: mat_graph->nvtxs=pcis->n;
4859: if (!mat_graph->xadj) NEUMANNCNT = 1;
4860: PCBDDCSetupLocalAdjacencyGraph(pc);
4862: i = mat_graph->nvtxs;
4863: PetscMalloc4(i,PetscInt,&mat_graph->where,i,PetscInt,&mat_graph->count,i+1,PetscInt,&mat_graph->cptr,i,PetscInt,&mat_graph->queue);
4864: PetscMalloc2(i,PetscInt,&mat_graph->which_dof,i,PetscBool,&mat_graph->touched);
4865: PetscMemzero(mat_graph->where,mat_graph->nvtxs*sizeof(PetscInt));
4866: PetscMemzero(mat_graph->count,mat_graph->nvtxs*sizeof(PetscInt));
4867: PetscMemzero(mat_graph->which_dof,mat_graph->nvtxs*sizeof(PetscInt));
4868: PetscMemzero(mat_graph->queue,mat_graph->nvtxs*sizeof(PetscInt));
4869: PetscMemzero(mat_graph->cptr,(mat_graph->nvtxs+1)*sizeof(PetscInt));
4871: /* Setting dofs splitting in mat_graph->which_dof
4872: Get information about dofs' splitting if provided by the user
4873: Otherwise it assumes a constant block size */
4874: vertex_size=0;
4875: if (!pcbddc->n_ISForDofs) {
4876: MatGetBlockSize(matis->A,&bs);
4877: PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);
4878: for (i=0; i<bs; i++) {
4879: ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);
4880: }
4881: PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);
4882: vertex_size=1;
4883: /* remove my references to IS objects */
4884: for (i=0; i<bs; i++) {
4885: ISDestroy(&custom_ISForDofs[i]);
4886: }
4887: PetscFree(custom_ISForDofs);
4888: }
4889: for (i=0; i<pcbddc->n_ISForDofs; i++) {
4890: ISGetSize(pcbddc->ISForDofs[i],&k);
4891: ISGetIndices(pcbddc->ISForDofs[i],(const PetscInt**)&is_indices);
4892: for (j=0; j<k; j++) mat_graph->which_dof[is_indices[j]]=i;
4893: ISRestoreIndices(pcbddc->ISForDofs[i],(const PetscInt**)&is_indices);
4894: }
4895: /* use mat block size as vertex size if it has not yet set */
4896: if (!vertex_size) {
4897: MatGetBlockSize(matis->A,&vertex_size);
4898: }
4900: /* count number of neigh per node */
4901: total_counts=0;
4902: for (i=1; i<pcis->n_neigh; i++) {
4903: s = pcis->n_shared[i];
4904: total_counts += s;
4905: for (j=0;j<s;j++) mat_graph->count[pcis->shared[i][j]] += 1;
4906: }
4908: /* Take into account Neumann data -> it increments number of sharing subdomains for nodes lying on the interface */
4909: PCBDDCGetNeumannBoundaries(pc,&used_IS);
4910: VecSet(pcis->vec1_N,0.0);
4911: VecGetArray(pcis->vec1_N,&array);
4912: if (used_IS) {
4913: ISGetSize(used_IS,&neumann_bsize);
4914: ISGetIndices(used_IS,&neumann_nodes);
4915: for (i=0; i<neumann_bsize; i++) {
4916: iindex = neumann_nodes[i];
4917: if (mat_graph->count[iindex] > NEUMANNCNT && array[iindex]==0.0) {
4918: mat_graph->count[iindex]+=1;
4919: total_counts++;
4920: array[iindex]=array[iindex]+1.0;
4921: } else if (array[iindex]>0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"Error for neumann nodes provided to BDDC! They must be uniquely listed! Found duplicate node %d\n",iindex);
4922: }
4923: }
4924: VecRestoreArray(pcis->vec1_N,&array);
4925: /* allocate space for storing the set of neighbours for each node */
4926: PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt*),&mat_graph->neighbours_set);
4927: if (mat_graph->nvtxs) { PetscMalloc(total_counts*sizeof(PetscInt),&mat_graph->neighbours_set[0]); }
4928: for (i=1; i<mat_graph->nvtxs; i++) mat_graph->neighbours_set[i]=mat_graph->neighbours_set[i-1]+mat_graph->count[i-1];
4929: PetscMemzero(mat_graph->count,mat_graph->nvtxs*sizeof(PetscInt));
4930: for (i=1; i<pcis->n_neigh; i++) {
4931: s=pcis->n_shared[i];
4932: for (j=0; j<s; j++) {
4933: k=pcis->shared[i][j];
4935: mat_graph->neighbours_set[k][mat_graph->count[k]] = pcis->neigh[i];
4937: mat_graph->count[k]+=1;
4938: }
4939: }
4940: /* Check consistency of Neumann nodes */
4941: VecSet(pcis->vec1_global,0.0);
4942: VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
4943: VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
4944: VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
4945: VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
4946: VecGetArray(pcis->vec1_N,&array);
4947: /* set -1 fake neighbour to mimic Neumann boundary */
4948: if (used_IS) {
4949: for (i=0; i<neumann_bsize; i++) {
4950: iindex = neumann_nodes[i];
4951: if (mat_graph->count[iindex] > NEUMANNCNT) {
4952: if (mat_graph->count[iindex]+1 != (PetscInt)array[iindex]) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_USER,"Neumann nodes provided to BDDC must be consistent among neighbours!\nNode %d: number of sharing subdomains %d != number of subdomains for which it is a neumann node %d\n",iindex,mat_graph->count[iindex]+1,(PetscInt)array[iindex]);
4953: mat_graph->neighbours_set[iindex][mat_graph->count[iindex]] = -1;
4955: mat_graph->count[iindex]+=1;
4956: }
4957: }
4958: ISRestoreIndices(used_IS,&neumann_nodes);
4959: }
4960: VecRestoreArray(pcis->vec1_N,&array);
4961: /* sort set of sharing subdomains */
4962: for (i=0;i<mat_graph->nvtxs;i++) {
4963: PetscSortInt(mat_graph->count[i],mat_graph->neighbours_set[i]);
4964: }
4966: /* remove interior nodes and dirichlet boundary nodes from the next search into the graph */
4967: for (i=0;i<mat_graph->nvtxs;i++) mat_graph->touched[i]=PETSC_FALSE;
4968: nodes_touched=0;
4970: PCBDDCGetDirichletBoundaries(pc,&used_IS);
4971: VecSet(pcis->vec2_N,0.0);
4972: VecGetArray(pcis->vec1_N,&array);
4973: VecGetArray(pcis->vec2_N,&array2);
4974: if (used_IS) {
4975: ISGetSize(used_IS,&dirichlet_bsize);
4976: if (dirichlet_bsize && matis->pure_neumann) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Dirichlet boundaries are intended to be used with matrices with zeroed rows!\n");
4977: ISGetIndices(used_IS,&dirichlet_nodes);
4978: for (i=0; i<dirichlet_bsize; i++) {
4979: iindex = dirichlet_nodes[i];
4980: if (mat_graph->count[iindex] && !mat_graph->touched[iindex]) {
4981: if (array[iindex]>0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"BDDC cannot have nodes which are marked as Neumann and Dirichlet at the same time! Wrong node %d\n",iindex);
4982: mat_graph->touched[iindex] = PETSC_TRUE;
4983: mat_graph->where[iindex] = 0;
4984: nodes_touched++;
4985: array2[iindex] = array2[iindex]+1.0;
4986: }
4987: }
4988: ISRestoreIndices(used_IS,&dirichlet_nodes);
4989: }
4990: VecRestoreArray(pcis->vec1_N,&array);
4991: VecRestoreArray(pcis->vec2_N,&array2);
4993: /* Check consistency of Dirichlet nodes */
4994: VecSet(pcis->vec1_N,1.0);
4995: VecSet(pcis->vec1_global,0.0);
4996: VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
4997: VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
4998: VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
4999: VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
5000: VecSet(pcis->vec1_global,0.0);
5001: VecScatterBegin(matis->ctx,pcis->vec2_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
5002: VecScatterEnd (matis->ctx,pcis->vec2_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
5003: VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);
5004: VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);
5005: VecGetArray(pcis->vec1_N,&array);
5006: VecGetArray(pcis->vec2_N,&array2);
5007: if (used_IS) {
5008: ISGetSize(used_IS,&dirichlet_bsize);
5009: ISGetIndices(used_IS,&dirichlet_nodes);
5010: for (i=0; i<dirichlet_bsize; i++) {
5011: iindex=dirichlet_nodes[i];
5012: if (array[iindex]>1.0 && array[iindex]!=array2[iindex]) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_USER,"Dirichlet nodes provided to BDDC must be consistent among neighbours!\nNode %d: number of sharing subdomains %d != number of subdomains for which it is a neumann node %d\n",iindex,(PetscInt)array[iindex],(PetscInt)array2[iindex]);
5013: }
5014: ISRestoreIndices(used_IS,&dirichlet_nodes);
5015: }
5016: VecRestoreArray(pcis->vec1_N,&array);
5017: VecRestoreArray(pcis->vec2_N,&array2);
5019: for (i=0; i<mat_graph->nvtxs; i++) {
5020: if (!mat_graph->count[i]) { /* interior nodes */
5021: mat_graph->touched[i] = PETSC_TRUE;
5022: mat_graph->where[i] = 0;
5023: nodes_touched++;
5024: }
5025: }
5026: mat_graph->ncmps = 0;
5028: i=0;
5029: while (nodes_touched<mat_graph->nvtxs) {
5030: /* find first untouched node in local ordering */
5031: while (mat_graph->touched[i]) i++;
5032: mat_graph->touched[i]=PETSC_TRUE;
5033: mat_graph->where[i] =where_values;
5034: nodes_touched++;
5035: /* now find all other nodes having the same set of sharing subdomains */
5036: for (j=i+1; j<mat_graph->nvtxs; j++) {
5037: /* check for same number of sharing subdomains and dof number */
5038: if (!mat_graph->touched[j] && mat_graph->count[i]==mat_graph->count[j] && mat_graph->which_dof[i] == mat_graph->which_dof[j]) {
5039: /* check for same set of sharing subdomains */
5040: same_set=PETSC_TRUE;
5041: for (k=0; k<mat_graph->count[j]; k++) {
5042: if (mat_graph->neighbours_set[i][k] != mat_graph->neighbours_set[j][k]) {
5043: same_set=PETSC_FALSE;
5044: }
5045: }
5046: /* I found a friend of mine */
5047: if (same_set) {
5048: mat_graph->where[j] = where_values;
5049: mat_graph->touched[j] = PETSC_TRUE;
5050: nodes_touched++;
5051: }
5052: }
5053: }
5054: where_values++;
5055: }
5056: where_values--; if (where_values<0) where_values=0;
5057: PetscMalloc(where_values*sizeof(PetscMPIInt),&mat_graph->where_ncmps);
5058: /* Find connected components defined on the shared interface */
5059: if (where_values) {
5060: PCBDDCFindConnectedComponents(mat_graph, where_values);
5061: }
5062: PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt),&queue_in_global_numbering);
5063: /* check consistency of connected components among neighbouring subdomains -> it adapt them in case it is needed */
5064: for (i=0;i<where_values;i++) {
5065: /* We are not sure that on a given subset of the local interface,
5066: two connected components will be the same among sharing subdomains */
5067: if (mat_graph->where_ncmps[i]>1) {
5068: adapt_interface=1;
5069: break;
5070: }
5071: }
5073: MPI_Allreduce(&adapt_interface,&adapt_interface_reduced,1,MPIU_INT,MPI_LOR,interface_comm);
5074: if (pcbddc->dbg_flag && adapt_interface_reduced) {
5075: PetscViewerASCIIPrintf(viewer,"Adapting interface\n");
5076: PetscViewerFlush(viewer);
5077: }
5078: if (where_values && adapt_interface_reduced) {
5079: PetscInt sum_requests=0,my_rank;
5080: PetscInt buffer_size,start_of_recv,size_of_recv,start_of_send;
5081: PetscInt temp_buffer_size,ins_val,global_where_counter;
5082: PetscInt *cum_recv_counts;
5083: PetscInt *where_to_nodes_indices;
5084: PetscInt *petsc_buffer;
5085: PetscMPIInt *recv_buffer;
5086: PetscMPIInt *recv_buffer_where;
5087: PetscMPIInt *send_buffer;
5088: PetscMPIInt size_of_send;
5089: PetscInt *sizes_of_sends;
5090: MPI_Request *send_requests;
5091: MPI_Request *recv_requests;
5092: PetscInt *where_cc_adapt;
5093: PetscInt **temp_buffer;
5094: PetscInt *nodes_to_temp_buffer_indices;
5095: PetscInt *add_to_where;
5096: PetscInt *aux_new_xadj,*new_xadj,*new_adjncy;
5098: /* Retrict adjacency graph using information from connected components */
5099: PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt),&aux_new_xadj);
5100: for (i=0; i<mat_graph->nvtxs; i++) aux_new_xadj[i]=1;
5101: for (i=0;i<mat_graph->ncmps;i++) {
5102: k = mat_graph->cptr[i+1]-mat_graph->cptr[i];
5103: for (j=0;j<k;j++) aux_new_xadj[mat_graph->queue[mat_graph->cptr[i]+j]]=k;
5104: }
5105: j = 0;
5106: for (i=0;i<mat_graph->nvtxs;i++) j += aux_new_xadj[i];
5108: PetscMalloc((mat_graph->nvtxs+1)*sizeof(PetscInt),&new_xadj);
5109: PetscMalloc(j*sizeof(PetscInt),&new_adjncy);
5110: new_xadj[0]=0;
5111: for (i=0;i<mat_graph->nvtxs;i++) {
5112: new_xadj[i+1]=new_xadj[i]+aux_new_xadj[i];
5113: if (aux_new_xadj[i]==1) new_adjncy[new_xadj[i]]=i;
5114: }
5115: PetscFree(aux_new_xadj);
5116: for (i=0; i<mat_graph->ncmps; i++) {
5117: k = mat_graph->cptr[i+1]-mat_graph->cptr[i];
5118: for (j=0; j<k; j++) {
5119: PetscMemcpy(&new_adjncy[new_xadj[mat_graph->queue[mat_graph->cptr[i]+j]]],&mat_graph->queue[mat_graph->cptr[i]],k*sizeof(PetscInt));
5120: }
5121: }
5122: PCBDDCSetLocalAdjacencyGraph(pc,mat_graph->nvtxs,new_xadj,new_adjncy,PETSC_OWN_POINTER);
5123: /* For consistency among neughbouring procs, I need to sort (by global ordering) each connected component */
5124: for (i=0; i<mat_graph->ncmps; i++) {
5125: k = mat_graph->cptr[i+1]-mat_graph->cptr[i];
5126: ISLocalToGlobalMappingApply(matis->mapping,k,&mat_graph->queue[mat_graph->cptr[i]],&queue_in_global_numbering[mat_graph->cptr[i]]);
5127: PetscSortIntWithArray(k,&queue_in_global_numbering[mat_graph->cptr[i]],&mat_graph->queue[mat_graph->cptr[i]]);
5128: }
5129: /* allocate some space */
5130: MPI_Comm_rank(interface_comm,&my_rank);
5131: PetscMalloc((where_values+1)*sizeof(PetscInt),&cum_recv_counts);
5132: PetscMemzero(cum_recv_counts,(where_values+1)*sizeof(PetscInt));
5133: PetscMalloc(where_values*sizeof(PetscInt),&where_to_nodes_indices);
5134: /* first count how many neighbours per connected component I will receive from */
5135: cum_recv_counts[0]=0;
5136: for (i=1; i<where_values+1; i++) {
5137: j=0;
5138: while (mat_graph->where[j] != i) j++;
5139: where_to_nodes_indices[i-1]=j;
5140: if (mat_graph->neighbours_set[j][0]!=-1) cum_recv_counts[i]=cum_recv_counts[i-1]+mat_graph->count[j]; /* We don't want sends/recvs_to/from_self -> here I don't count myself */
5141: else cum_recv_counts[i]=cum_recv_counts[i-1]+mat_graph->count[j]-1;
5142: }
5143: PetscMalloc(2*cum_recv_counts[where_values]*sizeof(PetscMPIInt),&recv_buffer_where);
5144: PetscMalloc(cum_recv_counts[where_values]*sizeof(MPI_Request),&send_requests);
5145: PetscMalloc(cum_recv_counts[where_values]*sizeof(MPI_Request),&recv_requests);
5146: for (i=0; i<cum_recv_counts[where_values]; i++) {
5147: send_requests[i]=MPI_REQUEST_NULL;
5148: recv_requests[i]=MPI_REQUEST_NULL;
5149: }
5150: /* exchange with my neighbours the number of my connected components on the shared interface */
5151: for (i=0; i<where_values; i++) {
5152: j = where_to_nodes_indices[i];
5153: k = (mat_graph->neighbours_set[j][0] == -1 ? 1 : 0);
5154: for (; k<mat_graph->count[j]; k++) {
5155: MPI_Isend(&mat_graph->where_ncmps[i],1,MPIU_INT,mat_graph->neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);
5156: MPI_Irecv(&recv_buffer_where[sum_requests],1,MPIU_INT,mat_graph->neighbours_set[j][k],(mat_graph->neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);
5157: sum_requests++;
5158: }
5159: }
5160: MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);
5161: MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);
5162: /* determine the connected component I need to adapt */
5163: PetscMalloc(where_values*sizeof(PetscInt),&where_cc_adapt);
5164: PetscMemzero(where_cc_adapt,where_values*sizeof(PetscInt));
5165: for (i=0; i<where_values; i++) {
5166: for (j=cum_recv_counts[i]; j<cum_recv_counts[i+1]; j++) {
5167: /* The first condition is natural (i.e someone has a different number of cc than me), the second one is just to be safe */
5168: if (mat_graph->where_ncmps[i]!=recv_buffer_where[j] || mat_graph->where_ncmps[i] > 1) {
5169: where_cc_adapt[i]=PETSC_TRUE;
5170: break;
5171: }
5172: }
5173: }
5174: buffer_size = 0;
5175: for (i=0; i<where_values; i++) {
5176: if (where_cc_adapt[i]) {
5177: for (j=i; j<mat_graph->ncmps; j++) {
5178: if (mat_graph->where[mat_graph->queue[mat_graph->cptr[j]]] == i+1) { /* WARNING -> where values goes from 1 to where_values included */
5179: buffer_size += 1 + mat_graph->cptr[j+1]-mat_graph->cptr[j];
5180: }
5181: }
5182: }
5183: }
5184: PetscMalloc(buffer_size*sizeof(PetscMPIInt),&send_buffer);
5185: /* now get from neighbours their ccs (in global numbering) and adapt them (in case it is needed) */
5186: /* first determine how much data to send (size of each queue plus the global indices) and communicate it to neighbours */
5187: PetscMalloc(where_values*sizeof(PetscInt),&sizes_of_sends);
5188: PetscMemzero(sizes_of_sends,where_values*sizeof(PetscInt));
5190: sum_requests = 0;
5191: start_of_send = 0;
5192: start_of_recv = cum_recv_counts[where_values];
5193: for (i=0; i<where_values; i++) {
5194: if (where_cc_adapt[i]) {
5195: size_of_send=0;
5196: for (j=i; j<mat_graph->ncmps; j++) {
5197: if (mat_graph->where[mat_graph->queue[mat_graph->cptr[j]]] == i+1) { /* WARNING -> where values goes from 1 to where_values included */
5198: send_buffer[start_of_send+size_of_send]=mat_graph->cptr[j+1]-mat_graph->cptr[j];
5199: size_of_send += 1;
5200: for (k=0; k<mat_graph->cptr[j+1]-mat_graph->cptr[j]; k++) {
5201: send_buffer[start_of_send+size_of_send+k]=queue_in_global_numbering[mat_graph->cptr[j]+k];
5202: }
5203: size_of_send=size_of_send+mat_graph->cptr[j+1]-mat_graph->cptr[j];
5204: }
5205: }
5206: j = where_to_nodes_indices[i];
5207: k = (mat_graph->neighbours_set[j][0] == -1 ? 1 : 0);
5208: sizes_of_sends[i]=size_of_send;
5209: for (; k<mat_graph->count[j]; k++) {
5210: MPI_Isend(&sizes_of_sends[i],1,MPIU_INT,mat_graph->neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);
5211: MPI_Irecv(&recv_buffer_where[sum_requests+start_of_recv],1,MPIU_INT,mat_graph->neighbours_set[j][k],(mat_graph->neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);
5212: sum_requests++;
5213: }
5214: start_of_send+=size_of_send;
5215: }
5216: }
5217: MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);
5218: MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);
5220: buffer_size=0;
5222: for (k=0;k<sum_requests;k++) buffer_size += recv_buffer_where[start_of_recv+k];
5223: PetscMalloc(buffer_size*sizeof(PetscMPIInt),&recv_buffer);
5224: /* now exchange the data */
5225: start_of_recv = 0;
5226: start_of_send = 0;
5227: sum_requests = 0;
5228: for (i=0; i<where_values; i++) {
5229: if (where_cc_adapt[i]) {
5230: size_of_send = sizes_of_sends[i];
5232: j = where_to_nodes_indices[i];
5233: k = (mat_graph->neighbours_set[j][0] == -1 ? 1 : 0);
5234: for (; k<mat_graph->count[j]; k++) {
5235: MPI_Isend(&send_buffer[start_of_send],size_of_send,MPIU_INT,mat_graph->neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);
5236: size_of_recv = recv_buffer_where[cum_recv_counts[where_values]+sum_requests];
5237: MPI_Irecv(&recv_buffer[start_of_recv],size_of_recv,MPIU_INT,mat_graph->neighbours_set[j][k],(mat_graph->neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);
5238: start_of_recv+=size_of_recv;
5239: sum_requests++;
5240: }
5241: start_of_send+=size_of_send;
5242: }
5243: }
5244: MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);
5245: MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);
5246: PetscMalloc(buffer_size*sizeof(PetscInt),&petsc_buffer);
5247: for (k=0;k<start_of_recv;k++) petsc_buffer[k]=(PetscInt)recv_buffer[k];
5248: for (j=0;j<buffer_size;) {
5249: ISGlobalToLocalMappingApply(matis->mapping,IS_GTOLM_MASK,petsc_buffer[j],&petsc_buffer[j+1],&petsc_buffer[j],&petsc_buffer[j+1]);
5250: k = petsc_buffer[j]+1;
5251: j += k;
5252: }
5253: sum_requests = cum_recv_counts[where_values];
5254: start_of_recv = 0;
5256: PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt),&nodes_to_temp_buffer_indices);
5257: global_where_counter=0;
5258: for (i=0; i<where_values; i++) {
5259: if (where_cc_adapt[i]) {
5260: temp_buffer_size=0;
5261: /* find nodes on the shared interface we need to adapt */
5262: for (j=0; j<mat_graph->nvtxs; j++) {
5263: if (mat_graph->where[j]==i+1) {
5264: nodes_to_temp_buffer_indices[j]=temp_buffer_size;
5265: temp_buffer_size++;
5266: } else {
5267: nodes_to_temp_buffer_indices[j]=-1;
5268: }
5269: }
5271: /* allocate some temporary space */
5272: PetscMalloc(temp_buffer_size*sizeof(PetscInt*),&temp_buffer);
5273: PetscMalloc(temp_buffer_size*(cum_recv_counts[i+1]-cum_recv_counts[i])*sizeof(PetscInt),&temp_buffer[0]);
5274: PetscMemzero(temp_buffer[0],temp_buffer_size*(cum_recv_counts[i+1]-cum_recv_counts[i])*sizeof(PetscInt));
5275: for (j=1; j<temp_buffer_size; j++) {
5276: temp_buffer[j]=temp_buffer[j-1]+cum_recv_counts[i+1]-cum_recv_counts[i];
5277: }
5278: /* analyze contributions from neighbouring subdomains for i-th conn comp
5279: temp buffer structure:
5280: supposing part of the interface has dimension 5 (global nodes 0,1,2,3,4)
5281: 3 neighs procs with structured connected components:
5282: neigh 0: [0 1 4], [2 3]; (2 connected components)
5283: neigh 1: [0 1], [2 3 4]; (2 connected components)
5284: neigh 2: [0 4], [1], [2 3]; (3 connected components)
5285: tempbuffer (row-oriented) should be filled as:
5286: [ 0, 0, 0;
5287: 0, 0, 1;
5288: 1, 1, 2;
5289: 1, 1, 2;
5290: 0, 1, 0; ];
5291: This way we can simply recover the resulting structure account for possible intersections of ccs among neighs.
5292: The mat_graph->where array will be modified to reproduce the following 4 connected components [0], [1], [2 3], [4];
5293: */
5294: for (j=0;j<cum_recv_counts[i+1]-cum_recv_counts[i];j++) {
5295: ins_val=0;
5296: size_of_recv=recv_buffer_where[sum_requests]; /* total size of recv from neighs */
5297: for (buffer_size=0;buffer_size<size_of_recv;) { /* loop until all data from neighs has been taken into account */
5298: for (k=1;k<petsc_buffer[buffer_size+start_of_recv]+1;k++) { /* filling properly temp_buffer using data from a single recv */
5299: temp_buffer[nodes_to_temp_buffer_indices[petsc_buffer[start_of_recv+buffer_size+k]]][j] = ins_val;
5300: }
5301: buffer_size+=k;
5302: ins_val++;
5303: }
5304: start_of_recv+=size_of_recv;
5305: sum_requests++;
5306: }
5307: PetscMalloc(temp_buffer_size*sizeof(PetscInt),&add_to_where);
5308: PetscMemzero(add_to_where,temp_buffer_size*sizeof(PetscInt));
5309: for (j=0; j<temp_buffer_size; j++) {
5310: if (!add_to_where[j]) { /* found a new cc */
5311: global_where_counter++;
5312: add_to_where[j]=global_where_counter;
5313: for (k=j+1; k<temp_buffer_size; k++) { /* check for other nodes in new cc */
5314: same_set=PETSC_TRUE;
5315: for (s=0; s<cum_recv_counts[i+1]-cum_recv_counts[i]; s++) {
5316: if (temp_buffer[j][s]!=temp_buffer[k][s]) {
5317: same_set=PETSC_FALSE;
5318: break;
5319: }
5320: }
5321: if (same_set) add_to_where[k] = global_where_counter;
5322: }
5323: }
5324: }
5325: /* insert new data in where array */
5326: temp_buffer_size=0;
5327: for (j=0;j<mat_graph->nvtxs;j++) {
5328: if (mat_graph->where[j]==i+1) {
5329: mat_graph->where[j]=where_values+add_to_where[temp_buffer_size];
5330: temp_buffer_size++;
5331: }
5332: }
5333: PetscFree(temp_buffer[0]);
5334: PetscFree(temp_buffer);
5335: PetscFree(add_to_where);
5336: }
5337: }
5338: PetscFree(nodes_to_temp_buffer_indices);
5339: PetscFree(sizes_of_sends);
5340: PetscFree(send_requests);
5341: PetscFree(recv_requests);
5342: PetscFree(petsc_buffer);
5343: PetscFree(recv_buffer);
5344: PetscFree(recv_buffer_where);
5345: PetscFree(send_buffer);
5346: PetscFree(cum_recv_counts);
5347: PetscFree(where_to_nodes_indices);
5348: PetscFree(where_cc_adapt);
5350: /* We are ready to evaluate consistent connected components on each part of the shared interface */
5351: if (global_where_counter) {
5352: for (i=0;i<mat_graph->nvtxs;i++) mat_graph->touched[i]=PETSC_FALSE;
5353: global_where_counter=0;
5354: for (i=0;i<mat_graph->nvtxs;i++) {
5355: if (mat_graph->where[i] && !mat_graph->touched[i]) {
5356: global_where_counter++;
5357: for (j=i+1;j<mat_graph->nvtxs;j++) {
5358: if (!mat_graph->touched[j] && mat_graph->where[j]==mat_graph->where[i]) {
5359: mat_graph->where[j] = global_where_counter;
5360: mat_graph->touched[j] = PETSC_TRUE;
5361: }
5362: }
5363: mat_graph->where[i] = global_where_counter;
5364: mat_graph->touched[i] = PETSC_TRUE;
5365: }
5366: }
5367: where_values=global_where_counter;
5368: }
5369: if (global_where_counter) {
5370: PetscMemzero(mat_graph->cptr,(mat_graph->nvtxs+1)*sizeof(PetscInt));
5371: PetscMemzero(mat_graph->queue,mat_graph->nvtxs*sizeof(PetscInt));
5372: PetscFree(mat_graph->where_ncmps);
5373: PetscMalloc(where_values*sizeof(PetscMPIInt),&mat_graph->where_ncmps);
5374: PCBDDCFindConnectedComponents(mat_graph, where_values);
5375: }
5376: } /* Finished adapting interface */
5378: /* For consistency among neughbouring procs, I need to sort (by global ordering) each connected component */
5379: for (i=0; i<mat_graph->ncmps; i++) {
5380: k = mat_graph->cptr[i+1]-mat_graph->cptr[i];
5381: ISLocalToGlobalMappingApply(matis->mapping,k,&mat_graph->queue[mat_graph->cptr[i]],&queue_in_global_numbering[mat_graph->cptr[i]]);
5382: PetscSortIntWithArray(k,&queue_in_global_numbering[mat_graph->cptr[i]],&mat_graph->queue[mat_graph->cptr[i]]);
5383: }
5385: PetscInt nfc = 0;
5386: PetscInt nec = 0;
5387: PetscInt nvc = 0;
5388: PetscBool twodim_flag = PETSC_FALSE;
5389: for (i=0; i<mat_graph->ncmps; i++) {
5390: if (mat_graph->cptr[i+1]-mat_graph->cptr[i] > vertex_size) {
5391: if (mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]==1) nfc++; /* 1 neigh Neumann fake included */
5392: else nec++; /* note that nec will be zero in 2d */
5393: } else {
5394: nvc+=mat_graph->cptr[i+1]-mat_graph->cptr[i];
5395: }
5396: }
5397: if (!nec) { /* we are in a 2d case -> no faces, only edges */
5398: nec = nfc;
5399: nfc = 0;
5400: twodim_flag = PETSC_TRUE;
5401: }
5402: /* allocate IS arrays for faces, edges. Vertices need a single index set. */
5403: k=0;
5404: for (i=0; i<mat_graph->ncmps; i++) {
5405: j=mat_graph->cptr[i+1]-mat_graph->cptr[i];
5406: if (j > k) k=j;
5408: if (j<=vertex_size) k+=vertex_size;
5409: }
5410: PetscMalloc(k*sizeof(PetscInt),&auxis);
5411: if (!pcbddc->vertices_flag && !pcbddc->edges_flag) {
5412: PetscMalloc(nfc*sizeof(IS),&pcbddc->ISForFaces);
5413: use_faces = PETSC_TRUE;
5414: }
5415: if (!pcbddc->vertices_flag && !pcbddc->faces_flag) {
5416: PetscMalloc(nec*sizeof(IS),&pcbddc->ISForEdges);
5417: use_edges = PETSC_TRUE;
5418: }
5419: nfc=0;
5420: nec=0;
5421: for (i=0; i<mat_graph->ncmps; i++) {
5422: if (mat_graph->cptr[i+1]-mat_graph->cptr[i] > vertex_size) {
5423: for (j=0; j<mat_graph->cptr[i+1]-mat_graph->cptr[i]; j++) {
5424: auxis[j]=mat_graph->queue[mat_graph->cptr[i]+j];
5425: }
5426: if (mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]==1) {
5427: if (twodim_flag) {
5428: if (use_edges) {
5429: ISCreateGeneral(PETSC_COMM_SELF,j,auxis,PETSC_COPY_VALUES,&pcbddc->ISForEdges[nec]);
5430: nec++;
5431: }
5432: } else {
5433: if (use_faces) {
5434: ISCreateGeneral(PETSC_COMM_SELF,j,auxis,PETSC_COPY_VALUES,&pcbddc->ISForFaces[nfc]);
5435: nfc++;
5436: }
5437: }
5438: } else {
5439: if (use_edges) {
5440: ISCreateGeneral(PETSC_COMM_SELF,j,auxis,PETSC_COPY_VALUES,&pcbddc->ISForEdges[nec]);
5441: nec++;
5442: }
5443: }
5444: }
5445: }
5446: pcbddc->n_ISForFaces = nfc;
5447: pcbddc->n_ISForEdges = nec;
5449: nvc = 0;
5450: if (!pcbddc->constraints_flag) {
5451: for (i=0; i<mat_graph->ncmps; i++) {
5452: if (mat_graph->cptr[i+1]-mat_graph->cptr[i] <= vertex_size) {
5453: for (j = mat_graph->cptr[i]; j<mat_graph->cptr[i+1]; j++) {
5454: auxis[nvc]=mat_graph->queue[j];
5455: nvc++;
5456: }
5457: }
5458: }
5459: }
5461: /* sort vertex set (by local ordering) */
5462: PetscSortInt(nvc,auxis);
5463: ISCreateGeneral(PETSC_COMM_SELF,nvc,auxis,PETSC_COPY_VALUES,&pcbddc->ISForVertices);
5464: if (pcbddc->dbg_flag) {
5465: PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n");
5466: PetscViewerASCIISynchronizedPrintf(viewer,"Details from PCBDDCManageLocalBoundaries for subdomain %04d\n",PetscGlobalRank);
5467: PetscViewerASCIISynchronizedPrintf(viewer,"Matrix graph has %d connected components", mat_graph->ncmps);
5468: for (i=0; i<mat_graph->ncmps; i++) {
5469: PetscViewerASCIISynchronizedPrintf(viewer,"\nDetails for connected component number %02d: size %04d, count %01d. Nodes follow.\n",
5470: i,mat_graph->cptr[i+1]-mat_graph->cptr[i],mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]);
5471: PetscViewerASCIISynchronizedPrintf(viewer,"subdomains: ");
5472: for (j=0; j<mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]; j++) {
5473: PetscViewerASCIISynchronizedPrintf(viewer,"%d ",mat_graph->neighbours_set[mat_graph->queue[mat_graph->cptr[i]]][j]);
5474: }
5475: PetscViewerASCIISynchronizedPrintf(viewer,"\n");
5476: for (j=mat_graph->cptr[i]; j<mat_graph->cptr[i+1]; j++) {
5477: PetscViewerASCIISynchronizedPrintf(viewer,"%d (%d), ",mat_graph->queue[j],queue_in_global_numbering[j]);
5478: }
5479: }
5480: PetscViewerASCIISynchronizedPrintf(viewer,"\n--------------------------------------------------------------\n");
5481: PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local vertices\n",PetscGlobalRank,nvc);
5482: PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local faces\n",PetscGlobalRank,nfc);
5483: PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local edges\n",PetscGlobalRank,nec);
5484: PetscViewerFlush(viewer);
5485: }
5486: PetscFree(auxis);
5487: PetscFree(queue_in_global_numbering);
5488: return(0);
5489: }
5491: /* -------------------------------------------------------------------------- */
5493: /* The following code has been adapted from function IsConnectedSubdomain contained
5494: in source file contig.c of METIS library (version 5.0.1)
5495: It finds connected components of each partition labeled from 1 to n_dist */
5499: static PetscErrorCode PCBDDCFindConnectedComponents(PCBDDCGraph graph, PetscInt n_dist)
5500: {
5501: PetscInt i, j, k, nvtxs, first, last, nleft, ncmps,pid,cum_queue,n,ncmps_pid;
5502: PetscInt *xadj, *adjncy, *where, *queue;
5503: PetscInt *cptr;
5504: PetscBool *touched;
5507: nvtxs = graph->nvtxs;
5508: xadj = graph->xadj;
5509: adjncy = graph->adjncy;
5510: where = graph->where;
5511: touched = graph->touched;
5512: queue = graph->queue;
5513: cptr = graph->cptr;
5515: for (i=0; i<nvtxs; i++) touched[i] = PETSC_FALSE;
5517: cum_queue = 0;
5518: ncmps = 0;
5520: for (n=0; n<n_dist; n++) {
5521: pid = n+1; /* partition labeled by 0 is discarded */
5522: nleft = 0;
5523: for (i=0; i<nvtxs; i++) {
5524: if (where[i] == pid) nleft++;
5525: }
5526: for (i=0; i<nvtxs; i++) {
5527: if (where[i] == pid) break;
5528: }
5529: touched[i] = PETSC_TRUE;
5530: queue[cum_queue] = i;
5531: first = 0; last = 1;
5533: cptr[ncmps] = cum_queue; /* This actually points to queue */
5534: ncmps_pid = 0;
5536: while (first != nleft) {
5537: if (first == last) { /* Find another starting vertex */
5538: cptr[++ncmps] = first+cum_queue;
5539: ncmps_pid++;
5540: for (i=0; i<nvtxs; i++) {
5541: if (where[i] == pid && !touched[i]) break;
5542: }
5543: queue[cum_queue+last] = i;
5544: last++;
5545: touched[i] = PETSC_TRUE;
5546: }
5547: i = queue[cum_queue+first];
5548: first++;
5549: for (j=xadj[i]; j<xadj[i+1]; j++) {
5550: k = adjncy[j];
5551: if (where[k] == pid && !touched[k]) {
5552: queue[cum_queue+last] = k;
5553: last++;
5554: touched[k] = PETSC_TRUE;
5555: }
5556: }
5557: }
5558: cptr[++ncmps] = first+cum_queue;
5559: ncmps_pid++;
5560: cum_queue = cptr[ncmps];
5561: graph->where_ncmps[n] = ncmps_pid;
5562: }
5563: graph->ncmps = ncmps;
5564: return(0);
5565: }