Actual source code: asm.c
petsc-3.6.1 2015-08-06
2: /*
3: This file defines an additive Schwarz preconditioner for any Mat implementation.
5: Note that each processor may have any number of subdomains. But in order to
6: deal easily with the VecScatter(), we treat each processor as if it has the
7: same number of subdomains.
9: n - total number of true subdomains on all processors
10: n_local_true - actual number of subdomains on this processor
11: n_local = maximum over all processors of n_local_true
12: */
13: #include <petsc/private/pcimpl.h> /*I "petscpc.h" I*/
14: #include <petscdm.h>
16: typedef struct {
17: PetscInt n, n_local, n_local_true;
18: PetscInt overlap; /* overlap requested by user */
19: KSP *ksp; /* linear solvers for each block */
20: VecScatter *restriction; /* mapping from global to subregion */
21: VecScatter *localization; /* mapping from overlapping to non-overlapping subregion */
22: VecScatter *prolongation; /* mapping from subregion to global */
23: Vec *x,*y,*y_local; /* work vectors */
24: IS *is; /* index set that defines each overlapping subdomain */
25: IS *is_local; /* index set that defines each non-overlapping subdomain, may be NULL */
26: Mat *mat,*pmat; /* mat is not currently used */
27: PCASMType type; /* use reduced interpolation, restriction or both */
28: PetscBool type_set; /* if user set this value (so won't change it for symmetric problems) */
29: PetscBool same_local_solves; /* flag indicating whether all local solvers are same */
30: PetscBool sort_indices; /* flag to sort subdomain indices */
31: PetscBool dm_subdomains; /* whether DM is allowed to define subdomains */
32: PCCompositeType loctype; /* the type of composition for local solves */
33: } PC_ASM;
37: static PetscErrorCode PCView_ASM(PC pc,PetscViewer viewer)
38: {
39: PC_ASM *osm = (PC_ASM*)pc->data;
41: PetscMPIInt rank;
42: PetscInt i,bsz;
43: PetscBool iascii,isstring;
44: PetscViewer sviewer;
47: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
48: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSTRING,&isstring);
49: if (iascii) {
50: char overlaps[256] = "user-defined overlap",blocks[256] = "total subdomain blocks not yet set";
51: if (osm->overlap >= 0) {PetscSNPrintf(overlaps,sizeof(overlaps),"amount of overlap = %D",osm->overlap);}
52: if (osm->n > 0) {PetscSNPrintf(blocks,sizeof(blocks),"total subdomain blocks = %D",osm->n);}
53: PetscViewerASCIIPrintf(viewer," Additive Schwarz: %s, %s\n",blocks,overlaps);
54: PetscViewerASCIIPrintf(viewer," Additive Schwarz: restriction/interpolation type - %s\n",PCASMTypes[osm->type]);
55: if (osm->loctype != PC_COMPOSITE_ADDITIVE) {PetscViewerASCIIPrintf(viewer," Additive Schwarz: local solve composition type - %s\n",PCCompositeTypes[osm->loctype]);}
56: MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);
57: if (osm->same_local_solves) {
58: if (osm->ksp) {
59: PetscViewerASCIIPrintf(viewer," Local solve is same for all blocks, in the following KSP and PC objects:\n");
60: PetscViewerGetSingleton(viewer,&sviewer);
61: if (!rank) {
62: PetscViewerASCIIPushTab(viewer);
63: KSPView(osm->ksp[0],sviewer);
64: PetscViewerASCIIPopTab(viewer);
65: }
66: PetscViewerRestoreSingleton(viewer,&sviewer);
67: }
68: } else {
69: PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);
70: PetscViewerASCIISynchronizedPrintf(viewer," [%d] number of local blocks = %D\n",(int)rank,osm->n_local_true);
71: PetscViewerFlush(viewer);
72: PetscViewerASCIIPrintf(viewer," Local solve info for each block is in the following KSP and PC objects:\n");
73: PetscViewerASCIIPushTab(viewer);
74: PetscViewerASCIIPrintf(viewer,"- - - - - - - - - - - - - - - - - -\n");
75: PetscViewerGetSingleton(viewer,&sviewer);
76: for (i=0; i<osm->n_local_true; i++) {
77: ISGetLocalSize(osm->is[i],&bsz);
78: PetscViewerASCIISynchronizedPrintf(sviewer,"[%d] local block number %D, size = %D\n",(int)rank,i,bsz);
79: KSPView(osm->ksp[i],sviewer);
80: PetscViewerASCIISynchronizedPrintf(sviewer,"- - - - - - - - - - - - - - - - - -\n");
81: }
82: PetscViewerRestoreSingleton(viewer,&sviewer);
83: PetscViewerASCIIPopTab(viewer);
84: PetscViewerFlush(viewer);
85: PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);
86: }
87: } else if (isstring) {
88: PetscViewerStringSPrintf(viewer," blocks=%D, overlap=%D, type=%s",osm->n,osm->overlap,PCASMTypes[osm->type]);
89: PetscViewerGetSingleton(viewer,&sviewer);
90: if (osm->ksp) {KSPView(osm->ksp[0],sviewer);}
91: PetscViewerRestoreSingleton(viewer,&sviewer);
92: }
93: return(0);
94: }
98: static PetscErrorCode PCASMPrintSubdomains(PC pc)
99: {
100: PC_ASM *osm = (PC_ASM*)pc->data;
101: const char *prefix;
102: char fname[PETSC_MAX_PATH_LEN+1];
103: PetscViewer viewer, sviewer;
104: char *s;
105: PetscInt i,j,nidx;
106: const PetscInt *idx;
107: PetscMPIInt rank, size;
111: MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size);
112: MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank);
113: PCGetOptionsPrefix(pc,&prefix);
114: PetscOptionsGetString(prefix,"-pc_asm_print_subdomains",fname,PETSC_MAX_PATH_LEN,NULL);
115: if (fname[0] == 0) { PetscStrcpy(fname,"stdout"); };
116: PetscViewerASCIIOpen(PetscObjectComm((PetscObject)pc),fname,&viewer);
117: for (i=0; i<osm->n_local; i++) {
118: if (i < osm->n_local_true) {
119: ISGetLocalSize(osm->is[i],&nidx);
120: ISGetIndices(osm->is[i],&idx);
121: /* Print to a string viewer; no more than 15 characters per index plus 512 char for the header.*/
122: PetscMalloc1(16*(nidx+1)+512, &s);
123: PetscViewerStringOpen(PETSC_COMM_SELF, s, 16*(nidx+1)+512, &sviewer);
124: PetscViewerStringSPrintf(sviewer, "[%D:%D] Subdomain %D with overlap:\n", rank, size, i);
125: for (j=0; j<nidx; j++) {
126: PetscViewerStringSPrintf(sviewer,"%D ",idx[j]);
127: }
128: ISRestoreIndices(osm->is[i],&idx);
129: PetscViewerStringSPrintf(sviewer,"\n");
130: PetscViewerDestroy(&sviewer);
131: PetscViewerASCIISynchronizedAllow(viewer, PETSC_TRUE);
132: PetscViewerASCIISynchronizedPrintf(viewer, s);
133: PetscViewerFlush(viewer);
134: PetscViewerASCIISynchronizedAllow(viewer, PETSC_FALSE);
135: PetscFree(s);
136: if (osm->is_local) {
137: /* Print to a string viewer; no more than 15 characters per index plus 512 char for the header.*/
138: PetscMalloc1(16*(nidx+1)+512, &s);
139: PetscViewerStringOpen(PETSC_COMM_SELF, s, 16*(nidx+1)+512, &sviewer);
140: PetscViewerStringSPrintf(sviewer, "[%D:%D] Subdomain %D without overlap:\n", rank, size, i);
141: ISGetLocalSize(osm->is_local[i],&nidx);
142: ISGetIndices(osm->is_local[i],&idx);
143: for (j=0; j<nidx; j++) {
144: PetscViewerStringSPrintf(sviewer,"%D ",idx[j]);
145: }
146: ISRestoreIndices(osm->is_local[i],&idx);
147: PetscViewerStringSPrintf(sviewer,"\n");
148: PetscViewerDestroy(&sviewer);
149: PetscViewerASCIISynchronizedAllow(viewer, PETSC_TRUE);
150: PetscViewerASCIISynchronizedPrintf(viewer, s);
151: PetscViewerFlush(viewer);
152: PetscViewerASCIISynchronizedAllow(viewer, PETSC_FALSE);
153: PetscFree(s);
154: }
155: } else {
156: /* Participate in collective viewer calls. */
157: PetscViewerASCIISynchronizedAllow(viewer, PETSC_TRUE);
158: PetscViewerFlush(viewer);
159: PetscViewerASCIISynchronizedAllow(viewer, PETSC_FALSE);
160: /* Assume either all ranks have is_local or none do. */
161: if (osm->is_local) {
162: PetscViewerASCIISynchronizedAllow(viewer, PETSC_TRUE);
163: PetscViewerFlush(viewer);
164: PetscViewerASCIISynchronizedAllow(viewer, PETSC_FALSE);
165: }
166: }
167: }
168: PetscViewerFlush(viewer);
169: PetscViewerDestroy(&viewer);
170: return(0);
171: }
175: static PetscErrorCode PCSetUp_ASM(PC pc)
176: {
177: PC_ASM *osm = (PC_ASM*)pc->data;
179: PetscBool symset,flg;
180: PetscInt i,m,m_local;
181: MatReuse scall = MAT_REUSE_MATRIX;
182: IS isl;
183: KSP ksp;
184: PC subpc;
185: const char *prefix,*pprefix;
186: Vec vec;
187: DM *domain_dm = NULL;
190: if (!pc->setupcalled) {
192: if (!osm->type_set) {
193: MatIsSymmetricKnown(pc->pmat,&symset,&flg);
194: if (symset && flg) osm->type = PC_ASM_BASIC;
195: }
197: /* Note: if subdomains have been set either via PCASMSetTotalSubdomains() or via PCASMSetLocalSubdomains(), osm->n_local_true will not be PETSC_DECIDE */
198: if (osm->n_local_true == PETSC_DECIDE) {
199: /* no subdomains given */
200: /* try pc->dm first, if allowed */
201: if (osm->dm_subdomains && pc->dm) {
202: PetscInt num_domains, d;
203: char **domain_names;
204: IS *inner_domain_is, *outer_domain_is;
205: DMCreateDomainDecomposition(pc->dm, &num_domains, &domain_names, &inner_domain_is, &outer_domain_is, &domain_dm);
206: if (num_domains) {
207: PCASMSetLocalSubdomains(pc, num_domains, outer_domain_is, inner_domain_is);
208: }
209: for (d = 0; d < num_domains; ++d) {
210: if (domain_names) {PetscFree(domain_names[d]);}
211: if (inner_domain_is) {ISDestroy(&inner_domain_is[d]);}
212: if (outer_domain_is) {ISDestroy(&outer_domain_is[d]);}
213: }
214: PetscFree(domain_names);
215: PetscFree(inner_domain_is);
216: PetscFree(outer_domain_is);
217: }
218: if (osm->n_local_true == PETSC_DECIDE) {
219: /* still no subdomains; use one subdomain per processor */
220: osm->n_local_true = 1;
221: }
222: }
223: { /* determine the global and max number of subdomains */
224: struct {PetscInt max,sum;} inwork,outwork;
225: inwork.max = osm->n_local_true;
226: inwork.sum = osm->n_local_true;
227: MPI_Allreduce(&inwork,&outwork,1,MPIU_2INT,PetscMaxSum_Op,PetscObjectComm((PetscObject)pc));
228: osm->n_local = outwork.max;
229: osm->n = outwork.sum;
230: }
231: if (!osm->is) { /* create the index sets */
232: PCASMCreateSubdomains(pc->pmat,osm->n_local_true,&osm->is);
233: }
234: if (osm->n_local_true > 1 && !osm->is_local) {
235: PetscMalloc1(osm->n_local_true,&osm->is_local);
236: for (i=0; i<osm->n_local_true; i++) {
237: if (osm->overlap > 0) { /* With positive overlap, osm->is[i] will be modified */
238: ISDuplicate(osm->is[i],&osm->is_local[i]);
239: ISCopy(osm->is[i],osm->is_local[i]);
240: } else {
241: PetscObjectReference((PetscObject)osm->is[i]);
242: osm->is_local[i] = osm->is[i];
243: }
244: }
245: }
246: PCGetOptionsPrefix(pc,&prefix);
247: flg = PETSC_FALSE;
248: PetscOptionsGetBool(prefix,"-pc_asm_print_subdomains",&flg,NULL);
249: if (flg) { PCASMPrintSubdomains(pc); }
251: if (osm->overlap > 0) {
252: /* Extend the "overlapping" regions by a number of steps */
253: MatIncreaseOverlap(pc->pmat,osm->n_local_true,osm->is,osm->overlap);
254: }
255: if (osm->sort_indices) {
256: for (i=0; i<osm->n_local_true; i++) {
257: ISSort(osm->is[i]);
258: if (osm->is_local) {
259: ISSort(osm->is_local[i]);
260: }
261: }
262: }
263: /* Create the local work vectors and scatter contexts */
264: MatCreateVecs(pc->pmat,&vec,0);
265: PetscMalloc1(osm->n_local,&osm->restriction);
266: if (osm->is_local) {PetscMalloc1(osm->n_local,&osm->localization);}
267: PetscMalloc1(osm->n_local,&osm->prolongation);
268: PetscMalloc1(osm->n_local,&osm->x);
269: PetscMalloc1(osm->n_local,&osm->y);
270: PetscMalloc1(osm->n_local,&osm->y_local);
271: for (i=0; i<osm->n_local_true; ++i) {
272: ISGetLocalSize(osm->is[i],&m);
273: VecCreateSeq(PETSC_COMM_SELF,m,&osm->x[i]);
274: ISCreateStride(PETSC_COMM_SELF,m,0,1,&isl);
275: VecScatterCreate(vec,osm->is[i],osm->x[i],isl,&osm->restriction[i]);
276: ISDestroy(&isl);
277: VecDuplicate(osm->x[i],&osm->y[i]);
278: if (osm->is_local) {
279: ISLocalToGlobalMapping ltog;
280: IS isll;
281: const PetscInt *idx_local;
282: PetscInt *idx,nout;
284: ISLocalToGlobalMappingCreateIS(osm->is[i],<og);
285: ISGetLocalSize(osm->is_local[i],&m_local);
286: ISGetIndices(osm->is_local[i], &idx_local);
287: PetscMalloc1(m_local,&idx);
288: ISGlobalToLocalMappingApply(ltog,IS_GTOLM_DROP,m_local,idx_local,&nout,idx);
289: ISLocalToGlobalMappingDestroy(<og);
290: if (nout != m_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"is_local not a subset of is");
291: ISRestoreIndices(osm->is_local[i], &idx_local);
292: ISCreateGeneral(PETSC_COMM_SELF,m_local,idx,PETSC_OWN_POINTER,&isll);
293: ISCreateStride(PETSC_COMM_SELF,m_local,0,1,&isl);
294: VecCreateSeq(PETSC_COMM_SELF,m_local,&osm->y_local[i]);
295: VecScatterCreate(osm->y[i],isll,osm->y_local[i],isl,&osm->localization[i]);
296: ISDestroy(&isll);
298: VecScatterCreate(vec,osm->is_local[i],osm->y_local[i],isl,&osm->prolongation[i]);
299: ISDestroy(&isl);
300: } else {
301: VecGetLocalSize(vec,&m_local);
303: osm->y_local[i] = osm->y[i];
305: PetscObjectReference((PetscObject) osm->y[i]);
307: osm->prolongation[i] = osm->restriction[i];
309: PetscObjectReference((PetscObject) osm->restriction[i]);
310: }
311: }
312: for (i=osm->n_local_true; i<osm->n_local; i++) {
313: VecCreateSeq(PETSC_COMM_SELF,0,&osm->x[i]);
314: VecDuplicate(osm->x[i],&osm->y[i]);
315: VecDuplicate(osm->x[i],&osm->y_local[i]);
316: ISCreateStride(PETSC_COMM_SELF,0,0,1,&isl);
317: VecScatterCreate(vec,isl,osm->x[i],isl,&osm->restriction[i]);
318: if (osm->is_local) {
319: VecScatterCreate(osm->y[i],isl,osm->y_local[i],isl,&osm->localization[i]);
320: VecScatterCreate(vec,isl,osm->x[i],isl,&osm->prolongation[i]);
321: } else {
322: osm->prolongation[i] = osm->restriction[i];
323: PetscObjectReference((PetscObject) osm->restriction[i]);
324: }
325: ISDestroy(&isl);
326: }
327: VecDestroy(&vec);
329: if (!osm->ksp) {
330: /* Create the local solvers */
331: PetscMalloc1(osm->n_local_true,&osm->ksp);
332: if (domain_dm) {
333: PetscInfo(pc,"Setting up ASM subproblems using the embedded DM\n");
334: }
335: for (i=0; i<osm->n_local_true; i++) {
336: KSPCreate(PETSC_COMM_SELF,&ksp);
337: KSPSetErrorIfNotConverged(ksp,pc->erroriffailure);
338: PetscLogObjectParent((PetscObject)pc,(PetscObject)ksp);
339: PetscObjectIncrementTabLevel((PetscObject)ksp,(PetscObject)pc,1);
340: KSPSetType(ksp,KSPPREONLY);
341: KSPGetPC(ksp,&subpc);
342: PCGetOptionsPrefix(pc,&prefix);
343: KSPSetOptionsPrefix(ksp,prefix);
344: KSPAppendOptionsPrefix(ksp,"sub_");
345: if (domain_dm) {
346: KSPSetDM(ksp, domain_dm[i]);
347: KSPSetDMActive(ksp, PETSC_FALSE);
348: DMDestroy(&domain_dm[i]);
349: }
350: osm->ksp[i] = ksp;
351: }
352: if (domain_dm) {
353: PetscFree(domain_dm);
354: }
355: }
356: scall = MAT_INITIAL_MATRIX;
357: } else {
358: /*
359: Destroy the blocks from the previous iteration
360: */
361: if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
362: MatDestroyMatrices(osm->n_local_true,&osm->pmat);
363: scall = MAT_INITIAL_MATRIX;
364: }
365: }
367: /*
368: Extract out the submatrices
369: */
370: MatGetSubMatrices(pc->pmat,osm->n_local_true,osm->is,osm->is,scall,&osm->pmat);
371: if (scall == MAT_INITIAL_MATRIX) {
372: PetscObjectGetOptionsPrefix((PetscObject)pc->pmat,&pprefix);
373: for (i=0; i<osm->n_local_true; i++) {
374: PetscLogObjectParent((PetscObject)pc,(PetscObject)osm->pmat[i]);
375: PetscObjectSetOptionsPrefix((PetscObject)osm->pmat[i],pprefix);
376: }
377: }
379: /* Return control to the user so that the submatrices can be modified (e.g., to apply
380: different boundary conditions for the submatrices than for the global problem) */
381: PCModifySubMatrices(pc,osm->n_local_true,osm->is,osm->is,osm->pmat,pc->modifysubmatricesP);
383: /*
384: Loop over subdomains putting them into local ksp
385: */
386: for (i=0; i<osm->n_local_true; i++) {
387: KSPSetOperators(osm->ksp[i],osm->pmat[i],osm->pmat[i]);
388: if (!pc->setupcalled) {
389: KSPSetFromOptions(osm->ksp[i]);
390: }
391: }
392: return(0);
393: }
397: static PetscErrorCode PCSetUpOnBlocks_ASM(PC pc)
398: {
399: PC_ASM *osm = (PC_ASM*)pc->data;
401: PetscInt i;
404: for (i=0; i<osm->n_local_true; i++) {
405: KSPSetUp(osm->ksp[i]);
406: }
407: return(0);
408: }
412: static PetscErrorCode PCApply_ASM(PC pc,Vec x,Vec y)
413: {
414: PC_ASM *osm = (PC_ASM*)pc->data;
416: PetscInt i,n_local = osm->n_local,n_local_true = osm->n_local_true;
417: ScatterMode forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
420: /*
421: Support for limiting the restriction or interpolation to only local
422: subdomain values (leaving the other values 0).
423: */
424: if (!(osm->type & PC_ASM_RESTRICT)) {
425: forward = SCATTER_FORWARD_LOCAL;
426: /* have to zero the work RHS since scatter may leave some slots empty */
427: for (i=0; i<n_local_true; i++) {
428: VecZeroEntries(osm->x[i]);
429: }
430: }
431: if (!(osm->type & PC_ASM_INTERPOLATE)) reverse = SCATTER_REVERSE_LOCAL;
433: switch (osm->loctype)
434: {
435: case PC_COMPOSITE_ADDITIVE:
436: for (i=0; i<n_local; i++) {
437: VecScatterBegin(osm->restriction[i],x,osm->x[i],INSERT_VALUES,forward);
438: }
439: VecZeroEntries(y);
440: /* do the local solves */
441: for (i=0; i<n_local_true; i++) {
442: VecScatterEnd(osm->restriction[i],x,osm->x[i],INSERT_VALUES,forward);
443: KSPSolve(osm->ksp[i],osm->x[i],osm->y[i]);
444: if (osm->localization) {
445: VecScatterBegin(osm->localization[i],osm->y[i],osm->y_local[i],INSERT_VALUES,forward);
446: VecScatterEnd(osm->localization[i],osm->y[i],osm->y_local[i],INSERT_VALUES,forward);
447: }
448: VecScatterBegin(osm->prolongation[i],osm->y_local[i],y,ADD_VALUES,reverse);
449: }
450: /* handle the rest of the scatters that do not have local solves */
451: for (i=n_local_true; i<n_local; i++) {
452: VecScatterEnd(osm->restriction[i],x,osm->x[i],INSERT_VALUES,forward);
453: VecScatterBegin(osm->prolongation[i],osm->y_local[i],y,ADD_VALUES,reverse);
454: }
455: for (i=0; i<n_local; i++) {
456: VecScatterEnd(osm->prolongation[i],osm->y_local[i],y,ADD_VALUES,reverse);
457: }
458: break;
459: case PC_COMPOSITE_MULTIPLICATIVE:
460: VecZeroEntries(y);
461: /* do the local solves */
462: for (i = 0; i < n_local_true; ++i) {
463: if (i > 0) {
464: /* Update initial guess */
465: VecScatterBegin(osm->restriction[i], y, osm->y[i], INSERT_VALUES, forward);
466: VecScatterEnd(osm->restriction[i], y, osm->y[i], INSERT_VALUES, forward);
467: MatMult(osm->pmat[i], osm->y[i], osm->x[i]);
468: VecScale(osm->x[i], -1.0);
469: } else {
470: VecZeroEntries(osm->x[i]);
471: }
472: VecScatterBegin(osm->restriction[i], x, osm->x[i], ADD_VALUES, forward);
473: VecScatterEnd(osm->restriction[i], x, osm->x[i], ADD_VALUES, forward);
474: KSPSolve(osm->ksp[i], osm->x[i], osm->y[i]);
475: if (osm->localization) {
476: VecScatterBegin(osm->localization[i], osm->y[i], osm->y_local[i], INSERT_VALUES, forward);
477: VecScatterEnd(osm->localization[i], osm->y[i], osm->y_local[i], INSERT_VALUES, forward);
478: }
479: VecScatterBegin(osm->prolongation[i], osm->y_local[i], y, ADD_VALUES, reverse);
480: VecScatterEnd(osm->prolongation[i], osm->y_local[i], y, ADD_VALUES, reverse);
481: }
482: /* handle the rest of the scatters that do not have local solves */
483: for (i = n_local_true; i < n_local; ++i) {
484: VecScatterBegin(osm->restriction[i], x, osm->x[i], INSERT_VALUES, forward);
485: VecScatterEnd(osm->restriction[i], x, osm->x[i], INSERT_VALUES, forward);
486: VecScatterBegin(osm->prolongation[i], osm->y_local[i], y, ADD_VALUES, reverse);
487: VecScatterEnd(osm->prolongation[i], osm->y_local[i], y, ADD_VALUES, reverse);
488: }
489: break;
490: default: SETERRQ1(PetscObjectComm((PetscObject) pc), PETSC_ERR_ARG_WRONG, "Invalid local composition type: %s", PCCompositeTypes[osm->loctype]);
491: }
492: return(0);
493: }
497: static PetscErrorCode PCApplyTranspose_ASM(PC pc,Vec x,Vec y)
498: {
499: PC_ASM *osm = (PC_ASM*)pc->data;
501: PetscInt i,n_local = osm->n_local,n_local_true = osm->n_local_true;
502: ScatterMode forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
505: /*
506: Support for limiting the restriction or interpolation to only local
507: subdomain values (leaving the other values 0).
509: Note: these are reversed from the PCApply_ASM() because we are applying the
510: transpose of the three terms
511: */
512: if (!(osm->type & PC_ASM_INTERPOLATE)) {
513: forward = SCATTER_FORWARD_LOCAL;
514: /* have to zero the work RHS since scatter may leave some slots empty */
515: for (i=0; i<n_local_true; i++) {
516: VecZeroEntries(osm->x[i]);
517: }
518: }
519: if (!(osm->type & PC_ASM_RESTRICT)) reverse = SCATTER_REVERSE_LOCAL;
521: for (i=0; i<n_local; i++) {
522: VecScatterBegin(osm->restriction[i],x,osm->x[i],INSERT_VALUES,forward);
523: }
524: VecZeroEntries(y);
525: /* do the local solves */
526: for (i=0; i<n_local_true; i++) {
527: VecScatterEnd(osm->restriction[i],x,osm->x[i],INSERT_VALUES,forward);
528: KSPSolveTranspose(osm->ksp[i],osm->x[i],osm->y[i]);
529: if (osm->localization) {
530: VecScatterBegin(osm->localization[i],osm->y[i],osm->y_local[i],INSERT_VALUES,forward);
531: VecScatterEnd(osm->localization[i],osm->y[i],osm->y_local[i],INSERT_VALUES,forward);
532: }
533: VecScatterBegin(osm->prolongation[i],osm->y_local[i],y,ADD_VALUES,reverse);
534: }
535: /* handle the rest of the scatters that do not have local solves */
536: for (i=n_local_true; i<n_local; i++) {
537: VecScatterEnd(osm->restriction[i],x,osm->x[i],INSERT_VALUES,forward);
538: VecScatterBegin(osm->prolongation[i],osm->y_local[i],y,ADD_VALUES,reverse);
539: }
540: for (i=0; i<n_local; i++) {
541: VecScatterEnd(osm->prolongation[i],osm->y_local[i],y,ADD_VALUES,reverse);
542: }
543: return(0);
544: }
548: static PetscErrorCode PCReset_ASM(PC pc)
549: {
550: PC_ASM *osm = (PC_ASM*)pc->data;
552: PetscInt i;
555: if (osm->ksp) {
556: for (i=0; i<osm->n_local_true; i++) {
557: KSPReset(osm->ksp[i]);
558: }
559: }
560: if (osm->pmat) {
561: if (osm->n_local_true > 0) {
562: MatDestroyMatrices(osm->n_local_true,&osm->pmat);
563: }
564: }
565: if (osm->restriction) {
566: for (i=0; i<osm->n_local; i++) {
567: VecScatterDestroy(&osm->restriction[i]);
568: if (osm->localization) {VecScatterDestroy(&osm->localization[i]);}
569: VecScatterDestroy(&osm->prolongation[i]);
570: VecDestroy(&osm->x[i]);
571: VecDestroy(&osm->y[i]);
572: VecDestroy(&osm->y_local[i]);
573: }
574: PetscFree(osm->restriction);
575: if (osm->localization) {PetscFree(osm->localization);}
576: PetscFree(osm->prolongation);
577: PetscFree(osm->x);
578: PetscFree(osm->y);
579: PetscFree(osm->y_local);
580: }
581: PCASMDestroySubdomains(osm->n_local_true,osm->is,osm->is_local);
583: osm->is = 0;
584: osm->is_local = 0;
585: return(0);
586: }
590: static PetscErrorCode PCDestroy_ASM(PC pc)
591: {
592: PC_ASM *osm = (PC_ASM*)pc->data;
594: PetscInt i;
597: PCReset_ASM(pc);
598: if (osm->ksp) {
599: for (i=0; i<osm->n_local_true; i++) {
600: KSPDestroy(&osm->ksp[i]);
601: }
602: PetscFree(osm->ksp);
603: }
604: PetscFree(pc->data);
605: return(0);
606: }
610: static PetscErrorCode PCSetFromOptions_ASM(PetscOptions *PetscOptionsObject,PC pc)
611: {
612: PC_ASM *osm = (PC_ASM*)pc->data;
614: PetscInt blocks,ovl;
615: PetscBool symset,flg;
616: PCASMType asmtype;
617: PCCompositeType loctype;
620: /* set the type to symmetric if matrix is symmetric */
621: if (!osm->type_set && pc->pmat) {
622: MatIsSymmetricKnown(pc->pmat,&symset,&flg);
623: if (symset && flg) osm->type = PC_ASM_BASIC;
624: }
625: PetscOptionsHead(PetscOptionsObject,"Additive Schwarz options");
626: PetscOptionsBool("-pc_asm_dm_subdomains","Use DMCreateDomainDecomposition() to define subdomains","PCASMSetDMSubdomains",osm->dm_subdomains,&osm->dm_subdomains,&flg);
627: PetscOptionsInt("-pc_asm_blocks","Number of subdomains","PCASMSetTotalSubdomains",osm->n,&blocks,&flg);
628: if (flg) {
629: PCASMSetTotalSubdomains(pc,blocks,NULL,NULL);
630: osm->dm_subdomains = PETSC_FALSE;
631: }
632: PetscOptionsInt("-pc_asm_overlap","Number of grid points overlap","PCASMSetOverlap",osm->overlap,&ovl,&flg);
633: if (flg) {
634: PCASMSetOverlap(pc,ovl);
635: osm->dm_subdomains = PETSC_FALSE;
636: }
637: flg = PETSC_FALSE;
638: PetscOptionsEnum("-pc_asm_type","Type of restriction/extension","PCASMSetType",PCASMTypes,(PetscEnum)osm->type,(PetscEnum*)&asmtype,&flg);
639: if (flg) {PCASMSetType(pc,asmtype); }
640: flg = PETSC_FALSE;
641: PetscOptionsEnum("-pc_asm_local_type","Type of local solver composition","PCASMSetLocalType",PCCompositeTypes,(PetscEnum)osm->loctype,(PetscEnum*)&loctype,&flg);
642: if (flg) {PCASMSetLocalType(pc,loctype); }
643: PetscOptionsTail();
644: return(0);
645: }
647: /*------------------------------------------------------------------------------------*/
651: static PetscErrorCode PCASMSetLocalSubdomains_ASM(PC pc,PetscInt n,IS is[],IS is_local[])
652: {
653: PC_ASM *osm = (PC_ASM*)pc->data;
655: PetscInt i;
658: if (n < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Each process must have 1 or more blocks, n = %D",n);
659: if (pc->setupcalled && (n != osm->n_local_true || is)) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"PCASMSetLocalSubdomains() should be called before calling PCSetUp().");
661: if (!pc->setupcalled) {
662: if (is) {
663: for (i=0; i<n; i++) {PetscObjectReference((PetscObject)is[i]);}
664: }
665: if (is_local) {
666: for (i=0; i<n; i++) {PetscObjectReference((PetscObject)is_local[i]);}
667: }
668: PCASMDestroySubdomains(osm->n_local_true,osm->is,osm->is_local);
670: osm->n_local_true = n;
671: osm->is = 0;
672: osm->is_local = 0;
673: if (is) {
674: PetscMalloc1(n,&osm->is);
675: for (i=0; i<n; i++) osm->is[i] = is[i];
676: /* Flag indicating that the user has set overlapping subdomains so PCASM should not increase their size. */
677: osm->overlap = -1;
678: }
679: if (is_local) {
680: PetscMalloc1(n,&osm->is_local);
681: for (i=0; i<n; i++) osm->is_local[i] = is_local[i];
682: if (!is) {
683: PetscMalloc1(osm->n_local_true,&osm->is);
684: for (i=0; i<osm->n_local_true; i++) {
685: if (osm->overlap > 0) { /* With positive overlap, osm->is[i] will be modified */
686: ISDuplicate(osm->is_local[i],&osm->is[i]);
687: ISCopy(osm->is_local[i],osm->is[i]);
688: } else {
689: PetscObjectReference((PetscObject)osm->is_local[i]);
690: osm->is[i] = osm->is_local[i];
691: }
692: }
693: }
694: }
695: }
696: return(0);
697: }
701: static PetscErrorCode PCASMSetTotalSubdomains_ASM(PC pc,PetscInt N,IS *is,IS *is_local)
702: {
703: PC_ASM *osm = (PC_ASM*)pc->data;
705: PetscMPIInt rank,size;
706: PetscInt n;
709: if (N < 1) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_OUTOFRANGE,"Number of total blocks must be > 0, N = %D",N);
710: if (is || is_local) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Use PCASMSetLocalSubdomains() to set specific index sets\n\they cannot be set globally yet.");
712: /*
713: Split the subdomains equally among all processors
714: */
715: MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);
716: MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);
717: n = N/size + ((N % size) > rank);
718: if (!n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Process %d must have at least one block: total processors %d total blocks %D",(int)rank,(int)size,N);
719: if (pc->setupcalled && n != osm->n_local_true) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"PCASMSetTotalSubdomains() should be called before PCSetUp().");
720: if (!pc->setupcalled) {
721: PCASMDestroySubdomains(osm->n_local_true,osm->is,osm->is_local);
723: osm->n_local_true = n;
724: osm->is = 0;
725: osm->is_local = 0;
726: }
727: return(0);
728: }
732: static PetscErrorCode PCASMSetOverlap_ASM(PC pc,PetscInt ovl)
733: {
734: PC_ASM *osm = (PC_ASM*)pc->data;
737: if (ovl < 0) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap value requested");
738: if (pc->setupcalled && ovl != osm->overlap) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"PCASMSetOverlap() should be called before PCSetUp().");
739: if (!pc->setupcalled) osm->overlap = ovl;
740: return(0);
741: }
745: static PetscErrorCode PCASMSetType_ASM(PC pc,PCASMType type)
746: {
747: PC_ASM *osm = (PC_ASM*)pc->data;
750: osm->type = type;
751: osm->type_set = PETSC_TRUE;
752: return(0);
753: }
757: static PetscErrorCode PCASMGetType_ASM(PC pc,PCASMType *type)
758: {
759: PC_ASM *osm = (PC_ASM*)pc->data;
762: *type = osm->type;
763: return(0);
764: }
768: static PetscErrorCode PCASMSetLocalType_ASM(PC pc, PCCompositeType type)
769: {
770: PC_ASM *osm = (PC_ASM *) pc->data;
773: osm->loctype = type;
774: return(0);
775: }
779: static PetscErrorCode PCASMGetLocalType_ASM(PC pc, PCCompositeType *type)
780: {
781: PC_ASM *osm = (PC_ASM *) pc->data;
784: *type = osm->loctype;
785: return(0);
786: }
790: static PetscErrorCode PCASMSetSortIndices_ASM(PC pc,PetscBool doSort)
791: {
792: PC_ASM *osm = (PC_ASM*)pc->data;
795: osm->sort_indices = doSort;
796: return(0);
797: }
801: static PetscErrorCode PCASMGetSubKSP_ASM(PC pc,PetscInt *n_local,PetscInt *first_local,KSP **ksp)
802: {
803: PC_ASM *osm = (PC_ASM*)pc->data;
807: if (osm->n_local_true < 1) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ORDER,"Need to call PCSetUP() on PC (or KSPSetUp() on the outer KSP object) before calling here");
809: if (n_local) *n_local = osm->n_local_true;
810: if (first_local) {
811: MPI_Scan(&osm->n_local_true,first_local,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));
812: *first_local -= osm->n_local_true;
813: }
814: if (ksp) {
815: /* Assume that local solves are now different; not necessarily
816: true though! This flag is used only for PCView_ASM() */
817: *ksp = osm->ksp;
818: osm->same_local_solves = PETSC_FALSE;
819: }
820: return(0);
821: }
825: /*@C
826: PCASMSetLocalSubdomains - Sets the local subdomains (for this processor only) for the additive Schwarz preconditioner.
828: Collective on PC
830: Input Parameters:
831: + pc - the preconditioner context
832: . n - the number of subdomains for this processor (default value = 1)
833: . is - the index set that defines the subdomains for this processor
834: (or NULL for PETSc to determine subdomains)
835: - is_local - the index sets that define the local part of the subdomains for this processor
836: (or NULL to use the default of 1 subdomain per process)
838: Notes:
839: The IS numbering is in the parallel, global numbering of the vector for both is and is_local
841: By default the ASM preconditioner uses 1 block per processor.
843: Use PCASMSetTotalSubdomains() to set the subdomains for all processors.
845: Level: advanced
847: .keywords: PC, ASM, set, local, subdomains, additive Schwarz
849: .seealso: PCASMSetTotalSubdomains(), PCASMSetOverlap(), PCASMGetSubKSP(),
850: PCASMCreateSubdomains2D(), PCASMGetLocalSubdomains()
851: @*/
852: PetscErrorCode PCASMSetLocalSubdomains(PC pc,PetscInt n,IS is[],IS is_local[])
853: {
858: PetscTryMethod(pc,"PCASMSetLocalSubdomains_C",(PC,PetscInt,IS[],IS[]),(pc,n,is,is_local));
859: return(0);
860: }
864: /*@C
865: PCASMSetTotalSubdomains - Sets the subdomains for all processors for the
866: additive Schwarz preconditioner. Either all or no processors in the
867: PC communicator must call this routine, with the same index sets.
869: Collective on PC
871: Input Parameters:
872: + pc - the preconditioner context
873: . N - the number of subdomains for all processors
874: . is - the index sets that define the subdomains for all processors
875: (or NULL to ask PETSc to compe up with subdomains)
876: - is_local - the index sets that define the local part of the subdomains for this processor
877: (or NULL to use the default of 1 subdomain per process)
879: Options Database Key:
880: To set the total number of subdomain blocks rather than specify the
881: index sets, use the option
882: . -pc_asm_blocks <blks> - Sets total blocks
884: Notes:
885: Currently you cannot use this to set the actual subdomains with the argument is.
887: By default the ASM preconditioner uses 1 block per processor.
889: These index sets cannot be destroyed until after completion of the
890: linear solves for which the ASM preconditioner is being used.
892: Use PCASMSetLocalSubdomains() to set local subdomains.
894: The IS numbering is in the parallel, global numbering of the vector for both is and is_local
896: Level: advanced
898: .keywords: PC, ASM, set, total, global, subdomains, additive Schwarz
900: .seealso: PCASMSetLocalSubdomains(), PCASMSetOverlap(), PCASMGetSubKSP(),
901: PCASMCreateSubdomains2D()
902: @*/
903: PetscErrorCode PCASMSetTotalSubdomains(PC pc,PetscInt N,IS is[],IS is_local[])
904: {
909: PetscTryMethod(pc,"PCASMSetTotalSubdomains_C",(PC,PetscInt,IS[],IS[]),(pc,N,is,is_local));
910: return(0);
911: }
915: /*@
916: PCASMSetOverlap - Sets the overlap between a pair of subdomains for the
917: additive Schwarz preconditioner. Either all or no processors in the
918: PC communicator must call this routine.
920: Logically Collective on PC
922: Input Parameters:
923: + pc - the preconditioner context
924: - ovl - the amount of overlap between subdomains (ovl >= 0, default value = 1)
926: Options Database Key:
927: . -pc_asm_overlap <ovl> - Sets overlap
929: Notes:
930: By default the ASM preconditioner uses 1 block per processor. To use
931: multiple blocks per perocessor, see PCASMSetTotalSubdomains() and
932: PCASMSetLocalSubdomains() (and the option -pc_asm_blocks <blks>).
934: The overlap defaults to 1, so if one desires that no additional
935: overlap be computed beyond what may have been set with a call to
936: PCASMSetTotalSubdomains() or PCASMSetLocalSubdomains(), then ovl
937: must be set to be 0. In particular, if one does not explicitly set
938: the subdomains an application code, then all overlap would be computed
939: internally by PETSc, and using an overlap of 0 would result in an ASM
940: variant that is equivalent to the block Jacobi preconditioner.
942: Note that one can define initial index sets with any overlap via
943: PCASMSetTotalSubdomains() or PCASMSetLocalSubdomains(); the routine
944: PCASMSetOverlap() merely allows PETSc to extend that overlap further
945: if desired.
947: Level: intermediate
949: .keywords: PC, ASM, set, overlap
951: .seealso: PCASMSetTotalSubdomains(), PCASMSetLocalSubdomains(), PCASMGetSubKSP(),
952: PCASMCreateSubdomains2D(), PCASMGetLocalSubdomains()
953: @*/
954: PetscErrorCode PCASMSetOverlap(PC pc,PetscInt ovl)
955: {
961: PetscTryMethod(pc,"PCASMSetOverlap_C",(PC,PetscInt),(pc,ovl));
962: return(0);
963: }
967: /*@
968: PCASMSetType - Sets the type of restriction and interpolation used
969: for local problems in the additive Schwarz method.
971: Logically Collective on PC
973: Input Parameters:
974: + pc - the preconditioner context
975: - type - variant of ASM, one of
976: .vb
977: PC_ASM_BASIC - full interpolation and restriction
978: PC_ASM_RESTRICT - full restriction, local processor interpolation
979: PC_ASM_INTERPOLATE - full interpolation, local processor restriction
980: PC_ASM_NONE - local processor restriction and interpolation
981: .ve
983: Options Database Key:
984: . -pc_asm_type [basic,restrict,interpolate,none] - Sets ASM type
986: Level: intermediate
988: .keywords: PC, ASM, set, type
990: .seealso: PCASMSetTotalSubdomains(), PCASMSetTotalSubdomains(), PCASMGetSubKSP(),
991: PCASMCreateSubdomains2D()
992: @*/
993: PetscErrorCode PCASMSetType(PC pc,PCASMType type)
994: {
1000: PetscTryMethod(pc,"PCASMSetType_C",(PC,PCASMType),(pc,type));
1001: return(0);
1002: }
1006: /*@
1007: PCASMGetType - Gets the type of restriction and interpolation used
1008: for local problems in the additive Schwarz method.
1010: Logically Collective on PC
1012: Input Parameter:
1013: . pc - the preconditioner context
1015: Output Parameter:
1016: . type - variant of ASM, one of
1018: .vb
1019: PC_ASM_BASIC - full interpolation and restriction
1020: PC_ASM_RESTRICT - full restriction, local processor interpolation
1021: PC_ASM_INTERPOLATE - full interpolation, local processor restriction
1022: PC_ASM_NONE - local processor restriction and interpolation
1023: .ve
1025: Options Database Key:
1026: . -pc_asm_type [basic,restrict,interpolate,none] - Sets ASM type
1028: Level: intermediate
1030: .keywords: PC, ASM, set, type
1032: .seealso: PCASMSetTotalSubdomains(), PCASMSetTotalSubdomains(), PCASMGetSubKSP(),
1033: PCASMCreateSubdomains2D()
1034: @*/
1035: PetscErrorCode PCASMGetType(PC pc,PCASMType *type)
1036: {
1041: PetscUseMethod(pc,"PCASMGetType_C",(PC,PCASMType*),(pc,type));
1042: return(0);
1043: }
1047: /*@
1048: PCASMSetLocalType - Sets the type of composition used for local problems in the additive Schwarz method.
1050: Logically Collective on PC
1052: Input Parameters:
1053: + pc - the preconditioner context
1054: - type - type of composition, one of
1055: .vb
1056: PC_COMPOSITE_ADDITIVE - local additive combination
1057: PC_COMPOSITE_MULTIPLICATIVE - local multiplicative combination
1058: .ve
1060: Options Database Key:
1061: . -pc_asm_local_type [additive,multiplicative] - Sets local solver composition type
1063: Level: intermediate
1065: .seealso: PCASMSetType(), PCASMGetType(), PCASMGetLocalType(), PCASMCreate()
1066: @*/
1067: PetscErrorCode PCASMSetLocalType(PC pc, PCCompositeType type)
1068: {
1074: PetscTryMethod(pc, "PCASMSetLocalType_C", (PC, PCCompositeType), (pc, type));
1075: return(0);
1076: }
1080: /*@
1081: PCASMGetLocalType - Gets the type of composition used for local problems in the additive Schwarz method.
1083: Logically Collective on PC
1085: Input Parameter:
1086: . pc - the preconditioner context
1088: Output Parameter:
1089: . type - type of composition, one of
1090: .vb
1091: PC_COMPOSITE_ADDITIVE - local additive combination
1092: PC_COMPOSITE_MULTIPLICATIVE - local multiplicative combination
1093: .ve
1095: Options Database Key:
1096: . -pc_asm_local_type [additive,multiplicative] - Sets local solver composition type
1098: Level: intermediate
1100: .seealso: PCASMSetType(), PCASMGetType(), PCASMSetLocalType(), PCASMCreate()
1101: @*/
1102: PetscErrorCode PCASMGetLocalType(PC pc, PCCompositeType *type)
1103: {
1109: PetscUseMethod(pc, "PCASMGetLocalType_C", (PC, PCCompositeType *), (pc, type));
1110: return(0);
1111: }
1115: /*@
1116: PCASMSetSortIndices - Determines whether subdomain indices are sorted.
1118: Logically Collective on PC
1120: Input Parameters:
1121: + pc - the preconditioner context
1122: - doSort - sort the subdomain indices
1124: Level: intermediate
1126: .keywords: PC, ASM, set, type
1128: .seealso: PCASMSetLocalSubdomains(), PCASMSetTotalSubdomains(), PCASMGetSubKSP(),
1129: PCASMCreateSubdomains2D()
1130: @*/
1131: PetscErrorCode PCASMSetSortIndices(PC pc,PetscBool doSort)
1132: {
1138: PetscTryMethod(pc,"PCASMSetSortIndices_C",(PC,PetscBool),(pc,doSort));
1139: return(0);
1140: }
1144: /*@C
1145: PCASMGetSubKSP - Gets the local KSP contexts for all blocks on
1146: this processor.
1148: Collective on PC iff first_local is requested
1150: Input Parameter:
1151: . pc - the preconditioner context
1153: Output Parameters:
1154: + n_local - the number of blocks on this processor or NULL
1155: . first_local - the global number of the first block on this processor or NULL,
1156: all processors must request or all must pass NULL
1157: - ksp - the array of KSP contexts
1159: Note:
1160: After PCASMGetSubKSP() the array of KSPes is not to be freed.
1162: Currently for some matrix implementations only 1 block per processor
1163: is supported.
1165: You must call KSPSetUp() before calling PCASMGetSubKSP().
1167: Fortran note:
1168: The output argument 'ksp' must be an array of sufficient length or NULL_OBJECT. The latter can be used to learn the necessary length.
1170: Level: advanced
1172: .keywords: PC, ASM, additive Schwarz, get, sub, KSP, context
1174: .seealso: PCASMSetTotalSubdomains(), PCASMSetTotalSubdomains(), PCASMSetOverlap(),
1175: PCASMCreateSubdomains2D(),
1176: @*/
1177: PetscErrorCode PCASMGetSubKSP(PC pc,PetscInt *n_local,PetscInt *first_local,KSP *ksp[])
1178: {
1183: PetscUseMethod(pc,"PCASMGetSubKSP_C",(PC,PetscInt*,PetscInt*,KSP **),(pc,n_local,first_local,ksp));
1184: return(0);
1185: }
1187: /* -------------------------------------------------------------------------------------*/
1188: /*MC
1189: PCASM - Use the (restricted) additive Schwarz method, each block is (approximately) solved with
1190: its own KSP object.
1192: Options Database Keys:
1193: + -pc_asm_blocks <blks> - Sets total blocks
1194: . -pc_asm_overlap <ovl> - Sets overlap
1195: - -pc_asm_type [basic,restrict,interpolate,none] - Sets ASM type
1197: IMPORTANT: If you run with, for example, 3 blocks on 1 processor or 3 blocks on 3 processors you
1198: will get a different convergence rate due to the default option of -pc_asm_type restrict. Use
1199: -pc_asm_type basic to use the standard ASM.
1201: Notes: Each processor can have one or more blocks, but a block cannot be shared by more
1202: than one processor. Defaults to one block per processor.
1204: To set options on the solvers for each block append -sub_ to all the KSP, and PC
1205: options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly
1207: To set the options on the solvers separate for each block call PCASMGetSubKSP()
1208: and set the options directly on the resulting KSP object (you can access its PC
1209: with KSPGetPC())
1212: Level: beginner
1214: Concepts: additive Schwarz method
1216: References:
1217: An additive variant of the Schwarz alternating method for the case of many subregions
1218: M Dryja, OB Widlund - Courant Institute, New York University Technical report
1220: Domain Decompositions: Parallel Multilevel Methods for Elliptic Partial Differential Equations,
1221: Barry Smith, Petter Bjorstad, and William Gropp, Cambridge University Press, ISBN 0-521-49589-X.
1223: .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC,
1224: PCBJACOBI, PCASMGetSubKSP(), PCASMSetLocalSubdomains(),
1225: PCASMSetTotalSubdomains(), PCSetModifySubmatrices(), PCASMSetOverlap(), PCASMSetType()
1227: M*/
1231: PETSC_EXTERN PetscErrorCode PCCreate_ASM(PC pc)
1232: {
1234: PC_ASM *osm;
1237: PetscNewLog(pc,&osm);
1239: osm->n = PETSC_DECIDE;
1240: osm->n_local = 0;
1241: osm->n_local_true = PETSC_DECIDE;
1242: osm->overlap = 1;
1243: osm->ksp = 0;
1244: osm->restriction = 0;
1245: osm->localization = 0;
1246: osm->prolongation = 0;
1247: osm->x = 0;
1248: osm->y = 0;
1249: osm->y_local = 0;
1250: osm->is = 0;
1251: osm->is_local = 0;
1252: osm->mat = 0;
1253: osm->pmat = 0;
1254: osm->type = PC_ASM_RESTRICT;
1255: osm->loctype = PC_COMPOSITE_ADDITIVE;
1256: osm->same_local_solves = PETSC_TRUE;
1257: osm->sort_indices = PETSC_TRUE;
1258: osm->dm_subdomains = PETSC_FALSE;
1260: pc->data = (void*)osm;
1261: pc->ops->apply = PCApply_ASM;
1262: pc->ops->applytranspose = PCApplyTranspose_ASM;
1263: pc->ops->setup = PCSetUp_ASM;
1264: pc->ops->reset = PCReset_ASM;
1265: pc->ops->destroy = PCDestroy_ASM;
1266: pc->ops->setfromoptions = PCSetFromOptions_ASM;
1267: pc->ops->setuponblocks = PCSetUpOnBlocks_ASM;
1268: pc->ops->view = PCView_ASM;
1269: pc->ops->applyrichardson = 0;
1271: PetscObjectComposeFunction((PetscObject)pc,"PCASMSetLocalSubdomains_C",PCASMSetLocalSubdomains_ASM);
1272: PetscObjectComposeFunction((PetscObject)pc,"PCASMSetTotalSubdomains_C",PCASMSetTotalSubdomains_ASM);
1273: PetscObjectComposeFunction((PetscObject)pc,"PCASMSetOverlap_C",PCASMSetOverlap_ASM);
1274: PetscObjectComposeFunction((PetscObject)pc,"PCASMSetType_C",PCASMSetType_ASM);
1275: PetscObjectComposeFunction((PetscObject)pc,"PCASMGetType_C",PCASMGetType_ASM);
1276: PetscObjectComposeFunction((PetscObject)pc,"PCASMSetLocalType_C",PCASMSetLocalType_ASM);
1277: PetscObjectComposeFunction((PetscObject)pc,"PCASMGetLocalType_C",PCASMGetLocalType_ASM);
1278: PetscObjectComposeFunction((PetscObject)pc,"PCASMSetSortIndices_C",PCASMSetSortIndices_ASM);
1279: PetscObjectComposeFunction((PetscObject)pc,"PCASMGetSubKSP_C",PCASMGetSubKSP_ASM);
1280: return(0);
1281: }
1285: /*@C
1286: PCASMCreateSubdomains - Creates the index sets for the overlapping Schwarz
1287: preconditioner for a any problem on a general grid.
1289: Collective
1291: Input Parameters:
1292: + A - The global matrix operator
1293: - n - the number of local blocks
1295: Output Parameters:
1296: . outis - the array of index sets defining the subdomains
1298: Level: advanced
1300: Note: this generates nonoverlapping subdomains; the PCASM will generate the overlap
1301: from these if you use PCASMSetLocalSubdomains()
1303: In the Fortran version you must provide the array outis[] already allocated of length n.
1305: .keywords: PC, ASM, additive Schwarz, create, subdomains, unstructured grid
1307: .seealso: PCASMSetLocalSubdomains(), PCASMDestroySubdomains()
1308: @*/
1309: PetscErrorCode PCASMCreateSubdomains(Mat A, PetscInt n, IS* outis[])
1310: {
1311: MatPartitioning mpart;
1312: const char *prefix;
1313: PetscErrorCode (*f)(Mat,Mat*);
1314: PetscMPIInt size;
1315: PetscInt i,j,rstart,rend,bs;
1316: PetscBool isbaij = PETSC_FALSE,foundpart = PETSC_FALSE;
1317: Mat Ad = NULL, adj;
1318: IS ispart,isnumb,*is;
1319: PetscErrorCode ierr;
1324: if (n < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"number of local blocks must be > 0, n = %D",n);
1326: /* Get prefix, row distribution, and block size */
1327: MatGetOptionsPrefix(A,&prefix);
1328: MatGetOwnershipRange(A,&rstart,&rend);
1329: MatGetBlockSize(A,&bs);
1330: if (rstart/bs*bs != rstart || rend/bs*bs != rend) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"bad row distribution [%D,%D) for matrix block size %D",rstart,rend,bs);
1332: /* Get diagonal block from matrix if possible */
1333: MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
1334: PetscObjectQueryFunction((PetscObject)A,"MatGetDiagonalBlock_C",&f);
1335: if (f) {
1336: MatGetDiagonalBlock(A,&Ad);
1337: } else if (size == 1) {
1338: Ad = A;
1339: }
1340: if (Ad) {
1341: PetscObjectTypeCompare((PetscObject)Ad,MATSEQBAIJ,&isbaij);
1342: if (!isbaij) {PetscObjectTypeCompare((PetscObject)Ad,MATSEQSBAIJ,&isbaij);}
1343: }
1344: if (Ad && n > 1) {
1345: PetscBool match,done;
1346: /* Try to setup a good matrix partitioning if available */
1347: MatPartitioningCreate(PETSC_COMM_SELF,&mpart);
1348: PetscObjectSetOptionsPrefix((PetscObject)mpart,prefix);
1349: MatPartitioningSetFromOptions(mpart);
1350: PetscObjectTypeCompare((PetscObject)mpart,MATPARTITIONINGCURRENT,&match);
1351: if (!match) {
1352: PetscObjectTypeCompare((PetscObject)mpart,MATPARTITIONINGSQUARE,&match);
1353: }
1354: if (!match) { /* assume a "good" partitioner is available */
1355: PetscInt na;
1356: const PetscInt *ia,*ja;
1357: MatGetRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);
1358: if (done) {
1359: /* Build adjacency matrix by hand. Unfortunately a call to
1360: MatConvert(Ad,MATMPIADJ,MAT_INITIAL_MATRIX,&adj) will
1361: remove the block-aij structure and we cannot expect
1362: MatPartitioning to split vertices as we need */
1363: PetscInt i,j,len,nnz,cnt,*iia=0,*jja=0;
1364: const PetscInt *row;
1365: nnz = 0;
1366: for (i=0; i<na; i++) { /* count number of nonzeros */
1367: len = ia[i+1] - ia[i];
1368: row = ja + ia[i];
1369: for (j=0; j<len; j++) {
1370: if (row[j] == i) { /* don't count diagonal */
1371: len--; break;
1372: }
1373: }
1374: nnz += len;
1375: }
1376: PetscMalloc1(na+1,&iia);
1377: PetscMalloc1(nnz,&jja);
1378: nnz = 0;
1379: iia[0] = 0;
1380: for (i=0; i<na; i++) { /* fill adjacency */
1381: cnt = 0;
1382: len = ia[i+1] - ia[i];
1383: row = ja + ia[i];
1384: for (j=0; j<len; j++) {
1385: if (row[j] != i) { /* if not diagonal */
1386: jja[nnz+cnt++] = row[j];
1387: }
1388: }
1389: nnz += cnt;
1390: iia[i+1] = nnz;
1391: }
1392: /* Partitioning of the adjacency matrix */
1393: MatCreateMPIAdj(PETSC_COMM_SELF,na,na,iia,jja,NULL,&adj);
1394: MatPartitioningSetAdjacency(mpart,adj);
1395: MatPartitioningSetNParts(mpart,n);
1396: MatPartitioningApply(mpart,&ispart);
1397: ISPartitioningToNumbering(ispart,&isnumb);
1398: MatDestroy(&adj);
1399: foundpart = PETSC_TRUE;
1400: }
1401: MatRestoreRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);
1402: }
1403: MatPartitioningDestroy(&mpart);
1404: }
1406: PetscMalloc1(n,&is);
1407: *outis = is;
1409: if (!foundpart) {
1411: /* Partitioning by contiguous chunks of rows */
1413: PetscInt mbs = (rend-rstart)/bs;
1414: PetscInt start = rstart;
1415: for (i=0; i<n; i++) {
1416: PetscInt count = (mbs/n + ((mbs % n) > i)) * bs;
1417: ISCreateStride(PETSC_COMM_SELF,count,start,1,&is[i]);
1418: start += count;
1419: }
1421: } else {
1423: /* Partitioning by adjacency of diagonal block */
1425: const PetscInt *numbering;
1426: PetscInt *count,nidx,*indices,*newidx,start=0;
1427: /* Get node count in each partition */
1428: PetscMalloc1(n,&count);
1429: ISPartitioningCount(ispart,n,count);
1430: if (isbaij && bs > 1) { /* adjust for the block-aij case */
1431: for (i=0; i<n; i++) count[i] *= bs;
1432: }
1433: /* Build indices from node numbering */
1434: ISGetLocalSize(isnumb,&nidx);
1435: PetscMalloc1(nidx,&indices);
1436: for (i=0; i<nidx; i++) indices[i] = i; /* needs to be initialized */
1437: ISGetIndices(isnumb,&numbering);
1438: PetscSortIntWithPermutation(nidx,numbering,indices);
1439: ISRestoreIndices(isnumb,&numbering);
1440: if (isbaij && bs > 1) { /* adjust for the block-aij case */
1441: PetscMalloc1(nidx*bs,&newidx);
1442: for (i=0; i<nidx; i++) {
1443: for (j=0; j<bs; j++) newidx[i*bs+j] = indices[i]*bs + j;
1444: }
1445: PetscFree(indices);
1446: nidx *= bs;
1447: indices = newidx;
1448: }
1449: /* Shift to get global indices */
1450: for (i=0; i<nidx; i++) indices[i] += rstart;
1452: /* Build the index sets for each block */
1453: for (i=0; i<n; i++) {
1454: ISCreateGeneral(PETSC_COMM_SELF,count[i],&indices[start],PETSC_COPY_VALUES,&is[i]);
1455: ISSort(is[i]);
1456: start += count[i];
1457: }
1459: PetscFree(count);
1460: PetscFree(indices);
1461: ISDestroy(&isnumb);
1462: ISDestroy(&ispart);
1464: }
1465: return(0);
1466: }
1470: /*@C
1471: PCASMDestroySubdomains - Destroys the index sets created with
1472: PCASMCreateSubdomains(). Should be called after setting subdomains
1473: with PCASMSetLocalSubdomains().
1475: Collective
1477: Input Parameters:
1478: + n - the number of index sets
1479: . is - the array of index sets
1480: - is_local - the array of local index sets, can be NULL
1482: Level: advanced
1484: .keywords: PC, ASM, additive Schwarz, create, subdomains, unstructured grid
1486: .seealso: PCASMCreateSubdomains(), PCASMSetLocalSubdomains()
1487: @*/
1488: PetscErrorCode PCASMDestroySubdomains(PetscInt n, IS is[], IS is_local[])
1489: {
1490: PetscInt i;
1494: if (n <= 0) return(0);
1495: if (is) {
1497: for (i=0; i<n; i++) { ISDestroy(&is[i]); }
1498: PetscFree(is);
1499: }
1500: if (is_local) {
1502: for (i=0; i<n; i++) { ISDestroy(&is_local[i]); }
1503: PetscFree(is_local);
1504: }
1505: return(0);
1506: }
1510: /*@
1511: PCASMCreateSubdomains2D - Creates the index sets for the overlapping Schwarz
1512: preconditioner for a two-dimensional problem on a regular grid.
1514: Not Collective
1516: Input Parameters:
1517: + m, n - the number of mesh points in the x and y directions
1518: . M, N - the number of subdomains in the x and y directions
1519: . dof - degrees of freedom per node
1520: - overlap - overlap in mesh lines
1522: Output Parameters:
1523: + Nsub - the number of subdomains created
1524: . is - array of index sets defining overlapping (if overlap > 0) subdomains
1525: - is_local - array of index sets defining non-overlapping subdomains
1527: Note:
1528: Presently PCAMSCreateSubdomains2d() is valid only for sequential
1529: preconditioners. More general related routines are
1530: PCASMSetTotalSubdomains() and PCASMSetLocalSubdomains().
1532: Level: advanced
1534: .keywords: PC, ASM, additive Schwarz, create, subdomains, 2D, regular grid
1536: .seealso: PCASMSetTotalSubdomains(), PCASMSetLocalSubdomains(), PCASMGetSubKSP(),
1537: PCASMSetOverlap()
1538: @*/
1539: PetscErrorCode PCASMCreateSubdomains2D(PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt dof,PetscInt overlap,PetscInt *Nsub,IS **is,IS **is_local)
1540: {
1541: PetscInt i,j,height,width,ystart,xstart,yleft,yright,xleft,xright,loc_outer;
1543: PetscInt nidx,*idx,loc,ii,jj,count;
1546: if (dof != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP," ");
1548: *Nsub = N*M;
1549: PetscMalloc1(*Nsub,is);
1550: PetscMalloc1(*Nsub,is_local);
1551: ystart = 0;
1552: loc_outer = 0;
1553: for (i=0; i<N; i++) {
1554: height = n/N + ((n % N) > i); /* height of subdomain */
1555: if (height < 2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many N subdomains for mesh dimension n");
1556: yleft = ystart - overlap; if (yleft < 0) yleft = 0;
1557: yright = ystart + height + overlap; if (yright > n) yright = n;
1558: xstart = 0;
1559: for (j=0; j<M; j++) {
1560: width = m/M + ((m % M) > j); /* width of subdomain */
1561: if (width < 2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many M subdomains for mesh dimension m");
1562: xleft = xstart - overlap; if (xleft < 0) xleft = 0;
1563: xright = xstart + width + overlap; if (xright > m) xright = m;
1564: nidx = (xright - xleft)*(yright - yleft);
1565: PetscMalloc1(nidx,&idx);
1566: loc = 0;
1567: for (ii=yleft; ii<yright; ii++) {
1568: count = m*ii + xleft;
1569: for (jj=xleft; jj<xright; jj++) idx[loc++] = count++;
1570: }
1571: ISCreateGeneral(PETSC_COMM_SELF,nidx,idx,PETSC_COPY_VALUES,(*is)+loc_outer);
1572: if (overlap == 0) {
1573: PetscObjectReference((PetscObject)(*is)[loc_outer]);
1575: (*is_local)[loc_outer] = (*is)[loc_outer];
1576: } else {
1577: for (loc=0,ii=ystart; ii<ystart+height; ii++) {
1578: for (jj=xstart; jj<xstart+width; jj++) {
1579: idx[loc++] = m*ii + jj;
1580: }
1581: }
1582: ISCreateGeneral(PETSC_COMM_SELF,loc,idx,PETSC_COPY_VALUES,*is_local+loc_outer);
1583: }
1584: PetscFree(idx);
1585: xstart += width;
1586: loc_outer++;
1587: }
1588: ystart += height;
1589: }
1590: for (i=0; i<*Nsub; i++) { ISSort((*is)[i]); }
1591: return(0);
1592: }
1596: /*@C
1597: PCASMGetLocalSubdomains - Gets the local subdomains (for this processor
1598: only) for the additive Schwarz preconditioner.
1600: Not Collective
1602: Input Parameter:
1603: . pc - the preconditioner context
1605: Output Parameters:
1606: + n - the number of subdomains for this processor (default value = 1)
1607: . is - the index sets that define the subdomains for this processor
1608: - is_local - the index sets that define the local part of the subdomains for this processor (can be NULL)
1611: Notes:
1612: The IS numbering is in the parallel, global numbering of the vector.
1614: Level: advanced
1616: .keywords: PC, ASM, set, local, subdomains, additive Schwarz
1618: .seealso: PCASMSetTotalSubdomains(), PCASMSetOverlap(), PCASMGetSubKSP(),
1619: PCASMCreateSubdomains2D(), PCASMSetLocalSubdomains(), PCASMGetLocalSubmatrices()
1620: @*/
1621: PetscErrorCode PCASMGetLocalSubdomains(PC pc,PetscInt *n,IS *is[],IS *is_local[])
1622: {
1623: PC_ASM *osm;
1625: PetscBool match;
1631: PetscObjectTypeCompare((PetscObject)pc,PCASM,&match);
1632: if (!match) {
1633: if (n) *n = 0;
1634: if (is) *is = NULL;
1635: } else {
1636: osm = (PC_ASM*)pc->data;
1637: if (n) *n = osm->n_local_true;
1638: if (is) *is = osm->is;
1639: if (is_local) *is_local = osm->is_local;
1640: }
1641: return(0);
1642: }
1646: /*@C
1647: PCASMGetLocalSubmatrices - Gets the local submatrices (for this processor
1648: only) for the additive Schwarz preconditioner.
1650: Not Collective
1652: Input Parameter:
1653: . pc - the preconditioner context
1655: Output Parameters:
1656: + n - the number of matrices for this processor (default value = 1)
1657: - mat - the matrices
1660: Level: advanced
1662: Notes: Call after PCSetUp() (or KSPSetUp()) but before PCApply() (or KSPApply()) and before PCSetUpOnBlocks())
1664: Usually one would use PCSetModifySubmatrices() to change the submatrices in building the preconditioner.
1666: .keywords: PC, ASM, set, local, subdomains, additive Schwarz, block Jacobi
1668: .seealso: PCASMSetTotalSubdomains(), PCASMSetOverlap(), PCASMGetSubKSP(),
1669: PCASMCreateSubdomains2D(), PCASMSetLocalSubdomains(), PCASMGetLocalSubdomains(), PCSetModifySubmatrices()
1670: @*/
1671: PetscErrorCode PCASMGetLocalSubmatrices(PC pc,PetscInt *n,Mat *mat[])
1672: {
1673: PC_ASM *osm;
1675: PetscBool match;
1681: if (!pc->setupcalled) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"Must call after KSPSetUP() or PCSetUp().");
1682: PetscObjectTypeCompare((PetscObject)pc,PCASM,&match);
1683: if (!match) {
1684: if (n) *n = 0;
1685: if (mat) *mat = NULL;
1686: } else {
1687: osm = (PC_ASM*)pc->data;
1688: if (n) *n = osm->n_local_true;
1689: if (mat) *mat = osm->pmat;
1690: }
1691: return(0);
1692: }
1696: /*@
1697: PCASMSetDMSubdomains - Indicates whether to use DMCreateDomainDecomposition() to define the subdomains, whenever possible.
1698: Logically Collective
1700: Input Parameter:
1701: + pc - the preconditioner
1702: - flg - boolean indicating whether to use subdomains defined by the DM
1704: Options Database Key:
1705: . -pc_asm_dm_subdomains
1707: Level: intermediate
1709: Notes:
1710: PCASMSetTotalSubdomains() and PCASMSetOverlap() take precedence over PCASMSetDMSubdomains(),
1711: so setting either of the first two effectively turns the latter off.
1713: .keywords: PC, ASM, DM, set, subdomains, additive Schwarz
1715: .seealso: PCASMGetDMSubdomains(), PCASMSetTotalSubdomains(), PCASMSetOverlap()
1716: PCASMCreateSubdomains2D(), PCASMSetLocalSubdomains(), PCASMGetLocalSubdomains()
1717: @*/
1718: PetscErrorCode PCASMSetDMSubdomains(PC pc,PetscBool flg)
1719: {
1720: PC_ASM *osm = (PC_ASM*)pc->data;
1722: PetscBool match;
1727: if (pc->setupcalled) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"Not for a setup PC.");
1728: PetscObjectTypeCompare((PetscObject)pc,PCASM,&match);
1729: if (match) {
1730: osm->dm_subdomains = flg;
1731: }
1732: return(0);
1733: }
1737: /*@
1738: PCASMGetDMSubdomains - Returns flag indicating whether to use DMCreateDomainDecomposition() to define the subdomains, whenever possible.
1739: Not Collective
1741: Input Parameter:
1742: . pc - the preconditioner
1744: Output Parameter:
1745: . flg - boolean indicating whether to use subdomains defined by the DM
1747: Level: intermediate
1749: .keywords: PC, ASM, DM, set, subdomains, additive Schwarz
1751: .seealso: PCASMSetDMSubdomains(), PCASMSetTotalSubdomains(), PCASMSetOverlap()
1752: PCASMCreateSubdomains2D(), PCASMSetLocalSubdomains(), PCASMGetLocalSubdomains()
1753: @*/
1754: PetscErrorCode PCASMGetDMSubdomains(PC pc,PetscBool* flg)
1755: {
1756: PC_ASM *osm = (PC_ASM*)pc->data;
1758: PetscBool match;
1763: PetscObjectTypeCompare((PetscObject)pc,PCASM,&match);
1764: if (match) {
1765: if (flg) *flg = osm->dm_subdomains;
1766: }
1767: return(0);
1768: }