Actual source code: asm.c
petsc-3.4.5 2014-06-29
2: /*
3: This file defines an additive Schwarz preconditioner for any Mat implementation.
5: Note that each processor may have any number of subdomains. But in order to
6: deal easily with the VecScatter(), we treat each processor as if it has the
7: same number of subdomains.
9: n - total number of true subdomains on all processors
10: n_local_true - actual number of subdomains on this processor
11: n_local = maximum over all processors of n_local_true
12: */
13: #include <petsc-private/pcimpl.h> /*I "petscpc.h" I*/
14: #include <petscdm.h>
16: typedef struct {
17: PetscInt n, n_local, n_local_true;
18: PetscInt overlap; /* overlap requested by user */
19: KSP *ksp; /* linear solvers for each block */
20: VecScatter *restriction; /* mapping from global to subregion */
21: VecScatter *localization; /* mapping from overlapping to non-overlapping subregion */
22: VecScatter *prolongation; /* mapping from subregion to global */
23: Vec *x,*y,*y_local; /* work vectors */
24: IS *is; /* index set that defines each overlapping subdomain */
25: IS *is_local; /* index set that defines each non-overlapping subdomain, may be NULL */
26: Mat *mat,*pmat; /* mat is not currently used */
27: PCASMType type; /* use reduced interpolation, restriction or both */
28: PetscBool type_set; /* if user set this value (so won't change it for symmetric problems) */
29: PetscBool same_local_solves; /* flag indicating whether all local solvers are same */
30: PetscBool sort_indices; /* flag to sort subdomain indices */
31: PetscBool dm_subdomains; /* whether DM is allowed to define subdomains */
32: } PC_ASM;
36: static PetscErrorCode PCView_ASM(PC pc,PetscViewer viewer)
37: {
38: PC_ASM *osm = (PC_ASM*)pc->data;
40: PetscMPIInt rank;
41: PetscInt i,bsz;
42: PetscBool iascii,isstring;
43: PetscViewer sviewer;
46: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
47: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSTRING,&isstring);
48: if (iascii) {
49: char overlaps[256] = "user-defined overlap",blocks[256] = "total subdomain blocks not yet set";
50: if (osm->overlap >= 0) {PetscSNPrintf(overlaps,sizeof(overlaps),"amount of overlap = %D",osm->overlap);}
51: if (osm->n > 0) {PetscSNPrintf(blocks,sizeof(blocks),"total subdomain blocks = %D",osm->n);}
52: PetscViewerASCIIPrintf(viewer," Additive Schwarz: %s, %s\n",blocks,overlaps);
53: PetscViewerASCIIPrintf(viewer," Additive Schwarz: restriction/interpolation type - %s\n",PCASMTypes[osm->type]);
54: MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);
55: if (osm->same_local_solves) {
56: if (osm->ksp) {
57: PetscViewerASCIIPrintf(viewer," Local solve is same for all blocks, in the following KSP and PC objects:\n");
58: PetscViewerGetSingleton(viewer,&sviewer);
59: if (!rank) {
60: PetscViewerASCIIPushTab(viewer);
61: KSPView(osm->ksp[0],sviewer);
62: PetscViewerASCIIPopTab(viewer);
63: }
64: PetscViewerRestoreSingleton(viewer,&sviewer);
65: }
66: } else {
67: PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);
68: PetscViewerASCIISynchronizedPrintf(viewer," [%d] number of local blocks = %D\n",(int)rank,osm->n_local_true);
69: PetscViewerFlush(viewer);
70: PetscViewerASCIIPrintf(viewer," Local solve info for each block is in the following KSP and PC objects:\n");
71: PetscViewerASCIIPushTab(viewer);
72: PetscViewerASCIIPrintf(viewer,"- - - - - - - - - - - - - - - - - -\n");
73: for (i=0; i<osm->n_local; i++) {
74: PetscViewerGetSingleton(viewer,&sviewer);
75: if (i < osm->n_local_true) {
76: ISGetLocalSize(osm->is[i],&bsz);
77: PetscViewerASCIISynchronizedPrintf(sviewer,"[%d] local block number %D, size = %D\n",(int)rank,i,bsz);
78: KSPView(osm->ksp[i],sviewer);
79: PetscViewerASCIISynchronizedPrintf(sviewer,"- - - - - - - - - - - - - - - - - -\n");
80: }
81: PetscViewerRestoreSingleton(viewer,&sviewer);
82: }
83: PetscViewerASCIIPopTab(viewer);
84: PetscViewerFlush(viewer);
85: PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);
86: }
87: } else if (isstring) {
88: PetscViewerStringSPrintf(viewer," blocks=%D, overlap=%D, type=%s",osm->n,osm->overlap,PCASMTypes[osm->type]);
89: PetscViewerGetSingleton(viewer,&sviewer);
90: if (osm->ksp) {KSPView(osm->ksp[0],sviewer);}
91: PetscViewerRestoreSingleton(viewer,&sviewer);
92: }
93: return(0);
94: }
98: static PetscErrorCode PCASMPrintSubdomains(PC pc)
99: {
100: PC_ASM *osm = (PC_ASM*)pc->data;
101: const char *prefix;
102: char fname[PETSC_MAX_PATH_LEN+1];
103: PetscViewer viewer, sviewer;
104: char *s;
105: PetscInt i,j,nidx;
106: const PetscInt *idx;
107: PetscMPIInt rank, size;
111: MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size);
112: MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank);
113: PCGetOptionsPrefix(pc,&prefix);
114: PetscOptionsGetString(prefix,"-pc_asm_print_subdomains",fname,PETSC_MAX_PATH_LEN,NULL);
115: if (fname[0] == 0) { PetscStrcpy(fname,"stdout"); };
116: PetscViewerASCIIOpen(PetscObjectComm((PetscObject)pc),fname,&viewer);
117: for (i=0; i<osm->n_local; i++) {
118: if (i < osm->n_local_true) {
119: ISGetLocalSize(osm->is[i],&nidx);
120: ISGetIndices(osm->is[i],&idx);
121: /* Print to a string viewer; no more than 15 characters per index plus 512 char for the header.*/
122: PetscMalloc(sizeof(char)*(16*(nidx+1)+512), &s);
123: PetscViewerStringOpen(PETSC_COMM_SELF, s, 16*(nidx+1)+512, &sviewer);
124: PetscViewerStringSPrintf(sviewer, "[%D:%D] Subdomain %D with overlap:\n", rank, size, i);
125: for (j=0; j<nidx; j++) {
126: PetscViewerStringSPrintf(sviewer,"%D ",idx[j]);
127: }
128: ISRestoreIndices(osm->is[i],&idx);
129: PetscViewerStringSPrintf(sviewer,"\n");
130: PetscViewerDestroy(&sviewer);
131: PetscViewerASCIISynchronizedAllow(viewer, PETSC_TRUE);
132: PetscViewerASCIISynchronizedPrintf(viewer, s);
133: PetscViewerFlush(viewer);
134: PetscViewerASCIISynchronizedAllow(viewer, PETSC_FALSE);
135: PetscFree(s);
136: if (osm->is_local) {
137: /* Print to a string viewer; no more than 15 characters per index plus 512 char for the header.*/
138: PetscMalloc(sizeof(char)*(16*(nidx+1)+512), &s);
139: PetscViewerStringOpen(PETSC_COMM_SELF, s, 16*(nidx+1)+512, &sviewer);
140: PetscViewerStringSPrintf(sviewer, "[%D:%D] Subdomain %D without overlap:\n", rank, size, i);
141: ISGetLocalSize(osm->is_local[i],&nidx);
142: ISGetIndices(osm->is_local[i],&idx);
143: for (j=0; j<nidx; j++) {
144: PetscViewerStringSPrintf(sviewer,"%D ",idx[j]);
145: }
146: ISRestoreIndices(osm->is_local[i],&idx);
147: PetscViewerStringSPrintf(sviewer,"\n");
148: PetscViewerDestroy(&sviewer);
149: PetscViewerASCIISynchronizedAllow(viewer, PETSC_TRUE);
150: PetscViewerASCIISynchronizedPrintf(viewer, s);
151: PetscViewerFlush(viewer);
152: PetscViewerASCIISynchronizedAllow(viewer, PETSC_FALSE);
153: PetscFree(s);
154: }
155: } else {
156: /* Participate in collective viewer calls. */
157: PetscViewerASCIISynchronizedAllow(viewer, PETSC_TRUE);
158: PetscViewerFlush(viewer);
159: PetscViewerASCIISynchronizedAllow(viewer, PETSC_FALSE);
160: /* Assume either all ranks have is_local or none do. */
161: if (osm->is_local) {
162: PetscViewerASCIISynchronizedAllow(viewer, PETSC_TRUE);
163: PetscViewerFlush(viewer);
164: PetscViewerASCIISynchronizedAllow(viewer, PETSC_FALSE);
165: }
166: }
167: }
168: PetscViewerFlush(viewer);
169: PetscViewerDestroy(&viewer);
170: return(0);
171: }
175: static PetscErrorCode PCSetUp_ASM(PC pc)
176: {
177: PC_ASM *osm = (PC_ASM*)pc->data;
179: PetscBool symset,flg;
180: PetscInt i,m,m_local,firstRow,lastRow;
181: MatReuse scall = MAT_REUSE_MATRIX;
182: IS isl;
183: KSP ksp;
184: PC subpc;
185: const char *prefix,*pprefix;
186: Vec vec;
187: DM *domain_dm = NULL;
190: if (!pc->setupcalled) {
192: if (!osm->type_set) {
193: MatIsSymmetricKnown(pc->pmat,&symset,&flg);
194: if (symset && flg) osm->type = PC_ASM_BASIC;
195: }
197: /* Note: if subdomains have been set either via PCASMSetTotalSubdomains() or via PCASMSetLocalSubdomains(), osm->n_local_true will not be PETSC_DECIDE */
198: if (osm->n_local_true == PETSC_DECIDE) {
199: /* no subdomains given */
200: /* try pc->dm first, if allowed */
201: if (osm->dm_subdomains && pc->dm) {
202: PetscInt num_domains, d;
203: char **domain_names;
204: IS *inner_domain_is, *outer_domain_is;
205: DMCreateDomainDecomposition(pc->dm, &num_domains, &domain_names, &inner_domain_is, &outer_domain_is, &domain_dm);
206: if (num_domains) {
207: PCASMSetLocalSubdomains(pc, num_domains, outer_domain_is, inner_domain_is);
208: }
209: for (d = 0; d < num_domains; ++d) {
210: if (domain_names) {PetscFree(domain_names[d]);}
211: if (inner_domain_is) {ISDestroy(&inner_domain_is[d]);}
212: if (outer_domain_is) {ISDestroy(&outer_domain_is[d]);}
213: }
214: PetscFree(domain_names);
215: PetscFree(inner_domain_is);
216: PetscFree(outer_domain_is);
217: }
218: if (osm->n_local_true == PETSC_DECIDE) {
219: /* still no subdomains; use one subdomain per processor */
220: osm->n_local_true = 1;
221: }
222: }
223: { /* determine the global and max number of subdomains */
224: struct {PetscInt max,sum;} inwork,outwork;
225: inwork.max = osm->n_local_true;
226: inwork.sum = osm->n_local_true;
227: MPI_Allreduce(&inwork,&outwork,1,MPIU_2INT,PetscMaxSum_Op,PetscObjectComm((PetscObject)pc));
228: osm->n_local = outwork.max;
229: osm->n = outwork.sum;
230: }
231: if (!osm->is) { /* create the index sets */
232: PCASMCreateSubdomains(pc->pmat,osm->n_local_true,&osm->is);
233: }
234: if (osm->n_local_true > 1 && !osm->is_local) {
235: PetscMalloc(osm->n_local_true*sizeof(IS),&osm->is_local);
236: for (i=0; i<osm->n_local_true; i++) {
237: if (osm->overlap > 0) { /* With positive overlap, osm->is[i] will be modified */
238: ISDuplicate(osm->is[i],&osm->is_local[i]);
239: ISCopy(osm->is[i],osm->is_local[i]);
240: } else {
241: PetscObjectReference((PetscObject)osm->is[i]);
242: osm->is_local[i] = osm->is[i];
243: }
244: }
245: }
246: PCGetOptionsPrefix(pc,&prefix);
247: flg = PETSC_FALSE;
248: PetscOptionsGetBool(prefix,"-pc_asm_print_subdomains",&flg,NULL);
249: if (flg) { PCASMPrintSubdomains(pc); }
251: if (osm->overlap > 0) {
252: /* Extend the "overlapping" regions by a number of steps */
253: MatIncreaseOverlap(pc->pmat,osm->n_local_true,osm->is,osm->overlap);
254: }
255: if (osm->sort_indices) {
256: for (i=0; i<osm->n_local_true; i++) {
257: ISSort(osm->is[i]);
258: if (osm->is_local) {
259: ISSort(osm->is_local[i]);
260: }
261: }
262: }
263: /* Create the local work vectors and scatter contexts */
264: MatGetVecs(pc->pmat,&vec,0);
265: PetscMalloc(osm->n_local*sizeof(VecScatter),&osm->restriction);
266: if (osm->is_local) {PetscMalloc(osm->n_local*sizeof(VecScatter),&osm->localization);}
267: PetscMalloc(osm->n_local*sizeof(VecScatter),&osm->prolongation);
268: PetscMalloc(osm->n_local*sizeof(Vec),&osm->x);
269: PetscMalloc(osm->n_local*sizeof(Vec),&osm->y);
270: PetscMalloc(osm->n_local*sizeof(Vec),&osm->y_local);
271: VecGetOwnershipRange(vec, &firstRow, &lastRow);
272: for (i=0; i<osm->n_local_true; ++i, firstRow += m_local) {
273: ISGetLocalSize(osm->is[i],&m);
274: VecCreateSeq(PETSC_COMM_SELF,m,&osm->x[i]);
275: ISCreateStride(PETSC_COMM_SELF,m,0,1,&isl);
276: VecScatterCreate(vec,osm->is[i],osm->x[i],isl,&osm->restriction[i]);
277: ISDestroy(&isl);
278: VecDuplicate(osm->x[i],&osm->y[i]);
279: if (osm->is_local) {
280: ISLocalToGlobalMapping ltog;
281: IS isll;
282: const PetscInt *idx_local;
283: PetscInt *idx,nout;
285: ISLocalToGlobalMappingCreateIS(osm->is[i],<og);
286: ISGetLocalSize(osm->is_local[i],&m_local);
287: ISGetIndices(osm->is_local[i], &idx_local);
288: PetscMalloc(m_local*sizeof(PetscInt),&idx);
289: ISGlobalToLocalMappingApply(ltog,IS_GTOLM_DROP,m_local,idx_local,&nout,idx);
290: ISLocalToGlobalMappingDestroy(<og);
291: if (nout != m_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"is_local not a subset of is");
292: ISRestoreIndices(osm->is_local[i], &idx_local);
293: ISCreateGeneral(PETSC_COMM_SELF,m_local,idx,PETSC_OWN_POINTER,&isll);
294: ISCreateStride(PETSC_COMM_SELF,m_local,0,1,&isl);
295: VecCreateSeq(PETSC_COMM_SELF,m_local,&osm->y_local[i]);
296: VecScatterCreate(osm->y[i],isll,osm->y_local[i],isl,&osm->localization[i]);
297: ISDestroy(&isll);
299: VecScatterCreate(vec,osm->is_local[i],osm->y_local[i],isl,&osm->prolongation[i]);
300: ISDestroy(&isl);
301: } else {
302: VecGetLocalSize(vec,&m_local);
304: osm->y_local[i] = osm->y[i];
306: PetscObjectReference((PetscObject) osm->y[i]);
308: osm->prolongation[i] = osm->restriction[i];
310: PetscObjectReference((PetscObject) osm->restriction[i]);
311: }
312: }
313: if (firstRow != lastRow) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Specified ASM subdomain sizes were invalid: %d != %d", firstRow, lastRow);
314: for (i=osm->n_local_true; i<osm->n_local; i++) {
315: VecCreateSeq(PETSC_COMM_SELF,0,&osm->x[i]);
316: VecDuplicate(osm->x[i],&osm->y[i]);
317: VecDuplicate(osm->x[i],&osm->y_local[i]);
318: ISCreateStride(PETSC_COMM_SELF,0,0,1,&isl);
319: VecScatterCreate(vec,isl,osm->x[i],isl,&osm->restriction[i]);
320: if (osm->is_local) {
321: VecScatterCreate(osm->y[i],isl,osm->y_local[i],isl,&osm->localization[i]);
322: VecScatterCreate(vec,isl,osm->x[i],isl,&osm->prolongation[i]);
323: } else {
324: osm->prolongation[i] = osm->restriction[i];
325: PetscObjectReference((PetscObject) osm->restriction[i]);
326: }
327: ISDestroy(&isl);
328: }
329: VecDestroy(&vec);
331: if (!osm->ksp) {
332: /* Create the local solvers */
333: PetscMalloc(osm->n_local_true*sizeof(KSP*),&osm->ksp);
334: if (domain_dm) {
335: PetscInfo(pc,"Setting up ASM subproblems using the embedded DM\n");
336: }
337: for (i=0; i<osm->n_local_true; i++) {
338: KSPCreate(PETSC_COMM_SELF,&ksp);
339: PetscLogObjectParent(pc,ksp);
340: PetscObjectIncrementTabLevel((PetscObject)ksp,(PetscObject)pc,1);
341: KSPSetType(ksp,KSPPREONLY);
342: KSPGetPC(ksp,&subpc);
343: PCGetOptionsPrefix(pc,&prefix);
344: KSPSetOptionsPrefix(ksp,prefix);
345: KSPAppendOptionsPrefix(ksp,"sub_");
346: if (domain_dm) {
347: KSPSetDM(ksp, domain_dm[i]);
348: KSPSetDMActive(ksp, PETSC_FALSE);
349: DMDestroy(&domain_dm[i]);
350: }
351: osm->ksp[i] = ksp;
352: }
353: if (domain_dm) {
354: PetscFree(domain_dm);
355: }
356: }
357: scall = MAT_INITIAL_MATRIX;
358: } else {
359: /*
360: Destroy the blocks from the previous iteration
361: */
362: if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
363: MatDestroyMatrices(osm->n_local_true,&osm->pmat);
364: scall = MAT_INITIAL_MATRIX;
365: }
366: }
368: /*
369: Extract out the submatrices
370: */
371: MatGetSubMatrices(pc->pmat,osm->n_local_true,osm->is,osm->is,scall,&osm->pmat);
372: if (scall == MAT_INITIAL_MATRIX) {
373: PetscObjectGetOptionsPrefix((PetscObject)pc->pmat,&pprefix);
374: for (i=0; i<osm->n_local_true; i++) {
375: PetscLogObjectParent(pc,osm->pmat[i]);
376: PetscObjectSetOptionsPrefix((PetscObject)osm->pmat[i],pprefix);
377: }
378: }
380: /* Return control to the user so that the submatrices can be modified (e.g., to apply
381: different boundary conditions for the submatrices than for the global problem) */
382: PCModifySubMatrices(pc,osm->n_local_true,osm->is,osm->is,osm->pmat,pc->modifysubmatricesP);
384: /*
385: Loop over subdomains putting them into local ksp
386: */
387: for (i=0; i<osm->n_local_true; i++) {
388: KSPSetOperators(osm->ksp[i],osm->pmat[i],osm->pmat[i],pc->flag);
389: if (!pc->setupcalled) {
390: KSPSetFromOptions(osm->ksp[i]);
391: }
392: }
393: return(0);
394: }
398: static PetscErrorCode PCSetUpOnBlocks_ASM(PC pc)
399: {
400: PC_ASM *osm = (PC_ASM*)pc->data;
402: PetscInt i;
405: for (i=0; i<osm->n_local_true; i++) {
406: KSPSetUp(osm->ksp[i]);
407: }
408: return(0);
409: }
413: static PetscErrorCode PCApply_ASM(PC pc,Vec x,Vec y)
414: {
415: PC_ASM *osm = (PC_ASM*)pc->data;
417: PetscInt i,n_local = osm->n_local,n_local_true = osm->n_local_true;
418: ScatterMode forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
421: /*
422: Support for limiting the restriction or interpolation to only local
423: subdomain values (leaving the other values 0).
424: */
425: if (!(osm->type & PC_ASM_RESTRICT)) {
426: forward = SCATTER_FORWARD_LOCAL;
427: /* have to zero the work RHS since scatter may leave some slots empty */
428: for (i=0; i<n_local_true; i++) {
429: VecZeroEntries(osm->x[i]);
430: }
431: }
432: if (!(osm->type & PC_ASM_INTERPOLATE)) reverse = SCATTER_REVERSE_LOCAL;
434: for (i=0; i<n_local; i++) {
435: VecScatterBegin(osm->restriction[i],x,osm->x[i],INSERT_VALUES,forward);
436: }
437: VecZeroEntries(y);
438: /* do the local solves */
439: for (i=0; i<n_local_true; i++) {
440: VecScatterEnd(osm->restriction[i],x,osm->x[i],INSERT_VALUES,forward);
441: KSPSolve(osm->ksp[i],osm->x[i],osm->y[i]);
442: if (osm->localization) {
443: VecScatterBegin(osm->localization[i],osm->y[i],osm->y_local[i],INSERT_VALUES,forward);
444: VecScatterEnd(osm->localization[i],osm->y[i],osm->y_local[i],INSERT_VALUES,forward);
445: }
446: VecScatterBegin(osm->prolongation[i],osm->y_local[i],y,ADD_VALUES,reverse);
447: }
448: /* handle the rest of the scatters that do not have local solves */
449: for (i=n_local_true; i<n_local; i++) {
450: VecScatterEnd(osm->restriction[i],x,osm->x[i],INSERT_VALUES,forward);
451: VecScatterBegin(osm->prolongation[i],osm->y_local[i],y,ADD_VALUES,reverse);
452: }
453: for (i=0; i<n_local; i++) {
454: VecScatterEnd(osm->prolongation[i],osm->y_local[i],y,ADD_VALUES,reverse);
455: }
456: return(0);
457: }
461: static PetscErrorCode PCApplyTranspose_ASM(PC pc,Vec x,Vec y)
462: {
463: PC_ASM *osm = (PC_ASM*)pc->data;
465: PetscInt i,n_local = osm->n_local,n_local_true = osm->n_local_true;
466: ScatterMode forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
469: /*
470: Support for limiting the restriction or interpolation to only local
471: subdomain values (leaving the other values 0).
473: Note: these are reversed from the PCApply_ASM() because we are applying the
474: transpose of the three terms
475: */
476: if (!(osm->type & PC_ASM_INTERPOLATE)) {
477: forward = SCATTER_FORWARD_LOCAL;
478: /* have to zero the work RHS since scatter may leave some slots empty */
479: for (i=0; i<n_local_true; i++) {
480: VecZeroEntries(osm->x[i]);
481: }
482: }
483: if (!(osm->type & PC_ASM_RESTRICT)) reverse = SCATTER_REVERSE_LOCAL;
485: for (i=0; i<n_local; i++) {
486: VecScatterBegin(osm->restriction[i],x,osm->x[i],INSERT_VALUES,forward);
487: }
488: VecZeroEntries(y);
489: /* do the local solves */
490: for (i=0; i<n_local_true; i++) {
491: VecScatterEnd(osm->restriction[i],x,osm->x[i],INSERT_VALUES,forward);
492: KSPSolveTranspose(osm->ksp[i],osm->x[i],osm->y[i]);
493: if (osm->localization) {
494: VecScatterBegin(osm->localization[i],osm->y[i],osm->y_local[i],INSERT_VALUES,forward);
495: VecScatterEnd(osm->localization[i],osm->y[i],osm->y_local[i],INSERT_VALUES,forward);
496: }
497: VecScatterBegin(osm->prolongation[i],osm->y_local[i],y,ADD_VALUES,reverse);
498: }
499: /* handle the rest of the scatters that do not have local solves */
500: for (i=n_local_true; i<n_local; i++) {
501: VecScatterEnd(osm->restriction[i],x,osm->x[i],INSERT_VALUES,forward);
502: VecScatterBegin(osm->prolongation[i],osm->y_local[i],y,ADD_VALUES,reverse);
503: }
504: for (i=0; i<n_local; i++) {
505: VecScatterEnd(osm->prolongation[i],osm->y_local[i],y,ADD_VALUES,reverse);
506: }
507: return(0);
508: }
512: static PetscErrorCode PCReset_ASM(PC pc)
513: {
514: PC_ASM *osm = (PC_ASM*)pc->data;
516: PetscInt i;
519: if (osm->ksp) {
520: for (i=0; i<osm->n_local_true; i++) {
521: KSPReset(osm->ksp[i]);
522: }
523: }
524: if (osm->pmat) {
525: if (osm->n_local_true > 0) {
526: MatDestroyMatrices(osm->n_local_true,&osm->pmat);
527: }
528: }
529: if (osm->restriction) {
530: for (i=0; i<osm->n_local; i++) {
531: VecScatterDestroy(&osm->restriction[i]);
532: if (osm->localization) {VecScatterDestroy(&osm->localization[i]);}
533: VecScatterDestroy(&osm->prolongation[i]);
534: VecDestroy(&osm->x[i]);
535: VecDestroy(&osm->y[i]);
536: VecDestroy(&osm->y_local[i]);
537: }
538: PetscFree(osm->restriction);
539: if (osm->localization) {PetscFree(osm->localization);}
540: PetscFree(osm->prolongation);
541: PetscFree(osm->x);
542: PetscFree(osm->y);
543: PetscFree(osm->y_local);
544: }
545: PCASMDestroySubdomains(osm->n_local_true,osm->is,osm->is_local);
547: osm->is = 0;
548: osm->is_local = 0;
549: return(0);
550: }
554: static PetscErrorCode PCDestroy_ASM(PC pc)
555: {
556: PC_ASM *osm = (PC_ASM*)pc->data;
558: PetscInt i;
561: PCReset_ASM(pc);
562: if (osm->ksp) {
563: for (i=0; i<osm->n_local_true; i++) {
564: KSPDestroy(&osm->ksp[i]);
565: }
566: PetscFree(osm->ksp);
567: }
568: PetscFree(pc->data);
569: return(0);
570: }
574: static PetscErrorCode PCSetFromOptions_ASM(PC pc)
575: {
576: PC_ASM *osm = (PC_ASM*)pc->data;
578: PetscInt blocks,ovl;
579: PetscBool symset,flg;
580: PCASMType asmtype;
583: /* set the type to symmetric if matrix is symmetric */
584: if (!osm->type_set && pc->pmat) {
585: MatIsSymmetricKnown(pc->pmat,&symset,&flg);
586: if (symset && flg) osm->type = PC_ASM_BASIC;
587: }
588: PetscOptionsHead("Additive Schwarz options");
589: PetscOptionsBool("-pc_asm_dm_subdomains","Use DMCreateDomainDecomposition() to define subdomains","PCASMSetDMSubdomains",osm->dm_subdomains,&osm->dm_subdomains,&flg);
590: PetscOptionsInt("-pc_asm_blocks","Number of subdomains","PCASMSetTotalSubdomains",osm->n,&blocks,&flg);
591: if (flg) {
592: PCASMSetTotalSubdomains(pc,blocks,NULL,NULL);
593: osm->dm_subdomains = PETSC_FALSE;
594: }
595: PetscOptionsInt("-pc_asm_overlap","Number of grid points overlap","PCASMSetOverlap",osm->overlap,&ovl,&flg);
596: if (flg) {
597: PCASMSetOverlap(pc,ovl);
598: osm->dm_subdomains = PETSC_FALSE;
599: }
600: flg = PETSC_FALSE;
601: PetscOptionsEnum("-pc_asm_type","Type of restriction/extension","PCASMSetType",PCASMTypes,(PetscEnum)osm->type,(PetscEnum*)&asmtype,&flg);
602: if (flg) {PCASMSetType(pc,asmtype); }
603: PetscOptionsTail();
604: return(0);
605: }
607: /*------------------------------------------------------------------------------------*/
611: static PetscErrorCode PCASMSetLocalSubdomains_ASM(PC pc,PetscInt n,IS is[],IS is_local[])
612: {
613: PC_ASM *osm = (PC_ASM*)pc->data;
615: PetscInt i;
618: if (n < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Each process must have 1 or more blocks, n = %D",n);
619: if (pc->setupcalled && (n != osm->n_local_true || is)) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"PCASMSetLocalSubdomains() should be called before calling PCSetUp().");
621: if (!pc->setupcalled) {
622: if (is) {
623: for (i=0; i<n; i++) {PetscObjectReference((PetscObject)is[i]);}
624: }
625: if (is_local) {
626: for (i=0; i<n; i++) {PetscObjectReference((PetscObject)is_local[i]);}
627: }
628: PCASMDestroySubdomains(osm->n_local_true,osm->is,osm->is_local);
630: osm->n_local_true = n;
631: osm->is = 0;
632: osm->is_local = 0;
633: if (is) {
634: PetscMalloc(n*sizeof(IS),&osm->is);
635: for (i=0; i<n; i++) osm->is[i] = is[i];
636: /* Flag indicating that the user has set overlapping subdomains so PCASM should not increase their size. */
637: osm->overlap = -1;
638: }
639: if (is_local) {
640: PetscMalloc(n*sizeof(IS),&osm->is_local);
641: for (i=0; i<n; i++) osm->is_local[i] = is_local[i];
642: if (!is) {
643: PetscMalloc(osm->n_local_true*sizeof(IS),&osm->is);
644: for (i=0; i<osm->n_local_true; i++) {
645: if (osm->overlap > 0) { /* With positive overlap, osm->is[i] will be modified */
646: ISDuplicate(osm->is_local[i],&osm->is[i]);
647: ISCopy(osm->is_local[i],osm->is[i]);
648: } else {
649: PetscObjectReference((PetscObject)osm->is_local[i]);
650: osm->is[i] = osm->is_local[i];
651: }
652: }
653: }
654: }
655: }
656: return(0);
657: }
661: static PetscErrorCode PCASMSetTotalSubdomains_ASM(PC pc,PetscInt N,IS *is,IS *is_local)
662: {
663: PC_ASM *osm = (PC_ASM*)pc->data;
665: PetscMPIInt rank,size;
666: PetscInt n;
669: if (N < 1) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_OUTOFRANGE,"Number of total blocks must be > 0, N = %D",N);
670: if (is || is_local) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Use PCASMSetLocalSubdomains() to set specific index sets\n\they cannot be set globally yet.");
672: /*
673: Split the subdomains equally among all processors
674: */
675: MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);
676: MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);
677: n = N/size + ((N % size) > rank);
678: if (!n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Process %d must have at least one block: total processors %d total blocks %D",(int)rank,(int)size,N);
679: if (pc->setupcalled && n != osm->n_local_true) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"PCASMSetTotalSubdomains() should be called before PCSetUp().");
680: if (!pc->setupcalled) {
681: PCASMDestroySubdomains(osm->n_local_true,osm->is,osm->is_local);
683: osm->n_local_true = n;
684: osm->is = 0;
685: osm->is_local = 0;
686: }
687: return(0);
688: }
692: static PetscErrorCode PCASMSetOverlap_ASM(PC pc,PetscInt ovl)
693: {
694: PC_ASM *osm = (PC_ASM*)pc->data;
697: if (ovl < 0) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap value requested");
698: if (pc->setupcalled && ovl != osm->overlap) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"PCASMSetOverlap() should be called before PCSetUp().");
699: if (!pc->setupcalled) osm->overlap = ovl;
700: return(0);
701: }
705: static PetscErrorCode PCASMSetType_ASM(PC pc,PCASMType type)
706: {
707: PC_ASM *osm = (PC_ASM*)pc->data;
710: osm->type = type;
711: osm->type_set = PETSC_TRUE;
712: return(0);
713: }
717: static PetscErrorCode PCASMSetSortIndices_ASM(PC pc,PetscBool doSort)
718: {
719: PC_ASM *osm = (PC_ASM*)pc->data;
722: osm->sort_indices = doSort;
723: return(0);
724: }
728: static PetscErrorCode PCASMGetSubKSP_ASM(PC pc,PetscInt *n_local,PetscInt *first_local,KSP **ksp)
729: {
730: PC_ASM *osm = (PC_ASM*)pc->data;
734: if (osm->n_local_true < 1) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ORDER,"Need to call PCSetUP() on PC (or KSPSetUp() on the outer KSP object) before calling here");
736: if (n_local) *n_local = osm->n_local_true;
737: if (first_local) {
738: MPI_Scan(&osm->n_local_true,first_local,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));
739: *first_local -= osm->n_local_true;
740: }
741: if (ksp) {
742: /* Assume that local solves are now different; not necessarily
743: true though! This flag is used only for PCView_ASM() */
744: *ksp = osm->ksp;
745: osm->same_local_solves = PETSC_FALSE;
746: }
747: return(0);
748: }
752: /*@C
753: PCASMSetLocalSubdomains - Sets the local subdomains (for this processor only) for the additive Schwarz preconditioner.
755: Collective on PC
757: Input Parameters:
758: + pc - the preconditioner context
759: . n - the number of subdomains for this processor (default value = 1)
760: . is - the index set that defines the subdomains for this processor
761: (or NULL for PETSc to determine subdomains)
762: - is_local - the index sets that define the local part of the subdomains for this processor
763: (or NULL to use the default of 1 subdomain per process)
765: Notes:
766: The IS numbering is in the parallel, global numbering of the vector for both is and is_local
768: By default the ASM preconditioner uses 1 block per processor.
770: Use PCASMSetTotalSubdomains() to set the subdomains for all processors.
772: Level: advanced
774: .keywords: PC, ASM, set, local, subdomains, additive Schwarz
776: .seealso: PCASMSetTotalSubdomains(), PCASMSetOverlap(), PCASMGetSubKSP(),
777: PCASMCreateSubdomains2D(), PCASMGetLocalSubdomains()
778: @*/
779: PetscErrorCode PCASMSetLocalSubdomains(PC pc,PetscInt n,IS is[],IS is_local[])
780: {
785: PetscTryMethod(pc,"PCASMSetLocalSubdomains_C",(PC,PetscInt,IS[],IS[]),(pc,n,is,is_local));
786: return(0);
787: }
791: /*@C
792: PCASMSetTotalSubdomains - Sets the subdomains for all processors for the
793: additive Schwarz preconditioner. Either all or no processors in the
794: PC communicator must call this routine, with the same index sets.
796: Collective on PC
798: Input Parameters:
799: + pc - the preconditioner context
800: . N - the number of subdomains for all processors
801: . is - the index sets that define the subdomains for all processors
802: (or NULL to ask PETSc to compe up with subdomains)
803: - is_local - the index sets that define the local part of the subdomains for this processor
804: (or NULL to use the default of 1 subdomain per process)
806: Options Database Key:
807: To set the total number of subdomain blocks rather than specify the
808: index sets, use the option
809: . -pc_asm_blocks <blks> - Sets total blocks
811: Notes:
812: Currently you cannot use this to set the actual subdomains with the argument is.
814: By default the ASM preconditioner uses 1 block per processor.
816: These index sets cannot be destroyed until after completion of the
817: linear solves for which the ASM preconditioner is being used.
819: Use PCASMSetLocalSubdomains() to set local subdomains.
821: The IS numbering is in the parallel, global numbering of the vector for both is and is_local
823: Level: advanced
825: .keywords: PC, ASM, set, total, global, subdomains, additive Schwarz
827: .seealso: PCASMSetLocalSubdomains(), PCASMSetOverlap(), PCASMGetSubKSP(),
828: PCASMCreateSubdomains2D()
829: @*/
830: PetscErrorCode PCASMSetTotalSubdomains(PC pc,PetscInt N,IS is[],IS is_local[])
831: {
836: PetscTryMethod(pc,"PCASMSetTotalSubdomains_C",(PC,PetscInt,IS[],IS[]),(pc,N,is,is_local));
837: return(0);
838: }
842: /*@
843: PCASMSetOverlap - Sets the overlap between a pair of subdomains for the
844: additive Schwarz preconditioner. Either all or no processors in the
845: PC communicator must call this routine.
847: Logically Collective on PC
849: Input Parameters:
850: + pc - the preconditioner context
851: - ovl - the amount of overlap between subdomains (ovl >= 0, default value = 1)
853: Options Database Key:
854: . -pc_asm_overlap <ovl> - Sets overlap
856: Notes:
857: By default the ASM preconditioner uses 1 block per processor. To use
858: multiple blocks per perocessor, see PCASMSetTotalSubdomains() and
859: PCASMSetLocalSubdomains() (and the option -pc_asm_blocks <blks>).
861: The overlap defaults to 1, so if one desires that no additional
862: overlap be computed beyond what may have been set with a call to
863: PCASMSetTotalSubdomains() or PCASMSetLocalSubdomains(), then ovl
864: must be set to be 0. In particular, if one does not explicitly set
865: the subdomains an application code, then all overlap would be computed
866: internally by PETSc, and using an overlap of 0 would result in an ASM
867: variant that is equivalent to the block Jacobi preconditioner.
869: Note that one can define initial index sets with any overlap via
870: PCASMSetTotalSubdomains() or PCASMSetLocalSubdomains(); the routine
871: PCASMSetOverlap() merely allows PETSc to extend that overlap further
872: if desired.
874: Level: intermediate
876: .keywords: PC, ASM, set, overlap
878: .seealso: PCASMSetTotalSubdomains(), PCASMSetLocalSubdomains(), PCASMGetSubKSP(),
879: PCASMCreateSubdomains2D(), PCASMGetLocalSubdomains()
880: @*/
881: PetscErrorCode PCASMSetOverlap(PC pc,PetscInt ovl)
882: {
888: PetscTryMethod(pc,"PCASMSetOverlap_C",(PC,PetscInt),(pc,ovl));
889: return(0);
890: }
894: /*@
895: PCASMSetType - Sets the type of restriction and interpolation used
896: for local problems in the additive Schwarz method.
898: Logically Collective on PC
900: Input Parameters:
901: + pc - the preconditioner context
902: - type - variant of ASM, one of
903: .vb
904: PC_ASM_BASIC - full interpolation and restriction
905: PC_ASM_RESTRICT - full restriction, local processor interpolation
906: PC_ASM_INTERPOLATE - full interpolation, local processor restriction
907: PC_ASM_NONE - local processor restriction and interpolation
908: .ve
910: Options Database Key:
911: . -pc_asm_type [basic,restrict,interpolate,none] - Sets ASM type
913: Level: intermediate
915: .keywords: PC, ASM, set, type
917: .seealso: PCASMSetTotalSubdomains(), PCASMSetTotalSubdomains(), PCASMGetSubKSP(),
918: PCASMCreateSubdomains2D()
919: @*/
920: PetscErrorCode PCASMSetType(PC pc,PCASMType type)
921: {
927: PetscTryMethod(pc,"PCASMSetType_C",(PC,PCASMType),(pc,type));
928: return(0);
929: }
933: /*@
934: PCASMSetSortIndices - Determines whether subdomain indices are sorted.
936: Logically Collective on PC
938: Input Parameters:
939: + pc - the preconditioner context
940: - doSort - sort the subdomain indices
942: Level: intermediate
944: .keywords: PC, ASM, set, type
946: .seealso: PCASMSetLocalSubdomains(), PCASMSetTotalSubdomains(), PCASMGetSubKSP(),
947: PCASMCreateSubdomains2D()
948: @*/
949: PetscErrorCode PCASMSetSortIndices(PC pc,PetscBool doSort)
950: {
956: PetscTryMethod(pc,"PCASMSetSortIndices_C",(PC,PetscBool),(pc,doSort));
957: return(0);
958: }
962: /*@C
963: PCASMGetSubKSP - Gets the local KSP contexts for all blocks on
964: this processor.
966: Collective on PC iff first_local is requested
968: Input Parameter:
969: . pc - the preconditioner context
971: Output Parameters:
972: + n_local - the number of blocks on this processor or NULL
973: . first_local - the global number of the first block on this processor or NULL,
974: all processors must request or all must pass NULL
975: - ksp - the array of KSP contexts
977: Note:
978: After PCASMGetSubKSP() the array of KSPes is not to be freed.
980: Currently for some matrix implementations only 1 block per processor
981: is supported.
983: You must call KSPSetUp() before calling PCASMGetSubKSP().
985: Fortran note:
986: The output argument 'ksp' must be an array of sufficient length or NULL_OBJECT. The latter can be used to learn the necessary length.
988: Level: advanced
990: .keywords: PC, ASM, additive Schwarz, get, sub, KSP, context
992: .seealso: PCASMSetTotalSubdomains(), PCASMSetTotalSubdomains(), PCASMSetOverlap(),
993: PCASMCreateSubdomains2D(),
994: @*/
995: PetscErrorCode PCASMGetSubKSP(PC pc,PetscInt *n_local,PetscInt *first_local,KSP *ksp[])
996: {
1001: PetscUseMethod(pc,"PCASMGetSubKSP_C",(PC,PetscInt*,PetscInt*,KSP **),(pc,n_local,first_local,ksp));
1002: return(0);
1003: }
1005: /* -------------------------------------------------------------------------------------*/
1006: /*MC
1007: PCASM - Use the (restricted) additive Schwarz method, each block is (approximately) solved with
1008: its own KSP object.
1010: Options Database Keys:
1011: + -pc_asm_blocks <blks> - Sets total blocks
1012: . -pc_asm_overlap <ovl> - Sets overlap
1013: - -pc_asm_type [basic,restrict,interpolate,none] - Sets ASM type
1015: IMPORTANT: If you run with, for example, 3 blocks on 1 processor or 3 blocks on 3 processors you
1016: will get a different convergence rate due to the default option of -pc_asm_type restrict. Use
1017: -pc_asm_type basic to use the standard ASM.
1019: Notes: Each processor can have one or more blocks, but a block cannot be shared by more
1020: than one processor. Defaults to one block per processor.
1022: To set options on the solvers for each block append -sub_ to all the KSP, and PC
1023: options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly
1025: To set the options on the solvers separate for each block call PCASMGetSubKSP()
1026: and set the options directly on the resulting KSP object (you can access its PC
1027: with KSPGetPC())
1030: Level: beginner
1032: Concepts: additive Schwarz method
1034: References:
1035: An additive variant of the Schwarz alternating method for the case of many subregions
1036: M Dryja, OB Widlund - Courant Institute, New York University Technical report
1038: Domain Decompositions: Parallel Multilevel Methods for Elliptic Partial Differential Equations,
1039: Barry Smith, Petter Bjorstad, and William Gropp, Cambridge University Press, ISBN 0-521-49589-X.
1041: .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC,
1042: PCBJACOBI, PCASMGetSubKSP(), PCASMSetLocalSubdomains(),
1043: PCASMSetTotalSubdomains(), PCSetModifySubmatrices(), PCASMSetOverlap(), PCASMSetType()
1045: M*/
1049: PETSC_EXTERN PetscErrorCode PCCreate_ASM(PC pc)
1050: {
1052: PC_ASM *osm;
1055: PetscNewLog(pc,PC_ASM,&osm);
1057: osm->n = PETSC_DECIDE;
1058: osm->n_local = 0;
1059: osm->n_local_true = PETSC_DECIDE;
1060: osm->overlap = 1;
1061: osm->ksp = 0;
1062: osm->restriction = 0;
1063: osm->localization = 0;
1064: osm->prolongation = 0;
1065: osm->x = 0;
1066: osm->y = 0;
1067: osm->y_local = 0;
1068: osm->is = 0;
1069: osm->is_local = 0;
1070: osm->mat = 0;
1071: osm->pmat = 0;
1072: osm->type = PC_ASM_RESTRICT;
1073: osm->same_local_solves = PETSC_TRUE;
1074: osm->sort_indices = PETSC_TRUE;
1075: osm->dm_subdomains = PETSC_FALSE;
1077: pc->data = (void*)osm;
1078: pc->ops->apply = PCApply_ASM;
1079: pc->ops->applytranspose = PCApplyTranspose_ASM;
1080: pc->ops->setup = PCSetUp_ASM;
1081: pc->ops->reset = PCReset_ASM;
1082: pc->ops->destroy = PCDestroy_ASM;
1083: pc->ops->setfromoptions = PCSetFromOptions_ASM;
1084: pc->ops->setuponblocks = PCSetUpOnBlocks_ASM;
1085: pc->ops->view = PCView_ASM;
1086: pc->ops->applyrichardson = 0;
1088: PetscObjectComposeFunction((PetscObject)pc,"PCASMSetLocalSubdomains_C",PCASMSetLocalSubdomains_ASM);
1089: PetscObjectComposeFunction((PetscObject)pc,"PCASMSetTotalSubdomains_C",PCASMSetTotalSubdomains_ASM);
1090: PetscObjectComposeFunction((PetscObject)pc,"PCASMSetOverlap_C",PCASMSetOverlap_ASM);
1091: PetscObjectComposeFunction((PetscObject)pc,"PCASMSetType_C",PCASMSetType_ASM);
1092: PetscObjectComposeFunction((PetscObject)pc,"PCASMSetSortIndices_C",PCASMSetSortIndices_ASM);
1093: PetscObjectComposeFunction((PetscObject)pc,"PCASMGetSubKSP_C",PCASMGetSubKSP_ASM);
1094: return(0);
1095: }
1099: /*@C
1100: PCASMCreateSubdomains - Creates the index sets for the overlapping Schwarz
1101: preconditioner for a any problem on a general grid.
1103: Collective
1105: Input Parameters:
1106: + A - The global matrix operator
1107: - n - the number of local blocks
1109: Output Parameters:
1110: . outis - the array of index sets defining the subdomains
1112: Level: advanced
1114: Note: this generates nonoverlapping subdomains; the PCASM will generate the overlap
1115: from these if you use PCASMSetLocalSubdomains()
1117: In the Fortran version you must provide the array outis[] already allocated of length n.
1119: .keywords: PC, ASM, additive Schwarz, create, subdomains, unstructured grid
1121: .seealso: PCASMSetLocalSubdomains(), PCASMDestroySubdomains()
1122: @*/
1123: PetscErrorCode PCASMCreateSubdomains(Mat A, PetscInt n, IS* outis[])
1124: {
1125: MatPartitioning mpart;
1126: const char *prefix;
1127: PetscErrorCode (*f)(Mat,Mat*);
1128: PetscMPIInt size;
1129: PetscInt i,j,rstart,rend,bs;
1130: PetscBool isbaij = PETSC_FALSE,foundpart = PETSC_FALSE;
1131: Mat Ad = NULL, adj;
1132: IS ispart,isnumb,*is;
1133: PetscErrorCode ierr;
1138: if (n < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"number of local blocks must be > 0, n = %D",n);
1140: /* Get prefix, row distribution, and block size */
1141: MatGetOptionsPrefix(A,&prefix);
1142: MatGetOwnershipRange(A,&rstart,&rend);
1143: MatGetBlockSize(A,&bs);
1144: if (rstart/bs*bs != rstart || rend/bs*bs != rend) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"bad row distribution [%D,%D) for matrix block size %D",rstart,rend,bs);
1146: /* Get diagonal block from matrix if possible */
1147: MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
1148: PetscObjectQueryFunction((PetscObject)A,"MatGetDiagonalBlock_C",&f);
1149: if (f) {
1150: MatGetDiagonalBlock(A,&Ad);
1151: } else if (size == 1) {
1152: Ad = A;
1153: }
1154: if (Ad) {
1155: PetscObjectTypeCompare((PetscObject)Ad,MATSEQBAIJ,&isbaij);
1156: if (!isbaij) {PetscObjectTypeCompare((PetscObject)Ad,MATSEQSBAIJ,&isbaij);}
1157: }
1158: if (Ad && n > 1) {
1159: PetscBool match,done;
1160: /* Try to setup a good matrix partitioning if available */
1161: MatPartitioningCreate(PETSC_COMM_SELF,&mpart);
1162: PetscObjectSetOptionsPrefix((PetscObject)mpart,prefix);
1163: MatPartitioningSetFromOptions(mpart);
1164: PetscObjectTypeCompare((PetscObject)mpart,MATPARTITIONINGCURRENT,&match);
1165: if (!match) {
1166: PetscObjectTypeCompare((PetscObject)mpart,MATPARTITIONINGSQUARE,&match);
1167: }
1168: if (!match) { /* assume a "good" partitioner is available */
1169: PetscInt na;
1170: const PetscInt *ia,*ja;
1171: MatGetRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);
1172: if (done) {
1173: /* Build adjacency matrix by hand. Unfortunately a call to
1174: MatConvert(Ad,MATMPIADJ,MAT_INITIAL_MATRIX,&adj) will
1175: remove the block-aij structure and we cannot expect
1176: MatPartitioning to split vertices as we need */
1177: PetscInt i,j,len,nnz,cnt,*iia=0,*jja=0;
1178: const PetscInt *row;
1179: nnz = 0;
1180: for (i=0; i<na; i++) { /* count number of nonzeros */
1181: len = ia[i+1] - ia[i];
1182: row = ja + ia[i];
1183: for (j=0; j<len; j++) {
1184: if (row[j] == i) { /* don't count diagonal */
1185: len--; break;
1186: }
1187: }
1188: nnz += len;
1189: }
1190: PetscMalloc((na+1)*sizeof(PetscInt),&iia);
1191: PetscMalloc((nnz)*sizeof(PetscInt),&jja);
1192: nnz = 0;
1193: iia[0] = 0;
1194: for (i=0; i<na; i++) { /* fill adjacency */
1195: cnt = 0;
1196: len = ia[i+1] - ia[i];
1197: row = ja + ia[i];
1198: for (j=0; j<len; j++) {
1199: if (row[j] != i) { /* if not diagonal */
1200: jja[nnz+cnt++] = row[j];
1201: }
1202: }
1203: nnz += cnt;
1204: iia[i+1] = nnz;
1205: }
1206: /* Partitioning of the adjacency matrix */
1207: MatCreateMPIAdj(PETSC_COMM_SELF,na,na,iia,jja,NULL,&adj);
1208: MatPartitioningSetAdjacency(mpart,adj);
1209: MatPartitioningSetNParts(mpart,n);
1210: MatPartitioningApply(mpart,&ispart);
1211: ISPartitioningToNumbering(ispart,&isnumb);
1212: MatDestroy(&adj);
1213: foundpart = PETSC_TRUE;
1214: }
1215: MatRestoreRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);
1216: }
1217: MatPartitioningDestroy(&mpart);
1218: }
1220: PetscMalloc(n*sizeof(IS),&is);
1221: *outis = is;
1223: if (!foundpart) {
1225: /* Partitioning by contiguous chunks of rows */
1227: PetscInt mbs = (rend-rstart)/bs;
1228: PetscInt start = rstart;
1229: for (i=0; i<n; i++) {
1230: PetscInt count = (mbs/n + ((mbs % n) > i)) * bs;
1231: ISCreateStride(PETSC_COMM_SELF,count,start,1,&is[i]);
1232: start += count;
1233: }
1235: } else {
1237: /* Partitioning by adjacency of diagonal block */
1239: const PetscInt *numbering;
1240: PetscInt *count,nidx,*indices,*newidx,start=0;
1241: /* Get node count in each partition */
1242: PetscMalloc(n*sizeof(PetscInt),&count);
1243: ISPartitioningCount(ispart,n,count);
1244: if (isbaij && bs > 1) { /* adjust for the block-aij case */
1245: for (i=0; i<n; i++) count[i] *= bs;
1246: }
1247: /* Build indices from node numbering */
1248: ISGetLocalSize(isnumb,&nidx);
1249: PetscMalloc(nidx*sizeof(PetscInt),&indices);
1250: for (i=0; i<nidx; i++) indices[i] = i; /* needs to be initialized */
1251: ISGetIndices(isnumb,&numbering);
1252: PetscSortIntWithPermutation(nidx,numbering,indices);
1253: ISRestoreIndices(isnumb,&numbering);
1254: if (isbaij && bs > 1) { /* adjust for the block-aij case */
1255: PetscMalloc(nidx*bs*sizeof(PetscInt),&newidx);
1256: for (i=0; i<nidx; i++) {
1257: for (j=0; j<bs; j++) newidx[i*bs+j] = indices[i]*bs + j;
1258: }
1259: PetscFree(indices);
1260: nidx *= bs;
1261: indices = newidx;
1262: }
1263: /* Shift to get global indices */
1264: for (i=0; i<nidx; i++) indices[i] += rstart;
1266: /* Build the index sets for each block */
1267: for (i=0; i<n; i++) {
1268: ISCreateGeneral(PETSC_COMM_SELF,count[i],&indices[start],PETSC_COPY_VALUES,&is[i]);
1269: ISSort(is[i]);
1270: start += count[i];
1271: }
1273: PetscFree(count);
1274: PetscFree(indices);
1275: ISDestroy(&isnumb);
1276: ISDestroy(&ispart);
1278: }
1279: return(0);
1280: }
1284: /*@C
1285: PCASMDestroySubdomains - Destroys the index sets created with
1286: PCASMCreateSubdomains(). Should be called after setting subdomains
1287: with PCASMSetLocalSubdomains().
1289: Collective
1291: Input Parameters:
1292: + n - the number of index sets
1293: . is - the array of index sets
1294: - is_local - the array of local index sets, can be NULL
1296: Level: advanced
1298: .keywords: PC, ASM, additive Schwarz, create, subdomains, unstructured grid
1300: .seealso: PCASMCreateSubdomains(), PCASMSetLocalSubdomains()
1301: @*/
1302: PetscErrorCode PCASMDestroySubdomains(PetscInt n, IS is[], IS is_local[])
1303: {
1304: PetscInt i;
1308: if (n <= 0) return(0);
1309: if (is) {
1311: for (i=0; i<n; i++) { ISDestroy(&is[i]); }
1312: PetscFree(is);
1313: }
1314: if (is_local) {
1316: for (i=0; i<n; i++) { ISDestroy(&is_local[i]); }
1317: PetscFree(is_local);
1318: }
1319: return(0);
1320: }
1324: /*@
1325: PCASMCreateSubdomains2D - Creates the index sets for the overlapping Schwarz
1326: preconditioner for a two-dimensional problem on a regular grid.
1328: Not Collective
1330: Input Parameters:
1331: + m, n - the number of mesh points in the x and y directions
1332: . M, N - the number of subdomains in the x and y directions
1333: . dof - degrees of freedom per node
1334: - overlap - overlap in mesh lines
1336: Output Parameters:
1337: + Nsub - the number of subdomains created
1338: . is - array of index sets defining overlapping (if overlap > 0) subdomains
1339: - is_local - array of index sets defining non-overlapping subdomains
1341: Note:
1342: Presently PCAMSCreateSubdomains2d() is valid only for sequential
1343: preconditioners. More general related routines are
1344: PCASMSetTotalSubdomains() and PCASMSetLocalSubdomains().
1346: Level: advanced
1348: .keywords: PC, ASM, additive Schwarz, create, subdomains, 2D, regular grid
1350: .seealso: PCASMSetTotalSubdomains(), PCASMSetLocalSubdomains(), PCASMGetSubKSP(),
1351: PCASMSetOverlap()
1352: @*/
1353: PetscErrorCode PCASMCreateSubdomains2D(PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt dof,PetscInt overlap,PetscInt *Nsub,IS **is,IS **is_local)
1354: {
1355: PetscInt i,j,height,width,ystart,xstart,yleft,yright,xleft,xright,loc_outer;
1357: PetscInt nidx,*idx,loc,ii,jj,count;
1360: if (dof != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP," ");
1362: *Nsub = N*M;
1363: PetscMalloc((*Nsub)*sizeof(IS*),is);
1364: PetscMalloc((*Nsub)*sizeof(IS*),is_local);
1365: ystart = 0;
1366: loc_outer = 0;
1367: for (i=0; i<N; i++) {
1368: height = n/N + ((n % N) > i); /* height of subdomain */
1369: if (height < 2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many N subdomains for mesh dimension n");
1370: yleft = ystart - overlap; if (yleft < 0) yleft = 0;
1371: yright = ystart + height + overlap; if (yright > n) yright = n;
1372: xstart = 0;
1373: for (j=0; j<M; j++) {
1374: width = m/M + ((m % M) > j); /* width of subdomain */
1375: if (width < 2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many M subdomains for mesh dimension m");
1376: xleft = xstart - overlap; if (xleft < 0) xleft = 0;
1377: xright = xstart + width + overlap; if (xright > m) xright = m;
1378: nidx = (xright - xleft)*(yright - yleft);
1379: PetscMalloc(nidx*sizeof(PetscInt),&idx);
1380: loc = 0;
1381: for (ii=yleft; ii<yright; ii++) {
1382: count = m*ii + xleft;
1383: for (jj=xleft; jj<xright; jj++) idx[loc++] = count++;
1384: }
1385: ISCreateGeneral(PETSC_COMM_SELF,nidx,idx,PETSC_COPY_VALUES,(*is)+loc_outer);
1386: if (overlap == 0) {
1387: PetscObjectReference((PetscObject)(*is)[loc_outer]);
1389: (*is_local)[loc_outer] = (*is)[loc_outer];
1390: } else {
1391: for (loc=0,ii=ystart; ii<ystart+height; ii++) {
1392: for (jj=xstart; jj<xstart+width; jj++) {
1393: idx[loc++] = m*ii + jj;
1394: }
1395: }
1396: ISCreateGeneral(PETSC_COMM_SELF,loc,idx,PETSC_COPY_VALUES,*is_local+loc_outer);
1397: }
1398: PetscFree(idx);
1399: xstart += width;
1400: loc_outer++;
1401: }
1402: ystart += height;
1403: }
1404: for (i=0; i<*Nsub; i++) { ISSort((*is)[i]); }
1405: return(0);
1406: }
1410: /*@C
1411: PCASMGetLocalSubdomains - Gets the local subdomains (for this processor
1412: only) for the additive Schwarz preconditioner.
1414: Not Collective
1416: Input Parameter:
1417: . pc - the preconditioner context
1419: Output Parameters:
1420: + n - the number of subdomains for this processor (default value = 1)
1421: . is - the index sets that define the subdomains for this processor
1422: - is_local - the index sets that define the local part of the subdomains for this processor (can be NULL)
1425: Notes:
1426: The IS numbering is in the parallel, global numbering of the vector.
1428: Level: advanced
1430: .keywords: PC, ASM, set, local, subdomains, additive Schwarz
1432: .seealso: PCASMSetTotalSubdomains(), PCASMSetOverlap(), PCASMGetSubKSP(),
1433: PCASMCreateSubdomains2D(), PCASMSetLocalSubdomains(), PCASMGetLocalSubmatrices()
1434: @*/
1435: PetscErrorCode PCASMGetLocalSubdomains(PC pc,PetscInt *n,IS *is[],IS *is_local[])
1436: {
1437: PC_ASM *osm;
1439: PetscBool match;
1445: PetscObjectTypeCompare((PetscObject)pc,PCASM,&match);
1446: if (!match) {
1447: if (n) *n = 0;
1448: if (is) *is = NULL;
1449: } else {
1450: osm = (PC_ASM*)pc->data;
1451: if (n) *n = osm->n_local_true;
1452: if (is) *is = osm->is;
1453: if (is_local) *is_local = osm->is_local;
1454: }
1455: return(0);
1456: }
1460: /*@C
1461: PCASMGetLocalSubmatrices - Gets the local submatrices (for this processor
1462: only) for the additive Schwarz preconditioner.
1464: Not Collective
1466: Input Parameter:
1467: . pc - the preconditioner context
1469: Output Parameters:
1470: + n - the number of matrices for this processor (default value = 1)
1471: - mat - the matrices
1474: Level: advanced
1476: Notes: Call after PCSetUp() (or KSPSetUp()) but before PCApply() (or KSPApply()) and before PCSetUpOnBlocks())
1478: Usually one would use PCSetModifySubmatrices() to change the submatrices in building the preconditioner.
1480: .keywords: PC, ASM, set, local, subdomains, additive Schwarz, block Jacobi
1482: .seealso: PCASMSetTotalSubdomains(), PCASMSetOverlap(), PCASMGetSubKSP(),
1483: PCASMCreateSubdomains2D(), PCASMSetLocalSubdomains(), PCASMGetLocalSubdomains(), PCSetModifySubmatrices()
1484: @*/
1485: PetscErrorCode PCASMGetLocalSubmatrices(PC pc,PetscInt *n,Mat *mat[])
1486: {
1487: PC_ASM *osm;
1489: PetscBool match;
1495: if (!pc->setupcalled) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"Must call after KSPSetUP() or PCSetUp().");
1496: PetscObjectTypeCompare((PetscObject)pc,PCASM,&match);
1497: if (!match) {
1498: if (n) *n = 0;
1499: if (mat) *mat = NULL;
1500: } else {
1501: osm = (PC_ASM*)pc->data;
1502: if (n) *n = osm->n_local_true;
1503: if (mat) *mat = osm->pmat;
1504: }
1505: return(0);
1506: }
1510: /*@
1511: PCASMSetDMSubdomains - Indicates whether to use DMCreateDomainDecomposition() to define the subdomains, whenever possible.
1512: Logically Collective
1514: Input Parameter:
1515: + pc - the preconditioner
1516: - flg - boolean indicating whether to use subdomains defined by the DM
1518: Options Database Key:
1519: . -pc_asm_dm_subdomains
1521: Level: intermediate
1523: Notes:
1524: PCASMSetTotalSubdomains() and PCASMSetOverlap() take precedence over PCASMSetDMSubdomains(),
1525: so setting either of the first two effectively turns the latter off.
1527: .keywords: PC, ASM, DM, set, subdomains, additive Schwarz
1529: .seealso: PCASMGetDMSubdomains(), PCASMSetTotalSubdomains(), PCASMSetOverlap()
1530: PCASMCreateSubdomains2D(), PCASMSetLocalSubdomains(), PCASMGetLocalSubdomains()
1531: @*/
1532: PetscErrorCode PCASMSetDMSubdomains(PC pc,PetscBool flg)
1533: {
1534: PC_ASM *osm = (PC_ASM*)pc->data;
1536: PetscBool match;
1541: if (pc->setupcalled) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"Not for a setup PC.");
1542: PetscObjectTypeCompare((PetscObject)pc,PCASM,&match);
1543: if (match) {
1544: osm->dm_subdomains = flg;
1545: }
1546: return(0);
1547: }
1551: /*@
1552: PCASMGetDMSubdomains - Returns flag indicating whether to use DMCreateDomainDecomposition() to define the subdomains, whenever possible.
1553: Not Collective
1555: Input Parameter:
1556: . pc - the preconditioner
1558: Output Parameter:
1559: . flg - boolean indicating whether to use subdomains defined by the DM
1561: Level: intermediate
1563: .keywords: PC, ASM, DM, set, subdomains, additive Schwarz
1565: .seealso: PCASMSetDMSubdomains(), PCASMSetTotalSubdomains(), PCASMSetOverlap()
1566: PCASMCreateSubdomains2D(), PCASMSetLocalSubdomains(), PCASMGetLocalSubdomains()
1567: @*/
1568: PetscErrorCode PCASMGetDMSubdomains(PC pc,PetscBool* flg)
1569: {
1570: PC_ASM *osm = (PC_ASM*)pc->data;
1572: PetscBool match;
1577: PetscObjectTypeCompare((PetscObject)pc,PCASM,&match);
1578: if (match) {
1579: if (flg) *flg = osm->dm_subdomains;
1580: }
1581: return(0);
1582: }