Actual source code: gasm.c
petsc-3.7.7 2017-09-25
1: /*
2: This file defines an "generalized" additive Schwarz preconditioner for any Mat implementation.
3: In this version each processor may intersect multiple subdomains and any subdomain may
4: intersect multiple processors. Intersections of subdomains with processors are called *local
5: subdomains*.
7: N - total number of distinct global subdomains (set explicitly in PCGASMSetTotalSubdomains() or implicitly PCGASMSetSubdomains() and then calculated in PCSetUp_GASM())
8: n - actual number of local subdomains on this processor (set in PCGASMSetSubdomains() or calculated in PCGASMSetTotalSubdomains())
9: nmax - maximum number of local subdomains per processor (calculated in PCSetUp_GASM())
10: */
11: #include <petsc/private/pcimpl.h> /*I "petscpc.h" I*/
12: #include <petscdm.h>
14: typedef struct {
15: PetscInt N,n,nmax;
16: PetscInt overlap; /* overlap requested by user */
17: PCGASMType type; /* use reduced interpolation, restriction or both */
18: PetscBool type_set; /* if user set this value (so won't change it for symmetric problems) */
19: PetscBool same_subdomain_solvers; /* flag indicating whether all local solvers are same */
20: PetscBool sort_indices; /* flag to sort subdomain indices */
21: PetscBool user_subdomains; /* whether the user set explicit subdomain index sets -- keep them on PCReset() */
22: PetscBool dm_subdomains; /* whether DM is allowed to define subdomains */
23: PetscBool hierarchicalpartitioning;
24: IS *ois; /* index sets that define the outer (conceptually, overlapping) subdomains */
25: IS *iis; /* index sets that define the inner (conceptually, nonoverlapping) subdomains */
26: KSP *ksp; /* linear solvers for each subdomain */
27: Mat *pmat; /* subdomain block matrices */
28: Vec gx,gy; /* Merged work vectors */
29: Vec *x,*y; /* Split work vectors; storage aliases pieces of storage of the above merged vectors. */
30: VecScatter gorestriction; /* merged restriction to disjoint union of outer subdomains */
31: VecScatter girestriction; /* merged restriction to disjoint union of inner subdomains */
32: VecScatter pctoouter;
33: IS permutationIS;
34: Mat permutationP;
35: Mat pcmat;
36: Vec pcx,pcy;
37: } PC_GASM;
41: static PetscErrorCode PCGASMComputeGlobalSubdomainNumbering_Private(PC pc,PetscInt **numbering,PetscInt **permutation)
42: {
43: PC_GASM *osm = (PC_GASM*)pc->data;
44: PetscInt i;
48: /* Determine the number of globally-distinct subdomains and compute a global numbering for them. */
49: PetscMalloc2(osm->n,numbering,osm->n,permutation);
50: PetscObjectsListGetGlobalNumbering(PetscObjectComm((PetscObject)pc),osm->n,(PetscObject*)osm->iis,NULL,*numbering);
51: for (i = 0; i < osm->n; ++i) (*permutation)[i] = i;
52: PetscSortIntWithPermutation(osm->n,*numbering,*permutation);
53: return(0);
54: }
58: static PetscErrorCode PCGASMSubdomainView_Private(PC pc, PetscInt i, PetscViewer viewer)
59: {
60: PC_GASM *osm = (PC_GASM*)pc->data;
61: PetscInt j,nidx;
62: const PetscInt *idx;
63: PetscViewer sviewer;
64: char *cidx;
68: if (i < 0 || i > osm->n) SETERRQ2(PetscObjectComm((PetscObject)viewer), PETSC_ERR_ARG_WRONG, "Invalid subdomain %D: must nonnegative and less than %D", i, osm->n);
69: /* Inner subdomains. */
70: ISGetLocalSize(osm->iis[i], &nidx);
71: /*
72: No more than 15 characters per index plus a space.
73: PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx,
74: in case nidx == 0. That will take care of the space for the trailing '\0' as well.
75: For nidx == 0, the whole string 16 '\0'.
76: */
77: PetscMalloc1(16*(nidx+1)+1, &cidx);
78: PetscViewerStringOpen(PETSC_COMM_SELF, cidx, 16*(nidx+1)+1, &sviewer);
79: ISGetIndices(osm->iis[i], &idx);
80: for (j = 0; j < nidx; ++j) {
81: PetscViewerStringSPrintf(sviewer, "%D ", idx[j]);
82: }
83: ISRestoreIndices(osm->iis[i],&idx);
84: PetscViewerDestroy(&sviewer);
85: PetscViewerASCIIPrintf(viewer, "Inner subdomain:\n");
86: PetscViewerFlush(viewer);
87: PetscViewerASCIIPushSynchronized(viewer);
88: PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx);
89: PetscViewerFlush(viewer);
90: PetscViewerASCIIPopSynchronized(viewer);
91: PetscViewerASCIIPrintf(viewer, "\n");
92: PetscViewerFlush(viewer);
93: PetscFree(cidx);
94: /* Outer subdomains. */
95: ISGetLocalSize(osm->ois[i], &nidx);
96: /*
97: No more than 15 characters per index plus a space.
98: PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx,
99: in case nidx == 0. That will take care of the space for the trailing '\0' as well.
100: For nidx == 0, the whole string 16 '\0'.
101: */
102: PetscMalloc1(16*(nidx+1)+1, &cidx);
103: PetscViewerStringOpen(PETSC_COMM_SELF, cidx, 16*(nidx+1)+1, &sviewer);
104: ISGetIndices(osm->ois[i], &idx);
105: for (j = 0; j < nidx; ++j) {
106: PetscViewerStringSPrintf(sviewer,"%D ", idx[j]);
107: }
108: PetscViewerDestroy(&sviewer);
109: ISRestoreIndices(osm->ois[i],&idx);
110: PetscViewerASCIIPrintf(viewer, "Outer subdomain:\n");
111: PetscViewerFlush(viewer);
112: PetscViewerASCIIPushSynchronized(viewer);
113: PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx);
114: PetscViewerFlush(viewer);
115: PetscViewerASCIIPopSynchronized(viewer);
116: PetscViewerASCIIPrintf(viewer, "\n");
117: PetscViewerFlush(viewer);
118: PetscFree(cidx);
119: return(0);
120: }
124: static PetscErrorCode PCGASMPrintSubdomains(PC pc)
125: {
126: PC_GASM *osm = (PC_GASM*)pc->data;
127: const char *prefix;
128: char fname[PETSC_MAX_PATH_LEN+1];
129: PetscInt l, d, count;
130: PetscBool doprint,found;
131: PetscViewer viewer, sviewer = NULL;
132: PetscInt *numbering,*permutation;/* global numbering of locally-supported subdomains and the permutation from the local ordering */
136: PCGetOptionsPrefix(pc,&prefix);
137: doprint = PETSC_FALSE;
138: PetscOptionsGetBool(NULL,prefix,"-pc_gasm_print_subdomains",&doprint,NULL);
139: if (!doprint) return(0);
140: PetscOptionsGetString(NULL,prefix,"-pc_gasm_print_subdomains",fname,PETSC_MAX_PATH_LEN,&found);
141: if (!found) { PetscStrcpy(fname,"stdout"); };
142: PetscViewerASCIIOpen(PetscObjectComm((PetscObject)pc),fname,&viewer);
143: /*
144: Make sure the viewer has a name. Otherwise this may cause a deadlock or other weird errors when creating a subcomm viewer:
145: the subcomm viewer will attempt to inherit the viewer's name, which, if not set, will be constructed collectively on the comm.
146: */
147: PetscObjectName((PetscObject)viewer);
148: l = 0;
149: PCGASMComputeGlobalSubdomainNumbering_Private(pc,&numbering,&permutation);
150: for (count = 0; count < osm->N; ++count) {
151: /* Now let subdomains go one at a time in the global numbering order and print their subdomain/solver info. */
152: if (l<osm->n) {
153: d = permutation[l]; /* d is the local number of the l-th smallest (in the global ordering) among the locally supported subdomains */
154: if (numbering[d] == count) {
155: PetscViewerGetSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);
156: PCGASMSubdomainView_Private(pc,d,sviewer);
157: PetscViewerRestoreSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);
158: ++l;
159: }
160: }
161: MPI_Barrier(PetscObjectComm((PetscObject)pc));
162: }
163: PetscFree2(numbering,permutation);
164: PetscViewerDestroy(&viewer);
165: return(0);
166: }
171: static PetscErrorCode PCView_GASM(PC pc,PetscViewer viewer)
172: {
173: PC_GASM *osm = (PC_GASM*)pc->data;
174: const char *prefix;
176: PetscMPIInt rank, size;
177: PetscInt bsz;
178: PetscBool iascii,view_subdomains=PETSC_FALSE;
179: PetscViewer sviewer;
180: PetscInt count, l;
181: char overlap[256] = "user-defined overlap";
182: char gsubdomains[256] = "unknown total number of subdomains";
183: char msubdomains[256] = "unknown max number of local subdomains";
184: PetscInt *numbering,*permutation;/* global numbering of locally-supported subdomains and the permutation from the local ordering */
187: MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size);
188: MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank);
190: if (osm->overlap >= 0) {
191: PetscSNPrintf(overlap,sizeof(overlap),"requested amount of overlap = %D",osm->overlap);
192: }
193: if (osm->N != PETSC_DETERMINE) {
194: PetscSNPrintf(gsubdomains, sizeof(gsubdomains), "total number of subdomains = %D",osm->N);
195: }
196: if (osm->nmax != PETSC_DETERMINE) {
197: PetscSNPrintf(msubdomains,sizeof(msubdomains),"max number of local subdomains = %D",osm->nmax);
198: }
200: PCGetOptionsPrefix(pc,&prefix);
201: PetscOptionsGetBool(NULL,prefix,"-pc_gasm_view_subdomains",&view_subdomains,NULL);
203: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
204: if (iascii) {
205: /*
206: Make sure the viewer has a name. Otherwise this may cause a deadlock when creating a subcomm viewer:
207: the subcomm viewer will attempt to inherit the viewer's name, which, if not set, will be constructed
208: collectively on the comm.
209: */
210: PetscObjectName((PetscObject)viewer);
211: PetscViewerASCIIPrintf(viewer,"Generalized additive Schwarz:\n");
212: PetscViewerASCIIPrintf(viewer,"Restriction/interpolation type: %s\n",PCGASMTypes[osm->type]);
213: PetscViewerASCIIPrintf(viewer,"%s\n",overlap);
214: PetscViewerASCIIPrintf(viewer,"%s\n",gsubdomains);
215: PetscViewerASCIIPrintf(viewer,"%s\n",msubdomains);
216: PetscViewerASCIIPushSynchronized(viewer);
217: PetscViewerASCIISynchronizedPrintf(viewer,"[%d|%d] number of locally-supported subdomains = %D\n",rank,size,osm->n);
218: PetscViewerFlush(viewer);
219: PetscViewerASCIIPopSynchronized(viewer);
220: /* Cannot take advantage of osm->same_subdomain_solvers without a global numbering of subdomains. */
221: PetscViewerASCIIPrintf(viewer,"Subdomain solver info is as follows:\n");
222: PetscViewerASCIIPushTab(viewer);
223: PetscViewerASCIIPrintf(viewer,"- - - - - - - - - - - - - - - - - -\n");
224: /* Make sure that everybody waits for the banner to be printed. */
225: MPI_Barrier(PetscObjectComm((PetscObject)viewer));
226: /* Now let subdomains go one at a time in the global numbering order and print their subdomain/solver info. */
227: PCGASMComputeGlobalSubdomainNumbering_Private(pc,&numbering,&permutation);
228: l = 0;
229: for (count = 0; count < osm->N; ++count) {
230: PetscMPIInt srank, ssize;
231: if (l<osm->n) {
232: PetscInt d = permutation[l]; /* d is the local number of the l-th smallest (in the global ordering) among the locally supported subdomains */
233: if (numbering[d] == count) {
234: MPI_Comm_size(((PetscObject)osm->ois[d])->comm, &ssize);
235: MPI_Comm_rank(((PetscObject)osm->ois[d])->comm, &srank);
236: PetscViewerGetSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);
237: ISGetLocalSize(osm->ois[d],&bsz);
238: PetscViewerASCIISynchronizedPrintf(sviewer,"[%d|%d] (subcomm [%d|%d]) local subdomain number %D, local size = %D\n",rank,size,srank,ssize,d,bsz);
239: PetscViewerFlush(sviewer);
240: if (view_subdomains) {
241: PCGASMSubdomainView_Private(pc,d,sviewer);
242: }
243: if (!pc->setupcalled) {
244: PetscViewerASCIIPrintf(sviewer, "Solver not set up yet: PCSetUp() not yet called\n");
245: } else {
246: KSPView(osm->ksp[d],sviewer);
247: }
248: PetscViewerASCIIPrintf(sviewer,"- - - - - - - - - - - - - - - - - -\n");
249: PetscViewerFlush(sviewer);
250: PetscViewerRestoreSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);
251: ++l;
252: }
253: }
254: MPI_Barrier(PetscObjectComm((PetscObject)pc));
255: }
256: PetscFree2(numbering,permutation);
257: PetscViewerASCIIPopTab(viewer);
258: }
259: PetscViewerFlush(viewer);
260: PetscViewerASCIIPopSynchronized(viewer);
261: return(0);
262: }
264: PETSC_INTERN PetscErrorCode PCGASMCreateLocalSubdomains(Mat A, PetscInt nloc, IS *iis[]);
270: PetscErrorCode PCGASMSetHierarchicalPartitioning(PC pc)
271: {
272: PC_GASM *osm = (PC_GASM*)pc->data;
273: MatPartitioning part;
274: MPI_Comm comm;
275: PetscMPIInt size;
276: PetscInt nlocalsubdomains,fromrows_localsize;
277: IS partitioning,fromrows,isn;
278: Vec outervec;
279: PetscErrorCode ierr;
282: PetscObjectGetComm((PetscObject)pc,&comm);
283: MPI_Comm_size(comm,&size);
284: /* we do not need a hierarchical partitioning when
285: * the total number of subdomains is consistent with
286: * the number of MPI tasks.
287: * For the following cases, we do not need to use HP
288: * */
289: if(osm->N==PETSC_DETERMINE || osm->N>=size || osm->N==1){
290: return(0);
291: }
292: if(size%osm->N != 0){
293: SETERRQ2(PETSC_COMM_WORLD,PETSC_ERR_ARG_INCOMP,"have to specify the total number of subdomains %d to be a factor of the number of processors %d \n",osm->N,size);
294: }
295: nlocalsubdomains = size/osm->N;
296: osm->n = 1;
297: MatPartitioningCreate(comm,&part);
298: MatPartitioningSetAdjacency(part,pc->pmat);
299: MatPartitioningSetType(part,MATPARTITIONINGHIERARCH);
300: MatPartitioningHierarchicalSetNcoarseparts(part,osm->N);
301: MatPartitioningHierarchicalSetNfineparts(part,nlocalsubdomains);
302: MatPartitioningSetFromOptions(part);
303: /* get new processor owner number of each vertex */
304: MatPartitioningApply(part,&partitioning);
305: ISBuildTwoSided(partitioning,NULL,&fromrows);
306: ISPartitioningToNumbering(partitioning,&isn);
307: ISDestroy(&isn);
308: ISGetLocalSize(fromrows,&fromrows_localsize);
309: MatPartitioningDestroy(&part);
310: MatCreateVecs(pc->pmat,&outervec,NULL);
311: VecCreateMPI(comm,fromrows_localsize,PETSC_DETERMINE,&(osm->pcx));
312: VecDuplicate(osm->pcx,&(osm->pcy));
313: VecScatterCreate(osm->pcx,NULL,outervec,fromrows,&(osm->pctoouter));
314: MatGetSubMatrix(pc->pmat,fromrows,fromrows,MAT_INITIAL_MATRIX,&(osm->permutationP));
315: PetscObjectReference((PetscObject)fromrows);
316: osm->permutationIS = fromrows;
317: osm->pcmat = pc->pmat;
318: PetscObjectReference((PetscObject)osm->permutationP);
319: pc->pmat = osm->permutationP;
320: VecDestroy(&outervec);
321: ISDestroy(&fromrows);
322: ISDestroy(&partitioning);
323: osm->n = PETSC_DETERMINE;
324: return(0);
325: }
331: static PetscErrorCode PCSetUp_GASM(PC pc)
332: {
333: PC_GASM *osm = (PC_GASM*)pc->data;
335: PetscBool symset,flg;
336: PetscInt i,nInnerIndices,nTotalInnerIndices;
337: PetscMPIInt rank, size;
338: MatReuse scall = MAT_REUSE_MATRIX;
339: KSP ksp;
340: PC subpc;
341: const char *prefix,*pprefix;
342: Vec x,y;
343: PetscInt oni; /* Number of indices in the i-th local outer subdomain. */
344: const PetscInt *oidxi; /* Indices from the i-th subdomain local outer subdomain. */
345: PetscInt on; /* Number of indices in the disjoint union of local outer subdomains. */
346: PetscInt *oidx; /* Indices in the disjoint union of local outer subdomains. */
347: IS gois; /* Disjoint union the global indices of outer subdomains. */
348: IS goid; /* Identity IS of the size of the disjoint union of outer subdomains. */
349: PetscScalar *gxarray, *gyarray;
350: PetscInt gostart; /* Start of locally-owned indices in the vectors -- osm->gx,osm->gy -- over the disjoint union of outer subdomains. */
351: PetscInt num_subdomains = 0;
352: DM *subdomain_dm = NULL;
353: char **subdomain_names = NULL;
354: PetscInt *numbering;
358: MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);
359: MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);
360: if (!pc->setupcalled) {
361: /* use a hierarchical partitioning */
362: if(osm->hierarchicalpartitioning){
363: PCGASMSetHierarchicalPartitioning(pc);
364: }
365: if (!osm->type_set) {
366: MatIsSymmetricKnown(pc->pmat,&symset,&flg);
367: if (symset && flg) osm->type = PC_GASM_BASIC;
368: }
370: if (osm->n == PETSC_DETERMINE) {
371: if (osm->N != PETSC_DETERMINE) {
372: /* No local subdomains given, but the desired number of total subdomains is known, so construct them accordingly. */
373: PCGASMCreateSubdomains(pc->pmat,osm->N,&osm->n,&osm->iis);
374: } else if (osm->dm_subdomains && pc->dm) {
375: /* try pc->dm next, if allowed */
376: PetscInt d;
377: IS *inner_subdomain_is, *outer_subdomain_is;
378: DMCreateDomainDecomposition(pc->dm, &num_subdomains, &subdomain_names, &inner_subdomain_is, &outer_subdomain_is, &subdomain_dm);
379: if (num_subdomains) {
380: PCGASMSetSubdomains(pc, num_subdomains, inner_subdomain_is, outer_subdomain_is);
381: }
382: for (d = 0; d < num_subdomains; ++d) {
383: if (inner_subdomain_is) {ISDestroy(&inner_subdomain_is[d]);}
384: if (outer_subdomain_is) {ISDestroy(&outer_subdomain_is[d]);}
385: }
386: PetscFree(inner_subdomain_is);
387: PetscFree(outer_subdomain_is);
388: } else {
389: /* still no subdomains; use one per processor */
390: osm->nmax = osm->n = 1;
391: MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);
392: osm->N = size;
393: PCGASMCreateLocalSubdomains(pc->pmat,osm->n,&osm->iis);
394: }
395: }
396: if (!osm->iis) {
397: /*
398: osm->n was set in PCGASMSetSubdomains(), but the actual subdomains have not been supplied.
399: We create the requisite number of local inner subdomains and then expand them into
400: out subdomains, if necessary.
401: */
402: PCGASMCreateLocalSubdomains(pc->pmat,osm->n,&osm->iis);
403: }
404: if (!osm->ois) {
405: /*
406: Initially make outer subdomains the same as inner subdomains. If nonzero additional overlap
407: has been requested, copy the inner subdomains over so they can be modified.
408: */
409: PetscMalloc1(osm->n,&osm->ois);
410: for (i=0; i<osm->n; ++i) {
411: if (osm->overlap > 0 && osm->N>1) { /* With positive overlap, osm->iis[i] will be modified */
412: ISDuplicate(osm->iis[i],(osm->ois)+i);
413: ISCopy(osm->iis[i],osm->ois[i]);
414: } else {
415: PetscObjectReference((PetscObject)((osm->iis)[i]));
416: osm->ois[i] = osm->iis[i];
417: }
418: }
419: if (osm->overlap>0 && osm->N>1) {
420: /* Extend the "overlapping" regions by a number of steps */
421: MatIncreaseOverlapSplit(pc->pmat,osm->n,osm->ois,osm->overlap);
422: }
423: }
425: /* Now the subdomains are defined. Determine their global and max local numbers, if necessary. */
426: if (osm->nmax == PETSC_DETERMINE) {
427: PetscMPIInt inwork,outwork;
428: /* determine global number of subdomains and the max number of local subdomains */
429: inwork = osm->n;
430: MPIU_Allreduce(&inwork,&outwork,1,MPI_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));
431: osm->nmax = outwork;
432: }
433: if (osm->N == PETSC_DETERMINE) {
434: /* Determine the number of globally-distinct subdomains and compute a global numbering for them. */
435: PetscObjectsListGetGlobalNumbering(PetscObjectComm((PetscObject)pc),osm->n,(PetscObject*)osm->ois,&osm->N,NULL);
436: }
439: if (osm->sort_indices) {
440: for (i=0; i<osm->n; i++) {
441: ISSort(osm->ois[i]);
442: ISSort(osm->iis[i]);
443: }
444: }
445: PCGetOptionsPrefix(pc,&prefix);
446: PCGASMPrintSubdomains(pc);
448: /*
449: Merge the ISs, create merged vectors and restrictions.
450: */
451: /* Merge outer subdomain ISs and construct a restriction onto the disjoint union of local outer subdomains. */
452: on = 0;
453: for (i=0; i<osm->n; i++) {
454: ISGetLocalSize(osm->ois[i],&oni);
455: on += oni;
456: }
457: PetscMalloc1(on, &oidx);
458: on = 0;
459: /* Merge local indices together */
460: for (i=0; i<osm->n; i++) {
461: ISGetLocalSize(osm->ois[i],&oni);
462: ISGetIndices(osm->ois[i],&oidxi);
463: PetscMemcpy(oidx+on,oidxi,sizeof(PetscInt)*oni);
464: ISRestoreIndices(osm->ois[i],&oidxi);
465: on += oni;
466: }
467: ISCreateGeneral(((PetscObject)(pc))->comm,on,oidx,PETSC_OWN_POINTER,&gois);
468: nTotalInnerIndices = 0;
469: for(i=0; i<osm->n; i++){
470: ISGetLocalSize(osm->iis[i],&nInnerIndices);
471: nTotalInnerIndices += nInnerIndices;
472: }
473: VecCreateMPI(((PetscObject)(pc))->comm,nTotalInnerIndices,PETSC_DETERMINE,&x);
474: VecDuplicate(x,&y);
476: VecCreateMPI(PetscObjectComm((PetscObject)pc),on,PETSC_DECIDE,&osm->gx);
477: VecDuplicate(osm->gx,&osm->gy);
478: VecGetOwnershipRange(osm->gx, &gostart, NULL);
479: ISCreateStride(PetscObjectComm((PetscObject)pc),on,gostart,1, &goid);
480: /* gois might indices not on local */
481: VecScatterCreate(x,gois,osm->gx,goid, &(osm->gorestriction));
482: PetscMalloc1(osm->n,&numbering);
483: PetscObjectsListGetGlobalNumbering(PetscObjectComm((PetscObject)pc),osm->n,(PetscObject*)osm->ois,NULL,numbering);
484: VecDestroy(&x);
485: ISDestroy(&gois);
487: /* Merge inner subdomain ISs and construct a restriction onto the disjoint union of local inner subdomains. */
488: {
489: PetscInt ini; /* Number of indices the i-th a local inner subdomain. */
490: PetscInt in; /* Number of indices in the disjoint uniont of local inner subdomains. */
491: PetscInt *iidx; /* Global indices in the merged local inner subdomain. */
492: PetscInt *ioidx; /* Global indices of the disjoint union of inner subdomains within the disjoint union of outer subdomains. */
493: IS giis; /* IS for the disjoint union of inner subdomains. */
494: IS giois; /* IS for the disjoint union of inner subdomains within the disjoint union of outer subdomains. */
495: PetscScalar *array;
496: const PetscInt *indices;
497: PetscInt k;
498: on = 0;
499: for (i=0; i<osm->n; i++) {
500: ISGetLocalSize(osm->ois[i],&oni);
501: on += oni;
502: }
503: PetscMalloc1(on, &iidx);
504: PetscMalloc1(on, &ioidx);
505: VecGetArray(y,&array);
506: /* set communicator id to determine where overlap is */
507: in = 0;
508: for (i=0; i<osm->n; i++) {
509: ISGetLocalSize(osm->iis[i],&ini);
510: for (k = 0; k < ini; ++k){
511: array[in+k] = numbering[i];
512: }
513: in += ini;
514: }
515: VecRestoreArray(y,&array);
516: VecScatterBegin(osm->gorestriction,y,osm->gy,INSERT_VALUES,SCATTER_FORWARD);
517: VecScatterEnd(osm->gorestriction,y,osm->gy,INSERT_VALUES,SCATTER_FORWARD);
518: VecGetOwnershipRange(osm->gy,&gostart, NULL);
519: VecGetArray(osm->gy,&array);
520: on = 0;
521: in = 0;
522: for (i=0; i<osm->n; i++) {
523: ISGetLocalSize(osm->ois[i],&oni);
524: ISGetIndices(osm->ois[i],&indices);
525: for (k=0; k<oni; k++) {
526: /* skip overlapping indices to get inner domain */
527: if(PetscRealPart(array[on+k]) != numbering[i]) continue;
528: iidx[in] = indices[k];
529: ioidx[in++] = gostart+on+k;
530: }
531: ISRestoreIndices(osm->ois[i], &indices);
532: on += oni;
533: }
534: VecRestoreArray(osm->gy,&array);
535: ISCreateGeneral(PetscObjectComm((PetscObject)pc),in,iidx,PETSC_OWN_POINTER,&giis);
536: ISCreateGeneral(PetscObjectComm((PetscObject)pc),in,ioidx,PETSC_OWN_POINTER,&giois);
537: VecScatterCreate(y,giis,osm->gy,giois,&osm->girestriction);
538: VecDestroy(&y);
539: ISDestroy(&giis);
540: ISDestroy(&giois);
541: }
542: ISDestroy(&goid);
543: PetscFree(numbering);
545: /* Create the subdomain work vectors. */
546: PetscMalloc1(osm->n,&osm->x);
547: PetscMalloc1(osm->n,&osm->y);
548: VecGetArray(osm->gx, &gxarray);
549: VecGetArray(osm->gy, &gyarray);
550: for (i=0, on=0; i<osm->n; ++i, on += oni) {
551: PetscInt oNi;
552: ISGetLocalSize(osm->ois[i],&oni);
553: /* on a sub communicator */
554: ISGetSize(osm->ois[i],&oNi);
555: VecCreateMPIWithArray(((PetscObject)(osm->ois[i]))->comm,1,oni,oNi,gxarray+on,&osm->x[i]);
556: VecCreateMPIWithArray(((PetscObject)(osm->ois[i]))->comm,1,oni,oNi,gyarray+on,&osm->y[i]);
557: }
558: VecRestoreArray(osm->gx, &gxarray);
559: VecRestoreArray(osm->gy, &gyarray);
560: /* Create the subdomain solvers */
561: PetscMalloc1(osm->n,&osm->ksp);
562: for (i=0; i<osm->n; i++) {
563: char subprefix[PETSC_MAX_PATH_LEN+1];
564: KSPCreate(((PetscObject)(osm->ois[i]))->comm,&ksp);
565: PetscLogObjectParent((PetscObject)pc,(PetscObject)ksp);
566: PetscObjectIncrementTabLevel((PetscObject)ksp,(PetscObject)pc,1);
567: KSPSetType(ksp,KSPPREONLY);
568: KSPGetPC(ksp,&subpc); /* Why do we need this here? */
569: if (subdomain_dm) {
570: KSPSetDM(ksp,subdomain_dm[i]);
571: DMDestroy(subdomain_dm+i);
572: }
573: PCGetOptionsPrefix(pc,&prefix);
574: KSPSetOptionsPrefix(ksp,prefix);
575: if (subdomain_names && subdomain_names[i]) {
576: PetscSNPrintf(subprefix,PETSC_MAX_PATH_LEN,"sub_%s_",subdomain_names[i]);
577: KSPAppendOptionsPrefix(ksp,subprefix);
578: PetscFree(subdomain_names[i]);
579: }
580: KSPAppendOptionsPrefix(ksp,"sub_");
581: osm->ksp[i] = ksp;
582: }
583: PetscFree(subdomain_dm);
584: PetscFree(subdomain_names);
585: scall = MAT_INITIAL_MATRIX;
587: } else { /* if (pc->setupcalled) */
588: /*
589: Destroy the submatrices from the previous iteration
590: */
591: if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
592: MatDestroyMatrices(osm->n,&osm->pmat);
593: scall = MAT_INITIAL_MATRIX;
594: }
595: if(osm->permutationIS){
596: MatGetSubMatrix(pc->pmat,osm->permutationIS,osm->permutationIS,scall,&osm->permutationP);
597: PetscObjectReference((PetscObject)osm->permutationP);
598: osm->pcmat = pc->pmat;
599: pc->pmat = osm->permutationP;
600: }
602: }
605: /*
606: Extract out the submatrices.
607: */
608: if (size > 1) {
609: MatGetSubMatricesMPI(pc->pmat,osm->n,osm->ois,osm->ois,scall,&osm->pmat);
610: } else {
611: MatGetSubMatrices(pc->pmat,osm->n,osm->ois,osm->ois,scall,&osm->pmat);
612: }
613: if (scall == MAT_INITIAL_MATRIX) {
614: PetscObjectGetOptionsPrefix((PetscObject)pc->pmat,&pprefix);
615: for (i=0; i<osm->n; i++) {
616: PetscLogObjectParent((PetscObject)pc,(PetscObject)osm->pmat[i]);
617: PetscObjectSetOptionsPrefix((PetscObject)osm->pmat[i],pprefix);
618: }
619: }
621: /* Return control to the user so that the submatrices can be modified (e.g., to apply
622: different boundary conditions for the submatrices than for the global problem) */
623: PCModifySubMatrices(pc,osm->n,osm->ois,osm->ois,osm->pmat,pc->modifysubmatricesP);
625: /*
626: Loop over submatrices putting them into local ksps
627: */
628: for (i=0; i<osm->n; i++) {
629: KSPSetOperators(osm->ksp[i],osm->pmat[i],osm->pmat[i]);
630: if (!pc->setupcalled) {
631: KSPSetFromOptions(osm->ksp[i]);
632: }
633: }
634: if(osm->pcmat){
635: MatDestroy(&pc->pmat);
636: pc->pmat = osm->pcmat;
637: osm->pcmat = 0;
638: }
639: return(0);
640: }
644: static PetscErrorCode PCSetUpOnBlocks_GASM(PC pc)
645: {
646: PC_GASM *osm = (PC_GASM*)pc->data;
648: PetscInt i;
651: for (i=0; i<osm->n; i++) {
652: KSPSetUp(osm->ksp[i]);
653: }
654: return(0);
655: }
659: static PetscErrorCode PCApply_GASM(PC pc,Vec xin,Vec yout)
660: {
661: PC_GASM *osm = (PC_GASM*)pc->data;
663: PetscInt i;
664: Vec x,y;
665: ScatterMode forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
668: if(osm->pctoouter){
669: VecScatterBegin(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);
670: VecScatterEnd(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);
671: x = osm->pcx;
672: y = osm->pcy;
673: }else{
674: x = xin;
675: y = yout;
676: }
677: /*
678: Support for limiting the restriction or interpolation only to the inner
679: subdomain values (leaving the other values 0).
680: */
681: if (!(osm->type & PC_GASM_RESTRICT)) {
682: /* have to zero the work RHS since scatter may leave some slots empty */
683: VecZeroEntries(osm->gx);
684: VecScatterBegin(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);
685: } else {
686: VecScatterBegin(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);
687: }
688: VecZeroEntries(osm->gy);
689: if (!(osm->type & PC_GASM_RESTRICT)) {
690: VecScatterEnd(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);
691: } else {
692: VecScatterEnd(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);
693: }
694: /* do the subdomain solves */
695: for (i=0; i<osm->n; ++i) {
696: KSPSolve(osm->ksp[i],osm->x[i],osm->y[i]);
697: }
698: /* Do we need to zero y ?? */
699: VecZeroEntries(y);
700: if (!(osm->type & PC_GASM_INTERPOLATE)) {
701: VecScatterBegin(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);
702: VecScatterEnd(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);
703: } else {
704: VecScatterBegin(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);
705: VecScatterEnd(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);
706: }
707: if(osm->pctoouter){
708: VecScatterBegin(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);
709: VecScatterEnd(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);
710: }
711: return(0);
712: }
716: static PetscErrorCode PCApplyTranspose_GASM(PC pc,Vec xin,Vec yout)
717: {
718: PC_GASM *osm = (PC_GASM*)pc->data;
720: PetscInt i;
721: Vec x,y;
722: ScatterMode forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
725: if(osm->pctoouter){
726: VecScatterBegin(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);
727: VecScatterEnd(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);
728: x = osm->pcx;
729: y = osm->pcy;
730: }else{
731: x = xin;
732: y = yout;
733: }
734: /*
735: Support for limiting the restriction or interpolation to only local
736: subdomain values (leaving the other values 0).
738: Note: these are reversed from the PCApply_GASM() because we are applying the
739: transpose of the three terms
740: */
741: if (!(osm->type & PC_GASM_INTERPOLATE)) {
742: /* have to zero the work RHS since scatter may leave some slots empty */
743: VecZeroEntries(osm->gx);
744: VecScatterBegin(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);
745: } else {
746: VecScatterBegin(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);
747: }
748: VecZeroEntries(osm->gy);
749: if (!(osm->type & PC_GASM_INTERPOLATE)) {
750: VecScatterEnd(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);
751: } else {
752: VecScatterEnd(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);
753: }
754: /* do the local solves */
755: for (i=0; i<osm->n; ++i) { /* Note that the solves are local, so we can go to osm->n, rather than osm->nmax. */
756: KSPSolveTranspose(osm->ksp[i],osm->x[i],osm->y[i]);
757: }
758: VecZeroEntries(y);
759: if (!(osm->type & PC_GASM_RESTRICT)) {
760: VecScatterBegin(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);
761: VecScatterEnd(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);
762: } else {
763: VecScatterBegin(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);
764: VecScatterEnd(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);
765: }
766: if(osm->pctoouter){
767: VecScatterBegin(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);
768: VecScatterEnd(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);
769: }
770: return(0);
771: }
775: static PetscErrorCode PCReset_GASM(PC pc)
776: {
777: PC_GASM *osm = (PC_GASM*)pc->data;
779: PetscInt i;
782: if (osm->ksp) {
783: for (i=0; i<osm->n; i++) {
784: KSPReset(osm->ksp[i]);
785: }
786: }
787: if (osm->pmat) {
788: if (osm->n > 0) {
789: MatDestroyMatrices(osm->n,&osm->pmat);
790: }
791: }
792: if (osm->x) {
793: for (i=0; i<osm->n; i++) {
794: VecDestroy(&osm->x[i]);
795: VecDestroy(&osm->y[i]);
796: }
797: }
798: VecDestroy(&osm->gx);
799: VecDestroy(&osm->gy);
801: VecScatterDestroy(&osm->gorestriction);
802: VecScatterDestroy(&osm->girestriction);
803: if (!osm->user_subdomains) {
804: PCGASMDestroySubdomains(osm->n,&osm->ois,&osm->iis);
805: osm->N = PETSC_DETERMINE;
806: osm->nmax = PETSC_DETERMINE;
807: }
808: if(osm->pctoouter){
809: VecScatterDestroy(&(osm->pctoouter));
810: }
811: if(osm->permutationIS){
812: ISDestroy(&(osm->permutationIS));
813: }
814: if(osm->pcx){
815: VecDestroy(&(osm->pcx));
816: }
817: if(osm->pcy){
818: VecDestroy(&(osm->pcy));
819: }
820: if(osm->permutationP){
821: MatDestroy(&(osm->permutationP));
822: }
823: if(osm->pcmat){
824: MatDestroy(&osm->pcmat);
825: }
826: return(0);
827: }
831: static PetscErrorCode PCDestroy_GASM(PC pc)
832: {
833: PC_GASM *osm = (PC_GASM*)pc->data;
835: PetscInt i;
838: PCReset_GASM(pc);
839: /* PCReset will not destroy subdomains, if user_subdomains is true. */
840: PCGASMDestroySubdomains(osm->n,&osm->ois,&osm->iis);
841: if (osm->ksp) {
842: for (i=0; i<osm->n; i++) {
843: KSPDestroy(&osm->ksp[i]);
844: }
845: PetscFree(osm->ksp);
846: }
847: PetscFree(osm->x);
848: PetscFree(osm->y);
849: PetscFree(pc->data);
850: return(0);
851: }
855: static PetscErrorCode PCSetFromOptions_GASM(PetscOptionItems *PetscOptionsObject,PC pc)
856: {
857: PC_GASM *osm = (PC_GASM*)pc->data;
859: PetscInt blocks,ovl;
860: PetscBool symset,flg;
861: PCGASMType gasmtype;
864: /* set the type to symmetric if matrix is symmetric */
865: if (!osm->type_set && pc->pmat) {
866: MatIsSymmetricKnown(pc->pmat,&symset,&flg);
867: if (symset && flg) osm->type = PC_GASM_BASIC;
868: }
869: PetscOptionsHead(PetscOptionsObject,"Generalized additive Schwarz options");
870: PetscOptionsBool("-pc_gasm_use_dm_subdomains","If subdomains aren't set, use DMCreateDomainDecomposition() to define subdomains.","PCGASMSetUseDMSubdomains",osm->dm_subdomains,&osm->dm_subdomains,&flg);
871: PetscOptionsInt("-pc_gasm_total_subdomains","Total number of subdomains across communicator","PCGASMSetTotalSubdomains",osm->N,&blocks,&flg);
872: if (flg) {
873: PCGASMSetTotalSubdomains(pc,blocks);
874: }
875: PetscOptionsInt("-pc_gasm_overlap","Number of overlapping degrees of freedom","PCGASMSetOverlap",osm->overlap,&ovl,&flg);
876: if (flg) {
877: PCGASMSetOverlap(pc,ovl);
878: osm->dm_subdomains = PETSC_FALSE;
879: }
880: flg = PETSC_FALSE;
881: PetscOptionsEnum("-pc_gasm_type","Type of restriction/extension","PCGASMSetType",PCGASMTypes,(PetscEnum)osm->type,(PetscEnum*)&gasmtype,&flg);
882: if (flg) {PCGASMSetType(pc,gasmtype);}
883: PetscOptionsBool("-pc_gasm_use_hierachical_partitioning","use hierarchical partitioning",NULL,osm->hierarchicalpartitioning,&osm->hierarchicalpartitioning,&flg);
884: PetscOptionsTail();
885: return(0);
886: }
888: /*------------------------------------------------------------------------------------*/
892: /*@
893: PCGASMSetTotalSubdomains - sets the total number of subdomains to use across the
894: communicator.
895: Logically collective on pc
897: Input Parameters:
898: + pc - the preconditioner
899: - N - total number of subdomains
902: Level: beginner
904: .keywords: PC, ASM, DM, set, subdomains, additive Schwarz
906: .seealso: PCGASMSetSubdomains(), PCGASMSetOverlap()
907: PCGASMCreateSubdomains2D()
908: @*/
909: PetscErrorCode PCGASMSetTotalSubdomains(PC pc,PetscInt N)
910: {
911: PC_GASM *osm = (PC_GASM*)pc->data;
912: PetscMPIInt size,rank;
916: if (N < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Total number of subdomains must be 1 or more, got N = %D",N);
917: if (pc->setupcalled) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetTotalSubdomains() should be called before calling PCSetUp().");
919: PCGASMDestroySubdomains(osm->n,&osm->iis,&osm->ois);
920: osm->ois = osm->iis = NULL;
922: MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);
923: MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);
924: osm->N = N;
925: osm->n = PETSC_DETERMINE;
926: osm->nmax = PETSC_DETERMINE;
927: osm->dm_subdomains = PETSC_FALSE;
928: return(0);
929: }
933: static PetscErrorCode PCGASMSetSubdomains_GASM(PC pc,PetscInt n,IS iis[],IS ois[])
934: {
935: PC_GASM *osm = (PC_GASM*)pc->data;
937: PetscInt i;
940: if (n < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Each process must have 1 or more subdomains, got n = %D",n);
941: if (pc->setupcalled) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetSubdomains() should be called before calling PCSetUp().");
943: PCGASMDestroySubdomains(osm->n,&osm->iis,&osm->ois);
944: osm->iis = osm->ois = NULL;
945: osm->n = n;
946: osm->N = PETSC_DETERMINE;
947: osm->nmax = PETSC_DETERMINE;
948: if (ois) {
949: PetscMalloc1(n,&osm->ois);
950: for (i=0; i<n; i++) {
951: PetscObjectReference((PetscObject)ois[i]);
952: osm->ois[i] = ois[i];
953: }
954: /*
955: Since the user set the outer subdomains, even if nontrivial overlap was requested via PCGASMSetOverlap(),
956: it will be ignored. To avoid confusion later on (e.g., when viewing the PC), the overlap size is set to -1.
957: */
958: osm->overlap = -1;
959: if (!iis) {
960: PetscMalloc1(n,&osm->iis);
961: for (i=0; i<n; i++) {
962: for (i=0; i<n; i++) {
963: PetscObjectReference((PetscObject)ois[i]);
964: osm->iis[i] = ois[i];
965: }
966: }
967: }
968: }
969: if (iis) {
970: PetscMalloc1(n,&osm->iis);
971: for (i=0; i<n; i++) {
972: PetscObjectReference((PetscObject)iis[i]);
973: osm->iis[i] = iis[i];
974: }
975: if (!ois) {
976: PetscMalloc1(n,&osm->ois);
977: for (i=0; i<n; i++) {
978: for (i=0; i<n; i++) {
979: PetscObjectReference((PetscObject)iis[i]);
980: osm->ois[i] = iis[i];
981: }
982: }
983: /* If necessary, osm->ois[i] will be expanded using the requested overlap size in PCSetUp_GASM(). */
984: }
985: }
986: if (iis) osm->user_subdomains = PETSC_TRUE;
987: return(0);
988: }
993: static PetscErrorCode PCGASMSetOverlap_GASM(PC pc,PetscInt ovl)
994: {
995: PC_GASM *osm = (PC_GASM*)pc->data;
998: if (ovl < 0) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap value requested");
999: if (pc->setupcalled && ovl != osm->overlap) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetOverlap() should be called before PCSetUp().");
1000: if (!pc->setupcalled) osm->overlap = ovl;
1001: return(0);
1002: }
1006: static PetscErrorCode PCGASMSetType_GASM(PC pc,PCGASMType type)
1007: {
1008: PC_GASM *osm = (PC_GASM*)pc->data;
1011: osm->type = type;
1012: osm->type_set = PETSC_TRUE;
1013: return(0);
1014: }
1018: static PetscErrorCode PCGASMSetSortIndices_GASM(PC pc,PetscBool doSort)
1019: {
1020: PC_GASM *osm = (PC_GASM*)pc->data;
1023: osm->sort_indices = doSort;
1024: return(0);
1025: }
1029: /*
1030: FIXME: This routine might need to be modified now that multiple ranks per subdomain are allowed.
1031: In particular, it would upset the global subdomain number calculation.
1032: */
1033: static PetscErrorCode PCGASMGetSubKSP_GASM(PC pc,PetscInt *n,PetscInt *first,KSP **ksp)
1034: {
1035: PC_GASM *osm = (PC_GASM*)pc->data;
1039: if (osm->n < 1) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ORDER,"Need to call PCSetUP() on PC (or KSPSetUp() on the outer KSP object) before calling here");
1041: if (n) *n = osm->n;
1042: if (first) {
1043: MPI_Scan(&osm->n,first,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));
1044: *first -= osm->n;
1045: }
1046: if (ksp) {
1047: /* Assume that local solves are now different; not necessarily
1048: true, though! This flag is used only for PCView_GASM() */
1049: *ksp = osm->ksp;
1050: osm->same_subdomain_solvers = PETSC_FALSE;
1051: }
1052: return(0);
1053: } /* PCGASMGetSubKSP_GASM() */
1057: /*@C
1058: PCGASMSetSubdomains - Sets the subdomains for this processor
1059: for the additive Schwarz preconditioner.
1061: Collective on pc
1063: Input Parameters:
1064: + pc - the preconditioner object
1065: . n - the number of subdomains for this processor
1066: . iis - the index sets that define the inner subdomains (or NULL for PETSc to determine subdomains)
1067: - ois - the index sets that define the outer subdomains (or NULL to use the same as iis, or to construct by expanding iis by the requested overlap)
1069: Notes:
1070: The IS indices use the parallel, global numbering of the vector entries.
1071: Inner subdomains are those where the correction is applied.
1072: Outer subdomains are those where the residual necessary to obtain the
1073: corrections is obtained (see PCGASMType for the use of inner/outer subdomains).
1074: Both inner and outer subdomains can extend over several processors.
1075: This processor's portion of a subdomain is known as a local subdomain.
1077: By default the GASM preconditioner uses 1 (local) subdomain per processor.
1080: Level: advanced
1082: .keywords: PC, GASM, set, subdomains, additive Schwarz
1084: .seealso: PCGASMSetNumSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(),
1085: PCGASMCreateSubdomains2D(), PCGASMGetSubdomains()
1086: @*/
1087: PetscErrorCode PCGASMSetSubdomains(PC pc,PetscInt n,IS iis[],IS ois[])
1088: {
1089: PC_GASM *osm = (PC_GASM*)pc->data;
1094: PetscTryMethod(pc,"PCGASMSetSubdomains_C",(PC,PetscInt,IS[],IS[]),(pc,n,iis,ois));
1095: osm->dm_subdomains = PETSC_FALSE;
1096: return(0);
1097: }
1102: /*@
1103: PCGASMSetOverlap - Sets the overlap between a pair of subdomains for the
1104: additive Schwarz preconditioner. Either all or no processors in the
1105: pc communicator must call this routine.
1107: Logically Collective on pc
1109: Input Parameters:
1110: + pc - the preconditioner context
1111: - ovl - the amount of overlap between subdomains (ovl >= 0, default value = 0)
1113: Options Database Key:
1114: . -pc_gasm_overlap <overlap> - Sets overlap
1116: Notes:
1117: By default the GASM preconditioner uses 1 subdomain per processor. To use
1118: multiple subdomain per perocessor or "straddling" subdomains that intersect
1119: multiple processors use PCGASMSetSubdomains() (or option -pc_gasm_total_subdomains <n>).
1121: The overlap defaults to 0, so if one desires that no additional
1122: overlap be computed beyond what may have been set with a call to
1123: PCGASMSetSubdomains(), then ovl must be set to be 0. In particular, if one does
1124: not explicitly set the subdomains in application code, then all overlap would be computed
1125: internally by PETSc, and using an overlap of 0 would result in an GASM
1126: variant that is equivalent to the block Jacobi preconditioner.
1128: Note that one can define initial index sets with any overlap via
1129: PCGASMSetSubdomains(); the routine PCGASMSetOverlap() merely allows
1130: PETSc to extend that overlap further, if desired.
1132: Level: intermediate
1134: .keywords: PC, GASM, set, overlap
1136: .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(),
1137: PCGASMCreateSubdomains2D(), PCGASMGetSubdomains()
1138: @*/
1139: PetscErrorCode PCGASMSetOverlap(PC pc,PetscInt ovl)
1140: {
1142: PC_GASM *osm = (PC_GASM*)pc->data;
1147: PetscTryMethod(pc,"PCGASMSetOverlap_C",(PC,PetscInt),(pc,ovl));
1148: osm->dm_subdomains = PETSC_FALSE;
1149: return(0);
1150: }
1154: /*@
1155: PCGASMSetType - Sets the type of restriction and interpolation used
1156: for local problems in the additive Schwarz method.
1158: Logically Collective on PC
1160: Input Parameters:
1161: + pc - the preconditioner context
1162: - type - variant of GASM, one of
1163: .vb
1164: PC_GASM_BASIC - full interpolation and restriction
1165: PC_GASM_RESTRICT - full restriction, local processor interpolation
1166: PC_GASM_INTERPOLATE - full interpolation, local processor restriction
1167: PC_GASM_NONE - local processor restriction and interpolation
1168: .ve
1170: Options Database Key:
1171: . -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type
1173: Level: intermediate
1175: .keywords: PC, GASM, set, type
1177: .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(),
1178: PCGASMCreateSubdomains2D()
1179: @*/
1180: PetscErrorCode PCGASMSetType(PC pc,PCGASMType type)
1181: {
1187: PetscTryMethod(pc,"PCGASMSetType_C",(PC,PCGASMType),(pc,type));
1188: return(0);
1189: }
1193: /*@
1194: PCGASMSetSortIndices - Determines whether subdomain indices are sorted.
1196: Logically Collective on PC
1198: Input Parameters:
1199: + pc - the preconditioner context
1200: - doSort - sort the subdomain indices
1202: Level: intermediate
1204: .keywords: PC, GASM, set, type
1206: .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(),
1207: PCGASMCreateSubdomains2D()
1208: @*/
1209: PetscErrorCode PCGASMSetSortIndices(PC pc,PetscBool doSort)
1210: {
1216: PetscTryMethod(pc,"PCGASMSetSortIndices_C",(PC,PetscBool),(pc,doSort));
1217: return(0);
1218: }
1222: /*@C
1223: PCGASMGetSubKSP - Gets the local KSP contexts for all blocks on
1224: this processor.
1226: Collective on PC iff first_local is requested
1228: Input Parameter:
1229: . pc - the preconditioner context
1231: Output Parameters:
1232: + n_local - the number of blocks on this processor or NULL
1233: . first_local - the global number of the first block on this processor or NULL,
1234: all processors must request or all must pass NULL
1235: - ksp - the array of KSP contexts
1237: Note:
1238: After PCGASMGetSubKSP() the array of KSPes is not to be freed
1240: Currently for some matrix implementations only 1 block per processor
1241: is supported.
1243: You must call KSPSetUp() before calling PCGASMGetSubKSP().
1245: Level: advanced
1247: .keywords: PC, GASM, additive Schwarz, get, sub, KSP, context
1249: .seealso: PCGASMSetSubdomains(), PCGASMSetOverlap(),
1250: PCGASMCreateSubdomains2D(),
1251: @*/
1252: PetscErrorCode PCGASMGetSubKSP(PC pc,PetscInt *n_local,PetscInt *first_local,KSP *ksp[])
1253: {
1258: PetscUseMethod(pc,"PCGASMGetSubKSP_C",(PC,PetscInt*,PetscInt*,KSP **),(pc,n_local,first_local,ksp));
1259: return(0);
1260: }
1262: /* -------------------------------------------------------------------------------------*/
1263: /*MC
1264: PCGASM - Use the (restricted) additive Schwarz method, each block is (approximately) solved with
1265: its own KSP object.
1267: Options Database Keys:
1268: + -pc_gasm_total_subdomains <n> - Sets total number of local subdomains to be distributed among processors
1269: . -pc_gasm_view_subdomains - activates the printing of subdomain indices in PCView(), -ksp_view or -snes_view
1270: . -pc_gasm_print_subdomains - activates the printing of subdomain indices in PCSetUp()
1271: . -pc_gasm_overlap <ovl> - Sets overlap by which to (automatically) extend local subdomains
1272: - -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type
1274: IMPORTANT: If you run with, for example, 3 blocks on 1 processor or 3 blocks on 3 processors you
1275: will get a different convergence rate due to the default option of -pc_gasm_type restrict. Use
1276: -pc_gasm_type basic to use the standard GASM.
1278: Notes: Each processor can have one or more blocks, but a block cannot be shared by more
1279: than one processor. Defaults to one block per processor.
1281: To set options on the solvers for each block append -sub_ to all the KSP, and PC
1282: options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly
1284: To set the options on the solvers separate for each block call PCGASMGetSubKSP()
1285: and set the options directly on the resulting KSP object (you can access its PC
1286: with KSPGetPC())
1289: Level: beginner
1291: Concepts: additive Schwarz method
1293: References:
1294: + 1. - M Dryja, OB Widlund, An additive variant of the Schwarz alternating method for the case of many subregions
1295: Courant Institute, New York University Technical report
1296: - 2. - Barry Smith, Petter Bjorstad, and William Gropp, Domain Decompositions: Parallel Multilevel Methods for Elliptic Partial Differential Equations,
1297: Cambridge University Press.
1299: .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC,
1300: PCBJACOBI, PCGASMGetSubKSP(), PCGASMSetSubdomains(),
1301: PCSetModifySubmatrices(), PCGASMSetOverlap(), PCGASMSetType()
1303: M*/
1307: PETSC_EXTERN PetscErrorCode PCCreate_GASM(PC pc)
1308: {
1310: PC_GASM *osm;
1313: PetscNewLog(pc,&osm);
1315: osm->N = PETSC_DETERMINE;
1316: osm->n = PETSC_DECIDE;
1317: osm->nmax = PETSC_DETERMINE;
1318: osm->overlap = 0;
1319: osm->ksp = 0;
1320: osm->gorestriction = 0;
1321: osm->girestriction = 0;
1322: osm->pctoouter = 0;
1323: osm->gx = 0;
1324: osm->gy = 0;
1325: osm->x = 0;
1326: osm->y = 0;
1327: osm->pcx = 0;
1328: osm->pcy = 0;
1329: osm->permutationIS = 0;
1330: osm->permutationP = 0;
1331: osm->pcmat = 0;
1332: osm->ois = 0;
1333: osm->iis = 0;
1334: osm->pmat = 0;
1335: osm->type = PC_GASM_RESTRICT;
1336: osm->same_subdomain_solvers = PETSC_TRUE;
1337: osm->sort_indices = PETSC_TRUE;
1338: osm->dm_subdomains = PETSC_FALSE;
1339: osm->hierarchicalpartitioning = PETSC_FALSE;
1341: pc->data = (void*)osm;
1342: pc->ops->apply = PCApply_GASM;
1343: pc->ops->applytranspose = PCApplyTranspose_GASM;
1344: pc->ops->setup = PCSetUp_GASM;
1345: pc->ops->reset = PCReset_GASM;
1346: pc->ops->destroy = PCDestroy_GASM;
1347: pc->ops->setfromoptions = PCSetFromOptions_GASM;
1348: pc->ops->setuponblocks = PCSetUpOnBlocks_GASM;
1349: pc->ops->view = PCView_GASM;
1350: pc->ops->applyrichardson = 0;
1352: PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetSubdomains_C",PCGASMSetSubdomains_GASM);
1353: PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetOverlap_C",PCGASMSetOverlap_GASM);
1354: PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetType_C",PCGASMSetType_GASM);
1355: PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetSortIndices_C",PCGASMSetSortIndices_GASM);
1356: PetscObjectComposeFunction((PetscObject)pc,"PCGASMGetSubKSP_C",PCGASMGetSubKSP_GASM);
1357: return(0);
1358: }
1363: PetscErrorCode PCGASMCreateLocalSubdomains(Mat A, PetscInt nloc, IS *iis[])
1364: {
1365: MatPartitioning mpart;
1366: const char *prefix;
1367: PetscErrorCode (*f)(Mat,MatReuse,Mat*);
1368: PetscMPIInt size;
1369: PetscInt i,j,rstart,rend,bs;
1370: PetscBool isbaij = PETSC_FALSE,foundpart = PETSC_FALSE;
1371: Mat Ad = NULL, adj;
1372: IS ispart,isnumb,*is;
1373: PetscErrorCode ierr;
1376: if (nloc < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"number of local subdomains must > 0, got nloc = %D",nloc);
1378: /* Get prefix, row distribution, and block size */
1379: MatGetOptionsPrefix(A,&prefix);
1380: MatGetOwnershipRange(A,&rstart,&rend);
1381: MatGetBlockSize(A,&bs);
1382: if (rstart/bs*bs != rstart || rend/bs*bs != rend) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"bad row distribution [%D,%D) for matrix block size %D",rstart,rend,bs);
1384: /* Get diagonal block from matrix if possible */
1385: MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
1386: PetscObjectQueryFunction((PetscObject)A,"MatGetDiagonalBlock_C",&f);
1387: if (f) {
1388: MatGetDiagonalBlock(A,&Ad);
1389: } else if (size == 1) {
1390: Ad = A;
1391: }
1392: if (Ad) {
1393: PetscObjectTypeCompare((PetscObject)Ad,MATSEQBAIJ,&isbaij);
1394: if (!isbaij) {PetscObjectTypeCompare((PetscObject)Ad,MATSEQSBAIJ,&isbaij);}
1395: }
1396: if (Ad && nloc > 1) {
1397: PetscBool match,done;
1398: /* Try to setup a good matrix partitioning if available */
1399: MatPartitioningCreate(PETSC_COMM_SELF,&mpart);
1400: PetscObjectSetOptionsPrefix((PetscObject)mpart,prefix);
1401: MatPartitioningSetFromOptions(mpart);
1402: PetscObjectTypeCompare((PetscObject)mpart,MATPARTITIONINGCURRENT,&match);
1403: if (!match) {
1404: PetscObjectTypeCompare((PetscObject)mpart,MATPARTITIONINGSQUARE,&match);
1405: }
1406: if (!match) { /* assume a "good" partitioner is available */
1407: PetscInt na;
1408: const PetscInt *ia,*ja;
1409: MatGetRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);
1410: if (done) {
1411: /* Build adjacency matrix by hand. Unfortunately a call to
1412: MatConvert(Ad,MATMPIADJ,MAT_INITIAL_MATRIX,&adj) will
1413: remove the block-aij structure and we cannot expect
1414: MatPartitioning to split vertices as we need */
1415: PetscInt i,j,len,nnz,cnt,*iia=0,*jja=0;
1416: const PetscInt *row;
1417: nnz = 0;
1418: for (i=0; i<na; i++) { /* count number of nonzeros */
1419: len = ia[i+1] - ia[i];
1420: row = ja + ia[i];
1421: for (j=0; j<len; j++) {
1422: if (row[j] == i) { /* don't count diagonal */
1423: len--; break;
1424: }
1425: }
1426: nnz += len;
1427: }
1428: PetscMalloc1(na+1,&iia);
1429: PetscMalloc1(nnz,&jja);
1430: nnz = 0;
1431: iia[0] = 0;
1432: for (i=0; i<na; i++) { /* fill adjacency */
1433: cnt = 0;
1434: len = ia[i+1] - ia[i];
1435: row = ja + ia[i];
1436: for (j=0; j<len; j++) {
1437: if (row[j] != i) jja[nnz+cnt++] = row[j]; /* if not diagonal */
1438: }
1439: nnz += cnt;
1440: iia[i+1] = nnz;
1441: }
1442: /* Partitioning of the adjacency matrix */
1443: MatCreateMPIAdj(PETSC_COMM_SELF,na,na,iia,jja,NULL,&adj);
1444: MatPartitioningSetAdjacency(mpart,adj);
1445: MatPartitioningSetNParts(mpart,nloc);
1446: MatPartitioningApply(mpart,&ispart);
1447: ISPartitioningToNumbering(ispart,&isnumb);
1448: MatDestroy(&adj);
1449: foundpart = PETSC_TRUE;
1450: }
1451: MatRestoreRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);
1452: }
1453: MatPartitioningDestroy(&mpart);
1454: }
1455: PetscMalloc1(nloc,&is);
1456: if (!foundpart) {
1458: /* Partitioning by contiguous chunks of rows */
1460: PetscInt mbs = (rend-rstart)/bs;
1461: PetscInt start = rstart;
1462: for (i=0; i<nloc; i++) {
1463: PetscInt count = (mbs/nloc + ((mbs % nloc) > i)) * bs;
1464: ISCreateStride(PETSC_COMM_SELF,count,start,1,&is[i]);
1465: start += count;
1466: }
1468: } else {
1470: /* Partitioning by adjacency of diagonal block */
1472: const PetscInt *numbering;
1473: PetscInt *count,nidx,*indices,*newidx,start=0;
1474: /* Get node count in each partition */
1475: PetscMalloc1(nloc,&count);
1476: ISPartitioningCount(ispart,nloc,count);
1477: if (isbaij && bs > 1) { /* adjust for the block-aij case */
1478: for (i=0; i<nloc; i++) count[i] *= bs;
1479: }
1480: /* Build indices from node numbering */
1481: ISGetLocalSize(isnumb,&nidx);
1482: PetscMalloc1(nidx,&indices);
1483: for (i=0; i<nidx; i++) indices[i] = i; /* needs to be initialized */
1484: ISGetIndices(isnumb,&numbering);
1485: PetscSortIntWithPermutation(nidx,numbering,indices);
1486: ISRestoreIndices(isnumb,&numbering);
1487: if (isbaij && bs > 1) { /* adjust for the block-aij case */
1488: PetscMalloc1(nidx*bs,&newidx);
1489: for (i=0; i<nidx; i++) {
1490: for (j=0; j<bs; j++) newidx[i*bs+j] = indices[i]*bs + j;
1491: }
1492: PetscFree(indices);
1493: nidx *= bs;
1494: indices = newidx;
1495: }
1496: /* Shift to get global indices */
1497: for (i=0; i<nidx; i++) indices[i] += rstart;
1499: /* Build the index sets for each block */
1500: for (i=0; i<nloc; i++) {
1501: ISCreateGeneral(PETSC_COMM_SELF,count[i],&indices[start],PETSC_COPY_VALUES,&is[i]);
1502: ISSort(is[i]);
1503: start += count[i];
1504: }
1506: PetscFree(count);
1507: PetscFree(indices);
1508: ISDestroy(&isnumb);
1509: ISDestroy(&ispart);
1510: }
1511: *iis = is;
1512: return(0);
1513: }
1517: PETSC_INTERN PetscErrorCode PCGASMCreateStraddlingSubdomains(Mat A,PetscInt N,PetscInt *n,IS *iis[])
1518: {
1519: PetscErrorCode ierr;
1522: MatSubdomainsCreateCoalesce(A,N,n,iis);
1523: return(0);
1524: }
1530: /*@C
1531: PCGASMCreateSubdomains - Creates n index sets defining n nonoverlapping subdomains for the additive
1532: Schwarz preconditioner for a any problem based on its matrix.
1534: Collective
1536: Input Parameters:
1537: + A - The global matrix operator
1538: - N - the number of global subdomains requested
1540: Output Parameters:
1541: + n - the number of subdomains created on this processor
1542: - iis - the array of index sets defining the local inner subdomains (on which the correction is applied)
1544: Level: advanced
1546: Note: When N >= A's communicator size, each subdomain is local -- contained within a single processor.
1547: When N < size, the subdomains are 'straddling' (processor boundaries) and are no longer local.
1548: The resulting subdomains can be use in PCGASMSetSubdomains(pc,n,iss,NULL). The overlapping
1549: outer subdomains will be automatically generated from these according to the requested amount of
1550: overlap; this is currently supported only with local subdomains.
1553: .keywords: PC, GASM, additive Schwarz, create, subdomains, unstructured grid
1555: .seealso: PCGASMSetSubdomains(), PCGASMDestroySubdomains()
1556: @*/
1557: PetscErrorCode PCGASMCreateSubdomains(Mat A,PetscInt N,PetscInt *n,IS *iis[])
1558: {
1559: PetscMPIInt size;
1560: PetscErrorCode ierr;
1566: if (N < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Number of subdomains must be > 0, N = %D",N);
1567: MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
1568: if (N >= size) {
1569: *n = N/size + (N%size);
1570: PCGASMCreateLocalSubdomains(A,*n,iis);
1571: } else {
1572: PCGASMCreateStraddlingSubdomains(A,N,n,iis);
1573: }
1574: return(0);
1575: }
1579: /*@C
1580: PCGASMDestroySubdomains - Destroys the index sets created with
1581: PCGASMCreateSubdomains() or PCGASMCreateSubdomains2D. Should be
1582: called after setting subdomains with PCGASMSetSubdomains().
1584: Collective
1586: Input Parameters:
1587: + n - the number of index sets
1588: . iis - the array of inner subdomains,
1589: - ois - the array of outer subdomains, can be NULL
1591: Level: intermediate
1593: Notes: this is merely a convenience subroutine that walks each list,
1594: destroys each IS on the list, and then frees the list. At the end the
1595: list pointers are set to NULL.
1597: .keywords: PC, GASM, additive Schwarz, create, subdomains, unstructured grid
1599: .seealso: PCGASMCreateSubdomains(), PCGASMSetSubdomains()
1600: @*/
1601: PetscErrorCode PCGASMDestroySubdomains(PetscInt n,IS **iis,IS **ois)
1602: {
1603: PetscInt i;
1607: if (n <= 0) return(0);
1608: if (ois) {
1610: if (*ois) {
1612: for (i=0; i<n; i++) {
1613: ISDestroy(&(*ois)[i]);
1614: }
1615: PetscFree((*ois));
1616: }
1617: }
1618: if (iis) {
1620: if (*iis) {
1622: for (i=0; i<n; i++) {
1623: ISDestroy(&(*iis)[i]);
1624: }
1625: PetscFree((*iis));
1626: }
1627: }
1628: return(0);
1629: }
1632: #define PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,xleft_loc,ylow_loc,xright_loc,yhigh_loc,n) \
1633: { \
1634: PetscInt first_row = first/M, last_row = last/M+1; \
1635: /* \
1636: Compute ylow_loc and yhigh_loc so that (ylow_loc,xleft) and (yhigh_loc,xright) are the corners \
1637: of the bounding box of the intersection of the subdomain with the local ownership range (local \
1638: subdomain). \
1639: Also compute xleft_loc and xright_loc as the lower and upper bounds on the first and last rows \
1640: of the intersection. \
1641: */ \
1642: /* ylow_loc is the grid row containing the first element of the local sumbdomain */ \
1643: *ylow_loc = PetscMax(first_row,ylow); \
1644: /* xleft_loc is the offset of first element of the local subdomain within its grid row (might actually be outside the local subdomain) */ \
1645: *xleft_loc = *ylow_loc==first_row ? PetscMax(first%M,xleft) : xleft; \
1646: /* yhigh_loc is the grid row above the last local subdomain element */ \
1647: *yhigh_loc = PetscMin(last_row,yhigh); \
1648: /* xright is the offset of the end of the local subdomain within its grid row (might actually be outside the local subdomain) */ \
1649: *xright_loc = *yhigh_loc==last_row ? PetscMin(xright,last%M) : xright; \
1650: /* Now compute the size of the local subdomain n. */ \
1651: *n = 0; \
1652: if (*ylow_loc < *yhigh_loc) { \
1653: PetscInt width = xright-xleft; \
1654: *n += width*(*yhigh_loc-*ylow_loc-1); \
1655: *n += PetscMin(PetscMax(*xright_loc-xleft,0),width); \
1656: *n -= PetscMin(PetscMax(*xleft_loc-xleft,0), width); \
1657: } \
1658: }
1664: /*@
1665: PCGASMCreateSubdomains2D - Creates the index sets for the overlapping Schwarz
1666: preconditioner for a two-dimensional problem on a regular grid.
1668: Collective
1670: Input Parameters:
1671: + M, N - the global number of grid points in the x and y directions
1672: . Mdomains, Ndomains - the global number of subdomains in the x and y directions
1673: . dof - degrees of freedom per node
1674: - overlap - overlap in mesh lines
1676: Output Parameters:
1677: + Nsub - the number of local subdomains created
1678: . iis - array of index sets defining inner (nonoverlapping) subdomains
1679: - ois - array of index sets defining outer (overlapping, if overlap > 0) subdomains
1682: Level: advanced
1684: .keywords: PC, GASM, additive Schwarz, create, subdomains, 2D, regular grid
1686: .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(), PCGASMSetOverlap()
1687: @*/
1688: PetscErrorCode PCGASMCreateSubdomains2D(PC pc,PetscInt M,PetscInt N,PetscInt Mdomains,PetscInt Ndomains,PetscInt dof,PetscInt overlap,PetscInt *nsub,IS **iis,IS **ois)
1689: {
1691: PetscMPIInt size, rank;
1692: PetscInt i, j;
1693: PetscInt maxheight, maxwidth;
1694: PetscInt xstart, xleft, xright, xleft_loc, xright_loc;
1695: PetscInt ystart, ylow, yhigh, ylow_loc, yhigh_loc;
1696: PetscInt x[2][2], y[2][2], n[2];
1697: PetscInt first, last;
1698: PetscInt nidx, *idx;
1699: PetscInt ii,jj,s,q,d;
1700: PetscInt k,kk;
1701: PetscMPIInt color;
1702: MPI_Comm comm, subcomm;
1703: IS **xis = 0, **is = ois, **is_local = iis;
1706: PetscObjectGetComm((PetscObject)pc, &comm);
1707: MPI_Comm_size(comm, &size);
1708: MPI_Comm_rank(comm, &rank);
1709: MatGetOwnershipRange(pc->pmat, &first, &last);
1710: if (first%dof || last%dof) SETERRQ3(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Matrix row partitioning unsuitable for domain decomposition: local row range (%D,%D) "
1711: "does not respect the number of degrees of freedom per grid point %D", first, last, dof);
1713: /* Determine the number of domains with nonzero intersections with the local ownership range. */
1714: s = 0;
1715: ystart = 0;
1716: for (j=0; j<Ndomains; ++j) {
1717: maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */
1718: if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N);
1719: /* Vertical domain limits with an overlap. */
1720: ylow = PetscMax(ystart - overlap,0);
1721: yhigh = PetscMin(ystart + maxheight + overlap,N);
1722: xstart = 0;
1723: for (i=0; i<Mdomains; ++i) {
1724: maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */
1725: if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M);
1726: /* Horizontal domain limits with an overlap. */
1727: xleft = PetscMax(xstart - overlap,0);
1728: xright = PetscMin(xstart + maxwidth + overlap,M);
1729: /*
1730: Determine whether this subdomain intersects this processor's ownership range of pc->pmat.
1731: */
1732: PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx));
1733: if (nidx) ++s;
1734: xstart += maxwidth;
1735: } /* for (i = 0; i < Mdomains; ++i) */
1736: ystart += maxheight;
1737: } /* for (j = 0; j < Ndomains; ++j) */
1739: /* Now we can allocate the necessary number of ISs. */
1740: *nsub = s;
1741: PetscMalloc1(*nsub,is);
1742: PetscMalloc1(*nsub,is_local);
1743: s = 0;
1744: ystart = 0;
1745: for (j=0; j<Ndomains; ++j) {
1746: maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */
1747: if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N);
1748: /* Vertical domain limits with an overlap. */
1749: y[0][0] = PetscMax(ystart - overlap,0);
1750: y[0][1] = PetscMin(ystart + maxheight + overlap,N);
1751: /* Vertical domain limits without an overlap. */
1752: y[1][0] = ystart;
1753: y[1][1] = PetscMin(ystart + maxheight,N);
1754: xstart = 0;
1755: for (i=0; i<Mdomains; ++i) {
1756: maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */
1757: if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M);
1758: /* Horizontal domain limits with an overlap. */
1759: x[0][0] = PetscMax(xstart - overlap,0);
1760: x[0][1] = PetscMin(xstart + maxwidth + overlap,M);
1761: /* Horizontal domain limits without an overlap. */
1762: x[1][0] = xstart;
1763: x[1][1] = PetscMin(xstart+maxwidth,M);
1764: /*
1765: Determine whether this domain intersects this processor's ownership range of pc->pmat.
1766: Do this twice: first for the domains with overlaps, and once without.
1767: During the first pass create the subcommunicators, and use them on the second pass as well.
1768: */
1769: for (q = 0; q < 2; ++q) {
1770: PetscBool split = PETSC_FALSE;
1771: /*
1772: domain limits, (xleft, xright) and (ylow, yheigh) are adjusted
1773: according to whether the domain with an overlap or without is considered.
1774: */
1775: xleft = x[q][0]; xright = x[q][1];
1776: ylow = y[q][0]; yhigh = y[q][1];
1777: PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx));
1778: nidx *= dof;
1779: n[q] = nidx;
1780: /*
1781: Based on the counted number of indices in the local domain *with an overlap*,
1782: construct a subcommunicator of all the processors supporting this domain.
1783: Observe that a domain with an overlap might have nontrivial local support,
1784: while the domain without an overlap might not. Hence, the decision to participate
1785: in the subcommunicator must be based on the domain with an overlap.
1786: */
1787: if (q == 0) {
1788: if (nidx) color = 1;
1789: else color = MPI_UNDEFINED;
1790: MPI_Comm_split(comm, color, rank, &subcomm);
1791: split = PETSC_TRUE;
1792: }
1793: /*
1794: Proceed only if the number of local indices *with an overlap* is nonzero.
1795: */
1796: if (n[0]) {
1797: if (q == 0) xis = is;
1798: if (q == 1) {
1799: /*
1800: The IS for the no-overlap subdomain shares a communicator with the overlapping domain.
1801: Moreover, if the overlap is zero, the two ISs are identical.
1802: */
1803: if (overlap == 0) {
1804: (*is_local)[s] = (*is)[s];
1805: PetscObjectReference((PetscObject)(*is)[s]);
1806: continue;
1807: } else {
1808: xis = is_local;
1809: subcomm = ((PetscObject)(*is)[s])->comm;
1810: }
1811: } /* if (q == 1) */
1812: idx = NULL;
1813: PetscMalloc1(nidx,&idx);
1814: if (nidx) {
1815: k = 0;
1816: for (jj=ylow_loc; jj<yhigh_loc; ++jj) {
1817: PetscInt x0 = (jj==ylow_loc) ? xleft_loc : xleft;
1818: PetscInt x1 = (jj==yhigh_loc-1) ? xright_loc : xright;
1819: kk = dof*(M*jj + x0);
1820: for (ii=x0; ii<x1; ++ii) {
1821: for (d = 0; d < dof; ++d) {
1822: idx[k++] = kk++;
1823: }
1824: }
1825: }
1826: }
1827: ISCreateGeneral(subcomm,nidx,idx,PETSC_OWN_POINTER,(*xis)+s);
1828: if (split) {
1829: MPI_Comm_free(&subcomm);
1830: }
1831: }/* if (n[0]) */
1832: }/* for (q = 0; q < 2; ++q) */
1833: if (n[0]) ++s;
1834: xstart += maxwidth;
1835: } /* for (i = 0; i < Mdomains; ++i) */
1836: ystart += maxheight;
1837: } /* for (j = 0; j < Ndomains; ++j) */
1838: return(0);
1839: }
1843: /*@C
1844: PCGASMGetSubdomains - Gets the subdomains supported on this processor
1845: for the additive Schwarz preconditioner.
1847: Not Collective
1849: Input Parameter:
1850: . pc - the preconditioner context
1852: Output Parameters:
1853: + n - the number of subdomains for this processor (default value = 1)
1854: . iis - the index sets that define the inner subdomains (without overlap) supported on this processor (can be NULL)
1855: - ois - the index sets that define the outer subdomains (with overlap) supported on this processor (can be NULL)
1858: Notes:
1859: The user is responsible for destroying the ISs and freeing the returned arrays.
1860: The IS numbering is in the parallel, global numbering of the vector.
1862: Level: advanced
1864: .keywords: PC, GASM, get, subdomains, additive Schwarz
1866: .seealso: PCGASMSetOverlap(), PCGASMGetSubKSP(), PCGASMCreateSubdomains2D(),
1867: PCGASMSetSubdomains(), PCGASMGetSubmatrices()
1868: @*/
1869: PetscErrorCode PCGASMGetSubdomains(PC pc,PetscInt *n,IS *iis[],IS *ois[])
1870: {
1871: PC_GASM *osm;
1873: PetscBool match;
1874: PetscInt i;
1878: PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);
1879: if (!match) SETERRQ2(PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_WRONG, "Incorrect object type: expected %s, got %s instead", PCGASM, ((PetscObject)pc)->type_name);
1880: osm = (PC_GASM*)pc->data;
1881: if (n) *n = osm->n;
1882: if (iis) {
1883: PetscMalloc1(osm->n, iis);
1884: }
1885: if (ois) {
1886: PetscMalloc1(osm->n, ois);
1887: }
1888: if (iis || ois) {
1889: for (i = 0; i < osm->n; ++i) {
1890: if (iis) (*iis)[i] = osm->iis[i];
1891: if (ois) (*ois)[i] = osm->ois[i];
1892: }
1893: }
1894: return(0);
1895: }
1899: /*@C
1900: PCGASMGetSubmatrices - Gets the local submatrices (for this processor
1901: only) for the additive Schwarz preconditioner.
1903: Not Collective
1905: Input Parameter:
1906: . pc - the preconditioner context
1908: Output Parameters:
1909: + n - the number of matrices for this processor (default value = 1)
1910: - mat - the matrices
1912: Notes: matrices returned by this routine have the same communicators as the index sets (IS)
1913: used to define subdomains in PCGASMSetSubdomains()
1914: Level: advanced
1916: .keywords: PC, GASM, set, local, subdomains, additive Schwarz, block Jacobi
1918: .seealso: PCGASMSetOverlap(), PCGASMGetSubKSP(),
1919: PCGASMCreateSubdomains2D(), PCGASMSetSubdomains(), PCGASMGetSubdomains()
1920: @*/
1921: PetscErrorCode PCGASMGetSubmatrices(PC pc,PetscInt *n,Mat *mat[])
1922: {
1923: PC_GASM *osm;
1925: PetscBool match;
1931: if (!pc->setupcalled) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"Must call after KSPSetUP() or PCSetUp().");
1932: PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);
1933: if (!match) SETERRQ2(PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_WRONG, "Expected %s, got %s instead", PCGASM, ((PetscObject)pc)->type_name);
1934: osm = (PC_GASM*)pc->data;
1935: if (n) *n = osm->n;
1936: if (mat) *mat = osm->pmat;
1937: return(0);
1938: }
1942: /*@
1943: PCGASMSetUseDMSubdomains - Indicates whether to use DMCreateDomainDecomposition() to define the subdomains, whenever possible.
1944: Logically Collective
1946: Input Parameter:
1947: + pc - the preconditioner
1948: - flg - boolean indicating whether to use subdomains defined by the DM
1950: Options Database Key:
1951: . -pc_gasm_dm_subdomains -pc_gasm_overlap -pc_gasm_total_subdomains
1953: Level: intermediate
1955: Notes:
1956: PCGASMSetSubdomains(), PCGASMSetTotalSubdomains() or PCGASMSetOverlap() take precedence over PCGASMSetUseDMSubdomains(),
1957: so setting PCGASMSetSubdomains() with nontrivial subdomain ISs or any of PCGASMSetTotalSubdomains() and PCGASMSetOverlap()
1958: automatically turns the latter off.
1960: .keywords: PC, ASM, DM, set, subdomains, additive Schwarz
1962: .seealso: PCGASMGetUseDMSubdomains(), PCGASMSetSubdomains(), PCGASMSetOverlap()
1963: PCGASMCreateSubdomains2D()
1964: @*/
1965: PetscErrorCode PCGASMSetUseDMSubdomains(PC pc,PetscBool flg)
1966: {
1967: PC_GASM *osm = (PC_GASM*)pc->data;
1969: PetscBool match;
1974: if (pc->setupcalled) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"Not for a setup PC.");
1975: PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);
1976: if (match) {
1977: if (!osm->user_subdomains && osm->N == PETSC_DETERMINE && osm->overlap < 0) {
1978: osm->dm_subdomains = flg;
1979: }
1980: }
1981: return(0);
1982: }
1986: /*@
1987: PCGASMGetUseDMSubdomains - Returns flag indicating whether to use DMCreateDomainDecomposition() to define the subdomains, whenever possible.
1988: Not Collective
1990: Input Parameter:
1991: . pc - the preconditioner
1993: Output Parameter:
1994: . flg - boolean indicating whether to use subdomains defined by the DM
1996: Level: intermediate
1998: .keywords: PC, ASM, DM, set, subdomains, additive Schwarz
2000: .seealso: PCGASMSetUseDMSubdomains(), PCGASMSetOverlap()
2001: PCGASMCreateSubdomains2D()
2002: @*/
2003: PetscErrorCode PCGASMGetUseDMSubdomains(PC pc,PetscBool* flg)
2004: {
2005: PC_GASM *osm = (PC_GASM*)pc->data;
2007: PetscBool match;
2012: PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);
2013: if (match) {
2014: if (flg) *flg = osm->dm_subdomains;
2015: }
2016: return(0);
2017: }