Actual source code: gasm.c
petsc-3.12.5 2020-03-29
1: /*
2: This file defines an "generalized" additive Schwarz preconditioner for any Mat implementation.
3: In this version each processor may intersect multiple subdomains and any subdomain may
4: intersect multiple processors. Intersections of subdomains with processors are called *local
5: subdomains*.
7: N - total number of distinct global subdomains (set explicitly in PCGASMSetTotalSubdomains() or implicitly PCGASMSetSubdomains() and then calculated in PCSetUp_GASM())
8: n - actual number of local subdomains on this processor (set in PCGASMSetSubdomains() or calculated in PCGASMSetTotalSubdomains())
9: nmax - maximum number of local subdomains per processor (calculated in PCSetUp_GASM())
10: */
11: #include <petsc/private/pcimpl.h>
12: #include <petscdm.h>
14: typedef struct {
15: PetscInt N,n,nmax;
16: PetscInt overlap; /* overlap requested by user */
17: PCGASMType type; /* use reduced interpolation, restriction or both */
18: PetscBool type_set; /* if user set this value (so won't change it for symmetric problems) */
19: PetscBool same_subdomain_solvers; /* flag indicating whether all local solvers are same */
20: PetscBool sort_indices; /* flag to sort subdomain indices */
21: PetscBool user_subdomains; /* whether the user set explicit subdomain index sets -- keep them on PCReset() */
22: PetscBool dm_subdomains; /* whether DM is allowed to define subdomains */
23: PetscBool hierarchicalpartitioning;
24: IS *ois; /* index sets that define the outer (conceptually, overlapping) subdomains */
25: IS *iis; /* index sets that define the inner (conceptually, nonoverlapping) subdomains */
26: KSP *ksp; /* linear solvers for each subdomain */
27: Mat *pmat; /* subdomain block matrices */
28: Vec gx,gy; /* Merged work vectors */
29: Vec *x,*y; /* Split work vectors; storage aliases pieces of storage of the above merged vectors. */
30: VecScatter gorestriction; /* merged restriction to disjoint union of outer subdomains */
31: VecScatter girestriction; /* merged restriction to disjoint union of inner subdomains */
32: VecScatter pctoouter;
33: IS permutationIS;
34: Mat permutationP;
35: Mat pcmat;
36: Vec pcx,pcy;
37: } PC_GASM;
39: static PetscErrorCode PCGASMComputeGlobalSubdomainNumbering_Private(PC pc,PetscInt **numbering,PetscInt **permutation)
40: {
41: PC_GASM *osm = (PC_GASM*)pc->data;
42: PetscInt i;
46: /* Determine the number of globally-distinct subdomains and compute a global numbering for them. */
47: PetscMalloc2(osm->n,numbering,osm->n,permutation);
48: PetscObjectsListGetGlobalNumbering(PetscObjectComm((PetscObject)pc),osm->n,(PetscObject*)osm->iis,NULL,*numbering);
49: for (i = 0; i < osm->n; ++i) (*permutation)[i] = i;
50: PetscSortIntWithPermutation(osm->n,*numbering,*permutation);
51: return(0);
52: }
54: static PetscErrorCode PCGASMSubdomainView_Private(PC pc, PetscInt i, PetscViewer viewer)
55: {
56: PC_GASM *osm = (PC_GASM*)pc->data;
57: PetscInt j,nidx;
58: const PetscInt *idx;
59: PetscViewer sviewer;
60: char *cidx;
64: if (i < 0 || i > osm->n) SETERRQ2(PetscObjectComm((PetscObject)viewer), PETSC_ERR_ARG_WRONG, "Invalid subdomain %D: must nonnegative and less than %D", i, osm->n);
65: /* Inner subdomains. */
66: ISGetLocalSize(osm->iis[i], &nidx);
67: /*
68: No more than 15 characters per index plus a space.
69: PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx,
70: in case nidx == 0. That will take care of the space for the trailing '\0' as well.
71: For nidx == 0, the whole string 16 '\0'.
72: */
73: #define len 16*(nidx+1)+1
74: PetscMalloc1(len, &cidx);
75: PetscViewerStringOpen(PETSC_COMM_SELF, cidx, len, &sviewer);
76: #undef len
77: ISGetIndices(osm->iis[i], &idx);
78: for (j = 0; j < nidx; ++j) {
79: PetscViewerStringSPrintf(sviewer, "%D ", idx[j]);
80: }
81: ISRestoreIndices(osm->iis[i],&idx);
82: PetscViewerDestroy(&sviewer);
83: PetscViewerASCIIPrintf(viewer, "Inner subdomain:\n");
84: PetscViewerFlush(viewer);
85: PetscViewerASCIIPushSynchronized(viewer);
86: PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx);
87: PetscViewerFlush(viewer);
88: PetscViewerASCIIPopSynchronized(viewer);
89: PetscViewerASCIIPrintf(viewer, "\n");
90: PetscViewerFlush(viewer);
91: PetscFree(cidx);
92: /* Outer subdomains. */
93: ISGetLocalSize(osm->ois[i], &nidx);
94: /*
95: No more than 15 characters per index plus a space.
96: PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx,
97: in case nidx == 0. That will take care of the space for the trailing '\0' as well.
98: For nidx == 0, the whole string 16 '\0'.
99: */
100: #define len 16*(nidx+1)+1
101: PetscMalloc1(len, &cidx);
102: PetscViewerStringOpen(PETSC_COMM_SELF, cidx, len, &sviewer);
103: #undef len
104: ISGetIndices(osm->ois[i], &idx);
105: for (j = 0; j < nidx; ++j) {
106: PetscViewerStringSPrintf(sviewer,"%D ", idx[j]);
107: }
108: PetscViewerDestroy(&sviewer);
109: ISRestoreIndices(osm->ois[i],&idx);
110: PetscViewerASCIIPrintf(viewer, "Outer subdomain:\n");
111: PetscViewerFlush(viewer);
112: PetscViewerASCIIPushSynchronized(viewer);
113: PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx);
114: PetscViewerFlush(viewer);
115: PetscViewerASCIIPopSynchronized(viewer);
116: PetscViewerASCIIPrintf(viewer, "\n");
117: PetscViewerFlush(viewer);
118: PetscFree(cidx);
119: return(0);
120: }
122: static PetscErrorCode PCGASMPrintSubdomains(PC pc)
123: {
124: PC_GASM *osm = (PC_GASM*)pc->data;
125: const char *prefix;
126: char fname[PETSC_MAX_PATH_LEN+1];
127: PetscInt l, d, count;
128: PetscBool doprint,found;
129: PetscViewer viewer, sviewer = NULL;
130: PetscInt *numbering,*permutation;/* global numbering of locally-supported subdomains and the permutation from the local ordering */
134: PCGetOptionsPrefix(pc,&prefix);
135: doprint = PETSC_FALSE;
136: PetscOptionsGetBool(NULL,prefix,"-pc_gasm_print_subdomains",&doprint,NULL);
137: if (!doprint) return(0);
138: PetscOptionsGetString(NULL,prefix,"-pc_gasm_print_subdomains",fname,PETSC_MAX_PATH_LEN,&found);
139: if (!found) { PetscStrcpy(fname,"stdout"); };
140: PetscViewerASCIIOpen(PetscObjectComm((PetscObject)pc),fname,&viewer);
141: /*
142: Make sure the viewer has a name. Otherwise this may cause a deadlock or other weird errors when creating a subcomm viewer:
143: the subcomm viewer will attempt to inherit the viewer's name, which, if not set, will be constructed collectively on the comm.
144: */
145: PetscObjectName((PetscObject)viewer);
146: l = 0;
147: PCGASMComputeGlobalSubdomainNumbering_Private(pc,&numbering,&permutation);
148: for (count = 0; count < osm->N; ++count) {
149: /* Now let subdomains go one at a time in the global numbering order and print their subdomain/solver info. */
150: if (l<osm->n) {
151: d = permutation[l]; /* d is the local number of the l-th smallest (in the global ordering) among the locally supported subdomains */
152: if (numbering[d] == count) {
153: PetscViewerGetSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);
154: PCGASMSubdomainView_Private(pc,d,sviewer);
155: PetscViewerRestoreSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);
156: ++l;
157: }
158: }
159: MPI_Barrier(PetscObjectComm((PetscObject)pc));
160: }
161: PetscFree2(numbering,permutation);
162: PetscViewerDestroy(&viewer);
163: return(0);
164: }
167: static PetscErrorCode PCView_GASM(PC pc,PetscViewer viewer)
168: {
169: PC_GASM *osm = (PC_GASM*)pc->data;
170: const char *prefix;
172: PetscMPIInt rank, size;
173: PetscInt bsz;
174: PetscBool iascii,view_subdomains=PETSC_FALSE;
175: PetscViewer sviewer;
176: PetscInt count, l;
177: char overlap[256] = "user-defined overlap";
178: char gsubdomains[256] = "unknown total number of subdomains";
179: char msubdomains[256] = "unknown max number of local subdomains";
180: PetscInt *numbering,*permutation;/* global numbering of locally-supported subdomains and the permutation from the local ordering */
183: MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size);
184: MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank);
186: if (osm->overlap >= 0) {
187: PetscSNPrintf(overlap,sizeof(overlap),"requested amount of overlap = %D",osm->overlap);
188: }
189: if (osm->N != PETSC_DETERMINE) {
190: PetscSNPrintf(gsubdomains, sizeof(gsubdomains), "total number of subdomains = %D",osm->N);
191: }
192: if (osm->nmax != PETSC_DETERMINE) {
193: PetscSNPrintf(msubdomains,sizeof(msubdomains),"max number of local subdomains = %D",osm->nmax);
194: }
196: PCGetOptionsPrefix(pc,&prefix);
197: PetscOptionsGetBool(NULL,prefix,"-pc_gasm_view_subdomains",&view_subdomains,NULL);
199: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
200: if (iascii) {
201: /*
202: Make sure the viewer has a name. Otherwise this may cause a deadlock when creating a subcomm viewer:
203: the subcomm viewer will attempt to inherit the viewer's name, which, if not set, will be constructed
204: collectively on the comm.
205: */
206: PetscObjectName((PetscObject)viewer);
207: PetscViewerASCIIPrintf(viewer," Restriction/interpolation type: %s\n",PCGASMTypes[osm->type]);
208: PetscViewerASCIIPrintf(viewer," %s\n",overlap);
209: PetscViewerASCIIPrintf(viewer," %s\n",gsubdomains);
210: PetscViewerASCIIPrintf(viewer," %s\n",msubdomains);
211: PetscViewerASCIIPushSynchronized(viewer);
212: PetscViewerASCIISynchronizedPrintf(viewer," [%d|%d] number of locally-supported subdomains = %D\n",rank,size,osm->n);
213: PetscViewerFlush(viewer);
214: PetscViewerASCIIPopSynchronized(viewer);
215: /* Cannot take advantage of osm->same_subdomain_solvers without a global numbering of subdomains. */
216: PetscViewerASCIIPrintf(viewer," Subdomain solver info is as follows:\n");
217: PetscViewerASCIIPushTab(viewer);
218: PetscViewerASCIIPrintf(viewer," - - - - - - - - - - - - - - - - - -\n");
219: /* Make sure that everybody waits for the banner to be printed. */
220: MPI_Barrier(PetscObjectComm((PetscObject)viewer));
221: /* Now let subdomains go one at a time in the global numbering order and print their subdomain/solver info. */
222: PCGASMComputeGlobalSubdomainNumbering_Private(pc,&numbering,&permutation);
223: l = 0;
224: for (count = 0; count < osm->N; ++count) {
225: PetscMPIInt srank, ssize;
226: if (l<osm->n) {
227: PetscInt d = permutation[l]; /* d is the local number of the l-th smallest (in the global ordering) among the locally supported subdomains */
228: if (numbering[d] == count) {
229: MPI_Comm_size(((PetscObject)osm->ois[d])->comm, &ssize);
230: MPI_Comm_rank(((PetscObject)osm->ois[d])->comm, &srank);
231: PetscViewerGetSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);
232: ISGetLocalSize(osm->ois[d],&bsz);
233: PetscViewerASCIISynchronizedPrintf(sviewer," [%d|%d] (subcomm [%d|%d]) local subdomain number %D, local size = %D\n",rank,size,srank,ssize,d,bsz);
234: PetscViewerFlush(sviewer);
235: if (view_subdomains) {
236: PCGASMSubdomainView_Private(pc,d,sviewer);
237: }
238: if (!pc->setupcalled) {
239: PetscViewerASCIIPrintf(sviewer, " Solver not set up yet: PCSetUp() not yet called\n");
240: } else {
241: KSPView(osm->ksp[d],sviewer);
242: }
243: PetscViewerASCIIPrintf(sviewer," - - - - - - - - - - - - - - - - - -\n");
244: PetscViewerFlush(sviewer);
245: PetscViewerRestoreSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);
246: ++l;
247: }
248: }
249: MPI_Barrier(PetscObjectComm((PetscObject)pc));
250: }
251: PetscFree2(numbering,permutation);
252: PetscViewerASCIIPopTab(viewer);
253: }
254: PetscViewerFlush(viewer);
255: PetscViewerASCIIPopSynchronized(viewer);
256: return(0);
257: }
259: PETSC_INTERN PetscErrorCode PCGASMCreateLocalSubdomains(Mat A, PetscInt nloc, IS *iis[]);
263: PetscErrorCode PCGASMSetHierarchicalPartitioning(PC pc)
264: {
265: PC_GASM *osm = (PC_GASM*)pc->data;
266: MatPartitioning part;
267: MPI_Comm comm;
268: PetscMPIInt size;
269: PetscInt nlocalsubdomains,fromrows_localsize;
270: IS partitioning,fromrows,isn;
271: Vec outervec;
272: PetscErrorCode ierr;
275: PetscObjectGetComm((PetscObject)pc,&comm);
276: MPI_Comm_size(comm,&size);
277: /* we do not need a hierarchical partitioning when
278: * the total number of subdomains is consistent with
279: * the number of MPI tasks.
280: * For the following cases, we do not need to use HP
281: * */
282: if(osm->N==PETSC_DETERMINE || osm->N>=size || osm->N==1) return(0);
283: if(size%osm->N != 0) SETERRQ2(PETSC_COMM_WORLD,PETSC_ERR_ARG_INCOMP,"have to specify the total number of subdomains %D to be a factor of the number of processors %d \n",osm->N,size);
284: nlocalsubdomains = size/osm->N;
285: osm->n = 1;
286: MatPartitioningCreate(comm,&part);
287: MatPartitioningSetAdjacency(part,pc->pmat);
288: MatPartitioningSetType(part,MATPARTITIONINGHIERARCH);
289: MatPartitioningHierarchicalSetNcoarseparts(part,osm->N);
290: MatPartitioningHierarchicalSetNfineparts(part,nlocalsubdomains);
291: MatPartitioningSetFromOptions(part);
292: /* get new processor owner number of each vertex */
293: MatPartitioningApply(part,&partitioning);
294: ISBuildTwoSided(partitioning,NULL,&fromrows);
295: ISPartitioningToNumbering(partitioning,&isn);
296: ISDestroy(&isn);
297: ISGetLocalSize(fromrows,&fromrows_localsize);
298: MatPartitioningDestroy(&part);
299: MatCreateVecs(pc->pmat,&outervec,NULL);
300: VecCreateMPI(comm,fromrows_localsize,PETSC_DETERMINE,&(osm->pcx));
301: VecDuplicate(osm->pcx,&(osm->pcy));
302: VecScatterCreate(osm->pcx,NULL,outervec,fromrows,&(osm->pctoouter));
303: MatCreateSubMatrix(pc->pmat,fromrows,fromrows,MAT_INITIAL_MATRIX,&(osm->permutationP));
304: PetscObjectReference((PetscObject)fromrows);
305: osm->permutationIS = fromrows;
306: osm->pcmat = pc->pmat;
307: PetscObjectReference((PetscObject)osm->permutationP);
308: pc->pmat = osm->permutationP;
309: VecDestroy(&outervec);
310: ISDestroy(&fromrows);
311: ISDestroy(&partitioning);
312: osm->n = PETSC_DETERMINE;
313: return(0);
314: }
318: static PetscErrorCode PCSetUp_GASM(PC pc)
319: {
320: PC_GASM *osm = (PC_GASM*)pc->data;
322: PetscBool symset,flg;
323: PetscInt i,nInnerIndices,nTotalInnerIndices;
324: PetscMPIInt rank, size;
325: MatReuse scall = MAT_REUSE_MATRIX;
326: KSP ksp;
327: PC subpc;
328: const char *prefix,*pprefix;
329: Vec x,y;
330: PetscInt oni; /* Number of indices in the i-th local outer subdomain. */
331: const PetscInt *oidxi; /* Indices from the i-th subdomain local outer subdomain. */
332: PetscInt on; /* Number of indices in the disjoint union of local outer subdomains. */
333: PetscInt *oidx; /* Indices in the disjoint union of local outer subdomains. */
334: IS gois; /* Disjoint union the global indices of outer subdomains. */
335: IS goid; /* Identity IS of the size of the disjoint union of outer subdomains. */
336: PetscScalar *gxarray, *gyarray;
337: PetscInt gostart; /* Start of locally-owned indices in the vectors -- osm->gx,osm->gy -- over the disjoint union of outer subdomains. */
338: PetscInt num_subdomains = 0;
339: DM *subdomain_dm = NULL;
340: char **subdomain_names = NULL;
341: PetscInt *numbering;
345: MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);
346: MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);
347: if (!pc->setupcalled) {
348: /* use a hierarchical partitioning */
349: if(osm->hierarchicalpartitioning){
350: PCGASMSetHierarchicalPartitioning(pc);
351: }
352: if (!osm->type_set) {
353: MatIsSymmetricKnown(pc->pmat,&symset,&flg);
354: if (symset && flg) osm->type = PC_GASM_BASIC;
355: }
357: if (osm->n == PETSC_DETERMINE) {
358: if (osm->N != PETSC_DETERMINE) {
359: /* No local subdomains given, but the desired number of total subdomains is known, so construct them accordingly. */
360: PCGASMCreateSubdomains(pc->pmat,osm->N,&osm->n,&osm->iis);
361: } else if (osm->dm_subdomains && pc->dm) {
362: /* try pc->dm next, if allowed */
363: PetscInt d;
364: IS *inner_subdomain_is, *outer_subdomain_is;
365: DMCreateDomainDecomposition(pc->dm, &num_subdomains, &subdomain_names, &inner_subdomain_is, &outer_subdomain_is, &subdomain_dm);
366: if (num_subdomains) {
367: PCGASMSetSubdomains(pc, num_subdomains, inner_subdomain_is, outer_subdomain_is);
368: }
369: for (d = 0; d < num_subdomains; ++d) {
370: if (inner_subdomain_is) {ISDestroy(&inner_subdomain_is[d]);}
371: if (outer_subdomain_is) {ISDestroy(&outer_subdomain_is[d]);}
372: }
373: PetscFree(inner_subdomain_is);
374: PetscFree(outer_subdomain_is);
375: } else {
376: /* still no subdomains; use one per processor */
377: osm->nmax = osm->n = 1;
378: MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);
379: osm->N = size;
380: PCGASMCreateLocalSubdomains(pc->pmat,osm->n,&osm->iis);
381: }
382: }
383: if (!osm->iis) {
384: /*
385: osm->n was set in PCGASMSetSubdomains(), but the actual subdomains have not been supplied.
386: We create the requisite number of local inner subdomains and then expand them into
387: out subdomains, if necessary.
388: */
389: PCGASMCreateLocalSubdomains(pc->pmat,osm->n,&osm->iis);
390: }
391: if (!osm->ois) {
392: /*
393: Initially make outer subdomains the same as inner subdomains. If nonzero additional overlap
394: has been requested, copy the inner subdomains over so they can be modified.
395: */
396: PetscMalloc1(osm->n,&osm->ois);
397: for (i=0; i<osm->n; ++i) {
398: if (osm->overlap > 0 && osm->N>1) { /* With positive overlap, osm->iis[i] will be modified */
399: ISDuplicate(osm->iis[i],(osm->ois)+i);
400: ISCopy(osm->iis[i],osm->ois[i]);
401: } else {
402: PetscObjectReference((PetscObject)((osm->iis)[i]));
403: osm->ois[i] = osm->iis[i];
404: }
405: }
406: if (osm->overlap>0 && osm->N>1) {
407: /* Extend the "overlapping" regions by a number of steps */
408: MatIncreaseOverlapSplit(pc->pmat,osm->n,osm->ois,osm->overlap);
409: }
410: }
412: /* Now the subdomains are defined. Determine their global and max local numbers, if necessary. */
413: if (osm->nmax == PETSC_DETERMINE) {
414: PetscMPIInt inwork,outwork;
415: /* determine global number of subdomains and the max number of local subdomains */
416: inwork = osm->n;
417: MPIU_Allreduce(&inwork,&outwork,1,MPI_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));
418: osm->nmax = outwork;
419: }
420: if (osm->N == PETSC_DETERMINE) {
421: /* Determine the number of globally-distinct subdomains and compute a global numbering for them. */
422: PetscObjectsListGetGlobalNumbering(PetscObjectComm((PetscObject)pc),osm->n,(PetscObject*)osm->ois,&osm->N,NULL);
423: }
426: if (osm->sort_indices) {
427: for (i=0; i<osm->n; i++) {
428: ISSort(osm->ois[i]);
429: ISSort(osm->iis[i]);
430: }
431: }
432: PCGetOptionsPrefix(pc,&prefix);
433: PCGASMPrintSubdomains(pc);
435: /*
436: Merge the ISs, create merged vectors and restrictions.
437: */
438: /* Merge outer subdomain ISs and construct a restriction onto the disjoint union of local outer subdomains. */
439: on = 0;
440: for (i=0; i<osm->n; i++) {
441: ISGetLocalSize(osm->ois[i],&oni);
442: on += oni;
443: }
444: PetscMalloc1(on, &oidx);
445: on = 0;
446: /* Merge local indices together */
447: for (i=0; i<osm->n; i++) {
448: ISGetLocalSize(osm->ois[i],&oni);
449: ISGetIndices(osm->ois[i],&oidxi);
450: PetscArraycpy(oidx+on,oidxi,oni);
451: ISRestoreIndices(osm->ois[i],&oidxi);
452: on += oni;
453: }
454: ISCreateGeneral(((PetscObject)(pc))->comm,on,oidx,PETSC_OWN_POINTER,&gois);
455: nTotalInnerIndices = 0;
456: for(i=0; i<osm->n; i++){
457: ISGetLocalSize(osm->iis[i],&nInnerIndices);
458: nTotalInnerIndices += nInnerIndices;
459: }
460: VecCreateMPI(((PetscObject)(pc))->comm,nTotalInnerIndices,PETSC_DETERMINE,&x);
461: VecDuplicate(x,&y);
463: VecCreateMPI(PetscObjectComm((PetscObject)pc),on,PETSC_DECIDE,&osm->gx);
464: VecDuplicate(osm->gx,&osm->gy);
465: VecGetOwnershipRange(osm->gx, &gostart, NULL);
466: ISCreateStride(PetscObjectComm((PetscObject)pc),on,gostart,1, &goid);
467: /* gois might indices not on local */
468: VecScatterCreate(x,gois,osm->gx,goid, &(osm->gorestriction));
469: PetscMalloc1(osm->n,&numbering);
470: PetscObjectsListGetGlobalNumbering(PetscObjectComm((PetscObject)pc),osm->n,(PetscObject*)osm->ois,NULL,numbering);
471: VecDestroy(&x);
472: ISDestroy(&gois);
474: /* Merge inner subdomain ISs and construct a restriction onto the disjoint union of local inner subdomains. */
475: {
476: PetscInt ini; /* Number of indices the i-th a local inner subdomain. */
477: PetscInt in; /* Number of indices in the disjoint union of local inner subdomains. */
478: PetscInt *iidx; /* Global indices in the merged local inner subdomain. */
479: PetscInt *ioidx; /* Global indices of the disjoint union of inner subdomains within the disjoint union of outer subdomains. */
480: IS giis; /* IS for the disjoint union of inner subdomains. */
481: IS giois; /* IS for the disjoint union of inner subdomains within the disjoint union of outer subdomains. */
482: PetscScalar *array;
483: const PetscInt *indices;
484: PetscInt k;
485: on = 0;
486: for (i=0; i<osm->n; i++) {
487: ISGetLocalSize(osm->ois[i],&oni);
488: on += oni;
489: }
490: PetscMalloc1(on, &iidx);
491: PetscMalloc1(on, &ioidx);
492: VecGetArray(y,&array);
493: /* set communicator id to determine where overlap is */
494: in = 0;
495: for (i=0; i<osm->n; i++) {
496: ISGetLocalSize(osm->iis[i],&ini);
497: for (k = 0; k < ini; ++k){
498: array[in+k] = numbering[i];
499: }
500: in += ini;
501: }
502: VecRestoreArray(y,&array);
503: VecScatterBegin(osm->gorestriction,y,osm->gy,INSERT_VALUES,SCATTER_FORWARD);
504: VecScatterEnd(osm->gorestriction,y,osm->gy,INSERT_VALUES,SCATTER_FORWARD);
505: VecGetOwnershipRange(osm->gy,&gostart, NULL);
506: VecGetArray(osm->gy,&array);
507: on = 0;
508: in = 0;
509: for (i=0; i<osm->n; i++) {
510: ISGetLocalSize(osm->ois[i],&oni);
511: ISGetIndices(osm->ois[i],&indices);
512: for (k=0; k<oni; k++) {
513: /* skip overlapping indices to get inner domain */
514: if(PetscRealPart(array[on+k]) != numbering[i]) continue;
515: iidx[in] = indices[k];
516: ioidx[in++] = gostart+on+k;
517: }
518: ISRestoreIndices(osm->ois[i], &indices);
519: on += oni;
520: }
521: VecRestoreArray(osm->gy,&array);
522: ISCreateGeneral(PetscObjectComm((PetscObject)pc),in,iidx,PETSC_OWN_POINTER,&giis);
523: ISCreateGeneral(PetscObjectComm((PetscObject)pc),in,ioidx,PETSC_OWN_POINTER,&giois);
524: VecScatterCreate(y,giis,osm->gy,giois,&osm->girestriction);
525: VecDestroy(&y);
526: ISDestroy(&giis);
527: ISDestroy(&giois);
528: }
529: ISDestroy(&goid);
530: PetscFree(numbering);
532: /* Create the subdomain work vectors. */
533: PetscMalloc1(osm->n,&osm->x);
534: PetscMalloc1(osm->n,&osm->y);
535: VecGetArray(osm->gx, &gxarray);
536: VecGetArray(osm->gy, &gyarray);
537: for (i=0, on=0; i<osm->n; ++i, on += oni) {
538: PetscInt oNi;
539: ISGetLocalSize(osm->ois[i],&oni);
540: /* on a sub communicator */
541: ISGetSize(osm->ois[i],&oNi);
542: VecCreateMPIWithArray(((PetscObject)(osm->ois[i]))->comm,1,oni,oNi,gxarray+on,&osm->x[i]);
543: VecCreateMPIWithArray(((PetscObject)(osm->ois[i]))->comm,1,oni,oNi,gyarray+on,&osm->y[i]);
544: }
545: VecRestoreArray(osm->gx, &gxarray);
546: VecRestoreArray(osm->gy, &gyarray);
547: /* Create the subdomain solvers */
548: PetscMalloc1(osm->n,&osm->ksp);
549: for (i=0; i<osm->n; i++) {
550: char subprefix[PETSC_MAX_PATH_LEN+1];
551: KSPCreate(((PetscObject)(osm->ois[i]))->comm,&ksp);
552: KSPSetErrorIfNotConverged(ksp,pc->erroriffailure);
553: PetscLogObjectParent((PetscObject)pc,(PetscObject)ksp);
554: PetscObjectIncrementTabLevel((PetscObject)ksp,(PetscObject)pc,1);
555: KSPSetType(ksp,KSPPREONLY);
556: KSPGetPC(ksp,&subpc); /* Why do we need this here? */
557: if (subdomain_dm) {
558: KSPSetDM(ksp,subdomain_dm[i]);
559: DMDestroy(subdomain_dm+i);
560: }
561: PCGetOptionsPrefix(pc,&prefix);
562: KSPSetOptionsPrefix(ksp,prefix);
563: if (subdomain_names && subdomain_names[i]) {
564: PetscSNPrintf(subprefix,PETSC_MAX_PATH_LEN,"sub_%s_",subdomain_names[i]);
565: KSPAppendOptionsPrefix(ksp,subprefix);
566: PetscFree(subdomain_names[i]);
567: }
568: KSPAppendOptionsPrefix(ksp,"sub_");
569: osm->ksp[i] = ksp;
570: }
571: PetscFree(subdomain_dm);
572: PetscFree(subdomain_names);
573: scall = MAT_INITIAL_MATRIX;
575: } else { /* if (pc->setupcalled) */
576: /*
577: Destroy the submatrices from the previous iteration
578: */
579: if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
580: MatDestroyMatrices(osm->n,&osm->pmat);
581: scall = MAT_INITIAL_MATRIX;
582: }
583: if(osm->permutationIS){
584: MatCreateSubMatrix(pc->pmat,osm->permutationIS,osm->permutationIS,scall,&osm->permutationP);
585: PetscObjectReference((PetscObject)osm->permutationP);
586: osm->pcmat = pc->pmat;
587: pc->pmat = osm->permutationP;
588: }
590: }
593: /*
594: Extract out the submatrices.
595: */
596: if (size > 1) {
597: MatCreateSubMatricesMPI(pc->pmat,osm->n,osm->ois,osm->ois,scall,&osm->pmat);
598: } else {
599: MatCreateSubMatrices(pc->pmat,osm->n,osm->ois,osm->ois,scall,&osm->pmat);
600: }
601: if (scall == MAT_INITIAL_MATRIX) {
602: PetscObjectGetOptionsPrefix((PetscObject)pc->pmat,&pprefix);
603: for (i=0; i<osm->n; i++) {
604: PetscLogObjectParent((PetscObject)pc,(PetscObject)osm->pmat[i]);
605: PetscObjectSetOptionsPrefix((PetscObject)osm->pmat[i],pprefix);
606: }
607: }
609: /* Return control to the user so that the submatrices can be modified (e.g., to apply
610: different boundary conditions for the submatrices than for the global problem) */
611: PCModifySubMatrices(pc,osm->n,osm->ois,osm->ois,osm->pmat,pc->modifysubmatricesP);
613: /*
614: Loop over submatrices putting them into local ksps
615: */
616: for (i=0; i<osm->n; i++) {
617: KSPSetOperators(osm->ksp[i],osm->pmat[i],osm->pmat[i]);
618: if (!pc->setupcalled) {
619: KSPSetFromOptions(osm->ksp[i]);
620: }
621: }
622: if(osm->pcmat){
623: MatDestroy(&pc->pmat);
624: pc->pmat = osm->pcmat;
625: osm->pcmat = 0;
626: }
627: return(0);
628: }
630: static PetscErrorCode PCSetUpOnBlocks_GASM(PC pc)
631: {
632: PC_GASM *osm = (PC_GASM*)pc->data;
634: PetscInt i;
637: for (i=0; i<osm->n; i++) {
638: KSPSetUp(osm->ksp[i]);
639: }
640: return(0);
641: }
643: static PetscErrorCode PCApply_GASM(PC pc,Vec xin,Vec yout)
644: {
645: PC_GASM *osm = (PC_GASM*)pc->data;
647: PetscInt i;
648: Vec x,y;
649: ScatterMode forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
652: if(osm->pctoouter){
653: VecScatterBegin(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);
654: VecScatterEnd(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);
655: x = osm->pcx;
656: y = osm->pcy;
657: }else{
658: x = xin;
659: y = yout;
660: }
661: /*
662: Support for limiting the restriction or interpolation only to the inner
663: subdomain values (leaving the other values 0).
664: */
665: if (!(osm->type & PC_GASM_RESTRICT)) {
666: /* have to zero the work RHS since scatter may leave some slots empty */
667: VecZeroEntries(osm->gx);
668: VecScatterBegin(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);
669: } else {
670: VecScatterBegin(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);
671: }
672: VecZeroEntries(osm->gy);
673: if (!(osm->type & PC_GASM_RESTRICT)) {
674: VecScatterEnd(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);
675: } else {
676: VecScatterEnd(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);
677: }
678: /* do the subdomain solves */
679: for (i=0; i<osm->n; ++i) {
680: KSPSolve(osm->ksp[i],osm->x[i],osm->y[i]);
681: KSPCheckSolve(osm->ksp[i],pc,osm->y[i]);
682: }
683: /* Do we need to zero y ?? */
684: VecZeroEntries(y);
685: if (!(osm->type & PC_GASM_INTERPOLATE)) {
686: VecScatterBegin(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);
687: VecScatterEnd(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);
688: } else {
689: VecScatterBegin(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);
690: VecScatterEnd(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);
691: }
692: if(osm->pctoouter){
693: VecScatterBegin(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);
694: VecScatterEnd(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);
695: }
696: return(0);
697: }
699: static PetscErrorCode PCApplyTranspose_GASM(PC pc,Vec xin,Vec yout)
700: {
701: PC_GASM *osm = (PC_GASM*)pc->data;
703: PetscInt i;
704: Vec x,y;
705: ScatterMode forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
708: if(osm->pctoouter){
709: VecScatterBegin(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);
710: VecScatterEnd(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);
711: x = osm->pcx;
712: y = osm->pcy;
713: }else{
714: x = xin;
715: y = yout;
716: }
717: /*
718: Support for limiting the restriction or interpolation to only local
719: subdomain values (leaving the other values 0).
721: Note: these are reversed from the PCApply_GASM() because we are applying the
722: transpose of the three terms
723: */
724: if (!(osm->type & PC_GASM_INTERPOLATE)) {
725: /* have to zero the work RHS since scatter may leave some slots empty */
726: VecZeroEntries(osm->gx);
727: VecScatterBegin(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);
728: } else {
729: VecScatterBegin(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);
730: }
731: VecZeroEntries(osm->gy);
732: if (!(osm->type & PC_GASM_INTERPOLATE)) {
733: VecScatterEnd(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);
734: } else {
735: VecScatterEnd(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);
736: }
737: /* do the local solves */
738: for (i=0; i<osm->n; ++i) { /* Note that the solves are local, so we can go to osm->n, rather than osm->nmax. */
739: KSPSolveTranspose(osm->ksp[i],osm->x[i],osm->y[i]);
740: KSPCheckSolve(osm->ksp[i],pc,osm->y[i]);
741: }
742: VecZeroEntries(y);
743: if (!(osm->type & PC_GASM_RESTRICT)) {
744: VecScatterBegin(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);
745: VecScatterEnd(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);
746: } else {
747: VecScatterBegin(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);
748: VecScatterEnd(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);
749: }
750: if(osm->pctoouter){
751: VecScatterBegin(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);
752: VecScatterEnd(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);
753: }
754: return(0);
755: }
757: static PetscErrorCode PCReset_GASM(PC pc)
758: {
759: PC_GASM *osm = (PC_GASM*)pc->data;
761: PetscInt i;
764: if (osm->ksp) {
765: for (i=0; i<osm->n; i++) {
766: KSPReset(osm->ksp[i]);
767: }
768: }
769: if (osm->pmat) {
770: if (osm->n > 0) {
771: PetscMPIInt size;
772: MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);
773: if (size > 1) {
774: /* osm->pmat is created by MatCreateSubMatricesMPI(), cannot use MatDestroySubMatrices() */
775: MatDestroyMatrices(osm->n,&osm->pmat);
776: } else {
777: MatDestroySubMatrices(osm->n,&osm->pmat);
778: }
779: }
780: }
781: if (osm->x) {
782: for (i=0; i<osm->n; i++) {
783: VecDestroy(&osm->x[i]);
784: VecDestroy(&osm->y[i]);
785: }
786: }
787: VecDestroy(&osm->gx);
788: VecDestroy(&osm->gy);
790: VecScatterDestroy(&osm->gorestriction);
791: VecScatterDestroy(&osm->girestriction);
792: if (!osm->user_subdomains) {
793: PCGASMDestroySubdomains(osm->n,&osm->ois,&osm->iis);
794: osm->N = PETSC_DETERMINE;
795: osm->nmax = PETSC_DETERMINE;
796: }
797: if(osm->pctoouter){
798: VecScatterDestroy(&(osm->pctoouter));
799: }
800: if(osm->permutationIS){
801: ISDestroy(&(osm->permutationIS));
802: }
803: if(osm->pcx){
804: VecDestroy(&(osm->pcx));
805: }
806: if(osm->pcy){
807: VecDestroy(&(osm->pcy));
808: }
809: if(osm->permutationP){
810: MatDestroy(&(osm->permutationP));
811: }
812: if(osm->pcmat){
813: MatDestroy(&osm->pcmat);
814: }
815: return(0);
816: }
818: static PetscErrorCode PCDestroy_GASM(PC pc)
819: {
820: PC_GASM *osm = (PC_GASM*)pc->data;
822: PetscInt i;
825: PCReset_GASM(pc);
826: /* PCReset will not destroy subdomains, if user_subdomains is true. */
827: PCGASMDestroySubdomains(osm->n,&osm->ois,&osm->iis);
828: if (osm->ksp) {
829: for (i=0; i<osm->n; i++) {
830: KSPDestroy(&osm->ksp[i]);
831: }
832: PetscFree(osm->ksp);
833: }
834: PetscFree(osm->x);
835: PetscFree(osm->y);
836: PetscFree(pc->data);
837: return(0);
838: }
840: static PetscErrorCode PCSetFromOptions_GASM(PetscOptionItems *PetscOptionsObject,PC pc)
841: {
842: PC_GASM *osm = (PC_GASM*)pc->data;
844: PetscInt blocks,ovl;
845: PetscBool symset,flg;
846: PCGASMType gasmtype;
849: /* set the type to symmetric if matrix is symmetric */
850: if (!osm->type_set && pc->pmat) {
851: MatIsSymmetricKnown(pc->pmat,&symset,&flg);
852: if (symset && flg) osm->type = PC_GASM_BASIC;
853: }
854: PetscOptionsHead(PetscOptionsObject,"Generalized additive Schwarz options");
855: PetscOptionsBool("-pc_gasm_use_dm_subdomains","If subdomains aren't set, use DMCreateDomainDecomposition() to define subdomains.","PCGASMSetUseDMSubdomains",osm->dm_subdomains,&osm->dm_subdomains,&flg);
856: PetscOptionsInt("-pc_gasm_total_subdomains","Total number of subdomains across communicator","PCGASMSetTotalSubdomains",osm->N,&blocks,&flg);
857: if (flg) {
858: PCGASMSetTotalSubdomains(pc,blocks);
859: }
860: PetscOptionsInt("-pc_gasm_overlap","Number of overlapping degrees of freedom","PCGASMSetOverlap",osm->overlap,&ovl,&flg);
861: if (flg) {
862: PCGASMSetOverlap(pc,ovl);
863: osm->dm_subdomains = PETSC_FALSE;
864: }
865: flg = PETSC_FALSE;
866: PetscOptionsEnum("-pc_gasm_type","Type of restriction/extension","PCGASMSetType",PCGASMTypes,(PetscEnum)osm->type,(PetscEnum*)&gasmtype,&flg);
867: if (flg) {PCGASMSetType(pc,gasmtype);}
868: PetscOptionsBool("-pc_gasm_use_hierachical_partitioning","use hierarchical partitioning",NULL,osm->hierarchicalpartitioning,&osm->hierarchicalpartitioning,&flg);
869: PetscOptionsTail();
870: return(0);
871: }
873: /*------------------------------------------------------------------------------------*/
875: /*@
876: PCGASMSetTotalSubdomains - sets the total number of subdomains to use across the
877: communicator.
878: Logically collective on pc
880: Input Parameters:
881: + pc - the preconditioner
882: - N - total number of subdomains
885: Level: beginner
887: .seealso: PCGASMSetSubdomains(), PCGASMSetOverlap()
888: PCGASMCreateSubdomains2D()
889: @*/
890: PetscErrorCode PCGASMSetTotalSubdomains(PC pc,PetscInt N)
891: {
892: PC_GASM *osm = (PC_GASM*)pc->data;
893: PetscMPIInt size,rank;
897: if (N < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Total number of subdomains must be 1 or more, got N = %D",N);
898: if (pc->setupcalled) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetTotalSubdomains() should be called before calling PCSetUp().");
900: PCGASMDestroySubdomains(osm->n,&osm->iis,&osm->ois);
901: osm->ois = osm->iis = NULL;
903: MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);
904: MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);
905: osm->N = N;
906: osm->n = PETSC_DETERMINE;
907: osm->nmax = PETSC_DETERMINE;
908: osm->dm_subdomains = PETSC_FALSE;
909: return(0);
910: }
913: static PetscErrorCode PCGASMSetSubdomains_GASM(PC pc,PetscInt n,IS iis[],IS ois[])
914: {
915: PC_GASM *osm = (PC_GASM*)pc->data;
916: PetscErrorCode ierr;
917: PetscInt i;
920: if (n < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Each process must have 1 or more subdomains, got n = %D",n);
921: if (pc->setupcalled) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetSubdomains() should be called before calling PCSetUp().");
923: PCGASMDestroySubdomains(osm->n,&osm->iis,&osm->ois);
924: osm->iis = osm->ois = NULL;
925: osm->n = n;
926: osm->N = PETSC_DETERMINE;
927: osm->nmax = PETSC_DETERMINE;
928: if (ois) {
929: PetscMalloc1(n,&osm->ois);
930: for (i=0; i<n; i++) {
931: PetscObjectReference((PetscObject)ois[i]);
932: osm->ois[i] = ois[i];
933: }
934: /*
935: Since the user set the outer subdomains, even if nontrivial overlap was requested via PCGASMSetOverlap(),
936: it will be ignored. To avoid confusion later on (e.g., when viewing the PC), the overlap size is set to -1.
937: */
938: osm->overlap = -1;
939: /* inner subdomains must be provided */
940: if (!iis) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"inner indices have to be provided \n");
941: }/* end if */
942: if (iis) {
943: PetscMalloc1(n,&osm->iis);
944: for (i=0; i<n; i++) {
945: PetscObjectReference((PetscObject)iis[i]);
946: osm->iis[i] = iis[i];
947: }
948: if (!ois) {
949: osm->ois = NULL;
950: /* if user does not provide outer indices, we will create the corresponding outer indices using osm->overlap =1 in PCSetUp_GASM */
951: }
952: }
953: #if defined(PETSC_USE_DEBUG)
954: {
955: PetscInt j,rstart,rend,*covered,lsize;
956: const PetscInt *indices;
957: /* check if the inner indices cover and only cover the local portion of the preconditioning matrix */
958: MatGetOwnershipRange(pc->pmat,&rstart,&rend);
959: PetscCalloc1(rend-rstart,&covered);
960: /* check if the current processor owns indices from others */
961: for (i=0; i<n; i++) {
962: ISGetIndices(osm->iis[i],&indices);
963: ISGetLocalSize(osm->iis[i],&lsize);
964: for (j=0; j<lsize; j++) {
965: if (indices[j]<rstart || indices[j]>=rend) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"inner subdomains can not own an index %d from other processors", indices[j]);
966: else if (covered[indices[j]-rstart]==1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"inner subdomains can not have an overlapping index %d ",indices[j]);
967: else covered[indices[j]-rstart] = 1;
968: }
969: ISRestoreIndices(osm->iis[i],&indices);
970: }
971: /* check if we miss any indices */
972: for (i=rstart; i<rend; i++) {
973: if (!covered[i-rstart]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"local entity %d was not covered by inner subdomains",i);
974: }
975: PetscFree(covered);
976: }
977: #endif
978: if (iis) osm->user_subdomains = PETSC_TRUE;
979: return(0);
980: }
983: static PetscErrorCode PCGASMSetOverlap_GASM(PC pc,PetscInt ovl)
984: {
985: PC_GASM *osm = (PC_GASM*)pc->data;
988: if (ovl < 0) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap value requested");
989: if (pc->setupcalled && ovl != osm->overlap) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetOverlap() should be called before PCSetUp().");
990: if (!pc->setupcalled) osm->overlap = ovl;
991: return(0);
992: }
994: static PetscErrorCode PCGASMSetType_GASM(PC pc,PCGASMType type)
995: {
996: PC_GASM *osm = (PC_GASM*)pc->data;
999: osm->type = type;
1000: osm->type_set = PETSC_TRUE;
1001: return(0);
1002: }
1004: static PetscErrorCode PCGASMSetSortIndices_GASM(PC pc,PetscBool doSort)
1005: {
1006: PC_GASM *osm = (PC_GASM*)pc->data;
1009: osm->sort_indices = doSort;
1010: return(0);
1011: }
1013: /*
1014: FIXME: This routine might need to be modified now that multiple ranks per subdomain are allowed.
1015: In particular, it would upset the global subdomain number calculation.
1016: */
1017: static PetscErrorCode PCGASMGetSubKSP_GASM(PC pc,PetscInt *n,PetscInt *first,KSP **ksp)
1018: {
1019: PC_GASM *osm = (PC_GASM*)pc->data;
1023: if (osm->n < 1) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ORDER,"Need to call PCSetUp() on PC (or KSPSetUp() on the outer KSP object) before calling here");
1025: if (n) *n = osm->n;
1026: if (first) {
1027: MPI_Scan(&osm->n,first,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));
1028: *first -= osm->n;
1029: }
1030: if (ksp) {
1031: /* Assume that local solves are now different; not necessarily
1032: true, though! This flag is used only for PCView_GASM() */
1033: *ksp = osm->ksp;
1034: osm->same_subdomain_solvers = PETSC_FALSE;
1035: }
1036: return(0);
1037: } /* PCGASMGetSubKSP_GASM() */
1039: /*@C
1040: PCGASMSetSubdomains - Sets the subdomains for this processor
1041: for the additive Schwarz preconditioner.
1043: Collective on pc
1045: Input Parameters:
1046: + pc - the preconditioner object
1047: . n - the number of subdomains for this processor
1048: . iis - the index sets that define the inner subdomains (or NULL for PETSc to determine subdomains)
1049: - ois - the index sets that define the outer subdomains (or NULL to use the same as iis, or to construct by expanding iis by the requested overlap)
1051: Notes:
1052: The IS indices use the parallel, global numbering of the vector entries.
1053: Inner subdomains are those where the correction is applied.
1054: Outer subdomains are those where the residual necessary to obtain the
1055: corrections is obtained (see PCGASMType for the use of inner/outer subdomains).
1056: Both inner and outer subdomains can extend over several processors.
1057: This processor's portion of a subdomain is known as a local subdomain.
1059: Inner subdomains can not overlap with each other, do not have any entities from remote processors,
1060: and have to cover the entire local subdomain owned by the current processor. The index sets on each
1061: process should be ordered such that the ith local subdomain is connected to the ith remote subdomain
1062: on another MPI process.
1064: By default the GASM preconditioner uses 1 (local) subdomain per processor.
1067: Level: advanced
1069: .seealso: PCGASMSetNumSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(),
1070: PCGASMCreateSubdomains2D(), PCGASMGetSubdomains()
1071: @*/
1072: PetscErrorCode PCGASMSetSubdomains(PC pc,PetscInt n,IS iis[],IS ois[])
1073: {
1074: PC_GASM *osm = (PC_GASM*)pc->data;
1079: PetscTryMethod(pc,"PCGASMSetSubdomains_C",(PC,PetscInt,IS[],IS[]),(pc,n,iis,ois));
1080: osm->dm_subdomains = PETSC_FALSE;
1081: return(0);
1082: }
1085: /*@
1086: PCGASMSetOverlap - Sets the overlap between a pair of subdomains for the
1087: additive Schwarz preconditioner. Either all or no processors in the
1088: pc communicator must call this routine.
1090: Logically Collective on pc
1092: Input Parameters:
1093: + pc - the preconditioner context
1094: - ovl - the amount of overlap between subdomains (ovl >= 0, default value = 0)
1096: Options Database Key:
1097: . -pc_gasm_overlap <overlap> - Sets overlap
1099: Notes:
1100: By default the GASM preconditioner uses 1 subdomain per processor. To use
1101: multiple subdomain per perocessor or "straddling" subdomains that intersect
1102: multiple processors use PCGASMSetSubdomains() (or option -pc_gasm_total_subdomains <n>).
1104: The overlap defaults to 0, so if one desires that no additional
1105: overlap be computed beyond what may have been set with a call to
1106: PCGASMSetSubdomains(), then ovl must be set to be 0. In particular, if one does
1107: not explicitly set the subdomains in application code, then all overlap would be computed
1108: internally by PETSc, and using an overlap of 0 would result in an GASM
1109: variant that is equivalent to the block Jacobi preconditioner.
1111: Note that one can define initial index sets with any overlap via
1112: PCGASMSetSubdomains(); the routine PCGASMSetOverlap() merely allows
1113: PETSc to extend that overlap further, if desired.
1115: Level: intermediate
1117: .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(),
1118: PCGASMCreateSubdomains2D(), PCGASMGetSubdomains()
1119: @*/
1120: PetscErrorCode PCGASMSetOverlap(PC pc,PetscInt ovl)
1121: {
1123: PC_GASM *osm = (PC_GASM*)pc->data;
1128: PetscTryMethod(pc,"PCGASMSetOverlap_C",(PC,PetscInt),(pc,ovl));
1129: osm->dm_subdomains = PETSC_FALSE;
1130: return(0);
1131: }
1133: /*@
1134: PCGASMSetType - Sets the type of restriction and interpolation used
1135: for local problems in the additive Schwarz method.
1137: Logically Collective on PC
1139: Input Parameters:
1140: + pc - the preconditioner context
1141: - type - variant of GASM, one of
1142: .vb
1143: PC_GASM_BASIC - full interpolation and restriction
1144: PC_GASM_RESTRICT - full restriction, local processor interpolation
1145: PC_GASM_INTERPOLATE - full interpolation, local processor restriction
1146: PC_GASM_NONE - local processor restriction and interpolation
1147: .ve
1149: Options Database Key:
1150: . -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type
1152: Level: intermediate
1154: .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(),
1155: PCGASMCreateSubdomains2D()
1156: @*/
1157: PetscErrorCode PCGASMSetType(PC pc,PCGASMType type)
1158: {
1164: PetscTryMethod(pc,"PCGASMSetType_C",(PC,PCGASMType),(pc,type));
1165: return(0);
1166: }
1168: /*@
1169: PCGASMSetSortIndices - Determines whether subdomain indices are sorted.
1171: Logically Collective on PC
1173: Input Parameters:
1174: + pc - the preconditioner context
1175: - doSort - sort the subdomain indices
1177: Level: intermediate
1179: .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(),
1180: PCGASMCreateSubdomains2D()
1181: @*/
1182: PetscErrorCode PCGASMSetSortIndices(PC pc,PetscBool doSort)
1183: {
1189: PetscTryMethod(pc,"PCGASMSetSortIndices_C",(PC,PetscBool),(pc,doSort));
1190: return(0);
1191: }
1193: /*@C
1194: PCGASMGetSubKSP - Gets the local KSP contexts for all blocks on
1195: this processor.
1197: Collective on PC iff first_local is requested
1199: Input Parameter:
1200: . pc - the preconditioner context
1202: Output Parameters:
1203: + n_local - the number of blocks on this processor or NULL
1204: . first_local - the global number of the first block on this processor or NULL,
1205: all processors must request or all must pass NULL
1206: - ksp - the array of KSP contexts
1208: Note:
1209: After PCGASMGetSubKSP() the array of KSPes is not to be freed
1211: Currently for some matrix implementations only 1 block per processor
1212: is supported.
1214: You must call KSPSetUp() before calling PCGASMGetSubKSP().
1216: Level: advanced
1218: .seealso: PCGASMSetSubdomains(), PCGASMSetOverlap(),
1219: PCGASMCreateSubdomains2D(),
1220: @*/
1221: PetscErrorCode PCGASMGetSubKSP(PC pc,PetscInt *n_local,PetscInt *first_local,KSP *ksp[])
1222: {
1227: PetscUseMethod(pc,"PCGASMGetSubKSP_C",(PC,PetscInt*,PetscInt*,KSP **),(pc,n_local,first_local,ksp));
1228: return(0);
1229: }
1231: /* -------------------------------------------------------------------------------------*/
1232: /*MC
1233: PCGASM - Use the (restricted) additive Schwarz method, each block is (approximately) solved with
1234: its own KSP object.
1236: Options Database Keys:
1237: + -pc_gasm_total_subdomains <n> - Sets total number of local subdomains to be distributed among processors
1238: . -pc_gasm_view_subdomains - activates the printing of subdomain indices in PCView(), -ksp_view or -snes_view
1239: . -pc_gasm_print_subdomains - activates the printing of subdomain indices in PCSetUp()
1240: . -pc_gasm_overlap <ovl> - Sets overlap by which to (automatically) extend local subdomains
1241: - -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type
1243: IMPORTANT: If you run with, for example, 3 blocks on 1 processor or 3 blocks on 3 processors you
1244: will get a different convergence rate due to the default option of -pc_gasm_type restrict. Use
1245: -pc_gasm_type basic to use the standard GASM.
1247: Notes:
1248: Blocks can be shared by multiple processes.
1250: To set options on the solvers for each block append -sub_ to all the KSP, and PC
1251: options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly
1253: To set the options on the solvers separate for each block call PCGASMGetSubKSP()
1254: and set the options directly on the resulting KSP object (you can access its PC
1255: with KSPGetPC())
1258: Level: beginner
1260: References:
1261: + 1. - M Dryja, OB Widlund, An additive variant of the Schwarz alternating method for the case of many subregions
1262: Courant Institute, New York University Technical report
1263: - 2. - Barry Smith, Petter Bjorstad, and William Gropp, Domain Decompositions: Parallel Multilevel Methods for Elliptic Partial Differential Equations,
1264: Cambridge University Press.
1266: .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC,
1267: PCBJACOBI, PCGASMGetSubKSP(), PCGASMSetSubdomains(),
1268: PCSetModifySubMatrices(), PCGASMSetOverlap(), PCGASMSetType()
1270: M*/
1272: PETSC_EXTERN PetscErrorCode PCCreate_GASM(PC pc)
1273: {
1275: PC_GASM *osm;
1278: PetscNewLog(pc,&osm);
1280: osm->N = PETSC_DETERMINE;
1281: osm->n = PETSC_DECIDE;
1282: osm->nmax = PETSC_DETERMINE;
1283: osm->overlap = 0;
1284: osm->ksp = 0;
1285: osm->gorestriction = 0;
1286: osm->girestriction = 0;
1287: osm->pctoouter = 0;
1288: osm->gx = 0;
1289: osm->gy = 0;
1290: osm->x = 0;
1291: osm->y = 0;
1292: osm->pcx = 0;
1293: osm->pcy = 0;
1294: osm->permutationIS = 0;
1295: osm->permutationP = 0;
1296: osm->pcmat = 0;
1297: osm->ois = 0;
1298: osm->iis = 0;
1299: osm->pmat = 0;
1300: osm->type = PC_GASM_RESTRICT;
1301: osm->same_subdomain_solvers = PETSC_TRUE;
1302: osm->sort_indices = PETSC_TRUE;
1303: osm->dm_subdomains = PETSC_FALSE;
1304: osm->hierarchicalpartitioning = PETSC_FALSE;
1306: pc->data = (void*)osm;
1307: pc->ops->apply = PCApply_GASM;
1308: pc->ops->applytranspose = PCApplyTranspose_GASM;
1309: pc->ops->setup = PCSetUp_GASM;
1310: pc->ops->reset = PCReset_GASM;
1311: pc->ops->destroy = PCDestroy_GASM;
1312: pc->ops->setfromoptions = PCSetFromOptions_GASM;
1313: pc->ops->setuponblocks = PCSetUpOnBlocks_GASM;
1314: pc->ops->view = PCView_GASM;
1315: pc->ops->applyrichardson = 0;
1317: PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetSubdomains_C",PCGASMSetSubdomains_GASM);
1318: PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetOverlap_C",PCGASMSetOverlap_GASM);
1319: PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetType_C",PCGASMSetType_GASM);
1320: PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetSortIndices_C",PCGASMSetSortIndices_GASM);
1321: PetscObjectComposeFunction((PetscObject)pc,"PCGASMGetSubKSP_C",PCGASMGetSubKSP_GASM);
1322: return(0);
1323: }
1326: PetscErrorCode PCGASMCreateLocalSubdomains(Mat A, PetscInt nloc, IS *iis[])
1327: {
1328: MatPartitioning mpart;
1329: const char *prefix;
1330: PetscInt i,j,rstart,rend,bs;
1331: PetscBool hasop, isbaij = PETSC_FALSE,foundpart = PETSC_FALSE;
1332: Mat Ad = NULL, adj;
1333: IS ispart,isnumb,*is;
1334: PetscErrorCode ierr;
1337: if (nloc < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"number of local subdomains must > 0, got nloc = %D",nloc);
1339: /* Get prefix, row distribution, and block size */
1340: MatGetOptionsPrefix(A,&prefix);
1341: MatGetOwnershipRange(A,&rstart,&rend);
1342: MatGetBlockSize(A,&bs);
1343: if (rstart/bs*bs != rstart || rend/bs*bs != rend) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"bad row distribution [%D,%D) for matrix block size %D",rstart,rend,bs);
1345: /* Get diagonal block from matrix if possible */
1346: MatHasOperation(A,MATOP_GET_DIAGONAL_BLOCK,&hasop);
1347: if (hasop) {
1348: MatGetDiagonalBlock(A,&Ad);
1349: }
1350: if (Ad) {
1351: PetscObjectBaseTypeCompare((PetscObject)Ad,MATSEQBAIJ,&isbaij);
1352: if (!isbaij) {PetscObjectBaseTypeCompare((PetscObject)Ad,MATSEQSBAIJ,&isbaij);}
1353: }
1354: if (Ad && nloc > 1) {
1355: PetscBool match,done;
1356: /* Try to setup a good matrix partitioning if available */
1357: MatPartitioningCreate(PETSC_COMM_SELF,&mpart);
1358: PetscObjectSetOptionsPrefix((PetscObject)mpart,prefix);
1359: MatPartitioningSetFromOptions(mpart);
1360: PetscObjectTypeCompare((PetscObject)mpart,MATPARTITIONINGCURRENT,&match);
1361: if (!match) {
1362: PetscObjectTypeCompare((PetscObject)mpart,MATPARTITIONINGSQUARE,&match);
1363: }
1364: if (!match) { /* assume a "good" partitioner is available */
1365: PetscInt na;
1366: const PetscInt *ia,*ja;
1367: MatGetRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);
1368: if (done) {
1369: /* Build adjacency matrix by hand. Unfortunately a call to
1370: MatConvert(Ad,MATMPIADJ,MAT_INITIAL_MATRIX,&adj) will
1371: remove the block-aij structure and we cannot expect
1372: MatPartitioning to split vertices as we need */
1373: PetscInt i,j,len,nnz,cnt,*iia=0,*jja=0;
1374: const PetscInt *row;
1375: nnz = 0;
1376: for (i=0; i<na; i++) { /* count number of nonzeros */
1377: len = ia[i+1] - ia[i];
1378: row = ja + ia[i];
1379: for (j=0; j<len; j++) {
1380: if (row[j] == i) { /* don't count diagonal */
1381: len--; break;
1382: }
1383: }
1384: nnz += len;
1385: }
1386: PetscMalloc1(na+1,&iia);
1387: PetscMalloc1(nnz,&jja);
1388: nnz = 0;
1389: iia[0] = 0;
1390: for (i=0; i<na; i++) { /* fill adjacency */
1391: cnt = 0;
1392: len = ia[i+1] - ia[i];
1393: row = ja + ia[i];
1394: for (j=0; j<len; j++) {
1395: if (row[j] != i) jja[nnz+cnt++] = row[j]; /* if not diagonal */
1396: }
1397: nnz += cnt;
1398: iia[i+1] = nnz;
1399: }
1400: /* Partitioning of the adjacency matrix */
1401: MatCreateMPIAdj(PETSC_COMM_SELF,na,na,iia,jja,NULL,&adj);
1402: MatPartitioningSetAdjacency(mpart,adj);
1403: MatPartitioningSetNParts(mpart,nloc);
1404: MatPartitioningApply(mpart,&ispart);
1405: ISPartitioningToNumbering(ispart,&isnumb);
1406: MatDestroy(&adj);
1407: foundpart = PETSC_TRUE;
1408: }
1409: MatRestoreRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);
1410: }
1411: MatPartitioningDestroy(&mpart);
1412: }
1413: PetscMalloc1(nloc,&is);
1414: if (!foundpart) {
1416: /* Partitioning by contiguous chunks of rows */
1418: PetscInt mbs = (rend-rstart)/bs;
1419: PetscInt start = rstart;
1420: for (i=0; i<nloc; i++) {
1421: PetscInt count = (mbs/nloc + ((mbs % nloc) > i)) * bs;
1422: ISCreateStride(PETSC_COMM_SELF,count,start,1,&is[i]);
1423: start += count;
1424: }
1426: } else {
1428: /* Partitioning by adjacency of diagonal block */
1430: const PetscInt *numbering;
1431: PetscInt *count,nidx,*indices,*newidx,start=0;
1432: /* Get node count in each partition */
1433: PetscMalloc1(nloc,&count);
1434: ISPartitioningCount(ispart,nloc,count);
1435: if (isbaij && bs > 1) { /* adjust for the block-aij case */
1436: for (i=0; i<nloc; i++) count[i] *= bs;
1437: }
1438: /* Build indices from node numbering */
1439: ISGetLocalSize(isnumb,&nidx);
1440: PetscMalloc1(nidx,&indices);
1441: for (i=0; i<nidx; i++) indices[i] = i; /* needs to be initialized */
1442: ISGetIndices(isnumb,&numbering);
1443: PetscSortIntWithPermutation(nidx,numbering,indices);
1444: ISRestoreIndices(isnumb,&numbering);
1445: if (isbaij && bs > 1) { /* adjust for the block-aij case */
1446: PetscMalloc1(nidx*bs,&newidx);
1447: for (i=0; i<nidx; i++) {
1448: for (j=0; j<bs; j++) newidx[i*bs+j] = indices[i]*bs + j;
1449: }
1450: PetscFree(indices);
1451: nidx *= bs;
1452: indices = newidx;
1453: }
1454: /* Shift to get global indices */
1455: for (i=0; i<nidx; i++) indices[i] += rstart;
1457: /* Build the index sets for each block */
1458: for (i=0; i<nloc; i++) {
1459: ISCreateGeneral(PETSC_COMM_SELF,count[i],&indices[start],PETSC_COPY_VALUES,&is[i]);
1460: ISSort(is[i]);
1461: start += count[i];
1462: }
1464: PetscFree(count);
1465: PetscFree(indices);
1466: ISDestroy(&isnumb);
1467: ISDestroy(&ispart);
1468: }
1469: *iis = is;
1470: return(0);
1471: }
1473: PETSC_INTERN PetscErrorCode PCGASMCreateStraddlingSubdomains(Mat A,PetscInt N,PetscInt *n,IS *iis[])
1474: {
1475: PetscErrorCode ierr;
1478: MatSubdomainsCreateCoalesce(A,N,n,iis);
1479: return(0);
1480: }
1484: /*@C
1485: PCGASMCreateSubdomains - Creates n index sets defining n nonoverlapping subdomains for the additive
1486: Schwarz preconditioner for a any problem based on its matrix.
1488: Collective
1490: Input Parameters:
1491: + A - The global matrix operator
1492: - N - the number of global subdomains requested
1494: Output Parameters:
1495: + n - the number of subdomains created on this processor
1496: - iis - the array of index sets defining the local inner subdomains (on which the correction is applied)
1498: Level: advanced
1500: Note: When N >= A's communicator size, each subdomain is local -- contained within a single processor.
1501: When N < size, the subdomains are 'straddling' (processor boundaries) and are no longer local.
1502: The resulting subdomains can be use in PCGASMSetSubdomains(pc,n,iss,NULL). The overlapping
1503: outer subdomains will be automatically generated from these according to the requested amount of
1504: overlap; this is currently supported only with local subdomains.
1507: .seealso: PCGASMSetSubdomains(), PCGASMDestroySubdomains()
1508: @*/
1509: PetscErrorCode PCGASMCreateSubdomains(Mat A,PetscInt N,PetscInt *n,IS *iis[])
1510: {
1511: PetscMPIInt size;
1512: PetscErrorCode ierr;
1518: if (N < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Number of subdomains must be > 0, N = %D",N);
1519: MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
1520: if (N >= size) {
1521: *n = N/size + (N%size);
1522: PCGASMCreateLocalSubdomains(A,*n,iis);
1523: } else {
1524: PCGASMCreateStraddlingSubdomains(A,N,n,iis);
1525: }
1526: return(0);
1527: }
1529: /*@C
1530: PCGASMDestroySubdomains - Destroys the index sets created with
1531: PCGASMCreateSubdomains() or PCGASMCreateSubdomains2D. Should be
1532: called after setting subdomains with PCGASMSetSubdomains().
1534: Collective
1536: Input Parameters:
1537: + n - the number of index sets
1538: . iis - the array of inner subdomains,
1539: - ois - the array of outer subdomains, can be NULL
1541: Level: intermediate
1543: Notes:
1544: this is merely a convenience subroutine that walks each list,
1545: destroys each IS on the list, and then frees the list. At the end the
1546: list pointers are set to NULL.
1548: .seealso: PCGASMCreateSubdomains(), PCGASMSetSubdomains()
1549: @*/
1550: PetscErrorCode PCGASMDestroySubdomains(PetscInt n,IS **iis,IS **ois)
1551: {
1552: PetscInt i;
1556: if (n <= 0) return(0);
1557: if (ois) {
1559: if (*ois) {
1561: for (i=0; i<n; i++) {
1562: ISDestroy(&(*ois)[i]);
1563: }
1564: PetscFree((*ois));
1565: }
1566: }
1567: if (iis) {
1569: if (*iis) {
1571: for (i=0; i<n; i++) {
1572: ISDestroy(&(*iis)[i]);
1573: }
1574: PetscFree((*iis));
1575: }
1576: }
1577: return(0);
1578: }
1581: #define PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,xleft_loc,ylow_loc,xright_loc,yhigh_loc,n) \
1582: { \
1583: PetscInt first_row = first/M, last_row = last/M+1; \
1584: /* \
1585: Compute ylow_loc and yhigh_loc so that (ylow_loc,xleft) and (yhigh_loc,xright) are the corners \
1586: of the bounding box of the intersection of the subdomain with the local ownership range (local \
1587: subdomain). \
1588: Also compute xleft_loc and xright_loc as the lower and upper bounds on the first and last rows \
1589: of the intersection. \
1590: */ \
1591: /* ylow_loc is the grid row containing the first element of the local sumbdomain */ \
1592: *ylow_loc = PetscMax(first_row,ylow); \
1593: /* xleft_loc is the offset of first element of the local subdomain within its grid row (might actually be outside the local subdomain) */ \
1594: *xleft_loc = *ylow_loc==first_row ? PetscMax(first%M,xleft) : xleft; \
1595: /* yhigh_loc is the grid row above the last local subdomain element */ \
1596: *yhigh_loc = PetscMin(last_row,yhigh); \
1597: /* xright is the offset of the end of the local subdomain within its grid row (might actually be outside the local subdomain) */ \
1598: *xright_loc = *yhigh_loc==last_row ? PetscMin(xright,last%M) : xright; \
1599: /* Now compute the size of the local subdomain n. */ \
1600: *n = 0; \
1601: if (*ylow_loc < *yhigh_loc) { \
1602: PetscInt width = xright-xleft; \
1603: *n += width*(*yhigh_loc-*ylow_loc-1); \
1604: *n += PetscMin(PetscMax(*xright_loc-xleft,0),width); \
1605: *n -= PetscMin(PetscMax(*xleft_loc-xleft,0), width); \
1606: } \
1607: }
1611: /*@
1612: PCGASMCreateSubdomains2D - Creates the index sets for the overlapping Schwarz
1613: preconditioner for a two-dimensional problem on a regular grid.
1615: Collective
1617: Input Parameters:
1618: + M, N - the global number of grid points in the x and y directions
1619: . Mdomains, Ndomains - the global number of subdomains in the x and y directions
1620: . dof - degrees of freedom per node
1621: - overlap - overlap in mesh lines
1623: Output Parameters:
1624: + Nsub - the number of local subdomains created
1625: . iis - array of index sets defining inner (nonoverlapping) subdomains
1626: - ois - array of index sets defining outer (overlapping, if overlap > 0) subdomains
1629: Level: advanced
1631: .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(), PCGASMSetOverlap()
1632: @*/
1633: PetscErrorCode PCGASMCreateSubdomains2D(PC pc,PetscInt M,PetscInt N,PetscInt Mdomains,PetscInt Ndomains,PetscInt dof,PetscInt overlap,PetscInt *nsub,IS **iis,IS **ois)
1634: {
1636: PetscMPIInt size, rank;
1637: PetscInt i, j;
1638: PetscInt maxheight, maxwidth;
1639: PetscInt xstart, xleft, xright, xleft_loc, xright_loc;
1640: PetscInt ystart, ylow, yhigh, ylow_loc, yhigh_loc;
1641: PetscInt x[2][2], y[2][2], n[2];
1642: PetscInt first, last;
1643: PetscInt nidx, *idx;
1644: PetscInt ii,jj,s,q,d;
1645: PetscInt k,kk;
1646: PetscMPIInt color;
1647: MPI_Comm comm, subcomm;
1648: IS **xis = 0, **is = ois, **is_local = iis;
1651: PetscObjectGetComm((PetscObject)pc, &comm);
1652: MPI_Comm_size(comm, &size);
1653: MPI_Comm_rank(comm, &rank);
1654: MatGetOwnershipRange(pc->pmat, &first, &last);
1655: if (first%dof || last%dof) SETERRQ3(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Matrix row partitioning unsuitable for domain decomposition: local row range (%D,%D) "
1656: "does not respect the number of degrees of freedom per grid point %D", first, last, dof);
1658: /* Determine the number of domains with nonzero intersections with the local ownership range. */
1659: s = 0;
1660: ystart = 0;
1661: for (j=0; j<Ndomains; ++j) {
1662: maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */
1663: if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N);
1664: /* Vertical domain limits with an overlap. */
1665: ylow = PetscMax(ystart - overlap,0);
1666: yhigh = PetscMin(ystart + maxheight + overlap,N);
1667: xstart = 0;
1668: for (i=0; i<Mdomains; ++i) {
1669: maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */
1670: if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M);
1671: /* Horizontal domain limits with an overlap. */
1672: xleft = PetscMax(xstart - overlap,0);
1673: xright = PetscMin(xstart + maxwidth + overlap,M);
1674: /*
1675: Determine whether this subdomain intersects this processor's ownership range of pc->pmat.
1676: */
1677: PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx));
1678: if (nidx) ++s;
1679: xstart += maxwidth;
1680: } /* for (i = 0; i < Mdomains; ++i) */
1681: ystart += maxheight;
1682: } /* for (j = 0; j < Ndomains; ++j) */
1684: /* Now we can allocate the necessary number of ISs. */
1685: *nsub = s;
1686: PetscMalloc1(*nsub,is);
1687: PetscMalloc1(*nsub,is_local);
1688: s = 0;
1689: ystart = 0;
1690: for (j=0; j<Ndomains; ++j) {
1691: maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */
1692: if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N);
1693: /* Vertical domain limits with an overlap. */
1694: y[0][0] = PetscMax(ystart - overlap,0);
1695: y[0][1] = PetscMin(ystart + maxheight + overlap,N);
1696: /* Vertical domain limits without an overlap. */
1697: y[1][0] = ystart;
1698: y[1][1] = PetscMin(ystart + maxheight,N);
1699: xstart = 0;
1700: for (i=0; i<Mdomains; ++i) {
1701: maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */
1702: if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M);
1703: /* Horizontal domain limits with an overlap. */
1704: x[0][0] = PetscMax(xstart - overlap,0);
1705: x[0][1] = PetscMin(xstart + maxwidth + overlap,M);
1706: /* Horizontal domain limits without an overlap. */
1707: x[1][0] = xstart;
1708: x[1][1] = PetscMin(xstart+maxwidth,M);
1709: /*
1710: Determine whether this domain intersects this processor's ownership range of pc->pmat.
1711: Do this twice: first for the domains with overlaps, and once without.
1712: During the first pass create the subcommunicators, and use them on the second pass as well.
1713: */
1714: for (q = 0; q < 2; ++q) {
1715: PetscBool split = PETSC_FALSE;
1716: /*
1717: domain limits, (xleft, xright) and (ylow, yheigh) are adjusted
1718: according to whether the domain with an overlap or without is considered.
1719: */
1720: xleft = x[q][0]; xright = x[q][1];
1721: ylow = y[q][0]; yhigh = y[q][1];
1722: PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx));
1723: nidx *= dof;
1724: n[q] = nidx;
1725: /*
1726: Based on the counted number of indices in the local domain *with an overlap*,
1727: construct a subcommunicator of all the processors supporting this domain.
1728: Observe that a domain with an overlap might have nontrivial local support,
1729: while the domain without an overlap might not. Hence, the decision to participate
1730: in the subcommunicator must be based on the domain with an overlap.
1731: */
1732: if (q == 0) {
1733: if (nidx) color = 1;
1734: else color = MPI_UNDEFINED;
1735: MPI_Comm_split(comm, color, rank, &subcomm);
1736: split = PETSC_TRUE;
1737: }
1738: /*
1739: Proceed only if the number of local indices *with an overlap* is nonzero.
1740: */
1741: if (n[0]) {
1742: if (q == 0) xis = is;
1743: if (q == 1) {
1744: /*
1745: The IS for the no-overlap subdomain shares a communicator with the overlapping domain.
1746: Moreover, if the overlap is zero, the two ISs are identical.
1747: */
1748: if (overlap == 0) {
1749: (*is_local)[s] = (*is)[s];
1750: PetscObjectReference((PetscObject)(*is)[s]);
1751: continue;
1752: } else {
1753: xis = is_local;
1754: subcomm = ((PetscObject)(*is)[s])->comm;
1755: }
1756: } /* if (q == 1) */
1757: idx = NULL;
1758: PetscMalloc1(nidx,&idx);
1759: if (nidx) {
1760: k = 0;
1761: for (jj=ylow_loc; jj<yhigh_loc; ++jj) {
1762: PetscInt x0 = (jj==ylow_loc) ? xleft_loc : xleft;
1763: PetscInt x1 = (jj==yhigh_loc-1) ? xright_loc : xright;
1764: kk = dof*(M*jj + x0);
1765: for (ii=x0; ii<x1; ++ii) {
1766: for (d = 0; d < dof; ++d) {
1767: idx[k++] = kk++;
1768: }
1769: }
1770: }
1771: }
1772: ISCreateGeneral(subcomm,nidx,idx,PETSC_OWN_POINTER,(*xis)+s);
1773: if (split) {
1774: MPI_Comm_free(&subcomm);
1775: }
1776: }/* if (n[0]) */
1777: }/* for (q = 0; q < 2; ++q) */
1778: if (n[0]) ++s;
1779: xstart += maxwidth;
1780: } /* for (i = 0; i < Mdomains; ++i) */
1781: ystart += maxheight;
1782: } /* for (j = 0; j < Ndomains; ++j) */
1783: return(0);
1784: }
1786: /*@C
1787: PCGASMGetSubdomains - Gets the subdomains supported on this processor
1788: for the additive Schwarz preconditioner.
1790: Not Collective
1792: Input Parameter:
1793: . pc - the preconditioner context
1795: Output Parameters:
1796: + n - the number of subdomains for this processor (default value = 1)
1797: . iis - the index sets that define the inner subdomains (without overlap) supported on this processor (can be NULL)
1798: - ois - the index sets that define the outer subdomains (with overlap) supported on this processor (can be NULL)
1801: Notes:
1802: The user is responsible for destroying the ISs and freeing the returned arrays.
1803: The IS numbering is in the parallel, global numbering of the vector.
1805: Level: advanced
1807: .seealso: PCGASMSetOverlap(), PCGASMGetSubKSP(), PCGASMCreateSubdomains2D(),
1808: PCGASMSetSubdomains(), PCGASMGetSubmatrices()
1809: @*/
1810: PetscErrorCode PCGASMGetSubdomains(PC pc,PetscInt *n,IS *iis[],IS *ois[])
1811: {
1812: PC_GASM *osm;
1814: PetscBool match;
1815: PetscInt i;
1819: PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);
1820: if (!match) SETERRQ2(PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_WRONG, "Incorrect object type: expected %s, got %s instead", PCGASM, ((PetscObject)pc)->type_name);
1821: osm = (PC_GASM*)pc->data;
1822: if (n) *n = osm->n;
1823: if (iis) {
1824: PetscMalloc1(osm->n, iis);
1825: }
1826: if (ois) {
1827: PetscMalloc1(osm->n, ois);
1828: }
1829: if (iis || ois) {
1830: for (i = 0; i < osm->n; ++i) {
1831: if (iis) (*iis)[i] = osm->iis[i];
1832: if (ois) (*ois)[i] = osm->ois[i];
1833: }
1834: }
1835: return(0);
1836: }
1838: /*@C
1839: PCGASMGetSubmatrices - Gets the local submatrices (for this processor
1840: only) for the additive Schwarz preconditioner.
1842: Not Collective
1844: Input Parameter:
1845: . pc - the preconditioner context
1847: Output Parameters:
1848: + n - the number of matrices for this processor (default value = 1)
1849: - mat - the matrices
1851: Notes:
1852: matrices returned by this routine have the same communicators as the index sets (IS)
1853: used to define subdomains in PCGASMSetSubdomains()
1854: Level: advanced
1856: .seealso: PCGASMSetOverlap(), PCGASMGetSubKSP(),
1857: PCGASMCreateSubdomains2D(), PCGASMSetSubdomains(), PCGASMGetSubdomains()
1858: @*/
1859: PetscErrorCode PCGASMGetSubmatrices(PC pc,PetscInt *n,Mat *mat[])
1860: {
1861: PC_GASM *osm;
1863: PetscBool match;
1869: if (!pc->setupcalled) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"Must call after KSPSetUp() or PCSetUp().");
1870: PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);
1871: if (!match) SETERRQ2(PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_WRONG, "Expected %s, got %s instead", PCGASM, ((PetscObject)pc)->type_name);
1872: osm = (PC_GASM*)pc->data;
1873: if (n) *n = osm->n;
1874: if (mat) *mat = osm->pmat;
1875: return(0);
1876: }
1878: /*@
1879: PCGASMSetUseDMSubdomains - Indicates whether to use DMCreateDomainDecomposition() to define the subdomains, whenever possible.
1880: Logically Collective
1882: Input Parameter:
1883: + pc - the preconditioner
1884: - flg - boolean indicating whether to use subdomains defined by the DM
1886: Options Database Key:
1887: . -pc_gasm_dm_subdomains -pc_gasm_overlap -pc_gasm_total_subdomains
1889: Level: intermediate
1891: Notes:
1892: PCGASMSetSubdomains(), PCGASMSetTotalSubdomains() or PCGASMSetOverlap() take precedence over PCGASMSetUseDMSubdomains(),
1893: so setting PCGASMSetSubdomains() with nontrivial subdomain ISs or any of PCGASMSetTotalSubdomains() and PCGASMSetOverlap()
1894: automatically turns the latter off.
1896: .seealso: PCGASMGetUseDMSubdomains(), PCGASMSetSubdomains(), PCGASMSetOverlap()
1897: PCGASMCreateSubdomains2D()
1898: @*/
1899: PetscErrorCode PCGASMSetUseDMSubdomains(PC pc,PetscBool flg)
1900: {
1901: PC_GASM *osm = (PC_GASM*)pc->data;
1903: PetscBool match;
1908: if (pc->setupcalled) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"Not for a setup PC.");
1909: PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);
1910: if (match) {
1911: if (!osm->user_subdomains && osm->N == PETSC_DETERMINE && osm->overlap < 0) {
1912: osm->dm_subdomains = flg;
1913: }
1914: }
1915: return(0);
1916: }
1918: /*@
1919: PCGASMGetUseDMSubdomains - Returns flag indicating whether to use DMCreateDomainDecomposition() to define the subdomains, whenever possible.
1920: Not Collective
1922: Input Parameter:
1923: . pc - the preconditioner
1925: Output Parameter:
1926: . flg - boolean indicating whether to use subdomains defined by the DM
1928: Level: intermediate
1930: .seealso: PCGASMSetUseDMSubdomains(), PCGASMSetOverlap()
1931: PCGASMCreateSubdomains2D()
1932: @*/
1933: PetscErrorCode PCGASMGetUseDMSubdomains(PC pc,PetscBool* flg)
1934: {
1935: PC_GASM *osm = (PC_GASM*)pc->data;
1937: PetscBool match;
1942: PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);
1943: if (match) {
1944: if (flg) *flg = osm->dm_subdomains;
1945: }
1946: return(0);
1947: }