Actual source code: gasm.c
petsc-3.11.4 2019-09-28
1: /*
2: This file defines an "generalized" additive Schwarz preconditioner for any Mat implementation.
3: In this version each processor may intersect multiple subdomains and any subdomain may
4: intersect multiple processors. Intersections of subdomains with processors are called *local
5: subdomains*.
7: N - total number of distinct global subdomains (set explicitly in PCGASMSetTotalSubdomains() or implicitly PCGASMSetSubdomains() and then calculated in PCSetUp_GASM())
8: n - actual number of local subdomains on this processor (set in PCGASMSetSubdomains() or calculated in PCGASMSetTotalSubdomains())
9: nmax - maximum number of local subdomains per processor (calculated in PCSetUp_GASM())
10: */
11: #include <petsc/private/pcimpl.h>
12: #include <petscdm.h>
14: typedef struct {
15: PetscInt N,n,nmax;
16: PetscInt overlap; /* overlap requested by user */
17: PCGASMType type; /* use reduced interpolation, restriction or both */
18: PetscBool type_set; /* if user set this value (so won't change it for symmetric problems) */
19: PetscBool same_subdomain_solvers; /* flag indicating whether all local solvers are same */
20: PetscBool sort_indices; /* flag to sort subdomain indices */
21: PetscBool user_subdomains; /* whether the user set explicit subdomain index sets -- keep them on PCReset() */
22: PetscBool dm_subdomains; /* whether DM is allowed to define subdomains */
23: PetscBool hierarchicalpartitioning;
24: IS *ois; /* index sets that define the outer (conceptually, overlapping) subdomains */
25: IS *iis; /* index sets that define the inner (conceptually, nonoverlapping) subdomains */
26: KSP *ksp; /* linear solvers for each subdomain */
27: Mat *pmat; /* subdomain block matrices */
28: Vec gx,gy; /* Merged work vectors */
29: Vec *x,*y; /* Split work vectors; storage aliases pieces of storage of the above merged vectors. */
30: VecScatter gorestriction; /* merged restriction to disjoint union of outer subdomains */
31: VecScatter girestriction; /* merged restriction to disjoint union of inner subdomains */
32: VecScatter pctoouter;
33: IS permutationIS;
34: Mat permutationP;
35: Mat pcmat;
36: Vec pcx,pcy;
37: } PC_GASM;
39: static PetscErrorCode PCGASMComputeGlobalSubdomainNumbering_Private(PC pc,PetscInt **numbering,PetscInt **permutation)
40: {
41: PC_GASM *osm = (PC_GASM*)pc->data;
42: PetscInt i;
46: /* Determine the number of globally-distinct subdomains and compute a global numbering for them. */
47: PetscMalloc2(osm->n,numbering,osm->n,permutation);
48: PetscObjectsListGetGlobalNumbering(PetscObjectComm((PetscObject)pc),osm->n,(PetscObject*)osm->iis,NULL,*numbering);
49: for (i = 0; i < osm->n; ++i) (*permutation)[i] = i;
50: PetscSortIntWithPermutation(osm->n,*numbering,*permutation);
51: return(0);
52: }
54: static PetscErrorCode PCGASMSubdomainView_Private(PC pc, PetscInt i, PetscViewer viewer)
55: {
56: PC_GASM *osm = (PC_GASM*)pc->data;
57: PetscInt j,nidx;
58: const PetscInt *idx;
59: PetscViewer sviewer;
60: char *cidx;
64: if (i < 0 || i > osm->n) SETERRQ2(PetscObjectComm((PetscObject)viewer), PETSC_ERR_ARG_WRONG, "Invalid subdomain %D: must nonnegative and less than %D", i, osm->n);
65: /* Inner subdomains. */
66: ISGetLocalSize(osm->iis[i], &nidx);
67: /*
68: No more than 15 characters per index plus a space.
69: PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx,
70: in case nidx == 0. That will take care of the space for the trailing '\0' as well.
71: For nidx == 0, the whole string 16 '\0'.
72: */
73: PetscMalloc1(16*(nidx+1)+1, &cidx);
74: PetscViewerStringOpen(PETSC_COMM_SELF, cidx, 16*(nidx+1)+1, &sviewer);
75: ISGetIndices(osm->iis[i], &idx);
76: for (j = 0; j < nidx; ++j) {
77: PetscViewerStringSPrintf(sviewer, "%D ", idx[j]);
78: }
79: ISRestoreIndices(osm->iis[i],&idx);
80: PetscViewerDestroy(&sviewer);
81: PetscViewerASCIIPrintf(viewer, "Inner subdomain:\n");
82: PetscViewerFlush(viewer);
83: PetscViewerASCIIPushSynchronized(viewer);
84: PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx);
85: PetscViewerFlush(viewer);
86: PetscViewerASCIIPopSynchronized(viewer);
87: PetscViewerASCIIPrintf(viewer, "\n");
88: PetscViewerFlush(viewer);
89: PetscFree(cidx);
90: /* Outer subdomains. */
91: ISGetLocalSize(osm->ois[i], &nidx);
92: /*
93: No more than 15 characters per index plus a space.
94: PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx,
95: in case nidx == 0. That will take care of the space for the trailing '\0' as well.
96: For nidx == 0, the whole string 16 '\0'.
97: */
98: PetscMalloc1(16*(nidx+1)+1, &cidx);
99: PetscViewerStringOpen(PETSC_COMM_SELF, cidx, 16*(nidx+1)+1, &sviewer);
100: ISGetIndices(osm->ois[i], &idx);
101: for (j = 0; j < nidx; ++j) {
102: PetscViewerStringSPrintf(sviewer,"%D ", idx[j]);
103: }
104: PetscViewerDestroy(&sviewer);
105: ISRestoreIndices(osm->ois[i],&idx);
106: PetscViewerASCIIPrintf(viewer, "Outer subdomain:\n");
107: PetscViewerFlush(viewer);
108: PetscViewerASCIIPushSynchronized(viewer);
109: PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx);
110: PetscViewerFlush(viewer);
111: PetscViewerASCIIPopSynchronized(viewer);
112: PetscViewerASCIIPrintf(viewer, "\n");
113: PetscViewerFlush(viewer);
114: PetscFree(cidx);
115: return(0);
116: }
118: static PetscErrorCode PCGASMPrintSubdomains(PC pc)
119: {
120: PC_GASM *osm = (PC_GASM*)pc->data;
121: const char *prefix;
122: char fname[PETSC_MAX_PATH_LEN+1];
123: PetscInt l, d, count;
124: PetscBool doprint,found;
125: PetscViewer viewer, sviewer = NULL;
126: PetscInt *numbering,*permutation;/* global numbering of locally-supported subdomains and the permutation from the local ordering */
130: PCGetOptionsPrefix(pc,&prefix);
131: doprint = PETSC_FALSE;
132: PetscOptionsGetBool(NULL,prefix,"-pc_gasm_print_subdomains",&doprint,NULL);
133: if (!doprint) return(0);
134: PetscOptionsGetString(NULL,prefix,"-pc_gasm_print_subdomains",fname,PETSC_MAX_PATH_LEN,&found);
135: if (!found) { PetscStrcpy(fname,"stdout"); };
136: PetscViewerASCIIOpen(PetscObjectComm((PetscObject)pc),fname,&viewer);
137: /*
138: Make sure the viewer has a name. Otherwise this may cause a deadlock or other weird errors when creating a subcomm viewer:
139: the subcomm viewer will attempt to inherit the viewer's name, which, if not set, will be constructed collectively on the comm.
140: */
141: PetscObjectName((PetscObject)viewer);
142: l = 0;
143: PCGASMComputeGlobalSubdomainNumbering_Private(pc,&numbering,&permutation);
144: for (count = 0; count < osm->N; ++count) {
145: /* Now let subdomains go one at a time in the global numbering order and print their subdomain/solver info. */
146: if (l<osm->n) {
147: d = permutation[l]; /* d is the local number of the l-th smallest (in the global ordering) among the locally supported subdomains */
148: if (numbering[d] == count) {
149: PetscViewerGetSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);
150: PCGASMSubdomainView_Private(pc,d,sviewer);
151: PetscViewerRestoreSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);
152: ++l;
153: }
154: }
155: MPI_Barrier(PetscObjectComm((PetscObject)pc));
156: }
157: PetscFree2(numbering,permutation);
158: PetscViewerDestroy(&viewer);
159: return(0);
160: }
163: static PetscErrorCode PCView_GASM(PC pc,PetscViewer viewer)
164: {
165: PC_GASM *osm = (PC_GASM*)pc->data;
166: const char *prefix;
168: PetscMPIInt rank, size;
169: PetscInt bsz;
170: PetscBool iascii,view_subdomains=PETSC_FALSE;
171: PetscViewer sviewer;
172: PetscInt count, l;
173: char overlap[256] = "user-defined overlap";
174: char gsubdomains[256] = "unknown total number of subdomains";
175: char msubdomains[256] = "unknown max number of local subdomains";
176: PetscInt *numbering,*permutation;/* global numbering of locally-supported subdomains and the permutation from the local ordering */
179: MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size);
180: MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank);
182: if (osm->overlap >= 0) {
183: PetscSNPrintf(overlap,sizeof(overlap),"requested amount of overlap = %D",osm->overlap);
184: }
185: if (osm->N != PETSC_DETERMINE) {
186: PetscSNPrintf(gsubdomains, sizeof(gsubdomains), "total number of subdomains = %D",osm->N);
187: }
188: if (osm->nmax != PETSC_DETERMINE) {
189: PetscSNPrintf(msubdomains,sizeof(msubdomains),"max number of local subdomains = %D",osm->nmax);
190: }
192: PCGetOptionsPrefix(pc,&prefix);
193: PetscOptionsGetBool(NULL,prefix,"-pc_gasm_view_subdomains",&view_subdomains,NULL);
195: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
196: if (iascii) {
197: /*
198: Make sure the viewer has a name. Otherwise this may cause a deadlock when creating a subcomm viewer:
199: the subcomm viewer will attempt to inherit the viewer's name, which, if not set, will be constructed
200: collectively on the comm.
201: */
202: PetscObjectName((PetscObject)viewer);
203: PetscViewerASCIIPrintf(viewer," Restriction/interpolation type: %s\n",PCGASMTypes[osm->type]);
204: PetscViewerASCIIPrintf(viewer," %s\n",overlap);
205: PetscViewerASCIIPrintf(viewer," %s\n",gsubdomains);
206: PetscViewerASCIIPrintf(viewer," %s\n",msubdomains);
207: PetscViewerASCIIPushSynchronized(viewer);
208: PetscViewerASCIISynchronizedPrintf(viewer," [%d|%d] number of locally-supported subdomains = %D\n",rank,size,osm->n);
209: PetscViewerFlush(viewer);
210: PetscViewerASCIIPopSynchronized(viewer);
211: /* Cannot take advantage of osm->same_subdomain_solvers without a global numbering of subdomains. */
212: PetscViewerASCIIPrintf(viewer," Subdomain solver info is as follows:\n");
213: PetscViewerASCIIPushTab(viewer);
214: PetscViewerASCIIPrintf(viewer," - - - - - - - - - - - - - - - - - -\n");
215: /* Make sure that everybody waits for the banner to be printed. */
216: MPI_Barrier(PetscObjectComm((PetscObject)viewer));
217: /* Now let subdomains go one at a time in the global numbering order and print their subdomain/solver info. */
218: PCGASMComputeGlobalSubdomainNumbering_Private(pc,&numbering,&permutation);
219: l = 0;
220: for (count = 0; count < osm->N; ++count) {
221: PetscMPIInt srank, ssize;
222: if (l<osm->n) {
223: PetscInt d = permutation[l]; /* d is the local number of the l-th smallest (in the global ordering) among the locally supported subdomains */
224: if (numbering[d] == count) {
225: MPI_Comm_size(((PetscObject)osm->ois[d])->comm, &ssize);
226: MPI_Comm_rank(((PetscObject)osm->ois[d])->comm, &srank);
227: PetscViewerGetSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);
228: ISGetLocalSize(osm->ois[d],&bsz);
229: PetscViewerASCIISynchronizedPrintf(sviewer," [%d|%d] (subcomm [%d|%d]) local subdomain number %D, local size = %D\n",rank,size,srank,ssize,d,bsz);
230: PetscViewerFlush(sviewer);
231: if (view_subdomains) {
232: PCGASMSubdomainView_Private(pc,d,sviewer);
233: }
234: if (!pc->setupcalled) {
235: PetscViewerASCIIPrintf(sviewer, " Solver not set up yet: PCSetUp() not yet called\n");
236: } else {
237: KSPView(osm->ksp[d],sviewer);
238: }
239: PetscViewerASCIIPrintf(sviewer," - - - - - - - - - - - - - - - - - -\n");
240: PetscViewerFlush(sviewer);
241: PetscViewerRestoreSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);
242: ++l;
243: }
244: }
245: MPI_Barrier(PetscObjectComm((PetscObject)pc));
246: }
247: PetscFree2(numbering,permutation);
248: PetscViewerASCIIPopTab(viewer);
249: }
250: PetscViewerFlush(viewer);
251: PetscViewerASCIIPopSynchronized(viewer);
252: return(0);
253: }
255: PETSC_INTERN PetscErrorCode PCGASMCreateLocalSubdomains(Mat A, PetscInt nloc, IS *iis[]);
259: PetscErrorCode PCGASMSetHierarchicalPartitioning(PC pc)
260: {
261: PC_GASM *osm = (PC_GASM*)pc->data;
262: MatPartitioning part;
263: MPI_Comm comm;
264: PetscMPIInt size;
265: PetscInt nlocalsubdomains,fromrows_localsize;
266: IS partitioning,fromrows,isn;
267: Vec outervec;
268: PetscErrorCode ierr;
271: PetscObjectGetComm((PetscObject)pc,&comm);
272: MPI_Comm_size(comm,&size);
273: /* we do not need a hierarchical partitioning when
274: * the total number of subdomains is consistent with
275: * the number of MPI tasks.
276: * For the following cases, we do not need to use HP
277: * */
278: if(osm->N==PETSC_DETERMINE || osm->N>=size || osm->N==1) return(0);
279: if(size%osm->N != 0) SETERRQ2(PETSC_COMM_WORLD,PETSC_ERR_ARG_INCOMP,"have to specify the total number of subdomains %D to be a factor of the number of processors %d \n",osm->N,size);
280: nlocalsubdomains = size/osm->N;
281: osm->n = 1;
282: MatPartitioningCreate(comm,&part);
283: MatPartitioningSetAdjacency(part,pc->pmat);
284: MatPartitioningSetType(part,MATPARTITIONINGHIERARCH);
285: MatPartitioningHierarchicalSetNcoarseparts(part,osm->N);
286: MatPartitioningHierarchicalSetNfineparts(part,nlocalsubdomains);
287: MatPartitioningSetFromOptions(part);
288: /* get new processor owner number of each vertex */
289: MatPartitioningApply(part,&partitioning);
290: ISBuildTwoSided(partitioning,NULL,&fromrows);
291: ISPartitioningToNumbering(partitioning,&isn);
292: ISDestroy(&isn);
293: ISGetLocalSize(fromrows,&fromrows_localsize);
294: MatPartitioningDestroy(&part);
295: MatCreateVecs(pc->pmat,&outervec,NULL);
296: VecCreateMPI(comm,fromrows_localsize,PETSC_DETERMINE,&(osm->pcx));
297: VecDuplicate(osm->pcx,&(osm->pcy));
298: VecScatterCreate(osm->pcx,NULL,outervec,fromrows,&(osm->pctoouter));
299: MatCreateSubMatrix(pc->pmat,fromrows,fromrows,MAT_INITIAL_MATRIX,&(osm->permutationP));
300: PetscObjectReference((PetscObject)fromrows);
301: osm->permutationIS = fromrows;
302: osm->pcmat = pc->pmat;
303: PetscObjectReference((PetscObject)osm->permutationP);
304: pc->pmat = osm->permutationP;
305: VecDestroy(&outervec);
306: ISDestroy(&fromrows);
307: ISDestroy(&partitioning);
308: osm->n = PETSC_DETERMINE;
309: return(0);
310: }
314: static PetscErrorCode PCSetUp_GASM(PC pc)
315: {
316: PC_GASM *osm = (PC_GASM*)pc->data;
318: PetscBool symset,flg;
319: PetscInt i,nInnerIndices,nTotalInnerIndices;
320: PetscMPIInt rank, size;
321: MatReuse scall = MAT_REUSE_MATRIX;
322: KSP ksp;
323: PC subpc;
324: const char *prefix,*pprefix;
325: Vec x,y;
326: PetscInt oni; /* Number of indices in the i-th local outer subdomain. */
327: const PetscInt *oidxi; /* Indices from the i-th subdomain local outer subdomain. */
328: PetscInt on; /* Number of indices in the disjoint union of local outer subdomains. */
329: PetscInt *oidx; /* Indices in the disjoint union of local outer subdomains. */
330: IS gois; /* Disjoint union the global indices of outer subdomains. */
331: IS goid; /* Identity IS of the size of the disjoint union of outer subdomains. */
332: PetscScalar *gxarray, *gyarray;
333: PetscInt gostart; /* Start of locally-owned indices in the vectors -- osm->gx,osm->gy -- over the disjoint union of outer subdomains. */
334: PetscInt num_subdomains = 0;
335: DM *subdomain_dm = NULL;
336: char **subdomain_names = NULL;
337: PetscInt *numbering;
341: MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);
342: MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);
343: if (!pc->setupcalled) {
344: /* use a hierarchical partitioning */
345: if(osm->hierarchicalpartitioning){
346: PCGASMSetHierarchicalPartitioning(pc);
347: }
348: if (!osm->type_set) {
349: MatIsSymmetricKnown(pc->pmat,&symset,&flg);
350: if (symset && flg) osm->type = PC_GASM_BASIC;
351: }
353: if (osm->n == PETSC_DETERMINE) {
354: if (osm->N != PETSC_DETERMINE) {
355: /* No local subdomains given, but the desired number of total subdomains is known, so construct them accordingly. */
356: PCGASMCreateSubdomains(pc->pmat,osm->N,&osm->n,&osm->iis);
357: } else if (osm->dm_subdomains && pc->dm) {
358: /* try pc->dm next, if allowed */
359: PetscInt d;
360: IS *inner_subdomain_is, *outer_subdomain_is;
361: DMCreateDomainDecomposition(pc->dm, &num_subdomains, &subdomain_names, &inner_subdomain_is, &outer_subdomain_is, &subdomain_dm);
362: if (num_subdomains) {
363: PCGASMSetSubdomains(pc, num_subdomains, inner_subdomain_is, outer_subdomain_is);
364: }
365: for (d = 0; d < num_subdomains; ++d) {
366: if (inner_subdomain_is) {ISDestroy(&inner_subdomain_is[d]);}
367: if (outer_subdomain_is) {ISDestroy(&outer_subdomain_is[d]);}
368: }
369: PetscFree(inner_subdomain_is);
370: PetscFree(outer_subdomain_is);
371: } else {
372: /* still no subdomains; use one per processor */
373: osm->nmax = osm->n = 1;
374: MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);
375: osm->N = size;
376: PCGASMCreateLocalSubdomains(pc->pmat,osm->n,&osm->iis);
377: }
378: }
379: if (!osm->iis) {
380: /*
381: osm->n was set in PCGASMSetSubdomains(), but the actual subdomains have not been supplied.
382: We create the requisite number of local inner subdomains and then expand them into
383: out subdomains, if necessary.
384: */
385: PCGASMCreateLocalSubdomains(pc->pmat,osm->n,&osm->iis);
386: }
387: if (!osm->ois) {
388: /*
389: Initially make outer subdomains the same as inner subdomains. If nonzero additional overlap
390: has been requested, copy the inner subdomains over so they can be modified.
391: */
392: PetscMalloc1(osm->n,&osm->ois);
393: for (i=0; i<osm->n; ++i) {
394: if (osm->overlap > 0 && osm->N>1) { /* With positive overlap, osm->iis[i] will be modified */
395: ISDuplicate(osm->iis[i],(osm->ois)+i);
396: ISCopy(osm->iis[i],osm->ois[i]);
397: } else {
398: PetscObjectReference((PetscObject)((osm->iis)[i]));
399: osm->ois[i] = osm->iis[i];
400: }
401: }
402: if (osm->overlap>0 && osm->N>1) {
403: /* Extend the "overlapping" regions by a number of steps */
404: MatIncreaseOverlapSplit(pc->pmat,osm->n,osm->ois,osm->overlap);
405: }
406: }
408: /* Now the subdomains are defined. Determine their global and max local numbers, if necessary. */
409: if (osm->nmax == PETSC_DETERMINE) {
410: PetscMPIInt inwork,outwork;
411: /* determine global number of subdomains and the max number of local subdomains */
412: inwork = osm->n;
413: MPIU_Allreduce(&inwork,&outwork,1,MPI_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));
414: osm->nmax = outwork;
415: }
416: if (osm->N == PETSC_DETERMINE) {
417: /* Determine the number of globally-distinct subdomains and compute a global numbering for them. */
418: PetscObjectsListGetGlobalNumbering(PetscObjectComm((PetscObject)pc),osm->n,(PetscObject*)osm->ois,&osm->N,NULL);
419: }
422: if (osm->sort_indices) {
423: for (i=0; i<osm->n; i++) {
424: ISSort(osm->ois[i]);
425: ISSort(osm->iis[i]);
426: }
427: }
428: PCGetOptionsPrefix(pc,&prefix);
429: PCGASMPrintSubdomains(pc);
431: /*
432: Merge the ISs, create merged vectors and restrictions.
433: */
434: /* Merge outer subdomain ISs and construct a restriction onto the disjoint union of local outer subdomains. */
435: on = 0;
436: for (i=0; i<osm->n; i++) {
437: ISGetLocalSize(osm->ois[i],&oni);
438: on += oni;
439: }
440: PetscMalloc1(on, &oidx);
441: on = 0;
442: /* Merge local indices together */
443: for (i=0; i<osm->n; i++) {
444: ISGetLocalSize(osm->ois[i],&oni);
445: ISGetIndices(osm->ois[i],&oidxi);
446: PetscMemcpy(oidx+on,oidxi,sizeof(PetscInt)*oni);
447: ISRestoreIndices(osm->ois[i],&oidxi);
448: on += oni;
449: }
450: ISCreateGeneral(((PetscObject)(pc))->comm,on,oidx,PETSC_OWN_POINTER,&gois);
451: nTotalInnerIndices = 0;
452: for(i=0; i<osm->n; i++){
453: ISGetLocalSize(osm->iis[i],&nInnerIndices);
454: nTotalInnerIndices += nInnerIndices;
455: }
456: VecCreateMPI(((PetscObject)(pc))->comm,nTotalInnerIndices,PETSC_DETERMINE,&x);
457: VecDuplicate(x,&y);
459: VecCreateMPI(PetscObjectComm((PetscObject)pc),on,PETSC_DECIDE,&osm->gx);
460: VecDuplicate(osm->gx,&osm->gy);
461: VecGetOwnershipRange(osm->gx, &gostart, NULL);
462: ISCreateStride(PetscObjectComm((PetscObject)pc),on,gostart,1, &goid);
463: /* gois might indices not on local */
464: VecScatterCreate(x,gois,osm->gx,goid, &(osm->gorestriction));
465: PetscMalloc1(osm->n,&numbering);
466: PetscObjectsListGetGlobalNumbering(PetscObjectComm((PetscObject)pc),osm->n,(PetscObject*)osm->ois,NULL,numbering);
467: VecDestroy(&x);
468: ISDestroy(&gois);
470: /* Merge inner subdomain ISs and construct a restriction onto the disjoint union of local inner subdomains. */
471: {
472: PetscInt ini; /* Number of indices the i-th a local inner subdomain. */
473: PetscInt in; /* Number of indices in the disjoint uniont of local inner subdomains. */
474: PetscInt *iidx; /* Global indices in the merged local inner subdomain. */
475: PetscInt *ioidx; /* Global indices of the disjoint union of inner subdomains within the disjoint union of outer subdomains. */
476: IS giis; /* IS for the disjoint union of inner subdomains. */
477: IS giois; /* IS for the disjoint union of inner subdomains within the disjoint union of outer subdomains. */
478: PetscScalar *array;
479: const PetscInt *indices;
480: PetscInt k;
481: on = 0;
482: for (i=0; i<osm->n; i++) {
483: ISGetLocalSize(osm->ois[i],&oni);
484: on += oni;
485: }
486: PetscMalloc1(on, &iidx);
487: PetscMalloc1(on, &ioidx);
488: VecGetArray(y,&array);
489: /* set communicator id to determine where overlap is */
490: in = 0;
491: for (i=0; i<osm->n; i++) {
492: ISGetLocalSize(osm->iis[i],&ini);
493: for (k = 0; k < ini; ++k){
494: array[in+k] = numbering[i];
495: }
496: in += ini;
497: }
498: VecRestoreArray(y,&array);
499: VecScatterBegin(osm->gorestriction,y,osm->gy,INSERT_VALUES,SCATTER_FORWARD);
500: VecScatterEnd(osm->gorestriction,y,osm->gy,INSERT_VALUES,SCATTER_FORWARD);
501: VecGetOwnershipRange(osm->gy,&gostart, NULL);
502: VecGetArray(osm->gy,&array);
503: on = 0;
504: in = 0;
505: for (i=0; i<osm->n; i++) {
506: ISGetLocalSize(osm->ois[i],&oni);
507: ISGetIndices(osm->ois[i],&indices);
508: for (k=0; k<oni; k++) {
509: /* skip overlapping indices to get inner domain */
510: if(PetscRealPart(array[on+k]) != numbering[i]) continue;
511: iidx[in] = indices[k];
512: ioidx[in++] = gostart+on+k;
513: }
514: ISRestoreIndices(osm->ois[i], &indices);
515: on += oni;
516: }
517: VecRestoreArray(osm->gy,&array);
518: ISCreateGeneral(PetscObjectComm((PetscObject)pc),in,iidx,PETSC_OWN_POINTER,&giis);
519: ISCreateGeneral(PetscObjectComm((PetscObject)pc),in,ioidx,PETSC_OWN_POINTER,&giois);
520: VecScatterCreate(y,giis,osm->gy,giois,&osm->girestriction);
521: VecDestroy(&y);
522: ISDestroy(&giis);
523: ISDestroy(&giois);
524: }
525: ISDestroy(&goid);
526: PetscFree(numbering);
528: /* Create the subdomain work vectors. */
529: PetscMalloc1(osm->n,&osm->x);
530: PetscMalloc1(osm->n,&osm->y);
531: VecGetArray(osm->gx, &gxarray);
532: VecGetArray(osm->gy, &gyarray);
533: for (i=0, on=0; i<osm->n; ++i, on += oni) {
534: PetscInt oNi;
535: ISGetLocalSize(osm->ois[i],&oni);
536: /* on a sub communicator */
537: ISGetSize(osm->ois[i],&oNi);
538: VecCreateMPIWithArray(((PetscObject)(osm->ois[i]))->comm,1,oni,oNi,gxarray+on,&osm->x[i]);
539: VecCreateMPIWithArray(((PetscObject)(osm->ois[i]))->comm,1,oni,oNi,gyarray+on,&osm->y[i]);
540: }
541: VecRestoreArray(osm->gx, &gxarray);
542: VecRestoreArray(osm->gy, &gyarray);
543: /* Create the subdomain solvers */
544: PetscMalloc1(osm->n,&osm->ksp);
545: for (i=0; i<osm->n; i++) {
546: char subprefix[PETSC_MAX_PATH_LEN+1];
547: KSPCreate(((PetscObject)(osm->ois[i]))->comm,&ksp);
548: KSPSetErrorIfNotConverged(ksp,pc->erroriffailure);
549: PetscLogObjectParent((PetscObject)pc,(PetscObject)ksp);
550: PetscObjectIncrementTabLevel((PetscObject)ksp,(PetscObject)pc,1);
551: KSPSetType(ksp,KSPPREONLY);
552: KSPGetPC(ksp,&subpc); /* Why do we need this here? */
553: if (subdomain_dm) {
554: KSPSetDM(ksp,subdomain_dm[i]);
555: DMDestroy(subdomain_dm+i);
556: }
557: PCGetOptionsPrefix(pc,&prefix);
558: KSPSetOptionsPrefix(ksp,prefix);
559: if (subdomain_names && subdomain_names[i]) {
560: PetscSNPrintf(subprefix,PETSC_MAX_PATH_LEN,"sub_%s_",subdomain_names[i]);
561: KSPAppendOptionsPrefix(ksp,subprefix);
562: PetscFree(subdomain_names[i]);
563: }
564: KSPAppendOptionsPrefix(ksp,"sub_");
565: osm->ksp[i] = ksp;
566: }
567: PetscFree(subdomain_dm);
568: PetscFree(subdomain_names);
569: scall = MAT_INITIAL_MATRIX;
571: } else { /* if (pc->setupcalled) */
572: /*
573: Destroy the submatrices from the previous iteration
574: */
575: if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
576: MatDestroyMatrices(osm->n,&osm->pmat);
577: scall = MAT_INITIAL_MATRIX;
578: }
579: if(osm->permutationIS){
580: MatCreateSubMatrix(pc->pmat,osm->permutationIS,osm->permutationIS,scall,&osm->permutationP);
581: PetscObjectReference((PetscObject)osm->permutationP);
582: osm->pcmat = pc->pmat;
583: pc->pmat = osm->permutationP;
584: }
586: }
589: /*
590: Extract out the submatrices.
591: */
592: if (size > 1) {
593: MatCreateSubMatricesMPI(pc->pmat,osm->n,osm->ois,osm->ois,scall,&osm->pmat);
594: } else {
595: MatCreateSubMatrices(pc->pmat,osm->n,osm->ois,osm->ois,scall,&osm->pmat);
596: }
597: if (scall == MAT_INITIAL_MATRIX) {
598: PetscObjectGetOptionsPrefix((PetscObject)pc->pmat,&pprefix);
599: for (i=0; i<osm->n; i++) {
600: PetscLogObjectParent((PetscObject)pc,(PetscObject)osm->pmat[i]);
601: PetscObjectSetOptionsPrefix((PetscObject)osm->pmat[i],pprefix);
602: }
603: }
605: /* Return control to the user so that the submatrices can be modified (e.g., to apply
606: different boundary conditions for the submatrices than for the global problem) */
607: PCModifySubMatrices(pc,osm->n,osm->ois,osm->ois,osm->pmat,pc->modifysubmatricesP);
609: /*
610: Loop over submatrices putting them into local ksps
611: */
612: for (i=0; i<osm->n; i++) {
613: KSPSetOperators(osm->ksp[i],osm->pmat[i],osm->pmat[i]);
614: if (!pc->setupcalled) {
615: KSPSetFromOptions(osm->ksp[i]);
616: }
617: }
618: if(osm->pcmat){
619: MatDestroy(&pc->pmat);
620: pc->pmat = osm->pcmat;
621: osm->pcmat = 0;
622: }
623: return(0);
624: }
626: static PetscErrorCode PCSetUpOnBlocks_GASM(PC pc)
627: {
628: PC_GASM *osm = (PC_GASM*)pc->data;
630: PetscInt i;
633: for (i=0; i<osm->n; i++) {
634: KSPSetUp(osm->ksp[i]);
635: }
636: return(0);
637: }
639: static PetscErrorCode PCApply_GASM(PC pc,Vec xin,Vec yout)
640: {
641: PC_GASM *osm = (PC_GASM*)pc->data;
643: PetscInt i;
644: Vec x,y;
645: ScatterMode forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
648: if(osm->pctoouter){
649: VecScatterBegin(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);
650: VecScatterEnd(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);
651: x = osm->pcx;
652: y = osm->pcy;
653: }else{
654: x = xin;
655: y = yout;
656: }
657: /*
658: Support for limiting the restriction or interpolation only to the inner
659: subdomain values (leaving the other values 0).
660: */
661: if (!(osm->type & PC_GASM_RESTRICT)) {
662: /* have to zero the work RHS since scatter may leave some slots empty */
663: VecZeroEntries(osm->gx);
664: VecScatterBegin(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);
665: } else {
666: VecScatterBegin(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);
667: }
668: VecZeroEntries(osm->gy);
669: if (!(osm->type & PC_GASM_RESTRICT)) {
670: VecScatterEnd(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);
671: } else {
672: VecScatterEnd(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);
673: }
674: /* do the subdomain solves */
675: for (i=0; i<osm->n; ++i) {
676: KSPSolve(osm->ksp[i],osm->x[i],osm->y[i]);
677: KSPCheckSolve(osm->ksp[i],pc,osm->y[i]);
678: }
679: /* Do we need to zero y ?? */
680: VecZeroEntries(y);
681: if (!(osm->type & PC_GASM_INTERPOLATE)) {
682: VecScatterBegin(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);
683: VecScatterEnd(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);
684: } else {
685: VecScatterBegin(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);
686: VecScatterEnd(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);
687: }
688: if(osm->pctoouter){
689: VecScatterBegin(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);
690: VecScatterEnd(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);
691: }
692: return(0);
693: }
695: static PetscErrorCode PCApplyTranspose_GASM(PC pc,Vec xin,Vec yout)
696: {
697: PC_GASM *osm = (PC_GASM*)pc->data;
699: PetscInt i;
700: Vec x,y;
701: ScatterMode forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
704: if(osm->pctoouter){
705: VecScatterBegin(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);
706: VecScatterEnd(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);
707: x = osm->pcx;
708: y = osm->pcy;
709: }else{
710: x = xin;
711: y = yout;
712: }
713: /*
714: Support for limiting the restriction or interpolation to only local
715: subdomain values (leaving the other values 0).
717: Note: these are reversed from the PCApply_GASM() because we are applying the
718: transpose of the three terms
719: */
720: if (!(osm->type & PC_GASM_INTERPOLATE)) {
721: /* have to zero the work RHS since scatter may leave some slots empty */
722: VecZeroEntries(osm->gx);
723: VecScatterBegin(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);
724: } else {
725: VecScatterBegin(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);
726: }
727: VecZeroEntries(osm->gy);
728: if (!(osm->type & PC_GASM_INTERPOLATE)) {
729: VecScatterEnd(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);
730: } else {
731: VecScatterEnd(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);
732: }
733: /* do the local solves */
734: for (i=0; i<osm->n; ++i) { /* Note that the solves are local, so we can go to osm->n, rather than osm->nmax. */
735: KSPSolveTranspose(osm->ksp[i],osm->x[i],osm->y[i]);
736: KSPCheckSolve(osm->ksp[i],pc,osm->y[i]);
737: }
738: VecZeroEntries(y);
739: if (!(osm->type & PC_GASM_RESTRICT)) {
740: VecScatterBegin(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);
741: VecScatterEnd(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);
742: } else {
743: VecScatterBegin(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);
744: VecScatterEnd(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);
745: }
746: if(osm->pctoouter){
747: VecScatterBegin(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);
748: VecScatterEnd(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);
749: }
750: return(0);
751: }
753: static PetscErrorCode PCReset_GASM(PC pc)
754: {
755: PC_GASM *osm = (PC_GASM*)pc->data;
757: PetscInt i;
760: if (osm->ksp) {
761: for (i=0; i<osm->n; i++) {
762: KSPReset(osm->ksp[i]);
763: }
764: }
765: if (osm->pmat) {
766: if (osm->n > 0) {
767: PetscMPIInt size;
768: MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);
769: if (size > 1) {
770: /* osm->pmat is created by MatCreateSubMatricesMPI(), cannot use MatDestroySubMatrices() */
771: MatDestroyMatrices(osm->n,&osm->pmat);
772: } else {
773: MatDestroySubMatrices(osm->n,&osm->pmat);
774: }
775: }
776: }
777: if (osm->x) {
778: for (i=0; i<osm->n; i++) {
779: VecDestroy(&osm->x[i]);
780: VecDestroy(&osm->y[i]);
781: }
782: }
783: VecDestroy(&osm->gx);
784: VecDestroy(&osm->gy);
786: VecScatterDestroy(&osm->gorestriction);
787: VecScatterDestroy(&osm->girestriction);
788: if (!osm->user_subdomains) {
789: PCGASMDestroySubdomains(osm->n,&osm->ois,&osm->iis);
790: osm->N = PETSC_DETERMINE;
791: osm->nmax = PETSC_DETERMINE;
792: }
793: if(osm->pctoouter){
794: VecScatterDestroy(&(osm->pctoouter));
795: }
796: if(osm->permutationIS){
797: ISDestroy(&(osm->permutationIS));
798: }
799: if(osm->pcx){
800: VecDestroy(&(osm->pcx));
801: }
802: if(osm->pcy){
803: VecDestroy(&(osm->pcy));
804: }
805: if(osm->permutationP){
806: MatDestroy(&(osm->permutationP));
807: }
808: if(osm->pcmat){
809: MatDestroy(&osm->pcmat);
810: }
811: return(0);
812: }
814: static PetscErrorCode PCDestroy_GASM(PC pc)
815: {
816: PC_GASM *osm = (PC_GASM*)pc->data;
818: PetscInt i;
821: PCReset_GASM(pc);
822: /* PCReset will not destroy subdomains, if user_subdomains is true. */
823: PCGASMDestroySubdomains(osm->n,&osm->ois,&osm->iis);
824: if (osm->ksp) {
825: for (i=0; i<osm->n; i++) {
826: KSPDestroy(&osm->ksp[i]);
827: }
828: PetscFree(osm->ksp);
829: }
830: PetscFree(osm->x);
831: PetscFree(osm->y);
832: PetscFree(pc->data);
833: return(0);
834: }
836: static PetscErrorCode PCSetFromOptions_GASM(PetscOptionItems *PetscOptionsObject,PC pc)
837: {
838: PC_GASM *osm = (PC_GASM*)pc->data;
840: PetscInt blocks,ovl;
841: PetscBool symset,flg;
842: PCGASMType gasmtype;
845: /* set the type to symmetric if matrix is symmetric */
846: if (!osm->type_set && pc->pmat) {
847: MatIsSymmetricKnown(pc->pmat,&symset,&flg);
848: if (symset && flg) osm->type = PC_GASM_BASIC;
849: }
850: PetscOptionsHead(PetscOptionsObject,"Generalized additive Schwarz options");
851: PetscOptionsBool("-pc_gasm_use_dm_subdomains","If subdomains aren't set, use DMCreateDomainDecomposition() to define subdomains.","PCGASMSetUseDMSubdomains",osm->dm_subdomains,&osm->dm_subdomains,&flg);
852: PetscOptionsInt("-pc_gasm_total_subdomains","Total number of subdomains across communicator","PCGASMSetTotalSubdomains",osm->N,&blocks,&flg);
853: if (flg) {
854: PCGASMSetTotalSubdomains(pc,blocks);
855: }
856: PetscOptionsInt("-pc_gasm_overlap","Number of overlapping degrees of freedom","PCGASMSetOverlap",osm->overlap,&ovl,&flg);
857: if (flg) {
858: PCGASMSetOverlap(pc,ovl);
859: osm->dm_subdomains = PETSC_FALSE;
860: }
861: flg = PETSC_FALSE;
862: PetscOptionsEnum("-pc_gasm_type","Type of restriction/extension","PCGASMSetType",PCGASMTypes,(PetscEnum)osm->type,(PetscEnum*)&gasmtype,&flg);
863: if (flg) {PCGASMSetType(pc,gasmtype);}
864: PetscOptionsBool("-pc_gasm_use_hierachical_partitioning","use hierarchical partitioning",NULL,osm->hierarchicalpartitioning,&osm->hierarchicalpartitioning,&flg);
865: PetscOptionsTail();
866: return(0);
867: }
869: /*------------------------------------------------------------------------------------*/
871: /*@
872: PCGASMSetTotalSubdomains - sets the total number of subdomains to use across the
873: communicator.
874: Logically collective on pc
876: Input Parameters:
877: + pc - the preconditioner
878: - N - total number of subdomains
881: Level: beginner
883: .keywords: PC, ASM, DM, set, subdomains, additive Schwarz
885: .seealso: PCGASMSetSubdomains(), PCGASMSetOverlap()
886: PCGASMCreateSubdomains2D()
887: @*/
888: PetscErrorCode PCGASMSetTotalSubdomains(PC pc,PetscInt N)
889: {
890: PC_GASM *osm = (PC_GASM*)pc->data;
891: PetscMPIInt size,rank;
895: if (N < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Total number of subdomains must be 1 or more, got N = %D",N);
896: if (pc->setupcalled) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetTotalSubdomains() should be called before calling PCSetUp().");
898: PCGASMDestroySubdomains(osm->n,&osm->iis,&osm->ois);
899: osm->ois = osm->iis = NULL;
901: MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);
902: MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);
903: osm->N = N;
904: osm->n = PETSC_DETERMINE;
905: osm->nmax = PETSC_DETERMINE;
906: osm->dm_subdomains = PETSC_FALSE;
907: return(0);
908: }
911: static PetscErrorCode PCGASMSetSubdomains_GASM(PC pc,PetscInt n,IS iis[],IS ois[])
912: {
913: PC_GASM *osm = (PC_GASM*)pc->data;
914: PetscErrorCode ierr;
915: PetscInt i;
918: if (n < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Each process must have 1 or more subdomains, got n = %D",n);
919: if (pc->setupcalled) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetSubdomains() should be called before calling PCSetUp().");
921: PCGASMDestroySubdomains(osm->n,&osm->iis,&osm->ois);
922: osm->iis = osm->ois = NULL;
923: osm->n = n;
924: osm->N = PETSC_DETERMINE;
925: osm->nmax = PETSC_DETERMINE;
926: if (ois) {
927: PetscMalloc1(n,&osm->ois);
928: for (i=0; i<n; i++) {
929: PetscObjectReference((PetscObject)ois[i]);
930: osm->ois[i] = ois[i];
931: }
932: /*
933: Since the user set the outer subdomains, even if nontrivial overlap was requested via PCGASMSetOverlap(),
934: it will be ignored. To avoid confusion later on (e.g., when viewing the PC), the overlap size is set to -1.
935: */
936: osm->overlap = -1;
937: /* inner subdomains must be provided */
938: if (!iis) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"inner indices have to be provided \n");
939: }/* end if */
940: if (iis) {
941: PetscMalloc1(n,&osm->iis);
942: for (i=0; i<n; i++) {
943: PetscObjectReference((PetscObject)iis[i]);
944: osm->iis[i] = iis[i];
945: }
946: if (!ois) {
947: osm->ois = NULL;
948: /* if user does not provide outer indices, we will create the corresponding outer indices using osm->overlap =1 in PCSetUp_GASM */
949: }
950: }
951: #if defined(PETSC_USE_DEBUG)
952: {
953: PetscInt j,rstart,rend,*covered,lsize;
954: const PetscInt *indices;
955: /* check if the inner indices cover and only cover the local portion of the preconditioning matrix */
956: MatGetOwnershipRange(pc->pmat,&rstart,&rend);
957: PetscCalloc1(rend-rstart,&covered);
958: /* check if the current processor owns indices from others */
959: for (i=0; i<n; i++) {
960: ISGetIndices(osm->iis[i],&indices);
961: ISGetLocalSize(osm->iis[i],&lsize);
962: for (j=0; j<lsize; j++) {
963: if (indices[j]<rstart || indices[j]>=rend) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"inner subdomains can not own an index %d from other processors", indices[j]);
964: else if (covered[indices[j]-rstart]==1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"inner subdomains can not have an overlapping index %d ",indices[j]);
965: else covered[indices[j]-rstart] = 1;
966: }
967: ISRestoreIndices(osm->iis[i],&indices);
968: }
969: /* check if we miss any indices */
970: for (i=rstart; i<rend; i++) {
971: if (!covered[i-rstart]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"local entity %d was not covered by inner subdomains",i);
972: }
973: PetscFree(covered);
974: }
975: #endif
976: if (iis) osm->user_subdomains = PETSC_TRUE;
977: return(0);
978: }
981: static PetscErrorCode PCGASMSetOverlap_GASM(PC pc,PetscInt ovl)
982: {
983: PC_GASM *osm = (PC_GASM*)pc->data;
986: if (ovl < 0) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap value requested");
987: if (pc->setupcalled && ovl != osm->overlap) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetOverlap() should be called before PCSetUp().");
988: if (!pc->setupcalled) osm->overlap = ovl;
989: return(0);
990: }
992: static PetscErrorCode PCGASMSetType_GASM(PC pc,PCGASMType type)
993: {
994: PC_GASM *osm = (PC_GASM*)pc->data;
997: osm->type = type;
998: osm->type_set = PETSC_TRUE;
999: return(0);
1000: }
1002: static PetscErrorCode PCGASMSetSortIndices_GASM(PC pc,PetscBool doSort)
1003: {
1004: PC_GASM *osm = (PC_GASM*)pc->data;
1007: osm->sort_indices = doSort;
1008: return(0);
1009: }
1011: /*
1012: FIXME: This routine might need to be modified now that multiple ranks per subdomain are allowed.
1013: In particular, it would upset the global subdomain number calculation.
1014: */
1015: static PetscErrorCode PCGASMGetSubKSP_GASM(PC pc,PetscInt *n,PetscInt *first,KSP **ksp)
1016: {
1017: PC_GASM *osm = (PC_GASM*)pc->data;
1021: if (osm->n < 1) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ORDER,"Need to call PCSetUP() on PC (or KSPSetUp() on the outer KSP object) before calling here");
1023: if (n) *n = osm->n;
1024: if (first) {
1025: MPI_Scan(&osm->n,first,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));
1026: *first -= osm->n;
1027: }
1028: if (ksp) {
1029: /* Assume that local solves are now different; not necessarily
1030: true, though! This flag is used only for PCView_GASM() */
1031: *ksp = osm->ksp;
1032: osm->same_subdomain_solvers = PETSC_FALSE;
1033: }
1034: return(0);
1035: } /* PCGASMGetSubKSP_GASM() */
1037: /*@C
1038: PCGASMSetSubdomains - Sets the subdomains for this processor
1039: for the additive Schwarz preconditioner.
1041: Collective on pc
1043: Input Parameters:
1044: + pc - the preconditioner object
1045: . n - the number of subdomains for this processor
1046: . iis - the index sets that define the inner subdomains (or NULL for PETSc to determine subdomains)
1047: - ois - the index sets that define the outer subdomains (or NULL to use the same as iis, or to construct by expanding iis by the requested overlap)
1049: Notes:
1050: The IS indices use the parallel, global numbering of the vector entries.
1051: Inner subdomains are those where the correction is applied.
1052: Outer subdomains are those where the residual necessary to obtain the
1053: corrections is obtained (see PCGASMType for the use of inner/outer subdomains).
1054: Both inner and outer subdomains can extend over several processors.
1055: This processor's portion of a subdomain is known as a local subdomain.
1057: Inner subdomains can not overlap with each other, do not have any entities from remote processors,
1058: and have to cover the entire local subdomain owned by the current processor. The index sets on each
1059: process should be ordered such that the ith local subdomain is connected to the ith remote subdomain
1060: on another MPI process.
1062: By default the GASM preconditioner uses 1 (local) subdomain per processor.
1065: Level: advanced
1067: .keywords: PC, GASM, set, subdomains, additive Schwarz
1069: .seealso: PCGASMSetNumSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(),
1070: PCGASMCreateSubdomains2D(), PCGASMGetSubdomains()
1071: @*/
1072: PetscErrorCode PCGASMSetSubdomains(PC pc,PetscInt n,IS iis[],IS ois[])
1073: {
1074: PC_GASM *osm = (PC_GASM*)pc->data;
1079: PetscTryMethod(pc,"PCGASMSetSubdomains_C",(PC,PetscInt,IS[],IS[]),(pc,n,iis,ois));
1080: osm->dm_subdomains = PETSC_FALSE;
1081: return(0);
1082: }
1085: /*@
1086: PCGASMSetOverlap - Sets the overlap between a pair of subdomains for the
1087: additive Schwarz preconditioner. Either all or no processors in the
1088: pc communicator must call this routine.
1090: Logically Collective on pc
1092: Input Parameters:
1093: + pc - the preconditioner context
1094: - ovl - the amount of overlap between subdomains (ovl >= 0, default value = 0)
1096: Options Database Key:
1097: . -pc_gasm_overlap <overlap> - Sets overlap
1099: Notes:
1100: By default the GASM preconditioner uses 1 subdomain per processor. To use
1101: multiple subdomain per perocessor or "straddling" subdomains that intersect
1102: multiple processors use PCGASMSetSubdomains() (or option -pc_gasm_total_subdomains <n>).
1104: The overlap defaults to 0, so if one desires that no additional
1105: overlap be computed beyond what may have been set with a call to
1106: PCGASMSetSubdomains(), then ovl must be set to be 0. In particular, if one does
1107: not explicitly set the subdomains in application code, then all overlap would be computed
1108: internally by PETSc, and using an overlap of 0 would result in an GASM
1109: variant that is equivalent to the block Jacobi preconditioner.
1111: Note that one can define initial index sets with any overlap via
1112: PCGASMSetSubdomains(); the routine PCGASMSetOverlap() merely allows
1113: PETSc to extend that overlap further, if desired.
1115: Level: intermediate
1117: .keywords: PC, GASM, set, overlap
1119: .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(),
1120: PCGASMCreateSubdomains2D(), PCGASMGetSubdomains()
1121: @*/
1122: PetscErrorCode PCGASMSetOverlap(PC pc,PetscInt ovl)
1123: {
1125: PC_GASM *osm = (PC_GASM*)pc->data;
1130: PetscTryMethod(pc,"PCGASMSetOverlap_C",(PC,PetscInt),(pc,ovl));
1131: osm->dm_subdomains = PETSC_FALSE;
1132: return(0);
1133: }
1135: /*@
1136: PCGASMSetType - Sets the type of restriction and interpolation used
1137: for local problems in the additive Schwarz method.
1139: Logically Collective on PC
1141: Input Parameters:
1142: + pc - the preconditioner context
1143: - type - variant of GASM, one of
1144: .vb
1145: PC_GASM_BASIC - full interpolation and restriction
1146: PC_GASM_RESTRICT - full restriction, local processor interpolation
1147: PC_GASM_INTERPOLATE - full interpolation, local processor restriction
1148: PC_GASM_NONE - local processor restriction and interpolation
1149: .ve
1151: Options Database Key:
1152: . -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type
1154: Level: intermediate
1156: .keywords: PC, GASM, set, type
1158: .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(),
1159: PCGASMCreateSubdomains2D()
1160: @*/
1161: PetscErrorCode PCGASMSetType(PC pc,PCGASMType type)
1162: {
1168: PetscTryMethod(pc,"PCGASMSetType_C",(PC,PCGASMType),(pc,type));
1169: return(0);
1170: }
1172: /*@
1173: PCGASMSetSortIndices - Determines whether subdomain indices are sorted.
1175: Logically Collective on PC
1177: Input Parameters:
1178: + pc - the preconditioner context
1179: - doSort - sort the subdomain indices
1181: Level: intermediate
1183: .keywords: PC, GASM, set, type
1185: .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(),
1186: PCGASMCreateSubdomains2D()
1187: @*/
1188: PetscErrorCode PCGASMSetSortIndices(PC pc,PetscBool doSort)
1189: {
1195: PetscTryMethod(pc,"PCGASMSetSortIndices_C",(PC,PetscBool),(pc,doSort));
1196: return(0);
1197: }
1199: /*@C
1200: PCGASMGetSubKSP - Gets the local KSP contexts for all blocks on
1201: this processor.
1203: Collective on PC iff first_local is requested
1205: Input Parameter:
1206: . pc - the preconditioner context
1208: Output Parameters:
1209: + n_local - the number of blocks on this processor or NULL
1210: . first_local - the global number of the first block on this processor or NULL,
1211: all processors must request or all must pass NULL
1212: - ksp - the array of KSP contexts
1214: Note:
1215: After PCGASMGetSubKSP() the array of KSPes is not to be freed
1217: Currently for some matrix implementations only 1 block per processor
1218: is supported.
1220: You must call KSPSetUp() before calling PCGASMGetSubKSP().
1222: Level: advanced
1224: .keywords: PC, GASM, additive Schwarz, get, sub, KSP, context
1226: .seealso: PCGASMSetSubdomains(), PCGASMSetOverlap(),
1227: PCGASMCreateSubdomains2D(),
1228: @*/
1229: PetscErrorCode PCGASMGetSubKSP(PC pc,PetscInt *n_local,PetscInt *first_local,KSP *ksp[])
1230: {
1235: PetscUseMethod(pc,"PCGASMGetSubKSP_C",(PC,PetscInt*,PetscInt*,KSP **),(pc,n_local,first_local,ksp));
1236: return(0);
1237: }
1239: /* -------------------------------------------------------------------------------------*/
1240: /*MC
1241: PCGASM - Use the (restricted) additive Schwarz method, each block is (approximately) solved with
1242: its own KSP object.
1244: Options Database Keys:
1245: + -pc_gasm_total_subdomains <n> - Sets total number of local subdomains to be distributed among processors
1246: . -pc_gasm_view_subdomains - activates the printing of subdomain indices in PCView(), -ksp_view or -snes_view
1247: . -pc_gasm_print_subdomains - activates the printing of subdomain indices in PCSetUp()
1248: . -pc_gasm_overlap <ovl> - Sets overlap by which to (automatically) extend local subdomains
1249: - -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type
1251: IMPORTANT: If you run with, for example, 3 blocks on 1 processor or 3 blocks on 3 processors you
1252: will get a different convergence rate due to the default option of -pc_gasm_type restrict. Use
1253: -pc_gasm_type basic to use the standard GASM.
1255: Notes:
1256: Blocks can be shared by multiple processes.
1258: To set options on the solvers for each block append -sub_ to all the KSP, and PC
1259: options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly
1261: To set the options on the solvers separate for each block call PCGASMGetSubKSP()
1262: and set the options directly on the resulting KSP object (you can access its PC
1263: with KSPGetPC())
1266: Level: beginner
1268: Concepts: additive Schwarz method
1270: References:
1271: + 1. - M Dryja, OB Widlund, An additive variant of the Schwarz alternating method for the case of many subregions
1272: Courant Institute, New York University Technical report
1273: - 2. - Barry Smith, Petter Bjorstad, and William Gropp, Domain Decompositions: Parallel Multilevel Methods for Elliptic Partial Differential Equations,
1274: Cambridge University Press.
1276: .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC,
1277: PCBJACOBI, PCGASMGetSubKSP(), PCGASMSetSubdomains(),
1278: PCSetModifySubmatrices(), PCGASMSetOverlap(), PCGASMSetType()
1280: M*/
1282: PETSC_EXTERN PetscErrorCode PCCreate_GASM(PC pc)
1283: {
1285: PC_GASM *osm;
1288: PetscNewLog(pc,&osm);
1290: osm->N = PETSC_DETERMINE;
1291: osm->n = PETSC_DECIDE;
1292: osm->nmax = PETSC_DETERMINE;
1293: osm->overlap = 0;
1294: osm->ksp = 0;
1295: osm->gorestriction = 0;
1296: osm->girestriction = 0;
1297: osm->pctoouter = 0;
1298: osm->gx = 0;
1299: osm->gy = 0;
1300: osm->x = 0;
1301: osm->y = 0;
1302: osm->pcx = 0;
1303: osm->pcy = 0;
1304: osm->permutationIS = 0;
1305: osm->permutationP = 0;
1306: osm->pcmat = 0;
1307: osm->ois = 0;
1308: osm->iis = 0;
1309: osm->pmat = 0;
1310: osm->type = PC_GASM_RESTRICT;
1311: osm->same_subdomain_solvers = PETSC_TRUE;
1312: osm->sort_indices = PETSC_TRUE;
1313: osm->dm_subdomains = PETSC_FALSE;
1314: osm->hierarchicalpartitioning = PETSC_FALSE;
1316: pc->data = (void*)osm;
1317: pc->ops->apply = PCApply_GASM;
1318: pc->ops->applytranspose = PCApplyTranspose_GASM;
1319: pc->ops->setup = PCSetUp_GASM;
1320: pc->ops->reset = PCReset_GASM;
1321: pc->ops->destroy = PCDestroy_GASM;
1322: pc->ops->setfromoptions = PCSetFromOptions_GASM;
1323: pc->ops->setuponblocks = PCSetUpOnBlocks_GASM;
1324: pc->ops->view = PCView_GASM;
1325: pc->ops->applyrichardson = 0;
1327: PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetSubdomains_C",PCGASMSetSubdomains_GASM);
1328: PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetOverlap_C",PCGASMSetOverlap_GASM);
1329: PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetType_C",PCGASMSetType_GASM);
1330: PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetSortIndices_C",PCGASMSetSortIndices_GASM);
1331: PetscObjectComposeFunction((PetscObject)pc,"PCGASMGetSubKSP_C",PCGASMGetSubKSP_GASM);
1332: return(0);
1333: }
1336: PetscErrorCode PCGASMCreateLocalSubdomains(Mat A, PetscInt nloc, IS *iis[])
1337: {
1338: MatPartitioning mpart;
1339: const char *prefix;
1340: PetscInt i,j,rstart,rend,bs;
1341: PetscBool hasop, isbaij = PETSC_FALSE,foundpart = PETSC_FALSE;
1342: Mat Ad = NULL, adj;
1343: IS ispart,isnumb,*is;
1344: PetscErrorCode ierr;
1347: if (nloc < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"number of local subdomains must > 0, got nloc = %D",nloc);
1349: /* Get prefix, row distribution, and block size */
1350: MatGetOptionsPrefix(A,&prefix);
1351: MatGetOwnershipRange(A,&rstart,&rend);
1352: MatGetBlockSize(A,&bs);
1353: if (rstart/bs*bs != rstart || rend/bs*bs != rend) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"bad row distribution [%D,%D) for matrix block size %D",rstart,rend,bs);
1355: /* Get diagonal block from matrix if possible */
1356: MatHasOperation(A,MATOP_GET_DIAGONAL_BLOCK,&hasop);
1357: if (hasop) {
1358: MatGetDiagonalBlock(A,&Ad);
1359: }
1360: if (Ad) {
1361: PetscObjectTypeCompare((PetscObject)Ad,MATSEQBAIJ,&isbaij);
1362: if (!isbaij) {PetscObjectTypeCompare((PetscObject)Ad,MATSEQSBAIJ,&isbaij);}
1363: }
1364: if (Ad && nloc > 1) {
1365: PetscBool match,done;
1366: /* Try to setup a good matrix partitioning if available */
1367: MatPartitioningCreate(PETSC_COMM_SELF,&mpart);
1368: PetscObjectSetOptionsPrefix((PetscObject)mpart,prefix);
1369: MatPartitioningSetFromOptions(mpart);
1370: PetscObjectTypeCompare((PetscObject)mpart,MATPARTITIONINGCURRENT,&match);
1371: if (!match) {
1372: PetscObjectTypeCompare((PetscObject)mpart,MATPARTITIONINGSQUARE,&match);
1373: }
1374: if (!match) { /* assume a "good" partitioner is available */
1375: PetscInt na;
1376: const PetscInt *ia,*ja;
1377: MatGetRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);
1378: if (done) {
1379: /* Build adjacency matrix by hand. Unfortunately a call to
1380: MatConvert(Ad,MATMPIADJ,MAT_INITIAL_MATRIX,&adj) will
1381: remove the block-aij structure and we cannot expect
1382: MatPartitioning to split vertices as we need */
1383: PetscInt i,j,len,nnz,cnt,*iia=0,*jja=0;
1384: const PetscInt *row;
1385: nnz = 0;
1386: for (i=0; i<na; i++) { /* count number of nonzeros */
1387: len = ia[i+1] - ia[i];
1388: row = ja + ia[i];
1389: for (j=0; j<len; j++) {
1390: if (row[j] == i) { /* don't count diagonal */
1391: len--; break;
1392: }
1393: }
1394: nnz += len;
1395: }
1396: PetscMalloc1(na+1,&iia);
1397: PetscMalloc1(nnz,&jja);
1398: nnz = 0;
1399: iia[0] = 0;
1400: for (i=0; i<na; i++) { /* fill adjacency */
1401: cnt = 0;
1402: len = ia[i+1] - ia[i];
1403: row = ja + ia[i];
1404: for (j=0; j<len; j++) {
1405: if (row[j] != i) jja[nnz+cnt++] = row[j]; /* if not diagonal */
1406: }
1407: nnz += cnt;
1408: iia[i+1] = nnz;
1409: }
1410: /* Partitioning of the adjacency matrix */
1411: MatCreateMPIAdj(PETSC_COMM_SELF,na,na,iia,jja,NULL,&adj);
1412: MatPartitioningSetAdjacency(mpart,adj);
1413: MatPartitioningSetNParts(mpart,nloc);
1414: MatPartitioningApply(mpart,&ispart);
1415: ISPartitioningToNumbering(ispart,&isnumb);
1416: MatDestroy(&adj);
1417: foundpart = PETSC_TRUE;
1418: }
1419: MatRestoreRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);
1420: }
1421: MatPartitioningDestroy(&mpart);
1422: }
1423: PetscMalloc1(nloc,&is);
1424: if (!foundpart) {
1426: /* Partitioning by contiguous chunks of rows */
1428: PetscInt mbs = (rend-rstart)/bs;
1429: PetscInt start = rstart;
1430: for (i=0; i<nloc; i++) {
1431: PetscInt count = (mbs/nloc + ((mbs % nloc) > i)) * bs;
1432: ISCreateStride(PETSC_COMM_SELF,count,start,1,&is[i]);
1433: start += count;
1434: }
1436: } else {
1438: /* Partitioning by adjacency of diagonal block */
1440: const PetscInt *numbering;
1441: PetscInt *count,nidx,*indices,*newidx,start=0;
1442: /* Get node count in each partition */
1443: PetscMalloc1(nloc,&count);
1444: ISPartitioningCount(ispart,nloc,count);
1445: if (isbaij && bs > 1) { /* adjust for the block-aij case */
1446: for (i=0; i<nloc; i++) count[i] *= bs;
1447: }
1448: /* Build indices from node numbering */
1449: ISGetLocalSize(isnumb,&nidx);
1450: PetscMalloc1(nidx,&indices);
1451: for (i=0; i<nidx; i++) indices[i] = i; /* needs to be initialized */
1452: ISGetIndices(isnumb,&numbering);
1453: PetscSortIntWithPermutation(nidx,numbering,indices);
1454: ISRestoreIndices(isnumb,&numbering);
1455: if (isbaij && bs > 1) { /* adjust for the block-aij case */
1456: PetscMalloc1(nidx*bs,&newidx);
1457: for (i=0; i<nidx; i++) {
1458: for (j=0; j<bs; j++) newidx[i*bs+j] = indices[i]*bs + j;
1459: }
1460: PetscFree(indices);
1461: nidx *= bs;
1462: indices = newidx;
1463: }
1464: /* Shift to get global indices */
1465: for (i=0; i<nidx; i++) indices[i] += rstart;
1467: /* Build the index sets for each block */
1468: for (i=0; i<nloc; i++) {
1469: ISCreateGeneral(PETSC_COMM_SELF,count[i],&indices[start],PETSC_COPY_VALUES,&is[i]);
1470: ISSort(is[i]);
1471: start += count[i];
1472: }
1474: PetscFree(count);
1475: PetscFree(indices);
1476: ISDestroy(&isnumb);
1477: ISDestroy(&ispart);
1478: }
1479: *iis = is;
1480: return(0);
1481: }
1483: PETSC_INTERN PetscErrorCode PCGASMCreateStraddlingSubdomains(Mat A,PetscInt N,PetscInt *n,IS *iis[])
1484: {
1485: PetscErrorCode ierr;
1488: MatSubdomainsCreateCoalesce(A,N,n,iis);
1489: return(0);
1490: }
1494: /*@C
1495: PCGASMCreateSubdomains - Creates n index sets defining n nonoverlapping subdomains for the additive
1496: Schwarz preconditioner for a any problem based on its matrix.
1498: Collective
1500: Input Parameters:
1501: + A - The global matrix operator
1502: - N - the number of global subdomains requested
1504: Output Parameters:
1505: + n - the number of subdomains created on this processor
1506: - iis - the array of index sets defining the local inner subdomains (on which the correction is applied)
1508: Level: advanced
1510: Note: When N >= A's communicator size, each subdomain is local -- contained within a single processor.
1511: When N < size, the subdomains are 'straddling' (processor boundaries) and are no longer local.
1512: The resulting subdomains can be use in PCGASMSetSubdomains(pc,n,iss,NULL). The overlapping
1513: outer subdomains will be automatically generated from these according to the requested amount of
1514: overlap; this is currently supported only with local subdomains.
1517: .keywords: PC, GASM, additive Schwarz, create, subdomains, unstructured grid
1519: .seealso: PCGASMSetSubdomains(), PCGASMDestroySubdomains()
1520: @*/
1521: PetscErrorCode PCGASMCreateSubdomains(Mat A,PetscInt N,PetscInt *n,IS *iis[])
1522: {
1523: PetscMPIInt size;
1524: PetscErrorCode ierr;
1530: if (N < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Number of subdomains must be > 0, N = %D",N);
1531: MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
1532: if (N >= size) {
1533: *n = N/size + (N%size);
1534: PCGASMCreateLocalSubdomains(A,*n,iis);
1535: } else {
1536: PCGASMCreateStraddlingSubdomains(A,N,n,iis);
1537: }
1538: return(0);
1539: }
1541: /*@C
1542: PCGASMDestroySubdomains - Destroys the index sets created with
1543: PCGASMCreateSubdomains() or PCGASMCreateSubdomains2D. Should be
1544: called after setting subdomains with PCGASMSetSubdomains().
1546: Collective
1548: Input Parameters:
1549: + n - the number of index sets
1550: . iis - the array of inner subdomains,
1551: - ois - the array of outer subdomains, can be NULL
1553: Level: intermediate
1555: Notes:
1556: this is merely a convenience subroutine that walks each list,
1557: destroys each IS on the list, and then frees the list. At the end the
1558: list pointers are set to NULL.
1560: .keywords: PC, GASM, additive Schwarz, create, subdomains, unstructured grid
1562: .seealso: PCGASMCreateSubdomains(), PCGASMSetSubdomains()
1563: @*/
1564: PetscErrorCode PCGASMDestroySubdomains(PetscInt n,IS **iis,IS **ois)
1565: {
1566: PetscInt i;
1570: if (n <= 0) return(0);
1571: if (ois) {
1573: if (*ois) {
1575: for (i=0; i<n; i++) {
1576: ISDestroy(&(*ois)[i]);
1577: }
1578: PetscFree((*ois));
1579: }
1580: }
1581: if (iis) {
1583: if (*iis) {
1585: for (i=0; i<n; i++) {
1586: ISDestroy(&(*iis)[i]);
1587: }
1588: PetscFree((*iis));
1589: }
1590: }
1591: return(0);
1592: }
1595: #define PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,xleft_loc,ylow_loc,xright_loc,yhigh_loc,n) \
1596: { \
1597: PetscInt first_row = first/M, last_row = last/M+1; \
1598: /* \
1599: Compute ylow_loc and yhigh_loc so that (ylow_loc,xleft) and (yhigh_loc,xright) are the corners \
1600: of the bounding box of the intersection of the subdomain with the local ownership range (local \
1601: subdomain). \
1602: Also compute xleft_loc and xright_loc as the lower and upper bounds on the first and last rows \
1603: of the intersection. \
1604: */ \
1605: /* ylow_loc is the grid row containing the first element of the local sumbdomain */ \
1606: *ylow_loc = PetscMax(first_row,ylow); \
1607: /* xleft_loc is the offset of first element of the local subdomain within its grid row (might actually be outside the local subdomain) */ \
1608: *xleft_loc = *ylow_loc==first_row ? PetscMax(first%M,xleft) : xleft; \
1609: /* yhigh_loc is the grid row above the last local subdomain element */ \
1610: *yhigh_loc = PetscMin(last_row,yhigh); \
1611: /* xright is the offset of the end of the local subdomain within its grid row (might actually be outside the local subdomain) */ \
1612: *xright_loc = *yhigh_loc==last_row ? PetscMin(xright,last%M) : xright; \
1613: /* Now compute the size of the local subdomain n. */ \
1614: *n = 0; \
1615: if (*ylow_loc < *yhigh_loc) { \
1616: PetscInt width = xright-xleft; \
1617: *n += width*(*yhigh_loc-*ylow_loc-1); \
1618: *n += PetscMin(PetscMax(*xright_loc-xleft,0),width); \
1619: *n -= PetscMin(PetscMax(*xleft_loc-xleft,0), width); \
1620: } \
1621: }
1625: /*@
1626: PCGASMCreateSubdomains2D - Creates the index sets for the overlapping Schwarz
1627: preconditioner for a two-dimensional problem on a regular grid.
1629: Collective
1631: Input Parameters:
1632: + M, N - the global number of grid points in the x and y directions
1633: . Mdomains, Ndomains - the global number of subdomains in the x and y directions
1634: . dof - degrees of freedom per node
1635: - overlap - overlap in mesh lines
1637: Output Parameters:
1638: + Nsub - the number of local subdomains created
1639: . iis - array of index sets defining inner (nonoverlapping) subdomains
1640: - ois - array of index sets defining outer (overlapping, if overlap > 0) subdomains
1643: Level: advanced
1645: .keywords: PC, GASM, additive Schwarz, create, subdomains, 2D, regular grid
1647: .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(), PCGASMSetOverlap()
1648: @*/
1649: PetscErrorCode PCGASMCreateSubdomains2D(PC pc,PetscInt M,PetscInt N,PetscInt Mdomains,PetscInt Ndomains,PetscInt dof,PetscInt overlap,PetscInt *nsub,IS **iis,IS **ois)
1650: {
1652: PetscMPIInt size, rank;
1653: PetscInt i, j;
1654: PetscInt maxheight, maxwidth;
1655: PetscInt xstart, xleft, xright, xleft_loc, xright_loc;
1656: PetscInt ystart, ylow, yhigh, ylow_loc, yhigh_loc;
1657: PetscInt x[2][2], y[2][2], n[2];
1658: PetscInt first, last;
1659: PetscInt nidx, *idx;
1660: PetscInt ii,jj,s,q,d;
1661: PetscInt k,kk;
1662: PetscMPIInt color;
1663: MPI_Comm comm, subcomm;
1664: IS **xis = 0, **is = ois, **is_local = iis;
1667: PetscObjectGetComm((PetscObject)pc, &comm);
1668: MPI_Comm_size(comm, &size);
1669: MPI_Comm_rank(comm, &rank);
1670: MatGetOwnershipRange(pc->pmat, &first, &last);
1671: if (first%dof || last%dof) SETERRQ3(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Matrix row partitioning unsuitable for domain decomposition: local row range (%D,%D) "
1672: "does not respect the number of degrees of freedom per grid point %D", first, last, dof);
1674: /* Determine the number of domains with nonzero intersections with the local ownership range. */
1675: s = 0;
1676: ystart = 0;
1677: for (j=0; j<Ndomains; ++j) {
1678: maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */
1679: if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N);
1680: /* Vertical domain limits with an overlap. */
1681: ylow = PetscMax(ystart - overlap,0);
1682: yhigh = PetscMin(ystart + maxheight + overlap,N);
1683: xstart = 0;
1684: for (i=0; i<Mdomains; ++i) {
1685: maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */
1686: if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M);
1687: /* Horizontal domain limits with an overlap. */
1688: xleft = PetscMax(xstart - overlap,0);
1689: xright = PetscMin(xstart + maxwidth + overlap,M);
1690: /*
1691: Determine whether this subdomain intersects this processor's ownership range of pc->pmat.
1692: */
1693: PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx));
1694: if (nidx) ++s;
1695: xstart += maxwidth;
1696: } /* for (i = 0; i < Mdomains; ++i) */
1697: ystart += maxheight;
1698: } /* for (j = 0; j < Ndomains; ++j) */
1700: /* Now we can allocate the necessary number of ISs. */
1701: *nsub = s;
1702: PetscMalloc1(*nsub,is);
1703: PetscMalloc1(*nsub,is_local);
1704: s = 0;
1705: ystart = 0;
1706: for (j=0; j<Ndomains; ++j) {
1707: maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */
1708: if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N);
1709: /* Vertical domain limits with an overlap. */
1710: y[0][0] = PetscMax(ystart - overlap,0);
1711: y[0][1] = PetscMin(ystart + maxheight + overlap,N);
1712: /* Vertical domain limits without an overlap. */
1713: y[1][0] = ystart;
1714: y[1][1] = PetscMin(ystart + maxheight,N);
1715: xstart = 0;
1716: for (i=0; i<Mdomains; ++i) {
1717: maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */
1718: if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M);
1719: /* Horizontal domain limits with an overlap. */
1720: x[0][0] = PetscMax(xstart - overlap,0);
1721: x[0][1] = PetscMin(xstart + maxwidth + overlap,M);
1722: /* Horizontal domain limits without an overlap. */
1723: x[1][0] = xstart;
1724: x[1][1] = PetscMin(xstart+maxwidth,M);
1725: /*
1726: Determine whether this domain intersects this processor's ownership range of pc->pmat.
1727: Do this twice: first for the domains with overlaps, and once without.
1728: During the first pass create the subcommunicators, and use them on the second pass as well.
1729: */
1730: for (q = 0; q < 2; ++q) {
1731: PetscBool split = PETSC_FALSE;
1732: /*
1733: domain limits, (xleft, xright) and (ylow, yheigh) are adjusted
1734: according to whether the domain with an overlap or without is considered.
1735: */
1736: xleft = x[q][0]; xright = x[q][1];
1737: ylow = y[q][0]; yhigh = y[q][1];
1738: PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx));
1739: nidx *= dof;
1740: n[q] = nidx;
1741: /*
1742: Based on the counted number of indices in the local domain *with an overlap*,
1743: construct a subcommunicator of all the processors supporting this domain.
1744: Observe that a domain with an overlap might have nontrivial local support,
1745: while the domain without an overlap might not. Hence, the decision to participate
1746: in the subcommunicator must be based on the domain with an overlap.
1747: */
1748: if (q == 0) {
1749: if (nidx) color = 1;
1750: else color = MPI_UNDEFINED;
1751: MPI_Comm_split(comm, color, rank, &subcomm);
1752: split = PETSC_TRUE;
1753: }
1754: /*
1755: Proceed only if the number of local indices *with an overlap* is nonzero.
1756: */
1757: if (n[0]) {
1758: if (q == 0) xis = is;
1759: if (q == 1) {
1760: /*
1761: The IS for the no-overlap subdomain shares a communicator with the overlapping domain.
1762: Moreover, if the overlap is zero, the two ISs are identical.
1763: */
1764: if (overlap == 0) {
1765: (*is_local)[s] = (*is)[s];
1766: PetscObjectReference((PetscObject)(*is)[s]);
1767: continue;
1768: } else {
1769: xis = is_local;
1770: subcomm = ((PetscObject)(*is)[s])->comm;
1771: }
1772: } /* if (q == 1) */
1773: idx = NULL;
1774: PetscMalloc1(nidx,&idx);
1775: if (nidx) {
1776: k = 0;
1777: for (jj=ylow_loc; jj<yhigh_loc; ++jj) {
1778: PetscInt x0 = (jj==ylow_loc) ? xleft_loc : xleft;
1779: PetscInt x1 = (jj==yhigh_loc-1) ? xright_loc : xright;
1780: kk = dof*(M*jj + x0);
1781: for (ii=x0; ii<x1; ++ii) {
1782: for (d = 0; d < dof; ++d) {
1783: idx[k++] = kk++;
1784: }
1785: }
1786: }
1787: }
1788: ISCreateGeneral(subcomm,nidx,idx,PETSC_OWN_POINTER,(*xis)+s);
1789: if (split) {
1790: MPI_Comm_free(&subcomm);
1791: }
1792: }/* if (n[0]) */
1793: }/* for (q = 0; q < 2; ++q) */
1794: if (n[0]) ++s;
1795: xstart += maxwidth;
1796: } /* for (i = 0; i < Mdomains; ++i) */
1797: ystart += maxheight;
1798: } /* for (j = 0; j < Ndomains; ++j) */
1799: return(0);
1800: }
1802: /*@C
1803: PCGASMGetSubdomains - Gets the subdomains supported on this processor
1804: for the additive Schwarz preconditioner.
1806: Not Collective
1808: Input Parameter:
1809: . pc - the preconditioner context
1811: Output Parameters:
1812: + n - the number of subdomains for this processor (default value = 1)
1813: . iis - the index sets that define the inner subdomains (without overlap) supported on this processor (can be NULL)
1814: - ois - the index sets that define the outer subdomains (with overlap) supported on this processor (can be NULL)
1817: Notes:
1818: The user is responsible for destroying the ISs and freeing the returned arrays.
1819: The IS numbering is in the parallel, global numbering of the vector.
1821: Level: advanced
1823: .keywords: PC, GASM, get, subdomains, additive Schwarz
1825: .seealso: PCGASMSetOverlap(), PCGASMGetSubKSP(), PCGASMCreateSubdomains2D(),
1826: PCGASMSetSubdomains(), PCGASMGetSubmatrices()
1827: @*/
1828: PetscErrorCode PCGASMGetSubdomains(PC pc,PetscInt *n,IS *iis[],IS *ois[])
1829: {
1830: PC_GASM *osm;
1832: PetscBool match;
1833: PetscInt i;
1837: PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);
1838: if (!match) SETERRQ2(PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_WRONG, "Incorrect object type: expected %s, got %s instead", PCGASM, ((PetscObject)pc)->type_name);
1839: osm = (PC_GASM*)pc->data;
1840: if (n) *n = osm->n;
1841: if (iis) {
1842: PetscMalloc1(osm->n, iis);
1843: }
1844: if (ois) {
1845: PetscMalloc1(osm->n, ois);
1846: }
1847: if (iis || ois) {
1848: for (i = 0; i < osm->n; ++i) {
1849: if (iis) (*iis)[i] = osm->iis[i];
1850: if (ois) (*ois)[i] = osm->ois[i];
1851: }
1852: }
1853: return(0);
1854: }
1856: /*@C
1857: PCGASMGetSubmatrices - Gets the local submatrices (for this processor
1858: only) for the additive Schwarz preconditioner.
1860: Not Collective
1862: Input Parameter:
1863: . pc - the preconditioner context
1865: Output Parameters:
1866: + n - the number of matrices for this processor (default value = 1)
1867: - mat - the matrices
1869: Notes:
1870: matrices returned by this routine have the same communicators as the index sets (IS)
1871: used to define subdomains in PCGASMSetSubdomains()
1872: Level: advanced
1874: .keywords: PC, GASM, set, local, subdomains, additive Schwarz, block Jacobi
1876: .seealso: PCGASMSetOverlap(), PCGASMGetSubKSP(),
1877: PCGASMCreateSubdomains2D(), PCGASMSetSubdomains(), PCGASMGetSubdomains()
1878: @*/
1879: PetscErrorCode PCGASMGetSubmatrices(PC pc,PetscInt *n,Mat *mat[])
1880: {
1881: PC_GASM *osm;
1883: PetscBool match;
1889: if (!pc->setupcalled) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"Must call after KSPSetUP() or PCSetUp().");
1890: PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);
1891: if (!match) SETERRQ2(PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_WRONG, "Expected %s, got %s instead", PCGASM, ((PetscObject)pc)->type_name);
1892: osm = (PC_GASM*)pc->data;
1893: if (n) *n = osm->n;
1894: if (mat) *mat = osm->pmat;
1895: return(0);
1896: }
1898: /*@
1899: PCGASMSetUseDMSubdomains - Indicates whether to use DMCreateDomainDecomposition() to define the subdomains, whenever possible.
1900: Logically Collective
1902: Input Parameter:
1903: + pc - the preconditioner
1904: - flg - boolean indicating whether to use subdomains defined by the DM
1906: Options Database Key:
1907: . -pc_gasm_dm_subdomains -pc_gasm_overlap -pc_gasm_total_subdomains
1909: Level: intermediate
1911: Notes:
1912: PCGASMSetSubdomains(), PCGASMSetTotalSubdomains() or PCGASMSetOverlap() take precedence over PCGASMSetUseDMSubdomains(),
1913: so setting PCGASMSetSubdomains() with nontrivial subdomain ISs or any of PCGASMSetTotalSubdomains() and PCGASMSetOverlap()
1914: automatically turns the latter off.
1916: .keywords: PC, ASM, DM, set, subdomains, additive Schwarz
1918: .seealso: PCGASMGetUseDMSubdomains(), PCGASMSetSubdomains(), PCGASMSetOverlap()
1919: PCGASMCreateSubdomains2D()
1920: @*/
1921: PetscErrorCode PCGASMSetUseDMSubdomains(PC pc,PetscBool flg)
1922: {
1923: PC_GASM *osm = (PC_GASM*)pc->data;
1925: PetscBool match;
1930: if (pc->setupcalled) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"Not for a setup PC.");
1931: PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);
1932: if (match) {
1933: if (!osm->user_subdomains && osm->N == PETSC_DETERMINE && osm->overlap < 0) {
1934: osm->dm_subdomains = flg;
1935: }
1936: }
1937: return(0);
1938: }
1940: /*@
1941: PCGASMGetUseDMSubdomains - Returns flag indicating whether to use DMCreateDomainDecomposition() to define the subdomains, whenever possible.
1942: Not Collective
1944: Input Parameter:
1945: . pc - the preconditioner
1947: Output Parameter:
1948: . flg - boolean indicating whether to use subdomains defined by the DM
1950: Level: intermediate
1952: .keywords: PC, ASM, DM, set, subdomains, additive Schwarz
1954: .seealso: PCGASMSetUseDMSubdomains(), PCGASMSetOverlap()
1955: PCGASMCreateSubdomains2D()
1956: @*/
1957: PetscErrorCode PCGASMGetUseDMSubdomains(PC pc,PetscBool* flg)
1958: {
1959: PC_GASM *osm = (PC_GASM*)pc->data;
1961: PetscBool match;
1966: PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);
1967: if (match) {
1968: if (flg) *flg = osm->dm_subdomains;
1969: }
1970: return(0);
1971: }