Actual source code: gasm.c
1: /*
2: This file defines an "generalized" additive Schwarz preconditioner for any Mat implementation.
3: In this version each processor may intersect multiple subdomains and any subdomain may
4: intersect multiple processors. Intersections of subdomains with processors are called *local
5: subdomains*.
7: N - total number of distinct global subdomains (set explicitly in PCGASMSetTotalSubdomains() or implicitly PCGASMSetSubdomains() and then calculated in PCSetUp_GASM())
8: n - actual number of local subdomains on this processor (set in PCGASMSetSubdomains() or calculated in PCGASMSetTotalSubdomains())
9: nmax - maximum number of local subdomains per processor (calculated in PCSetUp_GASM())
10: */
11: #include <petsc/private/pcimpl.h>
12: #include <petscdm.h>
14: typedef struct {
15: PetscInt N,n,nmax;
16: PetscInt overlap; /* overlap requested by user */
17: PCGASMType type; /* use reduced interpolation, restriction or both */
18: PetscBool type_set; /* if user set this value (so won't change it for symmetric problems) */
19: PetscBool same_subdomain_solvers; /* flag indicating whether all local solvers are same */
20: PetscBool sort_indices; /* flag to sort subdomain indices */
21: PetscBool user_subdomains; /* whether the user set explicit subdomain index sets -- keep them on PCReset() */
22: PetscBool dm_subdomains; /* whether DM is allowed to define subdomains */
23: PetscBool hierarchicalpartitioning;
24: IS *ois; /* index sets that define the outer (conceptually, overlapping) subdomains */
25: IS *iis; /* index sets that define the inner (conceptually, nonoverlapping) subdomains */
26: KSP *ksp; /* linear solvers for each subdomain */
27: Mat *pmat; /* subdomain block matrices */
28: Vec gx,gy; /* Merged work vectors */
29: Vec *x,*y; /* Split work vectors; storage aliases pieces of storage of the above merged vectors. */
30: VecScatter gorestriction; /* merged restriction to disjoint union of outer subdomains */
31: VecScatter girestriction; /* merged restriction to disjoint union of inner subdomains */
32: VecScatter pctoouter;
33: IS permutationIS;
34: Mat permutationP;
35: Mat pcmat;
36: Vec pcx,pcy;
37: } PC_GASM;
39: static PetscErrorCode PCGASMComputeGlobalSubdomainNumbering_Private(PC pc,PetscInt **numbering,PetscInt **permutation)
40: {
41: PC_GASM *osm = (PC_GASM*)pc->data;
42: PetscInt i;
46: /* Determine the number of globally-distinct subdomains and compute a global numbering for them. */
47: PetscMalloc2(osm->n,numbering,osm->n,permutation);
48: PetscObjectsListGetGlobalNumbering(PetscObjectComm((PetscObject)pc),osm->n,(PetscObject*)osm->iis,NULL,*numbering);
49: for (i = 0; i < osm->n; ++i) (*permutation)[i] = i;
50: PetscSortIntWithPermutation(osm->n,*numbering,*permutation);
51: return(0);
52: }
54: static PetscErrorCode PCGASMSubdomainView_Private(PC pc, PetscInt i, PetscViewer viewer)
55: {
56: PC_GASM *osm = (PC_GASM*)pc->data;
57: PetscInt j,nidx;
58: const PetscInt *idx;
59: PetscViewer sviewer;
60: char *cidx;
64: if (i < 0 || i > osm->n) SETERRQ2(PetscObjectComm((PetscObject)viewer), PETSC_ERR_ARG_WRONG, "Invalid subdomain %D: must nonnegative and less than %D", i, osm->n);
65: /* Inner subdomains. */
66: ISGetLocalSize(osm->iis[i], &nidx);
67: /*
68: No more than 15 characters per index plus a space.
69: PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx,
70: in case nidx == 0. That will take care of the space for the trailing '\0' as well.
71: For nidx == 0, the whole string 16 '\0'.
72: */
73: #define len 16*(nidx+1)+1
74: PetscMalloc1(len, &cidx);
75: PetscViewerStringOpen(PETSC_COMM_SELF, cidx, len, &sviewer);
76: #undef len
77: ISGetIndices(osm->iis[i], &idx);
78: for (j = 0; j < nidx; ++j) {
79: PetscViewerStringSPrintf(sviewer, "%D ", idx[j]);
80: }
81: ISRestoreIndices(osm->iis[i],&idx);
82: PetscViewerDestroy(&sviewer);
83: PetscViewerASCIIPrintf(viewer, "Inner subdomain:\n");
84: PetscViewerFlush(viewer);
85: PetscViewerASCIIPushSynchronized(viewer);
86: PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx);
87: PetscViewerFlush(viewer);
88: PetscViewerASCIIPopSynchronized(viewer);
89: PetscViewerASCIIPrintf(viewer, "\n");
90: PetscViewerFlush(viewer);
91: PetscFree(cidx);
92: /* Outer subdomains. */
93: ISGetLocalSize(osm->ois[i], &nidx);
94: /*
95: No more than 15 characters per index plus a space.
96: PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx,
97: in case nidx == 0. That will take care of the space for the trailing '\0' as well.
98: For nidx == 0, the whole string 16 '\0'.
99: */
100: #define len 16*(nidx+1)+1
101: PetscMalloc1(len, &cidx);
102: PetscViewerStringOpen(PETSC_COMM_SELF, cidx, len, &sviewer);
103: #undef len
104: ISGetIndices(osm->ois[i], &idx);
105: for (j = 0; j < nidx; ++j) {
106: PetscViewerStringSPrintf(sviewer,"%D ", idx[j]);
107: }
108: PetscViewerDestroy(&sviewer);
109: ISRestoreIndices(osm->ois[i],&idx);
110: PetscViewerASCIIPrintf(viewer, "Outer subdomain:\n");
111: PetscViewerFlush(viewer);
112: PetscViewerASCIIPushSynchronized(viewer);
113: PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx);
114: PetscViewerFlush(viewer);
115: PetscViewerASCIIPopSynchronized(viewer);
116: PetscViewerASCIIPrintf(viewer, "\n");
117: PetscViewerFlush(viewer);
118: PetscFree(cidx);
119: return(0);
120: }
122: static PetscErrorCode PCGASMPrintSubdomains(PC pc)
123: {
124: PC_GASM *osm = (PC_GASM*)pc->data;
125: const char *prefix;
126: char fname[PETSC_MAX_PATH_LEN+1];
127: PetscInt l, d, count;
128: PetscBool doprint,found;
129: PetscViewer viewer, sviewer = NULL;
130: PetscInt *numbering,*permutation;/* global numbering of locally-supported subdomains and the permutation from the local ordering */
134: PCGetOptionsPrefix(pc,&prefix);
135: doprint = PETSC_FALSE;
136: PetscOptionsGetBool(NULL,prefix,"-pc_gasm_print_subdomains",&doprint,NULL);
137: if (!doprint) return(0);
138: PetscOptionsGetString(NULL,prefix,"-pc_gasm_print_subdomains",fname,sizeof(fname),&found);
139: if (!found) { PetscStrcpy(fname,"stdout"); };
140: PetscViewerASCIIOpen(PetscObjectComm((PetscObject)pc),fname,&viewer);
141: /*
142: Make sure the viewer has a name. Otherwise this may cause a deadlock or other weird errors when creating a subcomm viewer:
143: the subcomm viewer will attempt to inherit the viewer's name, which, if not set, will be constructed collectively on the comm.
144: */
145: PetscObjectName((PetscObject)viewer);
146: l = 0;
147: PCGASMComputeGlobalSubdomainNumbering_Private(pc,&numbering,&permutation);
148: for (count = 0; count < osm->N; ++count) {
149: /* Now let subdomains go one at a time in the global numbering order and print their subdomain/solver info. */
150: if (l<osm->n) {
151: d = permutation[l]; /* d is the local number of the l-th smallest (in the global ordering) among the locally supported subdomains */
152: if (numbering[d] == count) {
153: PetscViewerGetSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);
154: PCGASMSubdomainView_Private(pc,d,sviewer);
155: PetscViewerRestoreSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);
156: ++l;
157: }
158: }
159: MPI_Barrier(PetscObjectComm((PetscObject)pc));
160: }
161: PetscFree2(numbering,permutation);
162: PetscViewerDestroy(&viewer);
163: return(0);
164: }
166: static PetscErrorCode PCView_GASM(PC pc,PetscViewer viewer)
167: {
168: PC_GASM *osm = (PC_GASM*)pc->data;
169: const char *prefix;
171: PetscMPIInt rank, size;
172: PetscInt bsz;
173: PetscBool iascii,view_subdomains=PETSC_FALSE;
174: PetscViewer sviewer;
175: PetscInt count, l;
176: char overlap[256] = "user-defined overlap";
177: char gsubdomains[256] = "unknown total number of subdomains";
178: char msubdomains[256] = "unknown max number of local subdomains";
179: PetscInt *numbering,*permutation;/* global numbering of locally-supported subdomains and the permutation from the local ordering */
182: MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size);
183: MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank);
185: if (osm->overlap >= 0) {
186: PetscSNPrintf(overlap,sizeof(overlap),"requested amount of overlap = %D",osm->overlap);
187: }
188: if (osm->N != PETSC_DETERMINE) {
189: PetscSNPrintf(gsubdomains, sizeof(gsubdomains), "total number of subdomains = %D",osm->N);
190: }
191: if (osm->nmax != PETSC_DETERMINE) {
192: PetscSNPrintf(msubdomains,sizeof(msubdomains),"max number of local subdomains = %D",osm->nmax);
193: }
195: PCGetOptionsPrefix(pc,&prefix);
196: PetscOptionsGetBool(NULL,prefix,"-pc_gasm_view_subdomains",&view_subdomains,NULL);
198: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
199: if (iascii) {
200: /*
201: Make sure the viewer has a name. Otherwise this may cause a deadlock when creating a subcomm viewer:
202: the subcomm viewer will attempt to inherit the viewer's name, which, if not set, will be constructed
203: collectively on the comm.
204: */
205: PetscObjectName((PetscObject)viewer);
206: PetscViewerASCIIPrintf(viewer," Restriction/interpolation type: %s\n",PCGASMTypes[osm->type]);
207: PetscViewerASCIIPrintf(viewer," %s\n",overlap);
208: PetscViewerASCIIPrintf(viewer," %s\n",gsubdomains);
209: PetscViewerASCIIPrintf(viewer," %s\n",msubdomains);
210: PetscViewerASCIIPushSynchronized(viewer);
211: PetscViewerASCIISynchronizedPrintf(viewer," [%d|%d] number of locally-supported subdomains = %D\n",rank,size,osm->n);
212: PetscViewerFlush(viewer);
213: PetscViewerASCIIPopSynchronized(viewer);
214: /* Cannot take advantage of osm->same_subdomain_solvers without a global numbering of subdomains. */
215: PetscViewerASCIIPrintf(viewer," Subdomain solver info is as follows:\n");
216: PetscViewerASCIIPushTab(viewer);
217: PetscViewerASCIIPrintf(viewer," - - - - - - - - - - - - - - - - - -\n");
218: /* Make sure that everybody waits for the banner to be printed. */
219: MPI_Barrier(PetscObjectComm((PetscObject)viewer));
220: /* Now let subdomains go one at a time in the global numbering order and print their subdomain/solver info. */
221: PCGASMComputeGlobalSubdomainNumbering_Private(pc,&numbering,&permutation);
222: l = 0;
223: for (count = 0; count < osm->N; ++count) {
224: PetscMPIInt srank, ssize;
225: if (l<osm->n) {
226: PetscInt d = permutation[l]; /* d is the local number of the l-th smallest (in the global ordering) among the locally supported subdomains */
227: if (numbering[d] == count) {
228: MPI_Comm_size(((PetscObject)osm->ois[d])->comm, &ssize);
229: MPI_Comm_rank(((PetscObject)osm->ois[d])->comm, &srank);
230: PetscViewerGetSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);
231: ISGetLocalSize(osm->ois[d],&bsz);
232: PetscViewerASCIISynchronizedPrintf(sviewer," [%d|%d] (subcomm [%d|%d]) local subdomain number %D, local size = %D\n",rank,size,srank,ssize,d,bsz);
233: PetscViewerFlush(sviewer);
234: if (view_subdomains) {
235: PCGASMSubdomainView_Private(pc,d,sviewer);
236: }
237: if (!pc->setupcalled) {
238: PetscViewerASCIIPrintf(sviewer, " Solver not set up yet: PCSetUp() not yet called\n");
239: } else {
240: KSPView(osm->ksp[d],sviewer);
241: }
242: PetscViewerASCIIPrintf(sviewer," - - - - - - - - - - - - - - - - - -\n");
243: PetscViewerFlush(sviewer);
244: PetscViewerRestoreSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);
245: ++l;
246: }
247: }
248: MPI_Barrier(PetscObjectComm((PetscObject)pc));
249: }
250: PetscFree2(numbering,permutation);
251: PetscViewerASCIIPopTab(viewer);
252: PetscViewerFlush(viewer);
253: /* this line is needed to match the extra PetscViewerASCIIPushSynchronized() in PetscViewerGetSubViewer() */
254: PetscViewerASCIIPopSynchronized(viewer);
255: }
256: return(0);
257: }
259: PETSC_INTERN PetscErrorCode PCGASMCreateLocalSubdomains(Mat A, PetscInt nloc, IS *iis[]);
261: PetscErrorCode PCGASMSetHierarchicalPartitioning(PC pc)
262: {
263: PC_GASM *osm = (PC_GASM*)pc->data;
264: MatPartitioning part;
265: MPI_Comm comm;
266: PetscMPIInt size;
267: PetscInt nlocalsubdomains,fromrows_localsize;
268: IS partitioning,fromrows,isn;
269: Vec outervec;
270: PetscErrorCode ierr;
273: PetscObjectGetComm((PetscObject)pc,&comm);
274: MPI_Comm_size(comm,&size);
275: /* we do not need a hierarchical partitioning when
276: * the total number of subdomains is consistent with
277: * the number of MPI tasks.
278: * For the following cases, we do not need to use HP
279: * */
280: if (osm->N==PETSC_DETERMINE || osm->N>=size || osm->N==1) return(0);
281: if (size%osm->N != 0) SETERRQ2(PETSC_COMM_WORLD,PETSC_ERR_ARG_INCOMP,"have to specify the total number of subdomains %D to be a factor of the number of processors %d \n",osm->N,size);
282: nlocalsubdomains = size/osm->N;
283: osm->n = 1;
284: MatPartitioningCreate(comm,&part);
285: MatPartitioningSetAdjacency(part,pc->pmat);
286: MatPartitioningSetType(part,MATPARTITIONINGHIERARCH);
287: MatPartitioningHierarchicalSetNcoarseparts(part,osm->N);
288: MatPartitioningHierarchicalSetNfineparts(part,nlocalsubdomains);
289: MatPartitioningSetFromOptions(part);
290: /* get new processor owner number of each vertex */
291: MatPartitioningApply(part,&partitioning);
292: ISBuildTwoSided(partitioning,NULL,&fromrows);
293: ISPartitioningToNumbering(partitioning,&isn);
294: ISDestroy(&isn);
295: ISGetLocalSize(fromrows,&fromrows_localsize);
296: MatPartitioningDestroy(&part);
297: MatCreateVecs(pc->pmat,&outervec,NULL);
298: VecCreateMPI(comm,fromrows_localsize,PETSC_DETERMINE,&(osm->pcx));
299: VecDuplicate(osm->pcx,&(osm->pcy));
300: VecScatterCreate(osm->pcx,NULL,outervec,fromrows,&(osm->pctoouter));
301: MatCreateSubMatrix(pc->pmat,fromrows,fromrows,MAT_INITIAL_MATRIX,&(osm->permutationP));
302: PetscObjectReference((PetscObject)fromrows);
303: osm->permutationIS = fromrows;
304: osm->pcmat = pc->pmat;
305: PetscObjectReference((PetscObject)osm->permutationP);
306: pc->pmat = osm->permutationP;
307: VecDestroy(&outervec);
308: ISDestroy(&fromrows);
309: ISDestroy(&partitioning);
310: osm->n = PETSC_DETERMINE;
311: return(0);
312: }
314: static PetscErrorCode PCSetUp_GASM(PC pc)
315: {
316: PC_GASM *osm = (PC_GASM*)pc->data;
318: PetscInt i,nInnerIndices,nTotalInnerIndices;
319: PetscMPIInt rank, size;
320: MatReuse scall = MAT_REUSE_MATRIX;
321: KSP ksp;
322: PC subpc;
323: const char *prefix,*pprefix;
324: Vec x,y;
325: PetscInt oni; /* Number of indices in the i-th local outer subdomain. */
326: const PetscInt *oidxi; /* Indices from the i-th subdomain local outer subdomain. */
327: PetscInt on; /* Number of indices in the disjoint union of local outer subdomains. */
328: PetscInt *oidx; /* Indices in the disjoint union of local outer subdomains. */
329: IS gois; /* Disjoint union the global indices of outer subdomains. */
330: IS goid; /* Identity IS of the size of the disjoint union of outer subdomains. */
331: PetscScalar *gxarray, *gyarray;
332: PetscInt gostart; /* Start of locally-owned indices in the vectors -- osm->gx,osm->gy -- over the disjoint union of outer subdomains. */
333: PetscInt num_subdomains = 0;
334: DM *subdomain_dm = NULL;
335: char **subdomain_names = NULL;
336: PetscInt *numbering;
340: MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);
341: MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);
342: if (!pc->setupcalled) {
343: /* use a hierarchical partitioning */
344: if (osm->hierarchicalpartitioning){
345: PCGASMSetHierarchicalPartitioning(pc);
346: }
347: if (osm->n == PETSC_DETERMINE) {
348: if (osm->N != PETSC_DETERMINE) {
349: /* No local subdomains given, but the desired number of total subdomains is known, so construct them accordingly. */
350: PCGASMCreateSubdomains(pc->pmat,osm->N,&osm->n,&osm->iis);
351: } else if (osm->dm_subdomains && pc->dm) {
352: /* try pc->dm next, if allowed */
353: PetscInt d;
354: IS *inner_subdomain_is, *outer_subdomain_is;
355: DMCreateDomainDecomposition(pc->dm, &num_subdomains, &subdomain_names, &inner_subdomain_is, &outer_subdomain_is, &subdomain_dm);
356: if (num_subdomains) {
357: PCGASMSetSubdomains(pc, num_subdomains, inner_subdomain_is, outer_subdomain_is);
358: }
359: for (d = 0; d < num_subdomains; ++d) {
360: if (inner_subdomain_is) {ISDestroy(&inner_subdomain_is[d]);}
361: if (outer_subdomain_is) {ISDestroy(&outer_subdomain_is[d]);}
362: }
363: PetscFree(inner_subdomain_is);
364: PetscFree(outer_subdomain_is);
365: } else {
366: /* still no subdomains; use one per processor */
367: osm->nmax = osm->n = 1;
368: MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);
369: osm->N = size;
370: PCGASMCreateLocalSubdomains(pc->pmat,osm->n,&osm->iis);
371: }
372: }
373: if (!osm->iis) {
374: /*
375: osm->n was set in PCGASMSetSubdomains(), but the actual subdomains have not been supplied.
376: We create the requisite number of local inner subdomains and then expand them into
377: out subdomains, if necessary.
378: */
379: PCGASMCreateLocalSubdomains(pc->pmat,osm->n,&osm->iis);
380: }
381: if (!osm->ois) {
382: /*
383: Initially make outer subdomains the same as inner subdomains. If nonzero additional overlap
384: has been requested, copy the inner subdomains over so they can be modified.
385: */
386: PetscMalloc1(osm->n,&osm->ois);
387: for (i=0; i<osm->n; ++i) {
388: if (osm->overlap > 0 && osm->N>1) { /* With positive overlap, osm->iis[i] will be modified */
389: ISDuplicate(osm->iis[i],(osm->ois)+i);
390: ISCopy(osm->iis[i],osm->ois[i]);
391: } else {
392: PetscObjectReference((PetscObject)((osm->iis)[i]));
393: osm->ois[i] = osm->iis[i];
394: }
395: }
396: if (osm->overlap>0 && osm->N>1) {
397: /* Extend the "overlapping" regions by a number of steps */
398: MatIncreaseOverlapSplit(pc->pmat,osm->n,osm->ois,osm->overlap);
399: }
400: }
402: /* Now the subdomains are defined. Determine their global and max local numbers, if necessary. */
403: if (osm->nmax == PETSC_DETERMINE) {
404: PetscMPIInt inwork,outwork;
405: /* determine global number of subdomains and the max number of local subdomains */
406: inwork = osm->n;
407: MPIU_Allreduce(&inwork,&outwork,1,MPI_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));
408: osm->nmax = outwork;
409: }
410: if (osm->N == PETSC_DETERMINE) {
411: /* Determine the number of globally-distinct subdomains and compute a global numbering for them. */
412: PetscObjectsListGetGlobalNumbering(PetscObjectComm((PetscObject)pc),osm->n,(PetscObject*)osm->ois,&osm->N,NULL);
413: }
415: if (osm->sort_indices) {
416: for (i=0; i<osm->n; i++) {
417: ISSort(osm->ois[i]);
418: ISSort(osm->iis[i]);
419: }
420: }
421: PCGetOptionsPrefix(pc,&prefix);
422: PCGASMPrintSubdomains(pc);
424: /*
425: Merge the ISs, create merged vectors and restrictions.
426: */
427: /* Merge outer subdomain ISs and construct a restriction onto the disjoint union of local outer subdomains. */
428: on = 0;
429: for (i=0; i<osm->n; i++) {
430: ISGetLocalSize(osm->ois[i],&oni);
431: on += oni;
432: }
433: PetscMalloc1(on, &oidx);
434: on = 0;
435: /* Merge local indices together */
436: for (i=0; i<osm->n; i++) {
437: ISGetLocalSize(osm->ois[i],&oni);
438: ISGetIndices(osm->ois[i],&oidxi);
439: PetscArraycpy(oidx+on,oidxi,oni);
440: ISRestoreIndices(osm->ois[i],&oidxi);
441: on += oni;
442: }
443: ISCreateGeneral(((PetscObject)(pc))->comm,on,oidx,PETSC_OWN_POINTER,&gois);
444: nTotalInnerIndices = 0;
445: for (i=0; i<osm->n; i++){
446: ISGetLocalSize(osm->iis[i],&nInnerIndices);
447: nTotalInnerIndices += nInnerIndices;
448: }
449: VecCreateMPI(((PetscObject)(pc))->comm,nTotalInnerIndices,PETSC_DETERMINE,&x);
450: VecDuplicate(x,&y);
452: VecCreateMPI(PetscObjectComm((PetscObject)pc),on,PETSC_DECIDE,&osm->gx);
453: VecDuplicate(osm->gx,&osm->gy);
454: VecGetOwnershipRange(osm->gx, &gostart, NULL);
455: ISCreateStride(PetscObjectComm((PetscObject)pc),on,gostart,1, &goid);
456: /* gois might indices not on local */
457: VecScatterCreate(x,gois,osm->gx,goid, &(osm->gorestriction));
458: PetscMalloc1(osm->n,&numbering);
459: PetscObjectsListGetGlobalNumbering(PetscObjectComm((PetscObject)pc),osm->n,(PetscObject*)osm->ois,NULL,numbering);
460: VecDestroy(&x);
461: ISDestroy(&gois);
463: /* Merge inner subdomain ISs and construct a restriction onto the disjoint union of local inner subdomains. */
464: {
465: PetscInt ini; /* Number of indices the i-th a local inner subdomain. */
466: PetscInt in; /* Number of indices in the disjoint union of local inner subdomains. */
467: PetscInt *iidx; /* Global indices in the merged local inner subdomain. */
468: PetscInt *ioidx; /* Global indices of the disjoint union of inner subdomains within the disjoint union of outer subdomains. */
469: IS giis; /* IS for the disjoint union of inner subdomains. */
470: IS giois; /* IS for the disjoint union of inner subdomains within the disjoint union of outer subdomains. */
471: PetscScalar *array;
472: const PetscInt *indices;
473: PetscInt k;
474: on = 0;
475: for (i=0; i<osm->n; i++) {
476: ISGetLocalSize(osm->ois[i],&oni);
477: on += oni;
478: }
479: PetscMalloc1(on, &iidx);
480: PetscMalloc1(on, &ioidx);
481: VecGetArray(y,&array);
482: /* set communicator id to determine where overlap is */
483: in = 0;
484: for (i=0; i<osm->n; i++) {
485: ISGetLocalSize(osm->iis[i],&ini);
486: for (k = 0; k < ini; ++k){
487: array[in+k] = numbering[i];
488: }
489: in += ini;
490: }
491: VecRestoreArray(y,&array);
492: VecScatterBegin(osm->gorestriction,y,osm->gy,INSERT_VALUES,SCATTER_FORWARD);
493: VecScatterEnd(osm->gorestriction,y,osm->gy,INSERT_VALUES,SCATTER_FORWARD);
494: VecGetOwnershipRange(osm->gy,&gostart, NULL);
495: VecGetArray(osm->gy,&array);
496: on = 0;
497: in = 0;
498: for (i=0; i<osm->n; i++) {
499: ISGetLocalSize(osm->ois[i],&oni);
500: ISGetIndices(osm->ois[i],&indices);
501: for (k=0; k<oni; k++) {
502: /* skip overlapping indices to get inner domain */
503: if (PetscRealPart(array[on+k]) != numbering[i]) continue;
504: iidx[in] = indices[k];
505: ioidx[in++] = gostart+on+k;
506: }
507: ISRestoreIndices(osm->ois[i], &indices);
508: on += oni;
509: }
510: VecRestoreArray(osm->gy,&array);
511: ISCreateGeneral(PetscObjectComm((PetscObject)pc),in,iidx,PETSC_OWN_POINTER,&giis);
512: ISCreateGeneral(PetscObjectComm((PetscObject)pc),in,ioidx,PETSC_OWN_POINTER,&giois);
513: VecScatterCreate(y,giis,osm->gy,giois,&osm->girestriction);
514: VecDestroy(&y);
515: ISDestroy(&giis);
516: ISDestroy(&giois);
517: }
518: ISDestroy(&goid);
519: PetscFree(numbering);
521: /* Create the subdomain work vectors. */
522: PetscMalloc1(osm->n,&osm->x);
523: PetscMalloc1(osm->n,&osm->y);
524: VecGetArray(osm->gx, &gxarray);
525: VecGetArray(osm->gy, &gyarray);
526: for (i=0, on=0; i<osm->n; ++i, on += oni) {
527: PetscInt oNi;
528: ISGetLocalSize(osm->ois[i],&oni);
529: /* on a sub communicator */
530: ISGetSize(osm->ois[i],&oNi);
531: VecCreateMPIWithArray(((PetscObject)(osm->ois[i]))->comm,1,oni,oNi,gxarray+on,&osm->x[i]);
532: VecCreateMPIWithArray(((PetscObject)(osm->ois[i]))->comm,1,oni,oNi,gyarray+on,&osm->y[i]);
533: }
534: VecRestoreArray(osm->gx, &gxarray);
535: VecRestoreArray(osm->gy, &gyarray);
536: /* Create the subdomain solvers */
537: PetscMalloc1(osm->n,&osm->ksp);
538: for (i=0; i<osm->n; i++) {
539: char subprefix[PETSC_MAX_PATH_LEN+1];
540: KSPCreate(((PetscObject)(osm->ois[i]))->comm,&ksp);
541: KSPSetErrorIfNotConverged(ksp,pc->erroriffailure);
542: PetscLogObjectParent((PetscObject)pc,(PetscObject)ksp);
543: PetscObjectIncrementTabLevel((PetscObject)ksp,(PetscObject)pc,1);
544: KSPSetType(ksp,KSPPREONLY);
545: KSPGetPC(ksp,&subpc); /* Why do we need this here? */
546: if (subdomain_dm) {
547: KSPSetDM(ksp,subdomain_dm[i]);
548: DMDestroy(subdomain_dm+i);
549: }
550: PCGetOptionsPrefix(pc,&prefix);
551: KSPSetOptionsPrefix(ksp,prefix);
552: if (subdomain_names && subdomain_names[i]) {
553: PetscSNPrintf(subprefix,PETSC_MAX_PATH_LEN,"sub_%s_",subdomain_names[i]);
554: KSPAppendOptionsPrefix(ksp,subprefix);
555: PetscFree(subdomain_names[i]);
556: }
557: KSPAppendOptionsPrefix(ksp,"sub_");
558: osm->ksp[i] = ksp;
559: }
560: PetscFree(subdomain_dm);
561: PetscFree(subdomain_names);
562: scall = MAT_INITIAL_MATRIX;
563: } else { /* if (pc->setupcalled) */
564: /*
565: Destroy the submatrices from the previous iteration
566: */
567: if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
568: MatDestroyMatrices(osm->n,&osm->pmat);
569: scall = MAT_INITIAL_MATRIX;
570: }
571: if (osm->permutationIS){
572: MatCreateSubMatrix(pc->pmat,osm->permutationIS,osm->permutationIS,scall,&osm->permutationP);
573: PetscObjectReference((PetscObject)osm->permutationP);
574: osm->pcmat = pc->pmat;
575: pc->pmat = osm->permutationP;
576: }
577: }
579: /*
580: Extract the submatrices.
581: */
582: if (size > 1) {
583: MatCreateSubMatricesMPI(pc->pmat,osm->n,osm->ois,osm->ois,scall,&osm->pmat);
584: } else {
585: MatCreateSubMatrices(pc->pmat,osm->n,osm->ois,osm->ois,scall,&osm->pmat);
586: }
587: if (scall == MAT_INITIAL_MATRIX) {
588: PetscObjectGetOptionsPrefix((PetscObject)pc->pmat,&pprefix);
589: for (i=0; i<osm->n; i++) {
590: PetscLogObjectParent((PetscObject)pc,(PetscObject)osm->pmat[i]);
591: PetscObjectSetOptionsPrefix((PetscObject)osm->pmat[i],pprefix);
592: }
593: }
595: /* Return control to the user so that the submatrices can be modified (e.g., to apply
596: different boundary conditions for the submatrices than for the global problem) */
597: PCModifySubMatrices(pc,osm->n,osm->ois,osm->ois,osm->pmat,pc->modifysubmatricesP);
599: /*
600: Loop over submatrices putting them into local ksps
601: */
602: for (i=0; i<osm->n; i++) {
603: KSPSetOperators(osm->ksp[i],osm->pmat[i],osm->pmat[i]);
604: KSPGetOptionsPrefix(osm->ksp[i],&prefix);
605: MatSetOptionsPrefix(osm->pmat[i],prefix);
606: if (!pc->setupcalled) {
607: KSPSetFromOptions(osm->ksp[i]);
608: }
609: }
610: if (osm->pcmat){
611: MatDestroy(&pc->pmat);
612: pc->pmat = osm->pcmat;
613: osm->pcmat = NULL;
614: }
615: return(0);
616: }
618: static PetscErrorCode PCSetUpOnBlocks_GASM(PC pc)
619: {
620: PC_GASM *osm = (PC_GASM*)pc->data;
622: PetscInt i;
625: for (i=0; i<osm->n; i++) {
626: KSPSetUp(osm->ksp[i]);
627: }
628: return(0);
629: }
631: static PetscErrorCode PCApply_GASM(PC pc,Vec xin,Vec yout)
632: {
633: PC_GASM *osm = (PC_GASM*)pc->data;
635: PetscInt i;
636: Vec x,y;
637: ScatterMode forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
640: if (osm->pctoouter) {
641: VecScatterBegin(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);
642: VecScatterEnd(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);
643: x = osm->pcx;
644: y = osm->pcy;
645: } else {
646: x = xin;
647: y = yout;
648: }
649: /*
650: support for limiting the restriction or interpolation only to the inner
651: subdomain values (leaving the other values 0).
652: */
653: if (!(osm->type & PC_GASM_RESTRICT)) {
654: /* have to zero the work RHS since scatter may leave some slots empty */
655: VecZeroEntries(osm->gx);
656: VecScatterBegin(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);
657: } else {
658: VecScatterBegin(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);
659: }
660: VecZeroEntries(osm->gy);
661: if (!(osm->type & PC_GASM_RESTRICT)) {
662: VecScatterEnd(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);
663: } else {
664: VecScatterEnd(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);
665: }
666: /* do the subdomain solves */
667: for (i=0; i<osm->n; ++i) {
668: KSPSolve(osm->ksp[i],osm->x[i],osm->y[i]);
669: KSPCheckSolve(osm->ksp[i],pc,osm->y[i]);
670: }
671: /* do we need to zero y? */
672: VecZeroEntries(y);
673: if (!(osm->type & PC_GASM_INTERPOLATE)) {
674: VecScatterBegin(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);
675: VecScatterEnd(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);
676: } else {
677: VecScatterBegin(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);
678: VecScatterEnd(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);
679: }
680: if (osm->pctoouter) {
681: VecScatterBegin(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);
682: VecScatterEnd(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);
683: }
684: return(0);
685: }
687: static PetscErrorCode PCMatApply_GASM(PC pc,Mat Xin,Mat Yout)
688: {
689: PC_GASM *osm = (PC_GASM*)pc->data;
690: Mat X,Y,O=NULL,Z,W;
691: Vec x,y;
692: PetscInt i,m,M,N;
693: ScatterMode forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
697: if (osm->n != 1) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Not yet implemented");
698: MatGetSize(Xin,NULL,&N);
699: if (osm->pctoouter) {
700: VecGetLocalSize(osm->pcx,&m);
701: VecGetSize(osm->pcx,&M);
702: MatCreateDense(PetscObjectComm((PetscObject)osm->ois[0]),m,PETSC_DECIDE,M,N,NULL,&O);
703: for (i = 0; i < N; ++i) {
704: MatDenseGetColumnVecRead(Xin,i,&x);
705: MatDenseGetColumnVecWrite(O,i,&y);
706: VecScatterBegin(osm->pctoouter,x,y,INSERT_VALUES,SCATTER_REVERSE);
707: VecScatterEnd(osm->pctoouter,x,y,INSERT_VALUES,SCATTER_REVERSE);
708: MatDenseRestoreColumnVecWrite(O,i,&y);
709: MatDenseRestoreColumnVecRead(Xin,i,&x);
710: }
711: X = Y = O;
712: } else {
713: X = Xin;
714: Y = Yout;
715: }
716: /*
717: support for limiting the restriction or interpolation only to the inner
718: subdomain values (leaving the other values 0).
719: */
720: VecGetLocalSize(osm->x[0],&m);
721: VecGetSize(osm->x[0],&M);
722: MatCreateDense(PetscObjectComm((PetscObject)osm->ois[0]),m,PETSC_DECIDE,M,N,NULL,&Z);
723: for (i = 0; i < N; ++i) {
724: MatDenseGetColumnVecRead(X,i,&x);
725: MatDenseGetColumnVecWrite(Z,i,&y);
726: if (!(osm->type & PC_GASM_RESTRICT)) {
727: /* have to zero the work RHS since scatter may leave some slots empty */
728: VecZeroEntries(y);
729: VecScatterBegin(osm->girestriction,x,y,INSERT_VALUES,forward);
730: VecScatterEnd(osm->girestriction,x,y,INSERT_VALUES,forward);
731: } else {
732: VecScatterBegin(osm->gorestriction,x,y,INSERT_VALUES,forward);
733: VecScatterEnd(osm->gorestriction,x,y,INSERT_VALUES,forward);
734: }
735: MatDenseRestoreColumnVecWrite(Z,i,&y);
736: MatDenseRestoreColumnVecRead(X,i,&x);
737: }
738: MatCreateDense(PetscObjectComm((PetscObject)osm->ois[0]),m,PETSC_DECIDE,M,N,NULL,&W);
739: MatSetOption(Z,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);
740: MatAssemblyBegin(Z,MAT_FINAL_ASSEMBLY);
741: MatAssemblyEnd(Z,MAT_FINAL_ASSEMBLY);
742: /* do the subdomain solve */
743: KSPMatSolve(osm->ksp[0],Z,W);
744: KSPCheckSolve(osm->ksp[0],pc,NULL);
745: MatDestroy(&Z);
746: /* do we need to zero y? */
747: MatZeroEntries(Y);
748: for (i = 0; i < N; ++i) {
749: MatDenseGetColumnVecWrite(Y,i,&y);
750: MatDenseGetColumnVecRead(W,i,&x);
751: if (!(osm->type & PC_GASM_INTERPOLATE)) {
752: VecScatterBegin(osm->girestriction,x,y,ADD_VALUES,reverse);
753: VecScatterEnd(osm->girestriction,x,y,ADD_VALUES,reverse);
754: } else {
755: VecScatterBegin(osm->gorestriction,x,y,ADD_VALUES,reverse);
756: VecScatterEnd(osm->gorestriction,x,y,ADD_VALUES,reverse);
757: }
758: MatDenseRestoreColumnVecRead(W,i,&x);
759: if (osm->pctoouter) {
760: MatDenseGetColumnVecWrite(Yout,i,&x);
761: VecScatterBegin(osm->pctoouter,y,x,INSERT_VALUES,SCATTER_FORWARD);
762: VecScatterEnd(osm->pctoouter,y,x,INSERT_VALUES,SCATTER_FORWARD);
763: MatDenseRestoreColumnVecRead(Yout,i,&x);
764: }
765: MatDenseRestoreColumnVecWrite(Y,i,&y);
766: }
767: MatDestroy(&W);
768: MatDestroy(&O);
769: return(0);
770: }
772: static PetscErrorCode PCApplyTranspose_GASM(PC pc,Vec xin,Vec yout)
773: {
774: PC_GASM *osm = (PC_GASM*)pc->data;
776: PetscInt i;
777: Vec x,y;
778: ScatterMode forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
781: if (osm->pctoouter){
782: VecScatterBegin(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);
783: VecScatterEnd(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);
784: x = osm->pcx;
785: y = osm->pcy;
786: }else{
787: x = xin;
788: y = yout;
789: }
790: /*
791: Support for limiting the restriction or interpolation to only local
792: subdomain values (leaving the other values 0).
794: Note: these are reversed from the PCApply_GASM() because we are applying the
795: transpose of the three terms
796: */
797: if (!(osm->type & PC_GASM_INTERPOLATE)) {
798: /* have to zero the work RHS since scatter may leave some slots empty */
799: VecZeroEntries(osm->gx);
800: VecScatterBegin(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);
801: } else {
802: VecScatterBegin(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);
803: }
804: VecZeroEntries(osm->gy);
805: if (!(osm->type & PC_GASM_INTERPOLATE)) {
806: VecScatterEnd(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);
807: } else {
808: VecScatterEnd(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);
809: }
810: /* do the local solves */
811: for (i=0; i<osm->n; ++i) { /* Note that the solves are local, so we can go to osm->n, rather than osm->nmax. */
812: KSPSolveTranspose(osm->ksp[i],osm->x[i],osm->y[i]);
813: KSPCheckSolve(osm->ksp[i],pc,osm->y[i]);
814: }
815: VecZeroEntries(y);
816: if (!(osm->type & PC_GASM_RESTRICT)) {
817: VecScatterBegin(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);
818: VecScatterEnd(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);
819: } else {
820: VecScatterBegin(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);
821: VecScatterEnd(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);
822: }
823: if (osm->pctoouter){
824: VecScatterBegin(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);
825: VecScatterEnd(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);
826: }
827: return(0);
828: }
830: static PetscErrorCode PCReset_GASM(PC pc)
831: {
832: PC_GASM *osm = (PC_GASM*)pc->data;
834: PetscInt i;
837: if (osm->ksp) {
838: for (i=0; i<osm->n; i++) {
839: KSPReset(osm->ksp[i]);
840: }
841: }
842: if (osm->pmat) {
843: if (osm->n > 0) {
844: PetscMPIInt size;
845: MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);
846: if (size > 1) {
847: /* osm->pmat is created by MatCreateSubMatricesMPI(), cannot use MatDestroySubMatrices() */
848: MatDestroyMatrices(osm->n,&osm->pmat);
849: } else {
850: MatDestroySubMatrices(osm->n,&osm->pmat);
851: }
852: }
853: }
854: if (osm->x) {
855: for (i=0; i<osm->n; i++) {
856: VecDestroy(&osm->x[i]);
857: VecDestroy(&osm->y[i]);
858: }
859: }
860: VecDestroy(&osm->gx);
861: VecDestroy(&osm->gy);
863: VecScatterDestroy(&osm->gorestriction);
864: VecScatterDestroy(&osm->girestriction);
865: if (!osm->user_subdomains) {
866: PCGASMDestroySubdomains(osm->n,&osm->ois,&osm->iis);
867: osm->N = PETSC_DETERMINE;
868: osm->nmax = PETSC_DETERMINE;
869: }
870: if (osm->pctoouter){
871: VecScatterDestroy(&(osm->pctoouter));
872: }
873: if (osm->permutationIS){
874: ISDestroy(&(osm->permutationIS));
875: }
876: if (osm->pcx){
877: VecDestroy(&(osm->pcx));
878: }
879: if (osm->pcy){
880: VecDestroy(&(osm->pcy));
881: }
882: if (osm->permutationP){
883: MatDestroy(&(osm->permutationP));
884: }
885: if (osm->pcmat){
886: MatDestroy(&osm->pcmat);
887: }
888: return(0);
889: }
891: static PetscErrorCode PCDestroy_GASM(PC pc)
892: {
893: PC_GASM *osm = (PC_GASM*)pc->data;
895: PetscInt i;
898: PCReset_GASM(pc);
899: /* PCReset will not destroy subdomains, if user_subdomains is true. */
900: PCGASMDestroySubdomains(osm->n,&osm->ois,&osm->iis);
901: if (osm->ksp) {
902: for (i=0; i<osm->n; i++) {
903: KSPDestroy(&osm->ksp[i]);
904: }
905: PetscFree(osm->ksp);
906: }
907: PetscFree(osm->x);
908: PetscFree(osm->y);
909: PetscFree(pc->data);
910: return(0);
911: }
913: static PetscErrorCode PCSetFromOptions_GASM(PetscOptionItems *PetscOptionsObject,PC pc)
914: {
915: PC_GASM *osm = (PC_GASM*)pc->data;
917: PetscInt blocks,ovl;
918: PetscBool flg;
919: PCGASMType gasmtype;
922: PetscOptionsHead(PetscOptionsObject,"Generalized additive Schwarz options");
923: PetscOptionsBool("-pc_gasm_use_dm_subdomains","If subdomains aren't set, use DMCreateDomainDecomposition() to define subdomains.","PCGASMSetUseDMSubdomains",osm->dm_subdomains,&osm->dm_subdomains,&flg);
924: PetscOptionsInt("-pc_gasm_total_subdomains","Total number of subdomains across communicator","PCGASMSetTotalSubdomains",osm->N,&blocks,&flg);
925: if (flg) {
926: PCGASMSetTotalSubdomains(pc,blocks);
927: }
928: PetscOptionsInt("-pc_gasm_overlap","Number of overlapping degrees of freedom","PCGASMSetOverlap",osm->overlap,&ovl,&flg);
929: if (flg) {
930: PCGASMSetOverlap(pc,ovl);
931: osm->dm_subdomains = PETSC_FALSE;
932: }
933: flg = PETSC_FALSE;
934: PetscOptionsEnum("-pc_gasm_type","Type of restriction/extension","PCGASMSetType",PCGASMTypes,(PetscEnum)osm->type,(PetscEnum*)&gasmtype,&flg);
935: if (flg) {PCGASMSetType(pc,gasmtype);}
936: PetscOptionsBool("-pc_gasm_use_hierachical_partitioning","use hierarchical partitioning",NULL,osm->hierarchicalpartitioning,&osm->hierarchicalpartitioning,&flg);
937: PetscOptionsTail();
938: return(0);
939: }
941: /*------------------------------------------------------------------------------------*/
943: /*@
944: PCGASMSetTotalSubdomains - sets the total number of subdomains to use across the
945: communicator.
946: Logically collective on pc
948: Input Parameters:
949: + pc - the preconditioner
950: - N - total number of subdomains
953: Level: beginner
955: .seealso: PCGASMSetSubdomains(), PCGASMSetOverlap()
956: PCGASMCreateSubdomains2D()
957: @*/
958: PetscErrorCode PCGASMSetTotalSubdomains(PC pc,PetscInt N)
959: {
960: PC_GASM *osm = (PC_GASM*)pc->data;
961: PetscMPIInt size,rank;
965: if (N < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Total number of subdomains must be 1 or more, got N = %D",N);
966: if (pc->setupcalled) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetTotalSubdomains() should be called before calling PCSetUp().");
968: PCGASMDestroySubdomains(osm->n,&osm->iis,&osm->ois);
969: osm->ois = osm->iis = NULL;
971: MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);
972: MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);
973: osm->N = N;
974: osm->n = PETSC_DETERMINE;
975: osm->nmax = PETSC_DETERMINE;
976: osm->dm_subdomains = PETSC_FALSE;
977: return(0);
978: }
980: static PetscErrorCode PCGASMSetSubdomains_GASM(PC pc,PetscInt n,IS iis[],IS ois[])
981: {
982: PC_GASM *osm = (PC_GASM*)pc->data;
983: PetscErrorCode ierr;
984: PetscInt i;
987: if (n < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Each process must have 1 or more subdomains, got n = %D",n);
988: if (pc->setupcalled) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetSubdomains() should be called before calling PCSetUp().");
990: PCGASMDestroySubdomains(osm->n,&osm->iis,&osm->ois);
991: osm->iis = osm->ois = NULL;
992: osm->n = n;
993: osm->N = PETSC_DETERMINE;
994: osm->nmax = PETSC_DETERMINE;
995: if (ois) {
996: PetscMalloc1(n,&osm->ois);
997: for (i=0; i<n; i++) {
998: PetscObjectReference((PetscObject)ois[i]);
999: osm->ois[i] = ois[i];
1000: }
1001: /*
1002: Since the user set the outer subdomains, even if nontrivial overlap was requested via PCGASMSetOverlap(),
1003: it will be ignored. To avoid confusion later on (e.g., when viewing the PC), the overlap size is set to -1.
1004: */
1005: osm->overlap = -1;
1006: /* inner subdomains must be provided */
1007: if (!iis) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"inner indices have to be provided \n");
1008: }/* end if */
1009: if (iis) {
1010: PetscMalloc1(n,&osm->iis);
1011: for (i=0; i<n; i++) {
1012: PetscObjectReference((PetscObject)iis[i]);
1013: osm->iis[i] = iis[i];
1014: }
1015: if (!ois) {
1016: osm->ois = NULL;
1017: /* if user does not provide outer indices, we will create the corresponding outer indices using osm->overlap =1 in PCSetUp_GASM */
1018: }
1019: }
1020: if (PetscDefined(USE_DEBUG)) {
1021: PetscInt j,rstart,rend,*covered,lsize;
1022: const PetscInt *indices;
1023: /* check if the inner indices cover and only cover the local portion of the preconditioning matrix */
1024: MatGetOwnershipRange(pc->pmat,&rstart,&rend);
1025: PetscCalloc1(rend-rstart,&covered);
1026: /* check if the current processor owns indices from others */
1027: for (i=0; i<n; i++) {
1028: ISGetIndices(osm->iis[i],&indices);
1029: ISGetLocalSize(osm->iis[i],&lsize);
1030: for (j=0; j<lsize; j++) {
1031: if (indices[j]<rstart || indices[j]>=rend) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"inner subdomains can not own an index %d from other processors", indices[j]);
1032: else if (covered[indices[j]-rstart]==1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"inner subdomains can not have an overlapping index %d ",indices[j]);
1033: else covered[indices[j]-rstart] = 1;
1034: }
1035: ISRestoreIndices(osm->iis[i],&indices);
1036: }
1037: /* check if we miss any indices */
1038: for (i=rstart; i<rend; i++) {
1039: if (!covered[i-rstart]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"local entity %d was not covered by inner subdomains",i);
1040: }
1041: PetscFree(covered);
1042: }
1043: if (iis) osm->user_subdomains = PETSC_TRUE;
1044: return(0);
1045: }
1047: static PetscErrorCode PCGASMSetOverlap_GASM(PC pc,PetscInt ovl)
1048: {
1049: PC_GASM *osm = (PC_GASM*)pc->data;
1052: if (ovl < 0) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap value requested");
1053: if (pc->setupcalled && ovl != osm->overlap) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetOverlap() should be called before PCSetUp().");
1054: if (!pc->setupcalled) osm->overlap = ovl;
1055: return(0);
1056: }
1058: static PetscErrorCode PCGASMSetType_GASM(PC pc,PCGASMType type)
1059: {
1060: PC_GASM *osm = (PC_GASM*)pc->data;
1063: osm->type = type;
1064: osm->type_set = PETSC_TRUE;
1065: return(0);
1066: }
1068: static PetscErrorCode PCGASMSetSortIndices_GASM(PC pc,PetscBool doSort)
1069: {
1070: PC_GASM *osm = (PC_GASM*)pc->data;
1073: osm->sort_indices = doSort;
1074: return(0);
1075: }
1077: /*
1078: FIXME: This routine might need to be modified now that multiple ranks per subdomain are allowed.
1079: In particular, it would upset the global subdomain number calculation.
1080: */
1081: static PetscErrorCode PCGASMGetSubKSP_GASM(PC pc,PetscInt *n,PetscInt *first,KSP **ksp)
1082: {
1083: PC_GASM *osm = (PC_GASM*)pc->data;
1087: if (osm->n < 1) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ORDER,"Need to call PCSetUp() on PC (or KSPSetUp() on the outer KSP object) before calling here");
1089: if (n) *n = osm->n;
1090: if (first) {
1091: MPI_Scan(&osm->n,first,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));
1092: *first -= osm->n;
1093: }
1094: if (ksp) {
1095: /* Assume that local solves are now different; not necessarily
1096: true, though! This flag is used only for PCView_GASM() */
1097: *ksp = osm->ksp;
1098: osm->same_subdomain_solvers = PETSC_FALSE;
1099: }
1100: return(0);
1101: } /* PCGASMGetSubKSP_GASM() */
1103: /*@C
1104: PCGASMSetSubdomains - Sets the subdomains for this processor
1105: for the additive Schwarz preconditioner.
1107: Collective on pc
1109: Input Parameters:
1110: + pc - the preconditioner object
1111: . n - the number of subdomains for this processor
1112: . iis - the index sets that define the inner subdomains (or NULL for PETSc to determine subdomains)
1113: - ois - the index sets that define the outer subdomains (or NULL to use the same as iis, or to construct by expanding iis by the requested overlap)
1115: Notes:
1116: The IS indices use the parallel, global numbering of the vector entries.
1117: Inner subdomains are those where the correction is applied.
1118: Outer subdomains are those where the residual necessary to obtain the
1119: corrections is obtained (see PCGASMType for the use of inner/outer subdomains).
1120: Both inner and outer subdomains can extend over several processors.
1121: This processor's portion of a subdomain is known as a local subdomain.
1123: Inner subdomains can not overlap with each other, do not have any entities from remote processors,
1124: and have to cover the entire local subdomain owned by the current processor. The index sets on each
1125: process should be ordered such that the ith local subdomain is connected to the ith remote subdomain
1126: on another MPI process.
1128: By default the GASM preconditioner uses 1 (local) subdomain per processor.
1131: Level: advanced
1133: .seealso: PCGASMSetOverlap(), PCGASMGetSubKSP(),
1134: PCGASMCreateSubdomains2D(), PCGASMGetSubdomains()
1135: @*/
1136: PetscErrorCode PCGASMSetSubdomains(PC pc,PetscInt n,IS iis[],IS ois[])
1137: {
1138: PC_GASM *osm = (PC_GASM*)pc->data;
1143: PetscTryMethod(pc,"PCGASMSetSubdomains_C",(PC,PetscInt,IS[],IS[]),(pc,n,iis,ois));
1144: osm->dm_subdomains = PETSC_FALSE;
1145: return(0);
1146: }
1148: /*@
1149: PCGASMSetOverlap - Sets the overlap between a pair of subdomains for the
1150: additive Schwarz preconditioner. Either all or no processors in the
1151: pc communicator must call this routine.
1153: Logically Collective on pc
1155: Input Parameters:
1156: + pc - the preconditioner context
1157: - ovl - the amount of overlap between subdomains (ovl >= 0, default value = 0)
1159: Options Database Key:
1160: . -pc_gasm_overlap <overlap> - Sets overlap
1162: Notes:
1163: By default the GASM preconditioner uses 1 subdomain per processor. To use
1164: multiple subdomain per perocessor or "straddling" subdomains that intersect
1165: multiple processors use PCGASMSetSubdomains() (or option -pc_gasm_total_subdomains <n>).
1167: The overlap defaults to 0, so if one desires that no additional
1168: overlap be computed beyond what may have been set with a call to
1169: PCGASMSetSubdomains(), then ovl must be set to be 0. In particular, if one does
1170: not explicitly set the subdomains in application code, then all overlap would be computed
1171: internally by PETSc, and using an overlap of 0 would result in an GASM
1172: variant that is equivalent to the block Jacobi preconditioner.
1174: Note that one can define initial index sets with any overlap via
1175: PCGASMSetSubdomains(); the routine PCGASMSetOverlap() merely allows
1176: PETSc to extend that overlap further, if desired.
1178: Level: intermediate
1180: .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(),
1181: PCGASMCreateSubdomains2D(), PCGASMGetSubdomains()
1182: @*/
1183: PetscErrorCode PCGASMSetOverlap(PC pc,PetscInt ovl)
1184: {
1186: PC_GASM *osm = (PC_GASM*)pc->data;
1191: PetscTryMethod(pc,"PCGASMSetOverlap_C",(PC,PetscInt),(pc,ovl));
1192: osm->dm_subdomains = PETSC_FALSE;
1193: return(0);
1194: }
1196: /*@
1197: PCGASMSetType - Sets the type of restriction and interpolation used
1198: for local problems in the additive Schwarz method.
1200: Logically Collective on PC
1202: Input Parameters:
1203: + pc - the preconditioner context
1204: - type - variant of GASM, one of
1205: .vb
1206: PC_GASM_BASIC - full interpolation and restriction
1207: PC_GASM_RESTRICT - full restriction, local processor interpolation
1208: PC_GASM_INTERPOLATE - full interpolation, local processor restriction
1209: PC_GASM_NONE - local processor restriction and interpolation
1210: .ve
1212: Options Database Key:
1213: . -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type
1215: Level: intermediate
1217: .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(),
1218: PCGASMCreateSubdomains2D()
1219: @*/
1220: PetscErrorCode PCGASMSetType(PC pc,PCGASMType type)
1221: {
1227: PetscTryMethod(pc,"PCGASMSetType_C",(PC,PCGASMType),(pc,type));
1228: return(0);
1229: }
1231: /*@
1232: PCGASMSetSortIndices - Determines whether subdomain indices are sorted.
1234: Logically Collective on PC
1236: Input Parameters:
1237: + pc - the preconditioner context
1238: - doSort - sort the subdomain indices
1240: Level: intermediate
1242: .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(),
1243: PCGASMCreateSubdomains2D()
1244: @*/
1245: PetscErrorCode PCGASMSetSortIndices(PC pc,PetscBool doSort)
1246: {
1252: PetscTryMethod(pc,"PCGASMSetSortIndices_C",(PC,PetscBool),(pc,doSort));
1253: return(0);
1254: }
1256: /*@C
1257: PCGASMGetSubKSP - Gets the local KSP contexts for all blocks on
1258: this processor.
1260: Collective on PC iff first_local is requested
1262: Input Parameter:
1263: . pc - the preconditioner context
1265: Output Parameters:
1266: + n_local - the number of blocks on this processor or NULL
1267: . first_local - the global number of the first block on this processor or NULL,
1268: all processors must request or all must pass NULL
1269: - ksp - the array of KSP contexts
1271: Note:
1272: After PCGASMGetSubKSP() the array of KSPes is not to be freed
1274: Currently for some matrix implementations only 1 block per processor
1275: is supported.
1277: You must call KSPSetUp() before calling PCGASMGetSubKSP().
1279: Level: advanced
1281: .seealso: PCGASMSetSubdomains(), PCGASMSetOverlap(),
1282: PCGASMCreateSubdomains2D(),
1283: @*/
1284: PetscErrorCode PCGASMGetSubKSP(PC pc,PetscInt *n_local,PetscInt *first_local,KSP *ksp[])
1285: {
1290: PetscUseMethod(pc,"PCGASMGetSubKSP_C",(PC,PetscInt*,PetscInt*,KSP **),(pc,n_local,first_local,ksp));
1291: return(0);
1292: }
1294: /* -------------------------------------------------------------------------------------*/
1295: /*MC
1296: PCGASM - Use the (restricted) additive Schwarz method, each block is (approximately) solved with
1297: its own KSP object.
1299: Options Database Keys:
1300: + -pc_gasm_total_subdomains <n> - Sets total number of local subdomains to be distributed among processors
1301: . -pc_gasm_view_subdomains - activates the printing of subdomain indices in PCView(), -ksp_view or -snes_view
1302: . -pc_gasm_print_subdomains - activates the printing of subdomain indices in PCSetUp()
1303: . -pc_gasm_overlap <ovl> - Sets overlap by which to (automatically) extend local subdomains
1304: - -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type
1306: IMPORTANT: If you run with, for example, 3 blocks on 1 processor or 3 blocks on 3 processors you
1307: will get a different convergence rate due to the default option of -pc_gasm_type restrict. Use
1308: -pc_gasm_type basic to use the standard GASM.
1310: Notes:
1311: Blocks can be shared by multiple processes.
1313: To set options on the solvers for each block append -sub_ to all the KSP, and PC
1314: options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly
1316: To set the options on the solvers separate for each block call PCGASMGetSubKSP()
1317: and set the options directly on the resulting KSP object (you can access its PC
1318: with KSPGetPC())
1321: Level: beginner
1323: References:
1324: + 1. - M Dryja, OB Widlund, An additive variant of the Schwarz alternating method for the case of many subregions
1325: Courant Institute, New York University Technical report
1326: - 2. - Barry Smith, Petter Bjorstad, and William Gropp, Domain Decompositions: Parallel Multilevel Methods for Elliptic Partial Differential Equations,
1327: Cambridge University Press.
1329: .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC,
1330: PCBJACOBI, PCGASMGetSubKSP(), PCGASMSetSubdomains(),
1331: PCSetModifySubMatrices(), PCGASMSetOverlap(), PCGASMSetType()
1333: M*/
1335: PETSC_EXTERN PetscErrorCode PCCreate_GASM(PC pc)
1336: {
1338: PC_GASM *osm;
1341: PetscNewLog(pc,&osm);
1343: osm->N = PETSC_DETERMINE;
1344: osm->n = PETSC_DECIDE;
1345: osm->nmax = PETSC_DETERMINE;
1346: osm->overlap = 0;
1347: osm->ksp = NULL;
1348: osm->gorestriction = NULL;
1349: osm->girestriction = NULL;
1350: osm->pctoouter = NULL;
1351: osm->gx = NULL;
1352: osm->gy = NULL;
1353: osm->x = NULL;
1354: osm->y = NULL;
1355: osm->pcx = NULL;
1356: osm->pcy = NULL;
1357: osm->permutationIS = NULL;
1358: osm->permutationP = NULL;
1359: osm->pcmat = NULL;
1360: osm->ois = NULL;
1361: osm->iis = NULL;
1362: osm->pmat = NULL;
1363: osm->type = PC_GASM_RESTRICT;
1364: osm->same_subdomain_solvers = PETSC_TRUE;
1365: osm->sort_indices = PETSC_TRUE;
1366: osm->dm_subdomains = PETSC_FALSE;
1367: osm->hierarchicalpartitioning = PETSC_FALSE;
1369: pc->data = (void*)osm;
1370: pc->ops->apply = PCApply_GASM;
1371: pc->ops->matapply = PCMatApply_GASM;
1372: pc->ops->applytranspose = PCApplyTranspose_GASM;
1373: pc->ops->setup = PCSetUp_GASM;
1374: pc->ops->reset = PCReset_GASM;
1375: pc->ops->destroy = PCDestroy_GASM;
1376: pc->ops->setfromoptions = PCSetFromOptions_GASM;
1377: pc->ops->setuponblocks = PCSetUpOnBlocks_GASM;
1378: pc->ops->view = PCView_GASM;
1379: pc->ops->applyrichardson = NULL;
1381: PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetSubdomains_C",PCGASMSetSubdomains_GASM);
1382: PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetOverlap_C",PCGASMSetOverlap_GASM);
1383: PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetType_C",PCGASMSetType_GASM);
1384: PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetSortIndices_C",PCGASMSetSortIndices_GASM);
1385: PetscObjectComposeFunction((PetscObject)pc,"PCGASMGetSubKSP_C",PCGASMGetSubKSP_GASM);
1386: return(0);
1387: }
1389: PetscErrorCode PCGASMCreateLocalSubdomains(Mat A, PetscInt nloc, IS *iis[])
1390: {
1391: MatPartitioning mpart;
1392: const char *prefix;
1393: PetscInt i,j,rstart,rend,bs;
1394: PetscBool hasop, isbaij = PETSC_FALSE,foundpart = PETSC_FALSE;
1395: Mat Ad = NULL, adj;
1396: IS ispart,isnumb,*is;
1397: PetscErrorCode ierr;
1400: if (nloc < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"number of local subdomains must > 0, got nloc = %D",nloc);
1402: /* Get prefix, row distribution, and block size */
1403: MatGetOptionsPrefix(A,&prefix);
1404: MatGetOwnershipRange(A,&rstart,&rend);
1405: MatGetBlockSize(A,&bs);
1406: if (rstart/bs*bs != rstart || rend/bs*bs != rend) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"bad row distribution [%D,%D) for matrix block size %D",rstart,rend,bs);
1408: /* Get diagonal block from matrix if possible */
1409: MatHasOperation(A,MATOP_GET_DIAGONAL_BLOCK,&hasop);
1410: if (hasop) {
1411: MatGetDiagonalBlock(A,&Ad);
1412: }
1413: if (Ad) {
1414: PetscObjectBaseTypeCompare((PetscObject)Ad,MATSEQBAIJ,&isbaij);
1415: if (!isbaij) {PetscObjectBaseTypeCompare((PetscObject)Ad,MATSEQSBAIJ,&isbaij);}
1416: }
1417: if (Ad && nloc > 1) {
1418: PetscBool match,done;
1419: /* Try to setup a good matrix partitioning if available */
1420: MatPartitioningCreate(PETSC_COMM_SELF,&mpart);
1421: PetscObjectSetOptionsPrefix((PetscObject)mpart,prefix);
1422: MatPartitioningSetFromOptions(mpart);
1423: PetscObjectTypeCompare((PetscObject)mpart,MATPARTITIONINGCURRENT,&match);
1424: if (!match) {
1425: PetscObjectTypeCompare((PetscObject)mpart,MATPARTITIONINGSQUARE,&match);
1426: }
1427: if (!match) { /* assume a "good" partitioner is available */
1428: PetscInt na;
1429: const PetscInt *ia,*ja;
1430: MatGetRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);
1431: if (done) {
1432: /* Build adjacency matrix by hand. Unfortunately a call to
1433: MatConvert(Ad,MATMPIADJ,MAT_INITIAL_MATRIX,&adj) will
1434: remove the block-aij structure and we cannot expect
1435: MatPartitioning to split vertices as we need */
1436: PetscInt i,j,len,nnz,cnt,*iia=NULL,*jja=NULL;
1437: const PetscInt *row;
1438: nnz = 0;
1439: for (i=0; i<na; i++) { /* count number of nonzeros */
1440: len = ia[i+1] - ia[i];
1441: row = ja + ia[i];
1442: for (j=0; j<len; j++) {
1443: if (row[j] == i) { /* don't count diagonal */
1444: len--; break;
1445: }
1446: }
1447: nnz += len;
1448: }
1449: PetscMalloc1(na+1,&iia);
1450: PetscMalloc1(nnz,&jja);
1451: nnz = 0;
1452: iia[0] = 0;
1453: for (i=0; i<na; i++) { /* fill adjacency */
1454: cnt = 0;
1455: len = ia[i+1] - ia[i];
1456: row = ja + ia[i];
1457: for (j=0; j<len; j++) {
1458: if (row[j] != i) jja[nnz+cnt++] = row[j]; /* if not diagonal */
1459: }
1460: nnz += cnt;
1461: iia[i+1] = nnz;
1462: }
1463: /* Partitioning of the adjacency matrix */
1464: MatCreateMPIAdj(PETSC_COMM_SELF,na,na,iia,jja,NULL,&adj);
1465: MatPartitioningSetAdjacency(mpart,adj);
1466: MatPartitioningSetNParts(mpart,nloc);
1467: MatPartitioningApply(mpart,&ispart);
1468: ISPartitioningToNumbering(ispart,&isnumb);
1469: MatDestroy(&adj);
1470: foundpart = PETSC_TRUE;
1471: }
1472: MatRestoreRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);
1473: }
1474: MatPartitioningDestroy(&mpart);
1475: }
1476: PetscMalloc1(nloc,&is);
1477: if (!foundpart) {
1479: /* Partitioning by contiguous chunks of rows */
1481: PetscInt mbs = (rend-rstart)/bs;
1482: PetscInt start = rstart;
1483: for (i=0; i<nloc; i++) {
1484: PetscInt count = (mbs/nloc + ((mbs % nloc) > i)) * bs;
1485: ISCreateStride(PETSC_COMM_SELF,count,start,1,&is[i]);
1486: start += count;
1487: }
1489: } else {
1491: /* Partitioning by adjacency of diagonal block */
1493: const PetscInt *numbering;
1494: PetscInt *count,nidx,*indices,*newidx,start=0;
1495: /* Get node count in each partition */
1496: PetscMalloc1(nloc,&count);
1497: ISPartitioningCount(ispart,nloc,count);
1498: if (isbaij && bs > 1) { /* adjust for the block-aij case */
1499: for (i=0; i<nloc; i++) count[i] *= bs;
1500: }
1501: /* Build indices from node numbering */
1502: ISGetLocalSize(isnumb,&nidx);
1503: PetscMalloc1(nidx,&indices);
1504: for (i=0; i<nidx; i++) indices[i] = i; /* needs to be initialized */
1505: ISGetIndices(isnumb,&numbering);
1506: PetscSortIntWithPermutation(nidx,numbering,indices);
1507: ISRestoreIndices(isnumb,&numbering);
1508: if (isbaij && bs > 1) { /* adjust for the block-aij case */
1509: PetscMalloc1(nidx*bs,&newidx);
1510: for (i=0; i<nidx; i++) {
1511: for (j=0; j<bs; j++) newidx[i*bs+j] = indices[i]*bs + j;
1512: }
1513: PetscFree(indices);
1514: nidx *= bs;
1515: indices = newidx;
1516: }
1517: /* Shift to get global indices */
1518: for (i=0; i<nidx; i++) indices[i] += rstart;
1520: /* Build the index sets for each block */
1521: for (i=0; i<nloc; i++) {
1522: ISCreateGeneral(PETSC_COMM_SELF,count[i],&indices[start],PETSC_COPY_VALUES,&is[i]);
1523: ISSort(is[i]);
1524: start += count[i];
1525: }
1527: PetscFree(count);
1528: PetscFree(indices);
1529: ISDestroy(&isnumb);
1530: ISDestroy(&ispart);
1531: }
1532: *iis = is;
1533: return(0);
1534: }
1536: PETSC_INTERN PetscErrorCode PCGASMCreateStraddlingSubdomains(Mat A,PetscInt N,PetscInt *n,IS *iis[])
1537: {
1538: PetscErrorCode ierr;
1541: MatSubdomainsCreateCoalesce(A,N,n,iis);
1542: return(0);
1543: }
1545: /*@C
1546: PCGASMCreateSubdomains - Creates n index sets defining n nonoverlapping subdomains for the additive
1547: Schwarz preconditioner for a any problem based on its matrix.
1549: Collective
1551: Input Parameters:
1552: + A - The global matrix operator
1553: - N - the number of global subdomains requested
1555: Output Parameters:
1556: + n - the number of subdomains created on this processor
1557: - iis - the array of index sets defining the local inner subdomains (on which the correction is applied)
1559: Level: advanced
1561: Note: When N >= A's communicator size, each subdomain is local -- contained within a single processor.
1562: When N < size, the subdomains are 'straddling' (processor boundaries) and are no longer local.
1563: The resulting subdomains can be use in PCGASMSetSubdomains(pc,n,iss,NULL). The overlapping
1564: outer subdomains will be automatically generated from these according to the requested amount of
1565: overlap; this is currently supported only with local subdomains.
1568: .seealso: PCGASMSetSubdomains(), PCGASMDestroySubdomains()
1569: @*/
1570: PetscErrorCode PCGASMCreateSubdomains(Mat A,PetscInt N,PetscInt *n,IS *iis[])
1571: {
1572: PetscMPIInt size;
1573: PetscErrorCode ierr;
1579: if (N < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Number of subdomains must be > 0, N = %D",N);
1580: MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
1581: if (N >= size) {
1582: *n = N/size + (N%size);
1583: PCGASMCreateLocalSubdomains(A,*n,iis);
1584: } else {
1585: PCGASMCreateStraddlingSubdomains(A,N,n,iis);
1586: }
1587: return(0);
1588: }
1590: /*@C
1591: PCGASMDestroySubdomains - Destroys the index sets created with
1592: PCGASMCreateSubdomains() or PCGASMCreateSubdomains2D. Should be
1593: called after setting subdomains with PCGASMSetSubdomains().
1595: Collective
1597: Input Parameters:
1598: + n - the number of index sets
1599: . iis - the array of inner subdomains,
1600: - ois - the array of outer subdomains, can be NULL
1602: Level: intermediate
1604: Notes:
1605: this is merely a convenience subroutine that walks each list,
1606: destroys each IS on the list, and then frees the list. At the end the
1607: list pointers are set to NULL.
1609: .seealso: PCGASMCreateSubdomains(), PCGASMSetSubdomains()
1610: @*/
1611: PetscErrorCode PCGASMDestroySubdomains(PetscInt n,IS **iis,IS **ois)
1612: {
1613: PetscInt i;
1617: if (n <= 0) return(0);
1618: if (ois) {
1620: if (*ois) {
1622: for (i=0; i<n; i++) {
1623: ISDestroy(&(*ois)[i]);
1624: }
1625: PetscFree((*ois));
1626: }
1627: }
1628: if (iis) {
1630: if (*iis) {
1632: for (i=0; i<n; i++) {
1633: ISDestroy(&(*iis)[i]);
1634: }
1635: PetscFree((*iis));
1636: }
1637: }
1638: return(0);
1639: }
1641: #define PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,xleft_loc,ylow_loc,xright_loc,yhigh_loc,n) \
1642: { \
1643: PetscInt first_row = first/M, last_row = last/M+1; \
1644: /* \
1645: Compute ylow_loc and yhigh_loc so that (ylow_loc,xleft) and (yhigh_loc,xright) are the corners \
1646: of the bounding box of the intersection of the subdomain with the local ownership range (local \
1647: subdomain). \
1648: Also compute xleft_loc and xright_loc as the lower and upper bounds on the first and last rows \
1649: of the intersection. \
1650: */ \
1651: /* ylow_loc is the grid row containing the first element of the local sumbdomain */ \
1652: *ylow_loc = PetscMax(first_row,ylow); \
1653: /* xleft_loc is the offset of first element of the local subdomain within its grid row (might actually be outside the local subdomain) */ \
1654: *xleft_loc = *ylow_loc==first_row ? PetscMax(first%M,xleft) : xleft; \
1655: /* yhigh_loc is the grid row above the last local subdomain element */ \
1656: *yhigh_loc = PetscMin(last_row,yhigh); \
1657: /* xright is the offset of the end of the local subdomain within its grid row (might actually be outside the local subdomain) */ \
1658: *xright_loc = *yhigh_loc==last_row ? PetscMin(xright,last%M) : xright; \
1659: /* Now compute the size of the local subdomain n. */ \
1660: *n = 0; \
1661: if (*ylow_loc < *yhigh_loc) { \
1662: PetscInt width = xright-xleft; \
1663: *n += width*(*yhigh_loc-*ylow_loc-1); \
1664: *n += PetscMin(PetscMax(*xright_loc-xleft,0),width); \
1665: *n -= PetscMin(PetscMax(*xleft_loc-xleft,0), width); \
1666: } \
1667: }
1669: /*@
1670: PCGASMCreateSubdomains2D - Creates the index sets for the overlapping Schwarz
1671: preconditioner for a two-dimensional problem on a regular grid.
1673: Collective
1675: Input Parameters:
1676: + M, N - the global number of grid points in the x and y directions
1677: . Mdomains, Ndomains - the global number of subdomains in the x and y directions
1678: . dof - degrees of freedom per node
1679: - overlap - overlap in mesh lines
1681: Output Parameters:
1682: + Nsub - the number of local subdomains created
1683: . iis - array of index sets defining inner (nonoverlapping) subdomains
1684: - ois - array of index sets defining outer (overlapping, if overlap > 0) subdomains
1687: Level: advanced
1689: .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(), PCGASMSetOverlap()
1690: @*/
1691: PetscErrorCode PCGASMCreateSubdomains2D(PC pc,PetscInt M,PetscInt N,PetscInt Mdomains,PetscInt Ndomains,PetscInt dof,PetscInt overlap,PetscInt *nsub,IS **iis,IS **ois)
1692: {
1694: PetscMPIInt size, rank;
1695: PetscInt i, j;
1696: PetscInt maxheight, maxwidth;
1697: PetscInt xstart, xleft, xright, xleft_loc, xright_loc;
1698: PetscInt ystart, ylow, yhigh, ylow_loc, yhigh_loc;
1699: PetscInt x[2][2], y[2][2], n[2];
1700: PetscInt first, last;
1701: PetscInt nidx, *idx;
1702: PetscInt ii,jj,s,q,d;
1703: PetscInt k,kk;
1704: PetscMPIInt color;
1705: MPI_Comm comm, subcomm;
1706: IS **xis = NULL, **is = ois, **is_local = iis;
1709: PetscObjectGetComm((PetscObject)pc, &comm);
1710: MPI_Comm_size(comm, &size);
1711: MPI_Comm_rank(comm, &rank);
1712: MatGetOwnershipRange(pc->pmat, &first, &last);
1713: if (first%dof || last%dof) SETERRQ3(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Matrix row partitioning unsuitable for domain decomposition: local row range (%D,%D) "
1714: "does not respect the number of degrees of freedom per grid point %D", first, last, dof);
1716: /* Determine the number of domains with nonzero intersections with the local ownership range. */
1717: s = 0;
1718: ystart = 0;
1719: for (j=0; j<Ndomains; ++j) {
1720: maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */
1721: if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N);
1722: /* Vertical domain limits with an overlap. */
1723: ylow = PetscMax(ystart - overlap,0);
1724: yhigh = PetscMin(ystart + maxheight + overlap,N);
1725: xstart = 0;
1726: for (i=0; i<Mdomains; ++i) {
1727: maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */
1728: if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M);
1729: /* Horizontal domain limits with an overlap. */
1730: xleft = PetscMax(xstart - overlap,0);
1731: xright = PetscMin(xstart + maxwidth + overlap,M);
1732: /*
1733: Determine whether this subdomain intersects this processor's ownership range of pc->pmat.
1734: */
1735: PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx));
1736: if (nidx) ++s;
1737: xstart += maxwidth;
1738: } /* for (i = 0; i < Mdomains; ++i) */
1739: ystart += maxheight;
1740: } /* for (j = 0; j < Ndomains; ++j) */
1742: /* Now we can allocate the necessary number of ISs. */
1743: *nsub = s;
1744: PetscMalloc1(*nsub,is);
1745: PetscMalloc1(*nsub,is_local);
1746: s = 0;
1747: ystart = 0;
1748: for (j=0; j<Ndomains; ++j) {
1749: maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */
1750: if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N);
1751: /* Vertical domain limits with an overlap. */
1752: y[0][0] = PetscMax(ystart - overlap,0);
1753: y[0][1] = PetscMin(ystart + maxheight + overlap,N);
1754: /* Vertical domain limits without an overlap. */
1755: y[1][0] = ystart;
1756: y[1][1] = PetscMin(ystart + maxheight,N);
1757: xstart = 0;
1758: for (i=0; i<Mdomains; ++i) {
1759: maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */
1760: if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M);
1761: /* Horizontal domain limits with an overlap. */
1762: x[0][0] = PetscMax(xstart - overlap,0);
1763: x[0][1] = PetscMin(xstart + maxwidth + overlap,M);
1764: /* Horizontal domain limits without an overlap. */
1765: x[1][0] = xstart;
1766: x[1][1] = PetscMin(xstart+maxwidth,M);
1767: /*
1768: Determine whether this domain intersects this processor's ownership range of pc->pmat.
1769: Do this twice: first for the domains with overlaps, and once without.
1770: During the first pass create the subcommunicators, and use them on the second pass as well.
1771: */
1772: for (q = 0; q < 2; ++q) {
1773: PetscBool split = PETSC_FALSE;
1774: /*
1775: domain limits, (xleft, xright) and (ylow, yheigh) are adjusted
1776: according to whether the domain with an overlap or without is considered.
1777: */
1778: xleft = x[q][0]; xright = x[q][1];
1779: ylow = y[q][0]; yhigh = y[q][1];
1780: PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx));
1781: nidx *= dof;
1782: n[q] = nidx;
1783: /*
1784: Based on the counted number of indices in the local domain *with an overlap*,
1785: construct a subcommunicator of all the processors supporting this domain.
1786: Observe that a domain with an overlap might have nontrivial local support,
1787: while the domain without an overlap might not. Hence, the decision to participate
1788: in the subcommunicator must be based on the domain with an overlap.
1789: */
1790: if (q == 0) {
1791: if (nidx) color = 1;
1792: else color = MPI_UNDEFINED;
1793: MPI_Comm_split(comm, color, rank, &subcomm);
1794: split = PETSC_TRUE;
1795: }
1796: /*
1797: Proceed only if the number of local indices *with an overlap* is nonzero.
1798: */
1799: if (n[0]) {
1800: if (q == 0) xis = is;
1801: if (q == 1) {
1802: /*
1803: The IS for the no-overlap subdomain shares a communicator with the overlapping domain.
1804: Moreover, if the overlap is zero, the two ISs are identical.
1805: */
1806: if (overlap == 0) {
1807: (*is_local)[s] = (*is)[s];
1808: PetscObjectReference((PetscObject)(*is)[s]);
1809: continue;
1810: } else {
1811: xis = is_local;
1812: subcomm = ((PetscObject)(*is)[s])->comm;
1813: }
1814: } /* if (q == 1) */
1815: idx = NULL;
1816: PetscMalloc1(nidx,&idx);
1817: if (nidx) {
1818: k = 0;
1819: for (jj=ylow_loc; jj<yhigh_loc; ++jj) {
1820: PetscInt x0 = (jj==ylow_loc) ? xleft_loc : xleft;
1821: PetscInt x1 = (jj==yhigh_loc-1) ? xright_loc : xright;
1822: kk = dof*(M*jj + x0);
1823: for (ii=x0; ii<x1; ++ii) {
1824: for (d = 0; d < dof; ++d) {
1825: idx[k++] = kk++;
1826: }
1827: }
1828: }
1829: }
1830: ISCreateGeneral(subcomm,nidx,idx,PETSC_OWN_POINTER,(*xis)+s);
1831: if (split) {
1832: MPI_Comm_free(&subcomm);
1833: }
1834: }/* if (n[0]) */
1835: }/* for (q = 0; q < 2; ++q) */
1836: if (n[0]) ++s;
1837: xstart += maxwidth;
1838: } /* for (i = 0; i < Mdomains; ++i) */
1839: ystart += maxheight;
1840: } /* for (j = 0; j < Ndomains; ++j) */
1841: return(0);
1842: }
1844: /*@C
1845: PCGASMGetSubdomains - Gets the subdomains supported on this processor
1846: for the additive Schwarz preconditioner.
1848: Not Collective
1850: Input Parameter:
1851: . pc - the preconditioner context
1853: Output Parameters:
1854: + n - the number of subdomains for this processor (default value = 1)
1855: . iis - the index sets that define the inner subdomains (without overlap) supported on this processor (can be NULL)
1856: - ois - the index sets that define the outer subdomains (with overlap) supported on this processor (can be NULL)
1859: Notes:
1860: The user is responsible for destroying the ISs and freeing the returned arrays.
1861: The IS numbering is in the parallel, global numbering of the vector.
1863: Level: advanced
1865: .seealso: PCGASMSetOverlap(), PCGASMGetSubKSP(), PCGASMCreateSubdomains2D(),
1866: PCGASMSetSubdomains(), PCGASMGetSubmatrices()
1867: @*/
1868: PetscErrorCode PCGASMGetSubdomains(PC pc,PetscInt *n,IS *iis[],IS *ois[])
1869: {
1870: PC_GASM *osm;
1872: PetscBool match;
1873: PetscInt i;
1877: PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);
1878: if (!match) SETERRQ2(PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_WRONG, "Incorrect object type: expected %s, got %s instead", PCGASM, ((PetscObject)pc)->type_name);
1879: osm = (PC_GASM*)pc->data;
1880: if (n) *n = osm->n;
1881: if (iis) {
1882: PetscMalloc1(osm->n, iis);
1883: }
1884: if (ois) {
1885: PetscMalloc1(osm->n, ois);
1886: }
1887: if (iis || ois) {
1888: for (i = 0; i < osm->n; ++i) {
1889: if (iis) (*iis)[i] = osm->iis[i];
1890: if (ois) (*ois)[i] = osm->ois[i];
1891: }
1892: }
1893: return(0);
1894: }
1896: /*@C
1897: PCGASMGetSubmatrices - Gets the local submatrices (for this processor
1898: only) for the additive Schwarz preconditioner.
1900: Not Collective
1902: Input Parameter:
1903: . pc - the preconditioner context
1905: Output Parameters:
1906: + n - the number of matrices for this processor (default value = 1)
1907: - mat - the matrices
1909: Notes:
1910: matrices returned by this routine have the same communicators as the index sets (IS)
1911: used to define subdomains in PCGASMSetSubdomains()
1912: Level: advanced
1914: .seealso: PCGASMSetOverlap(), PCGASMGetSubKSP(),
1915: PCGASMCreateSubdomains2D(), PCGASMSetSubdomains(), PCGASMGetSubdomains()
1916: @*/
1917: PetscErrorCode PCGASMGetSubmatrices(PC pc,PetscInt *n,Mat *mat[])
1918: {
1919: PC_GASM *osm;
1921: PetscBool match;
1927: if (!pc->setupcalled) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"Must call after KSPSetUp() or PCSetUp().");
1928: PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);
1929: if (!match) SETERRQ2(PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_WRONG, "Expected %s, got %s instead", PCGASM, ((PetscObject)pc)->type_name);
1930: osm = (PC_GASM*)pc->data;
1931: if (n) *n = osm->n;
1932: if (mat) *mat = osm->pmat;
1933: return(0);
1934: }
1936: /*@
1937: PCGASMSetUseDMSubdomains - Indicates whether to use DMCreateDomainDecomposition() to define the subdomains, whenever possible.
1938: Logically Collective
1940: Input Parameter:
1941: + pc - the preconditioner
1942: - flg - boolean indicating whether to use subdomains defined by the DM
1944: Options Database Key:
1945: . -pc_gasm_dm_subdomains -pc_gasm_overlap -pc_gasm_total_subdomains
1947: Level: intermediate
1949: Notes:
1950: PCGASMSetSubdomains(), PCGASMSetTotalSubdomains() or PCGASMSetOverlap() take precedence over PCGASMSetUseDMSubdomains(),
1951: so setting PCGASMSetSubdomains() with nontrivial subdomain ISs or any of PCGASMSetTotalSubdomains() and PCGASMSetOverlap()
1952: automatically turns the latter off.
1954: .seealso: PCGASMGetUseDMSubdomains(), PCGASMSetSubdomains(), PCGASMSetOverlap()
1955: PCGASMCreateSubdomains2D()
1956: @*/
1957: PetscErrorCode PCGASMSetUseDMSubdomains(PC pc,PetscBool flg)
1958: {
1959: PC_GASM *osm = (PC_GASM*)pc->data;
1961: PetscBool match;
1966: if (pc->setupcalled) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"Not for a setup PC.");
1967: PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);
1968: if (match) {
1969: if (!osm->user_subdomains && osm->N == PETSC_DETERMINE && osm->overlap < 0) {
1970: osm->dm_subdomains = flg;
1971: }
1972: }
1973: return(0);
1974: }
1976: /*@
1977: PCGASMGetUseDMSubdomains - Returns flag indicating whether to use DMCreateDomainDecomposition() to define the subdomains, whenever possible.
1978: Not Collective
1980: Input Parameter:
1981: . pc - the preconditioner
1983: Output Parameter:
1984: . flg - boolean indicating whether to use subdomains defined by the DM
1986: Level: intermediate
1988: .seealso: PCGASMSetUseDMSubdomains(), PCGASMSetOverlap()
1989: PCGASMCreateSubdomains2D()
1990: @*/
1991: PetscErrorCode PCGASMGetUseDMSubdomains(PC pc,PetscBool* flg)
1992: {
1993: PC_GASM *osm = (PC_GASM*)pc->data;
1995: PetscBool match;
2000: PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);
2001: if (match) {
2002: if (flg) *flg = osm->dm_subdomains;
2003: }
2004: return(0);
2005: }