Actual source code: gasm.c
1: /*
2: This file defines an "generalized" additive Schwarz preconditioner for any Mat implementation.
3: In this version each processor may intersect multiple subdomains and any subdomain may
4: intersect multiple processors. Intersections of subdomains with processors are called *local
5: subdomains*.
7: N - total number of distinct global subdomains (set explicitly in PCGASMSetTotalSubdomains() or implicitly PCGASMSetSubdomains() and then calculated in PCSetUp_GASM())
8: n - actual number of local subdomains on this processor (set in PCGASMSetSubdomains() or calculated in PCGASMSetTotalSubdomains())
9: nmax - maximum number of local subdomains per processor (calculated in PCSetUp_GASM())
10: */
11: #include <petsc/private/pcimpl.h>
12: #include <petscdm.h>
14: typedef struct {
15: PetscInt N,n,nmax;
16: PetscInt overlap; /* overlap requested by user */
17: PCGASMType type; /* use reduced interpolation, restriction or both */
18: PetscBool type_set; /* if user set this value (so won't change it for symmetric problems) */
19: PetscBool same_subdomain_solvers; /* flag indicating whether all local solvers are same */
20: PetscBool sort_indices; /* flag to sort subdomain indices */
21: PetscBool user_subdomains; /* whether the user set explicit subdomain index sets -- keep them on PCReset() */
22: PetscBool dm_subdomains; /* whether DM is allowed to define subdomains */
23: PetscBool hierarchicalpartitioning;
24: IS *ois; /* index sets that define the outer (conceptually, overlapping) subdomains */
25: IS *iis; /* index sets that define the inner (conceptually, nonoverlapping) subdomains */
26: KSP *ksp; /* linear solvers for each subdomain */
27: Mat *pmat; /* subdomain block matrices */
28: Vec gx,gy; /* Merged work vectors */
29: Vec *x,*y; /* Split work vectors; storage aliases pieces of storage of the above merged vectors. */
30: VecScatter gorestriction; /* merged restriction to disjoint union of outer subdomains */
31: VecScatter girestriction; /* merged restriction to disjoint union of inner subdomains */
32: VecScatter pctoouter;
33: IS permutationIS;
34: Mat permutationP;
35: Mat pcmat;
36: Vec pcx,pcy;
37: } PC_GASM;
39: static PetscErrorCode PCGASMComputeGlobalSubdomainNumbering_Private(PC pc,PetscInt **numbering,PetscInt **permutation)
40: {
41: PC_GASM *osm = (PC_GASM*)pc->data;
42: PetscInt i;
46: /* Determine the number of globally-distinct subdomains and compute a global numbering for them. */
47: PetscMalloc2(osm->n,numbering,osm->n,permutation);
48: PetscObjectsListGetGlobalNumbering(PetscObjectComm((PetscObject)pc),osm->n,(PetscObject*)osm->iis,NULL,*numbering);
49: for (i = 0; i < osm->n; ++i) (*permutation)[i] = i;
50: PetscSortIntWithPermutation(osm->n,*numbering,*permutation);
51: return(0);
52: }
54: static PetscErrorCode PCGASMSubdomainView_Private(PC pc, PetscInt i, PetscViewer viewer)
55: {
56: PC_GASM *osm = (PC_GASM*)pc->data;
57: PetscInt j,nidx;
58: const PetscInt *idx;
59: PetscViewer sviewer;
60: char *cidx;
64: if (i < 0 || i > osm->n) SETERRQ2(PetscObjectComm((PetscObject)viewer), PETSC_ERR_ARG_WRONG, "Invalid subdomain %D: must nonnegative and less than %D", i, osm->n);
65: /* Inner subdomains. */
66: ISGetLocalSize(osm->iis[i], &nidx);
67: /*
68: No more than 15 characters per index plus a space.
69: PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx,
70: in case nidx == 0. That will take care of the space for the trailing '\0' as well.
71: For nidx == 0, the whole string 16 '\0'.
72: */
73: #define len 16*(nidx+1)+1
74: PetscMalloc1(len, &cidx);
75: PetscViewerStringOpen(PETSC_COMM_SELF, cidx, len, &sviewer);
76: #undef len
77: ISGetIndices(osm->iis[i], &idx);
78: for (j = 0; j < nidx; ++j) {
79: PetscViewerStringSPrintf(sviewer, "%D ", idx[j]);
80: }
81: ISRestoreIndices(osm->iis[i],&idx);
82: PetscViewerDestroy(&sviewer);
83: PetscViewerASCIIPrintf(viewer, "Inner subdomain:\n");
84: PetscViewerFlush(viewer);
85: PetscViewerASCIIPushSynchronized(viewer);
86: PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx);
87: PetscViewerFlush(viewer);
88: PetscViewerASCIIPopSynchronized(viewer);
89: PetscViewerASCIIPrintf(viewer, "\n");
90: PetscViewerFlush(viewer);
91: PetscFree(cidx);
92: /* Outer subdomains. */
93: ISGetLocalSize(osm->ois[i], &nidx);
94: /*
95: No more than 15 characters per index plus a space.
96: PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx,
97: in case nidx == 0. That will take care of the space for the trailing '\0' as well.
98: For nidx == 0, the whole string 16 '\0'.
99: */
100: #define len 16*(nidx+1)+1
101: PetscMalloc1(len, &cidx);
102: PetscViewerStringOpen(PETSC_COMM_SELF, cidx, len, &sviewer);
103: #undef len
104: ISGetIndices(osm->ois[i], &idx);
105: for (j = 0; j < nidx; ++j) {
106: PetscViewerStringSPrintf(sviewer,"%D ", idx[j]);
107: }
108: PetscViewerDestroy(&sviewer);
109: ISRestoreIndices(osm->ois[i],&idx);
110: PetscViewerASCIIPrintf(viewer, "Outer subdomain:\n");
111: PetscViewerFlush(viewer);
112: PetscViewerASCIIPushSynchronized(viewer);
113: PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx);
114: PetscViewerFlush(viewer);
115: PetscViewerASCIIPopSynchronized(viewer);
116: PetscViewerASCIIPrintf(viewer, "\n");
117: PetscViewerFlush(viewer);
118: PetscFree(cidx);
119: return(0);
120: }
122: static PetscErrorCode PCGASMPrintSubdomains(PC pc)
123: {
124: PC_GASM *osm = (PC_GASM*)pc->data;
125: const char *prefix;
126: char fname[PETSC_MAX_PATH_LEN+1];
127: PetscInt l, d, count;
128: PetscBool found;
129: PetscViewer viewer, sviewer = NULL;
130: PetscInt *numbering,*permutation;/* global numbering of locally-supported subdomains and the permutation from the local ordering */
134: PCGetOptionsPrefix(pc,&prefix);
135: PetscOptionsHasName(NULL,prefix,"-pc_gasm_print_subdomains",&found);
136: if (!found) return(0);
137: PetscOptionsGetString(NULL,prefix,"-pc_gasm_print_subdomains",fname,sizeof(fname),&found);
138: if (!found) { PetscStrcpy(fname,"stdout"); };
139: PetscViewerASCIIOpen(PetscObjectComm((PetscObject)pc),fname,&viewer);
140: /*
141: Make sure the viewer has a name. Otherwise this may cause a deadlock or other weird errors when creating a subcomm viewer:
142: the subcomm viewer will attempt to inherit the viewer's name, which, if not set, will be constructed collectively on the comm.
143: */
144: PetscObjectName((PetscObject)viewer);
145: l = 0;
146: PCGASMComputeGlobalSubdomainNumbering_Private(pc,&numbering,&permutation);
147: for (count = 0; count < osm->N; ++count) {
148: /* Now let subdomains go one at a time in the global numbering order and print their subdomain/solver info. */
149: if (l<osm->n) {
150: d = permutation[l]; /* d is the local number of the l-th smallest (in the global ordering) among the locally supported subdomains */
151: if (numbering[d] == count) {
152: PetscViewerGetSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);
153: PCGASMSubdomainView_Private(pc,d,sviewer);
154: PetscViewerRestoreSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);
155: ++l;
156: }
157: }
158: MPI_Barrier(PetscObjectComm((PetscObject)pc));
159: }
160: PetscFree2(numbering,permutation);
161: PetscViewerDestroy(&viewer);
162: return(0);
163: }
165: static PetscErrorCode PCView_GASM(PC pc,PetscViewer viewer)
166: {
167: PC_GASM *osm = (PC_GASM*)pc->data;
168: const char *prefix;
170: PetscMPIInt rank, size;
171: PetscInt bsz;
172: PetscBool iascii,view_subdomains=PETSC_FALSE;
173: PetscViewer sviewer;
174: PetscInt count, l;
175: char overlap[256] = "user-defined overlap";
176: char gsubdomains[256] = "unknown total number of subdomains";
177: char msubdomains[256] = "unknown max number of local subdomains";
178: PetscInt *numbering,*permutation;/* global numbering of locally-supported subdomains and the permutation from the local ordering */
181: MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size);
182: MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank);
184: if (osm->overlap >= 0) {
185: PetscSNPrintf(overlap,sizeof(overlap),"requested amount of overlap = %D",osm->overlap);
186: }
187: if (osm->N != PETSC_DETERMINE) {
188: PetscSNPrintf(gsubdomains, sizeof(gsubdomains), "total number of subdomains = %D",osm->N);
189: }
190: if (osm->nmax != PETSC_DETERMINE) {
191: PetscSNPrintf(msubdomains,sizeof(msubdomains),"max number of local subdomains = %D",osm->nmax);
192: }
194: PCGetOptionsPrefix(pc,&prefix);
195: PetscOptionsGetBool(NULL,prefix,"-pc_gasm_view_subdomains",&view_subdomains,NULL);
197: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
198: if (iascii) {
199: /*
200: Make sure the viewer has a name. Otherwise this may cause a deadlock when creating a subcomm viewer:
201: the subcomm viewer will attempt to inherit the viewer's name, which, if not set, will be constructed
202: collectively on the comm.
203: */
204: PetscObjectName((PetscObject)viewer);
205: PetscViewerASCIIPrintf(viewer," Restriction/interpolation type: %s\n",PCGASMTypes[osm->type]);
206: PetscViewerASCIIPrintf(viewer," %s\n",overlap);
207: PetscViewerASCIIPrintf(viewer," %s\n",gsubdomains);
208: PetscViewerASCIIPrintf(viewer," %s\n",msubdomains);
209: PetscViewerASCIIPushSynchronized(viewer);
210: PetscViewerASCIISynchronizedPrintf(viewer," [%d|%d] number of locally-supported subdomains = %D\n",rank,size,osm->n);
211: PetscViewerFlush(viewer);
212: PetscViewerASCIIPopSynchronized(viewer);
213: /* Cannot take advantage of osm->same_subdomain_solvers without a global numbering of subdomains. */
214: PetscViewerASCIIPrintf(viewer," Subdomain solver info is as follows:\n");
215: PetscViewerASCIIPushTab(viewer);
216: PetscViewerASCIIPrintf(viewer," - - - - - - - - - - - - - - - - - -\n");
217: /* Make sure that everybody waits for the banner to be printed. */
218: MPI_Barrier(PetscObjectComm((PetscObject)viewer));
219: /* Now let subdomains go one at a time in the global numbering order and print their subdomain/solver info. */
220: PCGASMComputeGlobalSubdomainNumbering_Private(pc,&numbering,&permutation);
221: l = 0;
222: for (count = 0; count < osm->N; ++count) {
223: PetscMPIInt srank, ssize;
224: if (l<osm->n) {
225: PetscInt d = permutation[l]; /* d is the local number of the l-th smallest (in the global ordering) among the locally supported subdomains */
226: if (numbering[d] == count) {
227: MPI_Comm_size(((PetscObject)osm->ois[d])->comm, &ssize);
228: MPI_Comm_rank(((PetscObject)osm->ois[d])->comm, &srank);
229: PetscViewerGetSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);
230: ISGetLocalSize(osm->ois[d],&bsz);
231: PetscViewerASCIISynchronizedPrintf(sviewer," [%d|%d] (subcomm [%d|%d]) local subdomain number %D, local size = %D\n",rank,size,srank,ssize,d,bsz);
232: PetscViewerFlush(sviewer);
233: if (view_subdomains) {
234: PCGASMSubdomainView_Private(pc,d,sviewer);
235: }
236: if (!pc->setupcalled) {
237: PetscViewerASCIIPrintf(sviewer, " Solver not set up yet: PCSetUp() not yet called\n");
238: } else {
239: KSPView(osm->ksp[d],sviewer);
240: }
241: PetscViewerASCIIPrintf(sviewer," - - - - - - - - - - - - - - - - - -\n");
242: PetscViewerFlush(sviewer);
243: PetscViewerRestoreSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);
244: ++l;
245: }
246: }
247: MPI_Barrier(PetscObjectComm((PetscObject)pc));
248: }
249: PetscFree2(numbering,permutation);
250: PetscViewerASCIIPopTab(viewer);
251: PetscViewerFlush(viewer);
252: /* this line is needed to match the extra PetscViewerASCIIPushSynchronized() in PetscViewerGetSubViewer() */
253: PetscViewerASCIIPopSynchronized(viewer);
254: }
255: return(0);
256: }
258: PETSC_INTERN PetscErrorCode PCGASMCreateLocalSubdomains(Mat A, PetscInt nloc, IS *iis[]);
260: PetscErrorCode PCGASMSetHierarchicalPartitioning(PC pc)
261: {
262: PC_GASM *osm = (PC_GASM*)pc->data;
263: MatPartitioning part;
264: MPI_Comm comm;
265: PetscMPIInt size;
266: PetscInt nlocalsubdomains,fromrows_localsize;
267: IS partitioning,fromrows,isn;
268: Vec outervec;
269: PetscErrorCode ierr;
272: PetscObjectGetComm((PetscObject)pc,&comm);
273: MPI_Comm_size(comm,&size);
274: /* we do not need a hierarchical partitioning when
275: * the total number of subdomains is consistent with
276: * the number of MPI tasks.
277: * For the following cases, we do not need to use HP
278: * */
279: if (osm->N==PETSC_DETERMINE || osm->N>=size || osm->N==1) return(0);
280: if (size%osm->N != 0) SETERRQ2(PETSC_COMM_WORLD,PETSC_ERR_ARG_INCOMP,"have to specify the total number of subdomains %D to be a factor of the number of processors %d \n",osm->N,size);
281: nlocalsubdomains = size/osm->N;
282: osm->n = 1;
283: MatPartitioningCreate(comm,&part);
284: MatPartitioningSetAdjacency(part,pc->pmat);
285: MatPartitioningSetType(part,MATPARTITIONINGHIERARCH);
286: MatPartitioningHierarchicalSetNcoarseparts(part,osm->N);
287: MatPartitioningHierarchicalSetNfineparts(part,nlocalsubdomains);
288: MatPartitioningSetFromOptions(part);
289: /* get new processor owner number of each vertex */
290: MatPartitioningApply(part,&partitioning);
291: ISBuildTwoSided(partitioning,NULL,&fromrows);
292: ISPartitioningToNumbering(partitioning,&isn);
293: ISDestroy(&isn);
294: ISGetLocalSize(fromrows,&fromrows_localsize);
295: MatPartitioningDestroy(&part);
296: MatCreateVecs(pc->pmat,&outervec,NULL);
297: VecCreateMPI(comm,fromrows_localsize,PETSC_DETERMINE,&(osm->pcx));
298: VecDuplicate(osm->pcx,&(osm->pcy));
299: VecScatterCreate(osm->pcx,NULL,outervec,fromrows,&(osm->pctoouter));
300: MatCreateSubMatrix(pc->pmat,fromrows,fromrows,MAT_INITIAL_MATRIX,&(osm->permutationP));
301: PetscObjectReference((PetscObject)fromrows);
302: osm->permutationIS = fromrows;
303: osm->pcmat = pc->pmat;
304: PetscObjectReference((PetscObject)osm->permutationP);
305: pc->pmat = osm->permutationP;
306: VecDestroy(&outervec);
307: ISDestroy(&fromrows);
308: ISDestroy(&partitioning);
309: osm->n = PETSC_DETERMINE;
310: return(0);
311: }
313: static PetscErrorCode PCSetUp_GASM(PC pc)
314: {
315: PC_GASM *osm = (PC_GASM*)pc->data;
317: PetscInt i,nInnerIndices,nTotalInnerIndices;
318: PetscMPIInt rank, size;
319: MatReuse scall = MAT_REUSE_MATRIX;
320: KSP ksp;
321: PC subpc;
322: const char *prefix,*pprefix;
323: Vec x,y;
324: PetscInt oni; /* Number of indices in the i-th local outer subdomain. */
325: const PetscInt *oidxi; /* Indices from the i-th subdomain local outer subdomain. */
326: PetscInt on; /* Number of indices in the disjoint union of local outer subdomains. */
327: PetscInt *oidx; /* Indices in the disjoint union of local outer subdomains. */
328: IS gois; /* Disjoint union the global indices of outer subdomains. */
329: IS goid; /* Identity IS of the size of the disjoint union of outer subdomains. */
330: PetscScalar *gxarray, *gyarray;
331: PetscInt gostart; /* Start of locally-owned indices in the vectors -- osm->gx,osm->gy -- over the disjoint union of outer subdomains. */
332: PetscInt num_subdomains = 0;
333: DM *subdomain_dm = NULL;
334: char **subdomain_names = NULL;
335: PetscInt *numbering;
338: MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);
339: MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);
340: if (!pc->setupcalled) {
341: /* use a hierarchical partitioning */
342: if (osm->hierarchicalpartitioning) {
343: PCGASMSetHierarchicalPartitioning(pc);
344: }
345: if (osm->n == PETSC_DETERMINE) {
346: if (osm->N != PETSC_DETERMINE) {
347: /* No local subdomains given, but the desired number of total subdomains is known, so construct them accordingly. */
348: PCGASMCreateSubdomains(pc->pmat,osm->N,&osm->n,&osm->iis);
349: } else if (osm->dm_subdomains && pc->dm) {
350: /* try pc->dm next, if allowed */
351: PetscInt d;
352: IS *inner_subdomain_is, *outer_subdomain_is;
353: DMCreateDomainDecomposition(pc->dm, &num_subdomains, &subdomain_names, &inner_subdomain_is, &outer_subdomain_is, &subdomain_dm);
354: if (num_subdomains) {
355: PCGASMSetSubdomains(pc, num_subdomains, inner_subdomain_is, outer_subdomain_is);
356: }
357: for (d = 0; d < num_subdomains; ++d) {
358: if (inner_subdomain_is) {ISDestroy(&inner_subdomain_is[d]);}
359: if (outer_subdomain_is) {ISDestroy(&outer_subdomain_is[d]);}
360: }
361: PetscFree(inner_subdomain_is);
362: PetscFree(outer_subdomain_is);
363: } else {
364: /* still no subdomains; use one per processor */
365: osm->nmax = osm->n = 1;
366: MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);
367: osm->N = size;
368: PCGASMCreateLocalSubdomains(pc->pmat,osm->n,&osm->iis);
369: }
370: }
371: if (!osm->iis) {
372: /*
373: osm->n was set in PCGASMSetSubdomains(), but the actual subdomains have not been supplied.
374: We create the requisite number of local inner subdomains and then expand them into
375: out subdomains, if necessary.
376: */
377: PCGASMCreateLocalSubdomains(pc->pmat,osm->n,&osm->iis);
378: }
379: if (!osm->ois) {
380: /*
381: Initially make outer subdomains the same as inner subdomains. If nonzero additional overlap
382: has been requested, copy the inner subdomains over so they can be modified.
383: */
384: PetscMalloc1(osm->n,&osm->ois);
385: for (i=0; i<osm->n; ++i) {
386: if (osm->overlap > 0 && osm->N>1) { /* With positive overlap, osm->iis[i] will be modified */
387: ISDuplicate(osm->iis[i],(osm->ois)+i);
388: ISCopy(osm->iis[i],osm->ois[i]);
389: } else {
390: PetscObjectReference((PetscObject)((osm->iis)[i]));
391: osm->ois[i] = osm->iis[i];
392: }
393: }
394: if (osm->overlap>0 && osm->N>1) {
395: /* Extend the "overlapping" regions by a number of steps */
396: MatIncreaseOverlapSplit(pc->pmat,osm->n,osm->ois,osm->overlap);
397: }
398: }
400: /* Now the subdomains are defined. Determine their global and max local numbers, if necessary. */
401: if (osm->nmax == PETSC_DETERMINE) {
402: PetscMPIInt inwork,outwork;
403: /* determine global number of subdomains and the max number of local subdomains */
404: inwork = osm->n;
405: MPIU_Allreduce(&inwork,&outwork,1,MPI_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));
406: osm->nmax = outwork;
407: }
408: if (osm->N == PETSC_DETERMINE) {
409: /* Determine the number of globally-distinct subdomains and compute a global numbering for them. */
410: PetscObjectsListGetGlobalNumbering(PetscObjectComm((PetscObject)pc),osm->n,(PetscObject*)osm->ois,&osm->N,NULL);
411: }
413: if (osm->sort_indices) {
414: for (i=0; i<osm->n; i++) {
415: ISSort(osm->ois[i]);
416: ISSort(osm->iis[i]);
417: }
418: }
419: PCGetOptionsPrefix(pc,&prefix);
420: PCGASMPrintSubdomains(pc);
422: /*
423: Merge the ISs, create merged vectors and restrictions.
424: */
425: /* Merge outer subdomain ISs and construct a restriction onto the disjoint union of local outer subdomains. */
426: on = 0;
427: for (i=0; i<osm->n; i++) {
428: ISGetLocalSize(osm->ois[i],&oni);
429: on += oni;
430: }
431: PetscMalloc1(on, &oidx);
432: on = 0;
433: /* Merge local indices together */
434: for (i=0; i<osm->n; i++) {
435: ISGetLocalSize(osm->ois[i],&oni);
436: ISGetIndices(osm->ois[i],&oidxi);
437: PetscArraycpy(oidx+on,oidxi,oni);
438: ISRestoreIndices(osm->ois[i],&oidxi);
439: on += oni;
440: }
441: ISCreateGeneral(((PetscObject)(pc))->comm,on,oidx,PETSC_OWN_POINTER,&gois);
442: nTotalInnerIndices = 0;
443: for (i=0; i<osm->n; i++) {
444: ISGetLocalSize(osm->iis[i],&nInnerIndices);
445: nTotalInnerIndices += nInnerIndices;
446: }
447: VecCreateMPI(((PetscObject)(pc))->comm,nTotalInnerIndices,PETSC_DETERMINE,&x);
448: VecDuplicate(x,&y);
450: VecCreateMPI(PetscObjectComm((PetscObject)pc),on,PETSC_DECIDE,&osm->gx);
451: VecDuplicate(osm->gx,&osm->gy);
452: VecGetOwnershipRange(osm->gx, &gostart, NULL);
453: ISCreateStride(PetscObjectComm((PetscObject)pc),on,gostart,1, &goid);
454: /* gois might indices not on local */
455: VecScatterCreate(x,gois,osm->gx,goid, &(osm->gorestriction));
456: PetscMalloc1(osm->n,&numbering);
457: PetscObjectsListGetGlobalNumbering(PetscObjectComm((PetscObject)pc),osm->n,(PetscObject*)osm->ois,NULL,numbering);
458: VecDestroy(&x);
459: ISDestroy(&gois);
461: /* Merge inner subdomain ISs and construct a restriction onto the disjoint union of local inner subdomains. */
462: {
463: PetscInt ini; /* Number of indices the i-th a local inner subdomain. */
464: PetscInt in; /* Number of indices in the disjoint union of local inner subdomains. */
465: PetscInt *iidx; /* Global indices in the merged local inner subdomain. */
466: PetscInt *ioidx; /* Global indices of the disjoint union of inner subdomains within the disjoint union of outer subdomains. */
467: IS giis; /* IS for the disjoint union of inner subdomains. */
468: IS giois; /* IS for the disjoint union of inner subdomains within the disjoint union of outer subdomains. */
469: PetscScalar *array;
470: const PetscInt *indices;
471: PetscInt k;
472: on = 0;
473: for (i=0; i<osm->n; i++) {
474: ISGetLocalSize(osm->ois[i],&oni);
475: on += oni;
476: }
477: PetscMalloc1(on, &iidx);
478: PetscMalloc1(on, &ioidx);
479: VecGetArray(y,&array);
480: /* set communicator id to determine where overlap is */
481: in = 0;
482: for (i=0; i<osm->n; i++) {
483: ISGetLocalSize(osm->iis[i],&ini);
484: for (k = 0; k < ini; ++k) {
485: array[in+k] = numbering[i];
486: }
487: in += ini;
488: }
489: VecRestoreArray(y,&array);
490: VecScatterBegin(osm->gorestriction,y,osm->gy,INSERT_VALUES,SCATTER_FORWARD);
491: VecScatterEnd(osm->gorestriction,y,osm->gy,INSERT_VALUES,SCATTER_FORWARD);
492: VecGetOwnershipRange(osm->gy,&gostart, NULL);
493: VecGetArray(osm->gy,&array);
494: on = 0;
495: in = 0;
496: for (i=0; i<osm->n; i++) {
497: ISGetLocalSize(osm->ois[i],&oni);
498: ISGetIndices(osm->ois[i],&indices);
499: for (k=0; k<oni; k++) {
500: /* skip overlapping indices to get inner domain */
501: if (PetscRealPart(array[on+k]) != numbering[i]) continue;
502: iidx[in] = indices[k];
503: ioidx[in++] = gostart+on+k;
504: }
505: ISRestoreIndices(osm->ois[i], &indices);
506: on += oni;
507: }
508: VecRestoreArray(osm->gy,&array);
509: ISCreateGeneral(PetscObjectComm((PetscObject)pc),in,iidx,PETSC_OWN_POINTER,&giis);
510: ISCreateGeneral(PetscObjectComm((PetscObject)pc),in,ioidx,PETSC_OWN_POINTER,&giois);
511: VecScatterCreate(y,giis,osm->gy,giois,&osm->girestriction);
512: VecDestroy(&y);
513: ISDestroy(&giis);
514: ISDestroy(&giois);
515: }
516: ISDestroy(&goid);
517: PetscFree(numbering);
519: /* Create the subdomain work vectors. */
520: PetscMalloc1(osm->n,&osm->x);
521: PetscMalloc1(osm->n,&osm->y);
522: VecGetArray(osm->gx, &gxarray);
523: VecGetArray(osm->gy, &gyarray);
524: for (i=0, on=0; i<osm->n; ++i, on += oni) {
525: PetscInt oNi;
526: ISGetLocalSize(osm->ois[i],&oni);
527: /* on a sub communicator */
528: ISGetSize(osm->ois[i],&oNi);
529: VecCreateMPIWithArray(((PetscObject)(osm->ois[i]))->comm,1,oni,oNi,gxarray+on,&osm->x[i]);
530: VecCreateMPIWithArray(((PetscObject)(osm->ois[i]))->comm,1,oni,oNi,gyarray+on,&osm->y[i]);
531: }
532: VecRestoreArray(osm->gx, &gxarray);
533: VecRestoreArray(osm->gy, &gyarray);
534: /* Create the subdomain solvers */
535: PetscMalloc1(osm->n,&osm->ksp);
536: for (i=0; i<osm->n; i++) {
537: char subprefix[PETSC_MAX_PATH_LEN+1];
538: KSPCreate(((PetscObject)(osm->ois[i]))->comm,&ksp);
539: KSPSetErrorIfNotConverged(ksp,pc->erroriffailure);
540: PetscLogObjectParent((PetscObject)pc,(PetscObject)ksp);
541: PetscObjectIncrementTabLevel((PetscObject)ksp,(PetscObject)pc,1);
542: KSPSetType(ksp,KSPPREONLY);
543: KSPGetPC(ksp,&subpc); /* Why do we need this here? */
544: if (subdomain_dm) {
545: KSPSetDM(ksp,subdomain_dm[i]);
546: DMDestroy(subdomain_dm+i);
547: }
548: PCGetOptionsPrefix(pc,&prefix);
549: KSPSetOptionsPrefix(ksp,prefix);
550: if (subdomain_names && subdomain_names[i]) {
551: PetscSNPrintf(subprefix,PETSC_MAX_PATH_LEN,"sub_%s_",subdomain_names[i]);
552: KSPAppendOptionsPrefix(ksp,subprefix);
553: PetscFree(subdomain_names[i]);
554: }
555: KSPAppendOptionsPrefix(ksp,"sub_");
556: osm->ksp[i] = ksp;
557: }
558: PetscFree(subdomain_dm);
559: PetscFree(subdomain_names);
560: scall = MAT_INITIAL_MATRIX;
561: } else { /* if (pc->setupcalled) */
562: /*
563: Destroy the submatrices from the previous iteration
564: */
565: if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
566: MatDestroyMatrices(osm->n,&osm->pmat);
567: scall = MAT_INITIAL_MATRIX;
568: }
569: if (osm->permutationIS) {
570: MatCreateSubMatrix(pc->pmat,osm->permutationIS,osm->permutationIS,scall,&osm->permutationP);
571: PetscObjectReference((PetscObject)osm->permutationP);
572: osm->pcmat = pc->pmat;
573: pc->pmat = osm->permutationP;
574: }
575: }
577: /*
578: Extract the submatrices.
579: */
580: if (size > 1) {
581: MatCreateSubMatricesMPI(pc->pmat,osm->n,osm->ois,osm->ois,scall,&osm->pmat);
582: } else {
583: MatCreateSubMatrices(pc->pmat,osm->n,osm->ois,osm->ois,scall,&osm->pmat);
584: }
585: if (scall == MAT_INITIAL_MATRIX) {
586: PetscObjectGetOptionsPrefix((PetscObject)pc->pmat,&pprefix);
587: for (i=0; i<osm->n; i++) {
588: PetscLogObjectParent((PetscObject)pc,(PetscObject)osm->pmat[i]);
589: PetscObjectSetOptionsPrefix((PetscObject)osm->pmat[i],pprefix);
590: }
591: }
593: /* Return control to the user so that the submatrices can be modified (e.g., to apply
594: different boundary conditions for the submatrices than for the global problem) */
595: PCModifySubMatrices(pc,osm->n,osm->ois,osm->ois,osm->pmat,pc->modifysubmatricesP);
597: /*
598: Loop over submatrices putting them into local ksps
599: */
600: for (i=0; i<osm->n; i++) {
601: KSPSetOperators(osm->ksp[i],osm->pmat[i],osm->pmat[i]);
602: KSPGetOptionsPrefix(osm->ksp[i],&prefix);
603: MatSetOptionsPrefix(osm->pmat[i],prefix);
604: if (!pc->setupcalled) {
605: KSPSetFromOptions(osm->ksp[i]);
606: }
607: }
608: if (osm->pcmat) {
609: MatDestroy(&pc->pmat);
610: pc->pmat = osm->pcmat;
611: osm->pcmat = NULL;
612: }
613: return(0);
614: }
616: static PetscErrorCode PCSetUpOnBlocks_GASM(PC pc)
617: {
618: PC_GASM *osm = (PC_GASM*)pc->data;
620: PetscInt i;
623: for (i=0; i<osm->n; i++) {
624: KSPSetUp(osm->ksp[i]);
625: }
626: return(0);
627: }
629: static PetscErrorCode PCApply_GASM(PC pc,Vec xin,Vec yout)
630: {
631: PC_GASM *osm = (PC_GASM*)pc->data;
633: PetscInt i;
634: Vec x,y;
635: ScatterMode forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
638: if (osm->pctoouter) {
639: VecScatterBegin(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);
640: VecScatterEnd(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);
641: x = osm->pcx;
642: y = osm->pcy;
643: } else {
644: x = xin;
645: y = yout;
646: }
647: /*
648: support for limiting the restriction or interpolation only to the inner
649: subdomain values (leaving the other values 0).
650: */
651: if (!(osm->type & PC_GASM_RESTRICT)) {
652: /* have to zero the work RHS since scatter may leave some slots empty */
653: VecZeroEntries(osm->gx);
654: VecScatterBegin(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);
655: } else {
656: VecScatterBegin(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);
657: }
658: VecZeroEntries(osm->gy);
659: if (!(osm->type & PC_GASM_RESTRICT)) {
660: VecScatterEnd(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);
661: } else {
662: VecScatterEnd(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);
663: }
664: /* do the subdomain solves */
665: for (i=0; i<osm->n; ++i) {
666: KSPSolve(osm->ksp[i],osm->x[i],osm->y[i]);
667: KSPCheckSolve(osm->ksp[i],pc,osm->y[i]);
668: }
669: /* do we need to zero y? */
670: VecZeroEntries(y);
671: if (!(osm->type & PC_GASM_INTERPOLATE)) {
672: VecScatterBegin(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);
673: VecScatterEnd(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);
674: } else {
675: VecScatterBegin(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);
676: VecScatterEnd(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);
677: }
678: if (osm->pctoouter) {
679: VecScatterBegin(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);
680: VecScatterEnd(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);
681: }
682: return(0);
683: }
685: static PetscErrorCode PCMatApply_GASM(PC pc,Mat Xin,Mat Yout)
686: {
687: PC_GASM *osm = (PC_GASM*)pc->data;
688: Mat X,Y,O=NULL,Z,W;
689: Vec x,y;
690: PetscInt i,m,M,N;
691: ScatterMode forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
695: if (osm->n != 1) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Not yet implemented");
696: MatGetSize(Xin,NULL,&N);
697: if (osm->pctoouter) {
698: VecGetLocalSize(osm->pcx,&m);
699: VecGetSize(osm->pcx,&M);
700: MatCreateDense(PetscObjectComm((PetscObject)osm->ois[0]),m,PETSC_DECIDE,M,N,NULL,&O);
701: for (i = 0; i < N; ++i) {
702: MatDenseGetColumnVecRead(Xin,i,&x);
703: MatDenseGetColumnVecWrite(O,i,&y);
704: VecScatterBegin(osm->pctoouter,x,y,INSERT_VALUES,SCATTER_REVERSE);
705: VecScatterEnd(osm->pctoouter,x,y,INSERT_VALUES,SCATTER_REVERSE);
706: MatDenseRestoreColumnVecWrite(O,i,&y);
707: MatDenseRestoreColumnVecRead(Xin,i,&x);
708: }
709: X = Y = O;
710: } else {
711: X = Xin;
712: Y = Yout;
713: }
714: /*
715: support for limiting the restriction or interpolation only to the inner
716: subdomain values (leaving the other values 0).
717: */
718: VecGetLocalSize(osm->x[0],&m);
719: VecGetSize(osm->x[0],&M);
720: MatCreateDense(PetscObjectComm((PetscObject)osm->ois[0]),m,PETSC_DECIDE,M,N,NULL,&Z);
721: for (i = 0; i < N; ++i) {
722: MatDenseGetColumnVecRead(X,i,&x);
723: MatDenseGetColumnVecWrite(Z,i,&y);
724: if (!(osm->type & PC_GASM_RESTRICT)) {
725: /* have to zero the work RHS since scatter may leave some slots empty */
726: VecZeroEntries(y);
727: VecScatterBegin(osm->girestriction,x,y,INSERT_VALUES,forward);
728: VecScatterEnd(osm->girestriction,x,y,INSERT_VALUES,forward);
729: } else {
730: VecScatterBegin(osm->gorestriction,x,y,INSERT_VALUES,forward);
731: VecScatterEnd(osm->gorestriction,x,y,INSERT_VALUES,forward);
732: }
733: MatDenseRestoreColumnVecWrite(Z,i,&y);
734: MatDenseRestoreColumnVecRead(X,i,&x);
735: }
736: MatCreateDense(PetscObjectComm((PetscObject)osm->ois[0]),m,PETSC_DECIDE,M,N,NULL,&W);
737: MatSetOption(Z,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);
738: MatAssemblyBegin(Z,MAT_FINAL_ASSEMBLY);
739: MatAssemblyEnd(Z,MAT_FINAL_ASSEMBLY);
740: /* do the subdomain solve */
741: KSPMatSolve(osm->ksp[0],Z,W);
742: KSPCheckSolve(osm->ksp[0],pc,NULL);
743: MatDestroy(&Z);
744: /* do we need to zero y? */
745: MatZeroEntries(Y);
746: for (i = 0; i < N; ++i) {
747: MatDenseGetColumnVecWrite(Y,i,&y);
748: MatDenseGetColumnVecRead(W,i,&x);
749: if (!(osm->type & PC_GASM_INTERPOLATE)) {
750: VecScatterBegin(osm->girestriction,x,y,ADD_VALUES,reverse);
751: VecScatterEnd(osm->girestriction,x,y,ADD_VALUES,reverse);
752: } else {
753: VecScatterBegin(osm->gorestriction,x,y,ADD_VALUES,reverse);
754: VecScatterEnd(osm->gorestriction,x,y,ADD_VALUES,reverse);
755: }
756: MatDenseRestoreColumnVecRead(W,i,&x);
757: if (osm->pctoouter) {
758: MatDenseGetColumnVecWrite(Yout,i,&x);
759: VecScatterBegin(osm->pctoouter,y,x,INSERT_VALUES,SCATTER_FORWARD);
760: VecScatterEnd(osm->pctoouter,y,x,INSERT_VALUES,SCATTER_FORWARD);
761: MatDenseRestoreColumnVecRead(Yout,i,&x);
762: }
763: MatDenseRestoreColumnVecWrite(Y,i,&y);
764: }
765: MatDestroy(&W);
766: MatDestroy(&O);
767: return(0);
768: }
770: static PetscErrorCode PCApplyTranspose_GASM(PC pc,Vec xin,Vec yout)
771: {
772: PC_GASM *osm = (PC_GASM*)pc->data;
774: PetscInt i;
775: Vec x,y;
776: ScatterMode forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
779: if (osm->pctoouter) {
780: VecScatterBegin(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);
781: VecScatterEnd(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);
782: x = osm->pcx;
783: y = osm->pcy;
784: }else{
785: x = xin;
786: y = yout;
787: }
788: /*
789: Support for limiting the restriction or interpolation to only local
790: subdomain values (leaving the other values 0).
792: Note: these are reversed from the PCApply_GASM() because we are applying the
793: transpose of the three terms
794: */
795: if (!(osm->type & PC_GASM_INTERPOLATE)) {
796: /* have to zero the work RHS since scatter may leave some slots empty */
797: VecZeroEntries(osm->gx);
798: VecScatterBegin(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);
799: } else {
800: VecScatterBegin(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);
801: }
802: VecZeroEntries(osm->gy);
803: if (!(osm->type & PC_GASM_INTERPOLATE)) {
804: VecScatterEnd(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);
805: } else {
806: VecScatterEnd(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);
807: }
808: /* do the local solves */
809: for (i=0; i<osm->n; ++i) { /* Note that the solves are local, so we can go to osm->n, rather than osm->nmax. */
810: KSPSolveTranspose(osm->ksp[i],osm->x[i],osm->y[i]);
811: KSPCheckSolve(osm->ksp[i],pc,osm->y[i]);
812: }
813: VecZeroEntries(y);
814: if (!(osm->type & PC_GASM_RESTRICT)) {
815: VecScatterBegin(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);
816: VecScatterEnd(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);
817: } else {
818: VecScatterBegin(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);
819: VecScatterEnd(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);
820: }
821: if (osm->pctoouter) {
822: VecScatterBegin(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);
823: VecScatterEnd(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);
824: }
825: return(0);
826: }
828: static PetscErrorCode PCReset_GASM(PC pc)
829: {
830: PC_GASM *osm = (PC_GASM*)pc->data;
832: PetscInt i;
835: if (osm->ksp) {
836: for (i=0; i<osm->n; i++) {
837: KSPReset(osm->ksp[i]);
838: }
839: }
840: if (osm->pmat) {
841: if (osm->n > 0) {
842: PetscMPIInt size;
843: MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);
844: if (size > 1) {
845: /* osm->pmat is created by MatCreateSubMatricesMPI(), cannot use MatDestroySubMatrices() */
846: MatDestroyMatrices(osm->n,&osm->pmat);
847: } else {
848: MatDestroySubMatrices(osm->n,&osm->pmat);
849: }
850: }
851: }
852: if (osm->x) {
853: for (i=0; i<osm->n; i++) {
854: VecDestroy(&osm->x[i]);
855: VecDestroy(&osm->y[i]);
856: }
857: }
858: VecDestroy(&osm->gx);
859: VecDestroy(&osm->gy);
861: VecScatterDestroy(&osm->gorestriction);
862: VecScatterDestroy(&osm->girestriction);
863: if (!osm->user_subdomains) {
864: PCGASMDestroySubdomains(osm->n,&osm->ois,&osm->iis);
865: osm->N = PETSC_DETERMINE;
866: osm->nmax = PETSC_DETERMINE;
867: }
868: if (osm->pctoouter) {
869: VecScatterDestroy(&(osm->pctoouter));
870: }
871: if (osm->permutationIS) {
872: ISDestroy(&(osm->permutationIS));
873: }
874: if (osm->pcx) {
875: VecDestroy(&(osm->pcx));
876: }
877: if (osm->pcy) {
878: VecDestroy(&(osm->pcy));
879: }
880: if (osm->permutationP) {
881: MatDestroy(&(osm->permutationP));
882: }
883: if (osm->pcmat) {
884: MatDestroy(&osm->pcmat);
885: }
886: return(0);
887: }
889: static PetscErrorCode PCDestroy_GASM(PC pc)
890: {
891: PC_GASM *osm = (PC_GASM*)pc->data;
893: PetscInt i;
896: PCReset_GASM(pc);
897: /* PCReset will not destroy subdomains, if user_subdomains is true. */
898: PCGASMDestroySubdomains(osm->n,&osm->ois,&osm->iis);
899: if (osm->ksp) {
900: for (i=0; i<osm->n; i++) {
901: KSPDestroy(&osm->ksp[i]);
902: }
903: PetscFree(osm->ksp);
904: }
905: PetscFree(osm->x);
906: PetscFree(osm->y);
907: PetscFree(pc->data);
908: return(0);
909: }
911: static PetscErrorCode PCSetFromOptions_GASM(PetscOptionItems *PetscOptionsObject,PC pc)
912: {
913: PC_GASM *osm = (PC_GASM*)pc->data;
915: PetscInt blocks,ovl;
916: PetscBool flg;
917: PCGASMType gasmtype;
920: PetscOptionsHead(PetscOptionsObject,"Generalized additive Schwarz options");
921: PetscOptionsBool("-pc_gasm_use_dm_subdomains","If subdomains aren't set, use DMCreateDomainDecomposition() to define subdomains.","PCGASMSetUseDMSubdomains",osm->dm_subdomains,&osm->dm_subdomains,&flg);
922: PetscOptionsInt("-pc_gasm_total_subdomains","Total number of subdomains across communicator","PCGASMSetTotalSubdomains",osm->N,&blocks,&flg);
923: if (flg) {
924: PCGASMSetTotalSubdomains(pc,blocks);
925: }
926: PetscOptionsInt("-pc_gasm_overlap","Number of overlapping degrees of freedom","PCGASMSetOverlap",osm->overlap,&ovl,&flg);
927: if (flg) {
928: PCGASMSetOverlap(pc,ovl);
929: osm->dm_subdomains = PETSC_FALSE;
930: }
931: flg = PETSC_FALSE;
932: PetscOptionsEnum("-pc_gasm_type","Type of restriction/extension","PCGASMSetType",PCGASMTypes,(PetscEnum)osm->type,(PetscEnum*)&gasmtype,&flg);
933: if (flg) {PCGASMSetType(pc,gasmtype);}
934: PetscOptionsBool("-pc_gasm_use_hierachical_partitioning","use hierarchical partitioning",NULL,osm->hierarchicalpartitioning,&osm->hierarchicalpartitioning,&flg);
935: PetscOptionsTail();
936: return(0);
937: }
939: /*------------------------------------------------------------------------------------*/
941: /*@
942: PCGASMSetTotalSubdomains - sets the total number of subdomains to use across the
943: communicator.
944: Logically collective on pc
946: Input Parameters:
947: + pc - the preconditioner
948: - N - total number of subdomains
950: Level: beginner
952: .seealso: PCGASMSetSubdomains(), PCGASMSetOverlap()
953: PCGASMCreateSubdomains2D()
954: @*/
955: PetscErrorCode PCGASMSetTotalSubdomains(PC pc,PetscInt N)
956: {
957: PC_GASM *osm = (PC_GASM*)pc->data;
958: PetscMPIInt size,rank;
962: if (N < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Total number of subdomains must be 1 or more, got N = %D",N);
963: if (pc->setupcalled) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetTotalSubdomains() should be called before calling PCSetUp().");
965: PCGASMDestroySubdomains(osm->n,&osm->iis,&osm->ois);
966: osm->ois = osm->iis = NULL;
968: MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);
969: MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);
970: osm->N = N;
971: osm->n = PETSC_DETERMINE;
972: osm->nmax = PETSC_DETERMINE;
973: osm->dm_subdomains = PETSC_FALSE;
974: return(0);
975: }
977: static PetscErrorCode PCGASMSetSubdomains_GASM(PC pc,PetscInt n,IS iis[],IS ois[])
978: {
979: PC_GASM *osm = (PC_GASM*)pc->data;
980: PetscErrorCode ierr;
981: PetscInt i;
984: if (n < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Each process must have 1 or more subdomains, got n = %D",n);
985: if (pc->setupcalled) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetSubdomains() should be called before calling PCSetUp().");
987: PCGASMDestroySubdomains(osm->n,&osm->iis,&osm->ois);
988: osm->iis = osm->ois = NULL;
989: osm->n = n;
990: osm->N = PETSC_DETERMINE;
991: osm->nmax = PETSC_DETERMINE;
992: if (ois) {
993: PetscMalloc1(n,&osm->ois);
994: for (i=0; i<n; i++) {
995: PetscObjectReference((PetscObject)ois[i]);
996: osm->ois[i] = ois[i];
997: }
998: /*
999: Since the user set the outer subdomains, even if nontrivial overlap was requested via PCGASMSetOverlap(),
1000: it will be ignored. To avoid confusion later on (e.g., when viewing the PC), the overlap size is set to -1.
1001: */
1002: osm->overlap = -1;
1003: /* inner subdomains must be provided */
1004: if (!iis) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"inner indices have to be provided \n");
1005: }/* end if */
1006: if (iis) {
1007: PetscMalloc1(n,&osm->iis);
1008: for (i=0; i<n; i++) {
1009: PetscObjectReference((PetscObject)iis[i]);
1010: osm->iis[i] = iis[i];
1011: }
1012: if (!ois) {
1013: osm->ois = NULL;
1014: /* if user does not provide outer indices, we will create the corresponding outer indices using osm->overlap =1 in PCSetUp_GASM */
1015: }
1016: }
1017: if (PetscDefined(USE_DEBUG)) {
1018: PetscInt j,rstart,rend,*covered,lsize;
1019: const PetscInt *indices;
1020: /* check if the inner indices cover and only cover the local portion of the preconditioning matrix */
1021: MatGetOwnershipRange(pc->pmat,&rstart,&rend);
1022: PetscCalloc1(rend-rstart,&covered);
1023: /* check if the current processor owns indices from others */
1024: for (i=0; i<n; i++) {
1025: ISGetIndices(osm->iis[i],&indices);
1026: ISGetLocalSize(osm->iis[i],&lsize);
1027: for (j=0; j<lsize; j++) {
1028: if (indices[j]<rstart || indices[j]>=rend) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"inner subdomains can not own an index %d from other processors", indices[j]);
1029: else if (covered[indices[j]-rstart]==1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"inner subdomains can not have an overlapping index %d ",indices[j]);
1030: else covered[indices[j]-rstart] = 1;
1031: }
1032: ISRestoreIndices(osm->iis[i],&indices);
1033: }
1034: /* check if we miss any indices */
1035: for (i=rstart; i<rend; i++) {
1036: if (!covered[i-rstart]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"local entity %d was not covered by inner subdomains",i);
1037: }
1038: PetscFree(covered);
1039: }
1040: if (iis) osm->user_subdomains = PETSC_TRUE;
1041: return(0);
1042: }
1044: static PetscErrorCode PCGASMSetOverlap_GASM(PC pc,PetscInt ovl)
1045: {
1046: PC_GASM *osm = (PC_GASM*)pc->data;
1049: if (ovl < 0) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap value requested");
1050: if (pc->setupcalled && ovl != osm->overlap) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetOverlap() should be called before PCSetUp().");
1051: if (!pc->setupcalled) osm->overlap = ovl;
1052: return(0);
1053: }
1055: static PetscErrorCode PCGASMSetType_GASM(PC pc,PCGASMType type)
1056: {
1057: PC_GASM *osm = (PC_GASM*)pc->data;
1060: osm->type = type;
1061: osm->type_set = PETSC_TRUE;
1062: return(0);
1063: }
1065: static PetscErrorCode PCGASMSetSortIndices_GASM(PC pc,PetscBool doSort)
1066: {
1067: PC_GASM *osm = (PC_GASM*)pc->data;
1070: osm->sort_indices = doSort;
1071: return(0);
1072: }
1074: /*
1075: FIXME: This routine might need to be modified now that multiple ranks per subdomain are allowed.
1076: In particular, it would upset the global subdomain number calculation.
1077: */
1078: static PetscErrorCode PCGASMGetSubKSP_GASM(PC pc,PetscInt *n,PetscInt *first,KSP **ksp)
1079: {
1080: PC_GASM *osm = (PC_GASM*)pc->data;
1084: if (osm->n < 1) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ORDER,"Need to call PCSetUp() on PC (or KSPSetUp() on the outer KSP object) before calling here");
1086: if (n) *n = osm->n;
1087: if (first) {
1088: MPI_Scan(&osm->n,first,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));
1089: *first -= osm->n;
1090: }
1091: if (ksp) {
1092: /* Assume that local solves are now different; not necessarily
1093: true, though! This flag is used only for PCView_GASM() */
1094: *ksp = osm->ksp;
1095: osm->same_subdomain_solvers = PETSC_FALSE;
1096: }
1097: return(0);
1098: } /* PCGASMGetSubKSP_GASM() */
1100: /*@C
1101: PCGASMSetSubdomains - Sets the subdomains for this processor
1102: for the additive Schwarz preconditioner.
1104: Collective on pc
1106: Input Parameters:
1107: + pc - the preconditioner object
1108: . n - the number of subdomains for this processor
1109: . iis - the index sets that define the inner subdomains (or NULL for PETSc to determine subdomains)
1110: - ois - the index sets that define the outer subdomains (or NULL to use the same as iis, or to construct by expanding iis by the requested overlap)
1112: Notes:
1113: The IS indices use the parallel, global numbering of the vector entries.
1114: Inner subdomains are those where the correction is applied.
1115: Outer subdomains are those where the residual necessary to obtain the
1116: corrections is obtained (see PCGASMType for the use of inner/outer subdomains).
1117: Both inner and outer subdomains can extend over several processors.
1118: This processor's portion of a subdomain is known as a local subdomain.
1120: Inner subdomains can not overlap with each other, do not have any entities from remote processors,
1121: and have to cover the entire local subdomain owned by the current processor. The index sets on each
1122: process should be ordered such that the ith local subdomain is connected to the ith remote subdomain
1123: on another MPI process.
1125: By default the GASM preconditioner uses 1 (local) subdomain per processor.
1127: Level: advanced
1129: .seealso: PCGASMSetOverlap(), PCGASMGetSubKSP(),
1130: PCGASMCreateSubdomains2D(), PCGASMGetSubdomains()
1131: @*/
1132: PetscErrorCode PCGASMSetSubdomains(PC pc,PetscInt n,IS iis[],IS ois[])
1133: {
1134: PC_GASM *osm = (PC_GASM*)pc->data;
1139: PetscTryMethod(pc,"PCGASMSetSubdomains_C",(PC,PetscInt,IS[],IS[]),(pc,n,iis,ois));
1140: osm->dm_subdomains = PETSC_FALSE;
1141: return(0);
1142: }
1144: /*@
1145: PCGASMSetOverlap - Sets the overlap between a pair of subdomains for the
1146: additive Schwarz preconditioner. Either all or no processors in the
1147: pc communicator must call this routine.
1149: Logically Collective on pc
1151: Input Parameters:
1152: + pc - the preconditioner context
1153: - ovl - the amount of overlap between subdomains (ovl >= 0, default value = 0)
1155: Options Database Key:
1156: . -pc_gasm_overlap <overlap> - Sets overlap
1158: Notes:
1159: By default the GASM preconditioner uses 1 subdomain per processor. To use
1160: multiple subdomain per perocessor or "straddling" subdomains that intersect
1161: multiple processors use PCGASMSetSubdomains() (or option -pc_gasm_total_subdomains <n>).
1163: The overlap defaults to 0, so if one desires that no additional
1164: overlap be computed beyond what may have been set with a call to
1165: PCGASMSetSubdomains(), then ovl must be set to be 0. In particular, if one does
1166: not explicitly set the subdomains in application code, then all overlap would be computed
1167: internally by PETSc, and using an overlap of 0 would result in an GASM
1168: variant that is equivalent to the block Jacobi preconditioner.
1170: Note that one can define initial index sets with any overlap via
1171: PCGASMSetSubdomains(); the routine PCGASMSetOverlap() merely allows
1172: PETSc to extend that overlap further, if desired.
1174: Level: intermediate
1176: .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(),
1177: PCGASMCreateSubdomains2D(), PCGASMGetSubdomains()
1178: @*/
1179: PetscErrorCode PCGASMSetOverlap(PC pc,PetscInt ovl)
1180: {
1182: PC_GASM *osm = (PC_GASM*)pc->data;
1187: PetscTryMethod(pc,"PCGASMSetOverlap_C",(PC,PetscInt),(pc,ovl));
1188: osm->dm_subdomains = PETSC_FALSE;
1189: return(0);
1190: }
1192: /*@
1193: PCGASMSetType - Sets the type of restriction and interpolation used
1194: for local problems in the additive Schwarz method.
1196: Logically Collective on PC
1198: Input Parameters:
1199: + pc - the preconditioner context
1200: - type - variant of GASM, one of
1201: .vb
1202: PC_GASM_BASIC - full interpolation and restriction
1203: PC_GASM_RESTRICT - full restriction, local processor interpolation
1204: PC_GASM_INTERPOLATE - full interpolation, local processor restriction
1205: PC_GASM_NONE - local processor restriction and interpolation
1206: .ve
1208: Options Database Key:
1209: . -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type
1211: Level: intermediate
1213: .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(),
1214: PCGASMCreateSubdomains2D()
1215: @*/
1216: PetscErrorCode PCGASMSetType(PC pc,PCGASMType type)
1217: {
1223: PetscTryMethod(pc,"PCGASMSetType_C",(PC,PCGASMType),(pc,type));
1224: return(0);
1225: }
1227: /*@
1228: PCGASMSetSortIndices - Determines whether subdomain indices are sorted.
1230: Logically Collective on PC
1232: Input Parameters:
1233: + pc - the preconditioner context
1234: - doSort - sort the subdomain indices
1236: Level: intermediate
1238: .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(),
1239: PCGASMCreateSubdomains2D()
1240: @*/
1241: PetscErrorCode PCGASMSetSortIndices(PC pc,PetscBool doSort)
1242: {
1248: PetscTryMethod(pc,"PCGASMSetSortIndices_C",(PC,PetscBool),(pc,doSort));
1249: return(0);
1250: }
1252: /*@C
1253: PCGASMGetSubKSP - Gets the local KSP contexts for all blocks on
1254: this processor.
1256: Collective on PC iff first_local is requested
1258: Input Parameter:
1259: . pc - the preconditioner context
1261: Output Parameters:
1262: + n_local - the number of blocks on this processor or NULL
1263: . first_local - the global number of the first block on this processor or NULL,
1264: all processors must request or all must pass NULL
1265: - ksp - the array of KSP contexts
1267: Note:
1268: After PCGASMGetSubKSP() the array of KSPes is not to be freed
1270: Currently for some matrix implementations only 1 block per processor
1271: is supported.
1273: You must call KSPSetUp() before calling PCGASMGetSubKSP().
1275: Level: advanced
1277: .seealso: PCGASMSetSubdomains(), PCGASMSetOverlap(),
1278: PCGASMCreateSubdomains2D(),
1279: @*/
1280: PetscErrorCode PCGASMGetSubKSP(PC pc,PetscInt *n_local,PetscInt *first_local,KSP *ksp[])
1281: {
1286: PetscUseMethod(pc,"PCGASMGetSubKSP_C",(PC,PetscInt*,PetscInt*,KSP **),(pc,n_local,first_local,ksp));
1287: return(0);
1288: }
1290: /* -------------------------------------------------------------------------------------*/
1291: /*MC
1292: PCGASM - Use the (restricted) additive Schwarz method, each block is (approximately) solved with
1293: its own KSP object.
1295: Options Database Keys:
1296: + -pc_gasm_total_subdomains <n> - Sets total number of local subdomains to be distributed among processors
1297: . -pc_gasm_view_subdomains - activates the printing of subdomain indices in PCView(), -ksp_view or -snes_view
1298: . -pc_gasm_print_subdomains - activates the printing of subdomain indices in PCSetUp()
1299: . -pc_gasm_overlap <ovl> - Sets overlap by which to (automatically) extend local subdomains
1300: - -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type
1302: IMPORTANT: If you run with, for example, 3 blocks on 1 processor or 3 blocks on 3 processors you
1303: will get a different convergence rate due to the default option of -pc_gasm_type restrict. Use
1304: -pc_gasm_type basic to use the standard GASM.
1306: Notes:
1307: Blocks can be shared by multiple processes.
1309: To set options on the solvers for each block append -sub_ to all the KSP, and PC
1310: options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly
1312: To set the options on the solvers separate for each block call PCGASMGetSubKSP()
1313: and set the options directly on the resulting KSP object (you can access its PC
1314: with KSPGetPC())
1316: Level: beginner
1318: References:
1319: + 1. - M Dryja, OB Widlund, An additive variant of the Schwarz alternating method for the case of many subregions
1320: Courant Institute, New York University Technical report
1321: - 2. - Barry Smith, Petter Bjorstad, and William Gropp, Domain Decompositions: Parallel Multilevel Methods for Elliptic Partial Differential Equations,
1322: Cambridge University Press.
1324: .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC,
1325: PCBJACOBI, PCGASMGetSubKSP(), PCGASMSetSubdomains(),
1326: PCSetModifySubMatrices(), PCGASMSetOverlap(), PCGASMSetType()
1328: M*/
1330: PETSC_EXTERN PetscErrorCode PCCreate_GASM(PC pc)
1331: {
1333: PC_GASM *osm;
1336: PetscNewLog(pc,&osm);
1338: osm->N = PETSC_DETERMINE;
1339: osm->n = PETSC_DECIDE;
1340: osm->nmax = PETSC_DETERMINE;
1341: osm->overlap = 0;
1342: osm->ksp = NULL;
1343: osm->gorestriction = NULL;
1344: osm->girestriction = NULL;
1345: osm->pctoouter = NULL;
1346: osm->gx = NULL;
1347: osm->gy = NULL;
1348: osm->x = NULL;
1349: osm->y = NULL;
1350: osm->pcx = NULL;
1351: osm->pcy = NULL;
1352: osm->permutationIS = NULL;
1353: osm->permutationP = NULL;
1354: osm->pcmat = NULL;
1355: osm->ois = NULL;
1356: osm->iis = NULL;
1357: osm->pmat = NULL;
1358: osm->type = PC_GASM_RESTRICT;
1359: osm->same_subdomain_solvers = PETSC_TRUE;
1360: osm->sort_indices = PETSC_TRUE;
1361: osm->dm_subdomains = PETSC_FALSE;
1362: osm->hierarchicalpartitioning = PETSC_FALSE;
1364: pc->data = (void*)osm;
1365: pc->ops->apply = PCApply_GASM;
1366: pc->ops->matapply = PCMatApply_GASM;
1367: pc->ops->applytranspose = PCApplyTranspose_GASM;
1368: pc->ops->setup = PCSetUp_GASM;
1369: pc->ops->reset = PCReset_GASM;
1370: pc->ops->destroy = PCDestroy_GASM;
1371: pc->ops->setfromoptions = PCSetFromOptions_GASM;
1372: pc->ops->setuponblocks = PCSetUpOnBlocks_GASM;
1373: pc->ops->view = PCView_GASM;
1374: pc->ops->applyrichardson = NULL;
1376: PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetSubdomains_C",PCGASMSetSubdomains_GASM);
1377: PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetOverlap_C",PCGASMSetOverlap_GASM);
1378: PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetType_C",PCGASMSetType_GASM);
1379: PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetSortIndices_C",PCGASMSetSortIndices_GASM);
1380: PetscObjectComposeFunction((PetscObject)pc,"PCGASMGetSubKSP_C",PCGASMGetSubKSP_GASM);
1381: return(0);
1382: }
1384: PetscErrorCode PCGASMCreateLocalSubdomains(Mat A, PetscInt nloc, IS *iis[])
1385: {
1386: MatPartitioning mpart;
1387: const char *prefix;
1388: PetscInt i,j,rstart,rend,bs;
1389: PetscBool hasop, isbaij = PETSC_FALSE,foundpart = PETSC_FALSE;
1390: Mat Ad = NULL, adj;
1391: IS ispart,isnumb,*is;
1392: PetscErrorCode ierr;
1395: if (nloc < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"number of local subdomains must > 0, got nloc = %D",nloc);
1397: /* Get prefix, row distribution, and block size */
1398: MatGetOptionsPrefix(A,&prefix);
1399: MatGetOwnershipRange(A,&rstart,&rend);
1400: MatGetBlockSize(A,&bs);
1401: if (rstart/bs*bs != rstart || rend/bs*bs != rend) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"bad row distribution [%D,%D) for matrix block size %D",rstart,rend,bs);
1403: /* Get diagonal block from matrix if possible */
1404: MatHasOperation(A,MATOP_GET_DIAGONAL_BLOCK,&hasop);
1405: if (hasop) {
1406: MatGetDiagonalBlock(A,&Ad);
1407: }
1408: if (Ad) {
1409: PetscObjectBaseTypeCompare((PetscObject)Ad,MATSEQBAIJ,&isbaij);
1410: if (!isbaij) {PetscObjectBaseTypeCompare((PetscObject)Ad,MATSEQSBAIJ,&isbaij);}
1411: }
1412: if (Ad && nloc > 1) {
1413: PetscBool match,done;
1414: /* Try to setup a good matrix partitioning if available */
1415: MatPartitioningCreate(PETSC_COMM_SELF,&mpart);
1416: PetscObjectSetOptionsPrefix((PetscObject)mpart,prefix);
1417: MatPartitioningSetFromOptions(mpart);
1418: PetscObjectTypeCompare((PetscObject)mpart,MATPARTITIONINGCURRENT,&match);
1419: if (!match) {
1420: PetscObjectTypeCompare((PetscObject)mpart,MATPARTITIONINGSQUARE,&match);
1421: }
1422: if (!match) { /* assume a "good" partitioner is available */
1423: PetscInt na;
1424: const PetscInt *ia,*ja;
1425: MatGetRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);
1426: if (done) {
1427: /* Build adjacency matrix by hand. Unfortunately a call to
1428: MatConvert(Ad,MATMPIADJ,MAT_INITIAL_MATRIX,&adj) will
1429: remove the block-aij structure and we cannot expect
1430: MatPartitioning to split vertices as we need */
1431: PetscInt i,j,len,nnz,cnt,*iia=NULL,*jja=NULL;
1432: const PetscInt *row;
1433: nnz = 0;
1434: for (i=0; i<na; i++) { /* count number of nonzeros */
1435: len = ia[i+1] - ia[i];
1436: row = ja + ia[i];
1437: for (j=0; j<len; j++) {
1438: if (row[j] == i) { /* don't count diagonal */
1439: len--; break;
1440: }
1441: }
1442: nnz += len;
1443: }
1444: PetscMalloc1(na+1,&iia);
1445: PetscMalloc1(nnz,&jja);
1446: nnz = 0;
1447: iia[0] = 0;
1448: for (i=0; i<na; i++) { /* fill adjacency */
1449: cnt = 0;
1450: len = ia[i+1] - ia[i];
1451: row = ja + ia[i];
1452: for (j=0; j<len; j++) {
1453: if (row[j] != i) jja[nnz+cnt++] = row[j]; /* if not diagonal */
1454: }
1455: nnz += cnt;
1456: iia[i+1] = nnz;
1457: }
1458: /* Partitioning of the adjacency matrix */
1459: MatCreateMPIAdj(PETSC_COMM_SELF,na,na,iia,jja,NULL,&adj);
1460: MatPartitioningSetAdjacency(mpart,adj);
1461: MatPartitioningSetNParts(mpart,nloc);
1462: MatPartitioningApply(mpart,&ispart);
1463: ISPartitioningToNumbering(ispart,&isnumb);
1464: MatDestroy(&adj);
1465: foundpart = PETSC_TRUE;
1466: }
1467: MatRestoreRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);
1468: }
1469: MatPartitioningDestroy(&mpart);
1470: }
1471: PetscMalloc1(nloc,&is);
1472: if (!foundpart) {
1474: /* Partitioning by contiguous chunks of rows */
1476: PetscInt mbs = (rend-rstart)/bs;
1477: PetscInt start = rstart;
1478: for (i=0; i<nloc; i++) {
1479: PetscInt count = (mbs/nloc + ((mbs % nloc) > i)) * bs;
1480: ISCreateStride(PETSC_COMM_SELF,count,start,1,&is[i]);
1481: start += count;
1482: }
1484: } else {
1486: /* Partitioning by adjacency of diagonal block */
1488: const PetscInt *numbering;
1489: PetscInt *count,nidx,*indices,*newidx,start=0;
1490: /* Get node count in each partition */
1491: PetscMalloc1(nloc,&count);
1492: ISPartitioningCount(ispart,nloc,count);
1493: if (isbaij && bs > 1) { /* adjust for the block-aij case */
1494: for (i=0; i<nloc; i++) count[i] *= bs;
1495: }
1496: /* Build indices from node numbering */
1497: ISGetLocalSize(isnumb,&nidx);
1498: PetscMalloc1(nidx,&indices);
1499: for (i=0; i<nidx; i++) indices[i] = i; /* needs to be initialized */
1500: ISGetIndices(isnumb,&numbering);
1501: PetscSortIntWithPermutation(nidx,numbering,indices);
1502: ISRestoreIndices(isnumb,&numbering);
1503: if (isbaij && bs > 1) { /* adjust for the block-aij case */
1504: PetscMalloc1(nidx*bs,&newidx);
1505: for (i=0; i<nidx; i++) {
1506: for (j=0; j<bs; j++) newidx[i*bs+j] = indices[i]*bs + j;
1507: }
1508: PetscFree(indices);
1509: nidx *= bs;
1510: indices = newidx;
1511: }
1512: /* Shift to get global indices */
1513: for (i=0; i<nidx; i++) indices[i] += rstart;
1515: /* Build the index sets for each block */
1516: for (i=0; i<nloc; i++) {
1517: ISCreateGeneral(PETSC_COMM_SELF,count[i],&indices[start],PETSC_COPY_VALUES,&is[i]);
1518: ISSort(is[i]);
1519: start += count[i];
1520: }
1522: PetscFree(count);
1523: PetscFree(indices);
1524: ISDestroy(&isnumb);
1525: ISDestroy(&ispart);
1526: }
1527: *iis = is;
1528: return(0);
1529: }
1531: PETSC_INTERN PetscErrorCode PCGASMCreateStraddlingSubdomains(Mat A,PetscInt N,PetscInt *n,IS *iis[])
1532: {
1533: PetscErrorCode ierr;
1536: MatSubdomainsCreateCoalesce(A,N,n,iis);
1537: return(0);
1538: }
1540: /*@C
1541: PCGASMCreateSubdomains - Creates n index sets defining n nonoverlapping subdomains for the additive
1542: Schwarz preconditioner for a any problem based on its matrix.
1544: Collective
1546: Input Parameters:
1547: + A - The global matrix operator
1548: - N - the number of global subdomains requested
1550: Output Parameters:
1551: + n - the number of subdomains created on this processor
1552: - iis - the array of index sets defining the local inner subdomains (on which the correction is applied)
1554: Level: advanced
1556: Note: When N >= A's communicator size, each subdomain is local -- contained within a single processor.
1557: When N < size, the subdomains are 'straddling' (processor boundaries) and are no longer local.
1558: The resulting subdomains can be use in PCGASMSetSubdomains(pc,n,iss,NULL). The overlapping
1559: outer subdomains will be automatically generated from these according to the requested amount of
1560: overlap; this is currently supported only with local subdomains.
1562: .seealso: PCGASMSetSubdomains(), PCGASMDestroySubdomains()
1563: @*/
1564: PetscErrorCode PCGASMCreateSubdomains(Mat A,PetscInt N,PetscInt *n,IS *iis[])
1565: {
1566: PetscMPIInt size;
1567: PetscErrorCode ierr;
1573: if (N < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Number of subdomains must be > 0, N = %D",N);
1574: MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
1575: if (N >= size) {
1576: *n = N/size + (N%size);
1577: PCGASMCreateLocalSubdomains(A,*n,iis);
1578: } else {
1579: PCGASMCreateStraddlingSubdomains(A,N,n,iis);
1580: }
1581: return(0);
1582: }
1584: /*@C
1585: PCGASMDestroySubdomains - Destroys the index sets created with
1586: PCGASMCreateSubdomains() or PCGASMCreateSubdomains2D. Should be
1587: called after setting subdomains with PCGASMSetSubdomains().
1589: Collective
1591: Input Parameters:
1592: + n - the number of index sets
1593: . iis - the array of inner subdomains,
1594: - ois - the array of outer subdomains, can be NULL
1596: Level: intermediate
1598: Notes:
1599: this is merely a convenience subroutine that walks each list,
1600: destroys each IS on the list, and then frees the list. At the end the
1601: list pointers are set to NULL.
1603: .seealso: PCGASMCreateSubdomains(), PCGASMSetSubdomains()
1604: @*/
1605: PetscErrorCode PCGASMDestroySubdomains(PetscInt n,IS **iis,IS **ois)
1606: {
1607: PetscInt i;
1611: if (n <= 0) return(0);
1612: if (ois) {
1614: if (*ois) {
1616: for (i=0; i<n; i++) {
1617: ISDestroy(&(*ois)[i]);
1618: }
1619: PetscFree((*ois));
1620: }
1621: }
1622: if (iis) {
1624: if (*iis) {
1626: for (i=0; i<n; i++) {
1627: ISDestroy(&(*iis)[i]);
1628: }
1629: PetscFree((*iis));
1630: }
1631: }
1632: return(0);
1633: }
1635: #define PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,xleft_loc,ylow_loc,xright_loc,yhigh_loc,n) \
1636: { \
1637: PetscInt first_row = first/M, last_row = last/M+1; \
1638: /* \
1639: Compute ylow_loc and yhigh_loc so that (ylow_loc,xleft) and (yhigh_loc,xright) are the corners \
1640: of the bounding box of the intersection of the subdomain with the local ownership range (local \
1641: subdomain). \
1642: Also compute xleft_loc and xright_loc as the lower and upper bounds on the first and last rows \
1643: of the intersection. \
1644: */ \
1645: /* ylow_loc is the grid row containing the first element of the local sumbdomain */ \
1646: *ylow_loc = PetscMax(first_row,ylow); \
1647: /* xleft_loc is the offset of first element of the local subdomain within its grid row (might actually be outside the local subdomain) */ \
1648: *xleft_loc = *ylow_loc==first_row ? PetscMax(first%M,xleft) : xleft; \
1649: /* yhigh_loc is the grid row above the last local subdomain element */ \
1650: *yhigh_loc = PetscMin(last_row,yhigh); \
1651: /* xright is the offset of the end of the local subdomain within its grid row (might actually be outside the local subdomain) */ \
1652: *xright_loc = *yhigh_loc==last_row ? PetscMin(xright,last%M) : xright; \
1653: /* Now compute the size of the local subdomain n. */ \
1654: *n = 0; \
1655: if (*ylow_loc < *yhigh_loc) { \
1656: PetscInt width = xright-xleft; \
1657: *n += width*(*yhigh_loc-*ylow_loc-1); \
1658: *n += PetscMin(PetscMax(*xright_loc-xleft,0),width); \
1659: *n -= PetscMin(PetscMax(*xleft_loc-xleft,0), width); \
1660: } \
1661: }
1663: /*@
1664: PCGASMCreateSubdomains2D - Creates the index sets for the overlapping Schwarz
1665: preconditioner for a two-dimensional problem on a regular grid.
1667: Collective
1669: Input Parameters:
1670: + pc - the preconditioner context
1671: . M - the global number of grid points in the x direction
1672: . N - the global number of grid points in the y direction
1673: . Mdomains - the global number of subdomains in the x direction
1674: . Ndomains - the global number of subdomains in the y direction
1675: . dof - degrees of freedom per node
1676: - overlap - overlap in mesh lines
1678: Output Parameters:
1679: + Nsub - the number of local subdomains created
1680: . iis - array of index sets defining inner (nonoverlapping) subdomains
1681: - ois - array of index sets defining outer (overlapping, if overlap > 0) subdomains
1683: Level: advanced
1685: .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(), PCGASMSetOverlap()
1686: @*/
1687: PetscErrorCode PCGASMCreateSubdomains2D(PC pc,PetscInt M,PetscInt N,PetscInt Mdomains,PetscInt Ndomains,PetscInt dof,PetscInt overlap,PetscInt *nsub,IS **iis,IS **ois)
1688: {
1690: PetscMPIInt size, rank;
1691: PetscInt i, j;
1692: PetscInt maxheight, maxwidth;
1693: PetscInt xstart, xleft, xright, xleft_loc, xright_loc;
1694: PetscInt ystart, ylow, yhigh, ylow_loc, yhigh_loc;
1695: PetscInt x[2][2], y[2][2], n[2];
1696: PetscInt first, last;
1697: PetscInt nidx, *idx;
1698: PetscInt ii,jj,s,q,d;
1699: PetscInt k,kk;
1700: PetscMPIInt color;
1701: MPI_Comm comm, subcomm;
1702: IS **xis = NULL, **is = ois, **is_local = iis;
1705: PetscObjectGetComm((PetscObject)pc, &comm);
1706: MPI_Comm_size(comm, &size);
1707: MPI_Comm_rank(comm, &rank);
1708: MatGetOwnershipRange(pc->pmat, &first, &last);
1709: if (first%dof || last%dof) SETERRQ3(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Matrix row partitioning unsuitable for domain decomposition: local row range (%D,%D) "
1710: "does not respect the number of degrees of freedom per grid point %D", first, last, dof);
1712: /* Determine the number of domains with nonzero intersections with the local ownership range. */
1713: s = 0;
1714: ystart = 0;
1715: for (j=0; j<Ndomains; ++j) {
1716: maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */
1717: if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N);
1718: /* Vertical domain limits with an overlap. */
1719: ylow = PetscMax(ystart - overlap,0);
1720: yhigh = PetscMin(ystart + maxheight + overlap,N);
1721: xstart = 0;
1722: for (i=0; i<Mdomains; ++i) {
1723: maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */
1724: if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M);
1725: /* Horizontal domain limits with an overlap. */
1726: xleft = PetscMax(xstart - overlap,0);
1727: xright = PetscMin(xstart + maxwidth + overlap,M);
1728: /*
1729: Determine whether this subdomain intersects this processor's ownership range of pc->pmat.
1730: */
1731: PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx));
1732: if (nidx) ++s;
1733: xstart += maxwidth;
1734: } /* for (i = 0; i < Mdomains; ++i) */
1735: ystart += maxheight;
1736: } /* for (j = 0; j < Ndomains; ++j) */
1738: /* Now we can allocate the necessary number of ISs. */
1739: *nsub = s;
1740: PetscMalloc1(*nsub,is);
1741: PetscMalloc1(*nsub,is_local);
1742: s = 0;
1743: ystart = 0;
1744: for (j=0; j<Ndomains; ++j) {
1745: maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */
1746: if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N);
1747: /* Vertical domain limits with an overlap. */
1748: y[0][0] = PetscMax(ystart - overlap,0);
1749: y[0][1] = PetscMin(ystart + maxheight + overlap,N);
1750: /* Vertical domain limits without an overlap. */
1751: y[1][0] = ystart;
1752: y[1][1] = PetscMin(ystart + maxheight,N);
1753: xstart = 0;
1754: for (i=0; i<Mdomains; ++i) {
1755: maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */
1756: if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M);
1757: /* Horizontal domain limits with an overlap. */
1758: x[0][0] = PetscMax(xstart - overlap,0);
1759: x[0][1] = PetscMin(xstart + maxwidth + overlap,M);
1760: /* Horizontal domain limits without an overlap. */
1761: x[1][0] = xstart;
1762: x[1][1] = PetscMin(xstart+maxwidth,M);
1763: /*
1764: Determine whether this domain intersects this processor's ownership range of pc->pmat.
1765: Do this twice: first for the domains with overlaps, and once without.
1766: During the first pass create the subcommunicators, and use them on the second pass as well.
1767: */
1768: for (q = 0; q < 2; ++q) {
1769: PetscBool split = PETSC_FALSE;
1770: /*
1771: domain limits, (xleft, xright) and (ylow, yheigh) are adjusted
1772: according to whether the domain with an overlap or without is considered.
1773: */
1774: xleft = x[q][0]; xright = x[q][1];
1775: ylow = y[q][0]; yhigh = y[q][1];
1776: PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx));
1777: nidx *= dof;
1778: n[q] = nidx;
1779: /*
1780: Based on the counted number of indices in the local domain *with an overlap*,
1781: construct a subcommunicator of all the processors supporting this domain.
1782: Observe that a domain with an overlap might have nontrivial local support,
1783: while the domain without an overlap might not. Hence, the decision to participate
1784: in the subcommunicator must be based on the domain with an overlap.
1785: */
1786: if (q == 0) {
1787: if (nidx) color = 1;
1788: else color = MPI_UNDEFINED;
1789: MPI_Comm_split(comm, color, rank, &subcomm);
1790: split = PETSC_TRUE;
1791: }
1792: /*
1793: Proceed only if the number of local indices *with an overlap* is nonzero.
1794: */
1795: if (n[0]) {
1796: if (q == 0) xis = is;
1797: if (q == 1) {
1798: /*
1799: The IS for the no-overlap subdomain shares a communicator with the overlapping domain.
1800: Moreover, if the overlap is zero, the two ISs are identical.
1801: */
1802: if (overlap == 0) {
1803: (*is_local)[s] = (*is)[s];
1804: PetscObjectReference((PetscObject)(*is)[s]);
1805: continue;
1806: } else {
1807: xis = is_local;
1808: subcomm = ((PetscObject)(*is)[s])->comm;
1809: }
1810: } /* if (q == 1) */
1811: idx = NULL;
1812: PetscMalloc1(nidx,&idx);
1813: if (nidx) {
1814: k = 0;
1815: for (jj=ylow_loc; jj<yhigh_loc; ++jj) {
1816: PetscInt x0 = (jj==ylow_loc) ? xleft_loc : xleft;
1817: PetscInt x1 = (jj==yhigh_loc-1) ? xright_loc : xright;
1818: kk = dof*(M*jj + x0);
1819: for (ii=x0; ii<x1; ++ii) {
1820: for (d = 0; d < dof; ++d) {
1821: idx[k++] = kk++;
1822: }
1823: }
1824: }
1825: }
1826: ISCreateGeneral(subcomm,nidx,idx,PETSC_OWN_POINTER,(*xis)+s);
1827: if (split) {
1828: MPI_Comm_free(&subcomm);
1829: }
1830: }/* if (n[0]) */
1831: }/* for (q = 0; q < 2; ++q) */
1832: if (n[0]) ++s;
1833: xstart += maxwidth;
1834: } /* for (i = 0; i < Mdomains; ++i) */
1835: ystart += maxheight;
1836: } /* for (j = 0; j < Ndomains; ++j) */
1837: return(0);
1838: }
1840: /*@C
1841: PCGASMGetSubdomains - Gets the subdomains supported on this processor
1842: for the additive Schwarz preconditioner.
1844: Not Collective
1846: Input Parameter:
1847: . pc - the preconditioner context
1849: Output Parameters:
1850: + n - the number of subdomains for this processor (default value = 1)
1851: . iis - the index sets that define the inner subdomains (without overlap) supported on this processor (can be NULL)
1852: - ois - the index sets that define the outer subdomains (with overlap) supported on this processor (can be NULL)
1854: Notes:
1855: The user is responsible for destroying the ISs and freeing the returned arrays.
1856: The IS numbering is in the parallel, global numbering of the vector.
1858: Level: advanced
1860: .seealso: PCGASMSetOverlap(), PCGASMGetSubKSP(), PCGASMCreateSubdomains2D(),
1861: PCGASMSetSubdomains(), PCGASMGetSubmatrices()
1862: @*/
1863: PetscErrorCode PCGASMGetSubdomains(PC pc,PetscInt *n,IS *iis[],IS *ois[])
1864: {
1865: PC_GASM *osm;
1867: PetscBool match;
1868: PetscInt i;
1872: PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);
1873: if (!match) SETERRQ2(PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_WRONG, "Incorrect object type: expected %s, got %s instead", PCGASM, ((PetscObject)pc)->type_name);
1874: osm = (PC_GASM*)pc->data;
1875: if (n) *n = osm->n;
1876: if (iis) {
1877: PetscMalloc1(osm->n, iis);
1878: }
1879: if (ois) {
1880: PetscMalloc1(osm->n, ois);
1881: }
1882: if (iis || ois) {
1883: for (i = 0; i < osm->n; ++i) {
1884: if (iis) (*iis)[i] = osm->iis[i];
1885: if (ois) (*ois)[i] = osm->ois[i];
1886: }
1887: }
1888: return(0);
1889: }
1891: /*@C
1892: PCGASMGetSubmatrices - Gets the local submatrices (for this processor
1893: only) for the additive Schwarz preconditioner.
1895: Not Collective
1897: Input Parameter:
1898: . pc - the preconditioner context
1900: Output Parameters:
1901: + n - the number of matrices for this processor (default value = 1)
1902: - mat - the matrices
1904: Notes:
1905: matrices returned by this routine have the same communicators as the index sets (IS)
1906: used to define subdomains in PCGASMSetSubdomains()
1907: Level: advanced
1909: .seealso: PCGASMSetOverlap(), PCGASMGetSubKSP(),
1910: PCGASMCreateSubdomains2D(), PCGASMSetSubdomains(), PCGASMGetSubdomains()
1911: @*/
1912: PetscErrorCode PCGASMGetSubmatrices(PC pc,PetscInt *n,Mat *mat[])
1913: {
1914: PC_GASM *osm;
1916: PetscBool match;
1922: if (!pc->setupcalled) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"Must call after KSPSetUp() or PCSetUp().");
1923: PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);
1924: if (!match) SETERRQ2(PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_WRONG, "Expected %s, got %s instead", PCGASM, ((PetscObject)pc)->type_name);
1925: osm = (PC_GASM*)pc->data;
1926: if (n) *n = osm->n;
1927: if (mat) *mat = osm->pmat;
1928: return(0);
1929: }
1931: /*@
1932: PCGASMSetUseDMSubdomains - Indicates whether to use DMCreateDomainDecomposition() to define the subdomains, whenever possible.
1933: Logically Collective
1935: Input Parameters:
1936: + pc - the preconditioner
1937: - flg - boolean indicating whether to use subdomains defined by the DM
1939: Options Database Key:
1940: . -pc_gasm_dm_subdomains -pc_gasm_overlap -pc_gasm_total_subdomains
1942: Level: intermediate
1944: Notes:
1945: PCGASMSetSubdomains(), PCGASMSetTotalSubdomains() or PCGASMSetOverlap() take precedence over PCGASMSetUseDMSubdomains(),
1946: so setting PCGASMSetSubdomains() with nontrivial subdomain ISs or any of PCGASMSetTotalSubdomains() and PCGASMSetOverlap()
1947: automatically turns the latter off.
1949: .seealso: PCGASMGetUseDMSubdomains(), PCGASMSetSubdomains(), PCGASMSetOverlap()
1950: PCGASMCreateSubdomains2D()
1951: @*/
1952: PetscErrorCode PCGASMSetUseDMSubdomains(PC pc,PetscBool flg)
1953: {
1954: PC_GASM *osm = (PC_GASM*)pc->data;
1956: PetscBool match;
1961: if (pc->setupcalled) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"Not for a setup PC.");
1962: PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);
1963: if (match) {
1964: if (!osm->user_subdomains && osm->N == PETSC_DETERMINE && osm->overlap < 0) {
1965: osm->dm_subdomains = flg;
1966: }
1967: }
1968: return(0);
1969: }
1971: /*@
1972: PCGASMGetUseDMSubdomains - Returns flag indicating whether to use DMCreateDomainDecomposition() to define the subdomains, whenever possible.
1973: Not Collective
1975: Input Parameter:
1976: . pc - the preconditioner
1978: Output Parameter:
1979: . flg - boolean indicating whether to use subdomains defined by the DM
1981: Level: intermediate
1983: .seealso: PCGASMSetUseDMSubdomains(), PCGASMSetOverlap()
1984: PCGASMCreateSubdomains2D()
1985: @*/
1986: PetscErrorCode PCGASMGetUseDMSubdomains(PC pc,PetscBool* flg)
1987: {
1988: PC_GASM *osm = (PC_GASM*)pc->data;
1990: PetscBool match;
1995: PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);
1996: if (match) {
1997: if (flg) *flg = osm->dm_subdomains;
1998: }
1999: return(0);
2000: }