Actual source code: gasm.c
1: /*
2: This file defines an "generalized" additive Schwarz preconditioner for any Mat implementation.
3: In this version each processor may intersect multiple subdomains and any subdomain may
4: intersect multiple processors. Intersections of subdomains with processors are called *local
5: subdomains*.
7: N - total number of distinct global subdomains (set explicitly in PCGASMSetTotalSubdomains() or implicitly PCGASMSetSubdomains() and then calculated in PCSetUp_GASM())
8: n - actual number of local subdomains on this processor (set in PCGASMSetSubdomains() or calculated in PCGASMSetTotalSubdomains())
9: nmax - maximum number of local subdomains per processor (calculated in PCSetUp_GASM())
10: */
11: #include <petsc/private/pcimpl.h>
12: #include <petscdm.h>
14: typedef struct {
15: PetscInt N,n,nmax;
16: PetscInt overlap; /* overlap requested by user */
17: PCGASMType type; /* use reduced interpolation, restriction or both */
18: PetscBool type_set; /* if user set this value (so won't change it for symmetric problems) */
19: PetscBool same_subdomain_solvers; /* flag indicating whether all local solvers are same */
20: PetscBool sort_indices; /* flag to sort subdomain indices */
21: PetscBool user_subdomains; /* whether the user set explicit subdomain index sets -- keep them on PCReset() */
22: PetscBool dm_subdomains; /* whether DM is allowed to define subdomains */
23: PetscBool hierarchicalpartitioning;
24: IS *ois; /* index sets that define the outer (conceptually, overlapping) subdomains */
25: IS *iis; /* index sets that define the inner (conceptually, nonoverlapping) subdomains */
26: KSP *ksp; /* linear solvers for each subdomain */
27: Mat *pmat; /* subdomain block matrices */
28: Vec gx,gy; /* Merged work vectors */
29: Vec *x,*y; /* Split work vectors; storage aliases pieces of storage of the above merged vectors. */
30: VecScatter gorestriction; /* merged restriction to disjoint union of outer subdomains */
31: VecScatter girestriction; /* merged restriction to disjoint union of inner subdomains */
32: VecScatter pctoouter;
33: IS permutationIS;
34: Mat permutationP;
35: Mat pcmat;
36: Vec pcx,pcy;
37: } PC_GASM;
39: static PetscErrorCode PCGASMComputeGlobalSubdomainNumbering_Private(PC pc,PetscInt **numbering,PetscInt **permutation)
40: {
41: PC_GASM *osm = (PC_GASM*)pc->data;
42: PetscInt i;
44: /* Determine the number of globally-distinct subdomains and compute a global numbering for them. */
45: PetscMalloc2(osm->n,numbering,osm->n,permutation);
46: PetscObjectsListGetGlobalNumbering(PetscObjectComm((PetscObject)pc),osm->n,(PetscObject*)osm->iis,NULL,*numbering);
47: for (i = 0; i < osm->n; ++i) (*permutation)[i] = i;
48: PetscSortIntWithPermutation(osm->n,*numbering,*permutation);
49: return 0;
50: }
52: static PetscErrorCode PCGASMSubdomainView_Private(PC pc, PetscInt i, PetscViewer viewer)
53: {
54: PC_GASM *osm = (PC_GASM*)pc->data;
55: PetscInt j,nidx;
56: const PetscInt *idx;
57: PetscViewer sviewer;
58: char *cidx;
61: /* Inner subdomains. */
62: ISGetLocalSize(osm->iis[i], &nidx);
63: /*
64: No more than 15 characters per index plus a space.
65: PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx,
66: in case nidx == 0. That will take care of the space for the trailing '\0' as well.
67: For nidx == 0, the whole string 16 '\0'.
68: */
69: #define len 16*(nidx+1)+1
70: PetscMalloc1(len, &cidx);
71: PetscViewerStringOpen(PETSC_COMM_SELF, cidx, len, &sviewer);
72: #undef len
73: ISGetIndices(osm->iis[i], &idx);
74: for (j = 0; j < nidx; ++j) {
75: PetscViewerStringSPrintf(sviewer, "%D ", idx[j]);
76: }
77: ISRestoreIndices(osm->iis[i],&idx);
78: PetscViewerDestroy(&sviewer);
79: PetscViewerASCIIPrintf(viewer, "Inner subdomain:\n");
80: PetscViewerFlush(viewer);
81: PetscViewerASCIIPushSynchronized(viewer);
82: PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx);
83: PetscViewerFlush(viewer);
84: PetscViewerASCIIPopSynchronized(viewer);
85: PetscViewerASCIIPrintf(viewer, "\n");
86: PetscViewerFlush(viewer);
87: PetscFree(cidx);
88: /* Outer subdomains. */
89: ISGetLocalSize(osm->ois[i], &nidx);
90: /*
91: No more than 15 characters per index plus a space.
92: PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx,
93: in case nidx == 0. That will take care of the space for the trailing '\0' as well.
94: For nidx == 0, the whole string 16 '\0'.
95: */
96: #define len 16*(nidx+1)+1
97: PetscMalloc1(len, &cidx);
98: PetscViewerStringOpen(PETSC_COMM_SELF, cidx, len, &sviewer);
99: #undef len
100: ISGetIndices(osm->ois[i], &idx);
101: for (j = 0; j < nidx; ++j) {
102: PetscViewerStringSPrintf(sviewer,"%D ", idx[j]);
103: }
104: PetscViewerDestroy(&sviewer);
105: ISRestoreIndices(osm->ois[i],&idx);
106: PetscViewerASCIIPrintf(viewer, "Outer subdomain:\n");
107: PetscViewerFlush(viewer);
108: PetscViewerASCIIPushSynchronized(viewer);
109: PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx);
110: PetscViewerFlush(viewer);
111: PetscViewerASCIIPopSynchronized(viewer);
112: PetscViewerASCIIPrintf(viewer, "\n");
113: PetscViewerFlush(viewer);
114: PetscFree(cidx);
115: return 0;
116: }
118: static PetscErrorCode PCGASMPrintSubdomains(PC pc)
119: {
120: PC_GASM *osm = (PC_GASM*)pc->data;
121: const char *prefix;
122: char fname[PETSC_MAX_PATH_LEN+1];
123: PetscInt l, d, count;
124: PetscBool found;
125: PetscViewer viewer, sviewer = NULL;
126: PetscInt *numbering,*permutation;/* global numbering of locally-supported subdomains and the permutation from the local ordering */
128: PCGetOptionsPrefix(pc,&prefix);
129: PetscOptionsHasName(NULL,prefix,"-pc_gasm_print_subdomains",&found);
130: if (!found) return 0;
131: PetscOptionsGetString(NULL,prefix,"-pc_gasm_print_subdomains",fname,sizeof(fname),&found);
132: if (!found) PetscStrcpy(fname,"stdout");
133: PetscViewerASCIIOpen(PetscObjectComm((PetscObject)pc),fname,&viewer);
134: /*
135: Make sure the viewer has a name. Otherwise this may cause a deadlock or other weird errors when creating a subcomm viewer:
136: the subcomm viewer will attempt to inherit the viewer's name, which, if not set, will be constructed collectively on the comm.
137: */
138: PetscObjectName((PetscObject)viewer);
139: l = 0;
140: PCGASMComputeGlobalSubdomainNumbering_Private(pc,&numbering,&permutation);
141: for (count = 0; count < osm->N; ++count) {
142: /* Now let subdomains go one at a time in the global numbering order and print their subdomain/solver info. */
143: if (l<osm->n) {
144: d = permutation[l]; /* d is the local number of the l-th smallest (in the global ordering) among the locally supported subdomains */
145: if (numbering[d] == count) {
146: PetscViewerGetSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);
147: PCGASMSubdomainView_Private(pc,d,sviewer);
148: PetscViewerRestoreSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);
149: ++l;
150: }
151: }
152: MPI_Barrier(PetscObjectComm((PetscObject)pc));
153: }
154: PetscFree2(numbering,permutation);
155: PetscViewerDestroy(&viewer);
156: return 0;
157: }
159: static PetscErrorCode PCView_GASM(PC pc,PetscViewer viewer)
160: {
161: PC_GASM *osm = (PC_GASM*)pc->data;
162: const char *prefix;
163: PetscMPIInt rank, size;
164: PetscInt bsz;
165: PetscBool iascii,view_subdomains=PETSC_FALSE;
166: PetscViewer sviewer;
167: PetscInt count, l;
168: char overlap[256] = "user-defined overlap";
169: char gsubdomains[256] = "unknown total number of subdomains";
170: char msubdomains[256] = "unknown max number of local subdomains";
171: PetscInt *numbering,*permutation;/* global numbering of locally-supported subdomains and the permutation from the local ordering */
173: MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size);
174: MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank);
176: if (osm->overlap >= 0) {
177: PetscSNPrintf(overlap,sizeof(overlap),"requested amount of overlap = %D",osm->overlap);
178: }
179: if (osm->N != PETSC_DETERMINE) {
180: PetscSNPrintf(gsubdomains, sizeof(gsubdomains), "total number of subdomains = %D",osm->N);
181: }
182: if (osm->nmax != PETSC_DETERMINE) {
183: PetscSNPrintf(msubdomains,sizeof(msubdomains),"max number of local subdomains = %D",osm->nmax);
184: }
186: PCGetOptionsPrefix(pc,&prefix);
187: PetscOptionsGetBool(NULL,prefix,"-pc_gasm_view_subdomains",&view_subdomains,NULL);
189: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
190: if (iascii) {
191: /*
192: Make sure the viewer has a name. Otherwise this may cause a deadlock when creating a subcomm viewer:
193: the subcomm viewer will attempt to inherit the viewer's name, which, if not set, will be constructed
194: collectively on the comm.
195: */
196: PetscObjectName((PetscObject)viewer);
197: PetscViewerASCIIPrintf(viewer," Restriction/interpolation type: %s\n",PCGASMTypes[osm->type]);
198: PetscViewerASCIIPrintf(viewer," %s\n",overlap);
199: PetscViewerASCIIPrintf(viewer," %s\n",gsubdomains);
200: PetscViewerASCIIPrintf(viewer," %s\n",msubdomains);
201: PetscViewerASCIIPushSynchronized(viewer);
202: PetscViewerASCIISynchronizedPrintf(viewer," [%d|%d] number of locally-supported subdomains = %D\n",rank,size,osm->n);
203: PetscViewerFlush(viewer);
204: PetscViewerASCIIPopSynchronized(viewer);
205: /* Cannot take advantage of osm->same_subdomain_solvers without a global numbering of subdomains. */
206: PetscViewerASCIIPrintf(viewer," Subdomain solver info is as follows:\n");
207: PetscViewerASCIIPushTab(viewer);
208: PetscViewerASCIIPrintf(viewer," - - - - - - - - - - - - - - - - - -\n");
209: /* Make sure that everybody waits for the banner to be printed. */
210: MPI_Barrier(PetscObjectComm((PetscObject)viewer));
211: /* Now let subdomains go one at a time in the global numbering order and print their subdomain/solver info. */
212: PCGASMComputeGlobalSubdomainNumbering_Private(pc,&numbering,&permutation);
213: l = 0;
214: for (count = 0; count < osm->N; ++count) {
215: PetscMPIInt srank, ssize;
216: if (l<osm->n) {
217: PetscInt d = permutation[l]; /* d is the local number of the l-th smallest (in the global ordering) among the locally supported subdomains */
218: if (numbering[d] == count) {
219: MPI_Comm_size(((PetscObject)osm->ois[d])->comm, &ssize);
220: MPI_Comm_rank(((PetscObject)osm->ois[d])->comm, &srank);
221: PetscViewerGetSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);
222: ISGetLocalSize(osm->ois[d],&bsz);
223: PetscViewerASCIISynchronizedPrintf(sviewer," [%d|%d] (subcomm [%d|%d]) local subdomain number %D, local size = %D\n",rank,size,srank,ssize,d,bsz);
224: PetscViewerFlush(sviewer);
225: if (view_subdomains) {
226: PCGASMSubdomainView_Private(pc,d,sviewer);
227: }
228: if (!pc->setupcalled) {
229: PetscViewerASCIIPrintf(sviewer, " Solver not set up yet: PCSetUp() not yet called\n");
230: } else {
231: KSPView(osm->ksp[d],sviewer);
232: }
233: PetscViewerASCIIPrintf(sviewer," - - - - - - - - - - - - - - - - - -\n");
234: PetscViewerFlush(sviewer);
235: PetscViewerRestoreSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);
236: ++l;
237: }
238: }
239: MPI_Barrier(PetscObjectComm((PetscObject)pc));
240: }
241: PetscFree2(numbering,permutation);
242: PetscViewerASCIIPopTab(viewer);
243: PetscViewerFlush(viewer);
244: /* this line is needed to match the extra PetscViewerASCIIPushSynchronized() in PetscViewerGetSubViewer() */
245: PetscViewerASCIIPopSynchronized(viewer);
246: }
247: return 0;
248: }
250: PETSC_INTERN PetscErrorCode PCGASMCreateLocalSubdomains(Mat A, PetscInt nloc, IS *iis[]);
252: PetscErrorCode PCGASMSetHierarchicalPartitioning(PC pc)
253: {
254: PC_GASM *osm = (PC_GASM*)pc->data;
255: MatPartitioning part;
256: MPI_Comm comm;
257: PetscMPIInt size;
258: PetscInt nlocalsubdomains,fromrows_localsize;
259: IS partitioning,fromrows,isn;
260: Vec outervec;
262: PetscObjectGetComm((PetscObject)pc,&comm);
263: MPI_Comm_size(comm,&size);
264: /* we do not need a hierarchical partitioning when
265: * the total number of subdomains is consistent with
266: * the number of MPI tasks.
267: * For the following cases, we do not need to use HP
268: * */
269: if (osm->N==PETSC_DETERMINE || osm->N>=size || osm->N==1) return 0;
271: nlocalsubdomains = size/osm->N;
272: osm->n = 1;
273: MatPartitioningCreate(comm,&part);
274: MatPartitioningSetAdjacency(part,pc->pmat);
275: MatPartitioningSetType(part,MATPARTITIONINGHIERARCH);
276: MatPartitioningHierarchicalSetNcoarseparts(part,osm->N);
277: MatPartitioningHierarchicalSetNfineparts(part,nlocalsubdomains);
278: MatPartitioningSetFromOptions(part);
279: /* get new processor owner number of each vertex */
280: MatPartitioningApply(part,&partitioning);
281: ISBuildTwoSided(partitioning,NULL,&fromrows);
282: ISPartitioningToNumbering(partitioning,&isn);
283: ISDestroy(&isn);
284: ISGetLocalSize(fromrows,&fromrows_localsize);
285: MatPartitioningDestroy(&part);
286: MatCreateVecs(pc->pmat,&outervec,NULL);
287: VecCreateMPI(comm,fromrows_localsize,PETSC_DETERMINE,&(osm->pcx));
288: VecDuplicate(osm->pcx,&(osm->pcy));
289: VecScatterCreate(osm->pcx,NULL,outervec,fromrows,&(osm->pctoouter));
290: MatCreateSubMatrix(pc->pmat,fromrows,fromrows,MAT_INITIAL_MATRIX,&(osm->permutationP));
291: PetscObjectReference((PetscObject)fromrows);
292: osm->permutationIS = fromrows;
293: osm->pcmat = pc->pmat;
294: PetscObjectReference((PetscObject)osm->permutationP);
295: pc->pmat = osm->permutationP;
296: VecDestroy(&outervec);
297: ISDestroy(&fromrows);
298: ISDestroy(&partitioning);
299: osm->n = PETSC_DETERMINE;
300: return 0;
301: }
303: static PetscErrorCode PCSetUp_GASM(PC pc)
304: {
305: PC_GASM *osm = (PC_GASM*)pc->data;
306: PetscInt i,nInnerIndices,nTotalInnerIndices;
307: PetscMPIInt rank, size;
308: MatReuse scall = MAT_REUSE_MATRIX;
309: KSP ksp;
310: PC subpc;
311: const char *prefix,*pprefix;
312: Vec x,y;
313: PetscInt oni; /* Number of indices in the i-th local outer subdomain. */
314: const PetscInt *oidxi; /* Indices from the i-th subdomain local outer subdomain. */
315: PetscInt on; /* Number of indices in the disjoint union of local outer subdomains. */
316: PetscInt *oidx; /* Indices in the disjoint union of local outer subdomains. */
317: IS gois; /* Disjoint union the global indices of outer subdomains. */
318: IS goid; /* Identity IS of the size of the disjoint union of outer subdomains. */
319: PetscScalar *gxarray, *gyarray;
320: PetscInt gostart; /* Start of locally-owned indices in the vectors -- osm->gx,osm->gy -- over the disjoint union of outer subdomains. */
321: PetscInt num_subdomains = 0;
322: DM *subdomain_dm = NULL;
323: char **subdomain_names = NULL;
324: PetscInt *numbering;
326: MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);
327: MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);
328: if (!pc->setupcalled) {
329: /* use a hierarchical partitioning */
330: if (osm->hierarchicalpartitioning) {
331: PCGASMSetHierarchicalPartitioning(pc);
332: }
333: if (osm->n == PETSC_DETERMINE) {
334: if (osm->N != PETSC_DETERMINE) {
335: /* No local subdomains given, but the desired number of total subdomains is known, so construct them accordingly. */
336: PCGASMCreateSubdomains(pc->pmat,osm->N,&osm->n,&osm->iis);
337: } else if (osm->dm_subdomains && pc->dm) {
338: /* try pc->dm next, if allowed */
339: PetscInt d;
340: IS *inner_subdomain_is, *outer_subdomain_is;
341: DMCreateDomainDecomposition(pc->dm, &num_subdomains, &subdomain_names, &inner_subdomain_is, &outer_subdomain_is, &subdomain_dm);
342: if (num_subdomains) {
343: PCGASMSetSubdomains(pc, num_subdomains, inner_subdomain_is, outer_subdomain_is);
344: }
345: for (d = 0; d < num_subdomains; ++d) {
346: if (inner_subdomain_is) ISDestroy(&inner_subdomain_is[d]);
347: if (outer_subdomain_is) ISDestroy(&outer_subdomain_is[d]);
348: }
349: PetscFree(inner_subdomain_is);
350: PetscFree(outer_subdomain_is);
351: } else {
352: /* still no subdomains; use one per processor */
353: osm->nmax = osm->n = 1;
354: MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);
355: osm->N = size;
356: PCGASMCreateLocalSubdomains(pc->pmat,osm->n,&osm->iis);
357: }
358: }
359: if (!osm->iis) {
360: /*
361: osm->n was set in PCGASMSetSubdomains(), but the actual subdomains have not been supplied.
362: We create the requisite number of local inner subdomains and then expand them into
363: out subdomains, if necessary.
364: */
365: PCGASMCreateLocalSubdomains(pc->pmat,osm->n,&osm->iis);
366: }
367: if (!osm->ois) {
368: /*
369: Initially make outer subdomains the same as inner subdomains. If nonzero additional overlap
370: has been requested, copy the inner subdomains over so they can be modified.
371: */
372: PetscMalloc1(osm->n,&osm->ois);
373: for (i=0; i<osm->n; ++i) {
374: if (osm->overlap > 0 && osm->N>1) { /* With positive overlap, osm->iis[i] will be modified */
375: ISDuplicate(osm->iis[i],(osm->ois)+i);
376: ISCopy(osm->iis[i],osm->ois[i]);
377: } else {
378: PetscObjectReference((PetscObject)((osm->iis)[i]));
379: osm->ois[i] = osm->iis[i];
380: }
381: }
382: if (osm->overlap>0 && osm->N>1) {
383: /* Extend the "overlapping" regions by a number of steps */
384: MatIncreaseOverlapSplit(pc->pmat,osm->n,osm->ois,osm->overlap);
385: }
386: }
388: /* Now the subdomains are defined. Determine their global and max local numbers, if necessary. */
389: if (osm->nmax == PETSC_DETERMINE) {
390: PetscMPIInt inwork,outwork;
391: /* determine global number of subdomains and the max number of local subdomains */
392: inwork = osm->n;
393: MPIU_Allreduce(&inwork,&outwork,1,MPI_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));
394: osm->nmax = outwork;
395: }
396: if (osm->N == PETSC_DETERMINE) {
397: /* Determine the number of globally-distinct subdomains and compute a global numbering for them. */
398: PetscObjectsListGetGlobalNumbering(PetscObjectComm((PetscObject)pc),osm->n,(PetscObject*)osm->ois,&osm->N,NULL);
399: }
401: if (osm->sort_indices) {
402: for (i=0; i<osm->n; i++) {
403: ISSort(osm->ois[i]);
404: ISSort(osm->iis[i]);
405: }
406: }
407: PCGetOptionsPrefix(pc,&prefix);
408: PCGASMPrintSubdomains(pc);
410: /*
411: Merge the ISs, create merged vectors and restrictions.
412: */
413: /* Merge outer subdomain ISs and construct a restriction onto the disjoint union of local outer subdomains. */
414: on = 0;
415: for (i=0; i<osm->n; i++) {
416: ISGetLocalSize(osm->ois[i],&oni);
417: on += oni;
418: }
419: PetscMalloc1(on, &oidx);
420: on = 0;
421: /* Merge local indices together */
422: for (i=0; i<osm->n; i++) {
423: ISGetLocalSize(osm->ois[i],&oni);
424: ISGetIndices(osm->ois[i],&oidxi);
425: PetscArraycpy(oidx+on,oidxi,oni);
426: ISRestoreIndices(osm->ois[i],&oidxi);
427: on += oni;
428: }
429: ISCreateGeneral(((PetscObject)(pc))->comm,on,oidx,PETSC_OWN_POINTER,&gois);
430: nTotalInnerIndices = 0;
431: for (i=0; i<osm->n; i++) {
432: ISGetLocalSize(osm->iis[i],&nInnerIndices);
433: nTotalInnerIndices += nInnerIndices;
434: }
435: VecCreateMPI(((PetscObject)(pc))->comm,nTotalInnerIndices,PETSC_DETERMINE,&x);
436: VecDuplicate(x,&y);
438: VecCreateMPI(PetscObjectComm((PetscObject)pc),on,PETSC_DECIDE,&osm->gx);
439: VecDuplicate(osm->gx,&osm->gy);
440: VecGetOwnershipRange(osm->gx, &gostart, NULL);
441: ISCreateStride(PetscObjectComm((PetscObject)pc),on,gostart,1, &goid);
442: /* gois might indices not on local */
443: VecScatterCreate(x,gois,osm->gx,goid, &(osm->gorestriction));
444: PetscMalloc1(osm->n,&numbering);
445: PetscObjectsListGetGlobalNumbering(PetscObjectComm((PetscObject)pc),osm->n,(PetscObject*)osm->ois,NULL,numbering);
446: VecDestroy(&x);
447: ISDestroy(&gois);
449: /* Merge inner subdomain ISs and construct a restriction onto the disjoint union of local inner subdomains. */
450: {
451: PetscInt ini; /* Number of indices the i-th a local inner subdomain. */
452: PetscInt in; /* Number of indices in the disjoint union of local inner subdomains. */
453: PetscInt *iidx; /* Global indices in the merged local inner subdomain. */
454: PetscInt *ioidx; /* Global indices of the disjoint union of inner subdomains within the disjoint union of outer subdomains. */
455: IS giis; /* IS for the disjoint union of inner subdomains. */
456: IS giois; /* IS for the disjoint union of inner subdomains within the disjoint union of outer subdomains. */
457: PetscScalar *array;
458: const PetscInt *indices;
459: PetscInt k;
460: on = 0;
461: for (i=0; i<osm->n; i++) {
462: ISGetLocalSize(osm->ois[i],&oni);
463: on += oni;
464: }
465: PetscMalloc1(on, &iidx);
466: PetscMalloc1(on, &ioidx);
467: VecGetArray(y,&array);
468: /* set communicator id to determine where overlap is */
469: in = 0;
470: for (i=0; i<osm->n; i++) {
471: ISGetLocalSize(osm->iis[i],&ini);
472: for (k = 0; k < ini; ++k) {
473: array[in+k] = numbering[i];
474: }
475: in += ini;
476: }
477: VecRestoreArray(y,&array);
478: VecScatterBegin(osm->gorestriction,y,osm->gy,INSERT_VALUES,SCATTER_FORWARD);
479: VecScatterEnd(osm->gorestriction,y,osm->gy,INSERT_VALUES,SCATTER_FORWARD);
480: VecGetOwnershipRange(osm->gy,&gostart, NULL);
481: VecGetArray(osm->gy,&array);
482: on = 0;
483: in = 0;
484: for (i=0; i<osm->n; i++) {
485: ISGetLocalSize(osm->ois[i],&oni);
486: ISGetIndices(osm->ois[i],&indices);
487: for (k=0; k<oni; k++) {
488: /* skip overlapping indices to get inner domain */
489: if (PetscRealPart(array[on+k]) != numbering[i]) continue;
490: iidx[in] = indices[k];
491: ioidx[in++] = gostart+on+k;
492: }
493: ISRestoreIndices(osm->ois[i], &indices);
494: on += oni;
495: }
496: VecRestoreArray(osm->gy,&array);
497: ISCreateGeneral(PetscObjectComm((PetscObject)pc),in,iidx,PETSC_OWN_POINTER,&giis);
498: ISCreateGeneral(PetscObjectComm((PetscObject)pc),in,ioidx,PETSC_OWN_POINTER,&giois);
499: VecScatterCreate(y,giis,osm->gy,giois,&osm->girestriction);
500: VecDestroy(&y);
501: ISDestroy(&giis);
502: ISDestroy(&giois);
503: }
504: ISDestroy(&goid);
505: PetscFree(numbering);
507: /* Create the subdomain work vectors. */
508: PetscMalloc1(osm->n,&osm->x);
509: PetscMalloc1(osm->n,&osm->y);
510: VecGetArray(osm->gx, &gxarray);
511: VecGetArray(osm->gy, &gyarray);
512: for (i=0, on=0; i<osm->n; ++i, on += oni) {
513: PetscInt oNi;
514: ISGetLocalSize(osm->ois[i],&oni);
515: /* on a sub communicator */
516: ISGetSize(osm->ois[i],&oNi);
517: VecCreateMPIWithArray(((PetscObject)(osm->ois[i]))->comm,1,oni,oNi,gxarray+on,&osm->x[i]);
518: VecCreateMPIWithArray(((PetscObject)(osm->ois[i]))->comm,1,oni,oNi,gyarray+on,&osm->y[i]);
519: }
520: VecRestoreArray(osm->gx, &gxarray);
521: VecRestoreArray(osm->gy, &gyarray);
522: /* Create the subdomain solvers */
523: PetscMalloc1(osm->n,&osm->ksp);
524: for (i=0; i<osm->n; i++) {
525: char subprefix[PETSC_MAX_PATH_LEN+1];
526: KSPCreate(((PetscObject)(osm->ois[i]))->comm,&ksp);
527: KSPSetErrorIfNotConverged(ksp,pc->erroriffailure);
528: PetscLogObjectParent((PetscObject)pc,(PetscObject)ksp);
529: PetscObjectIncrementTabLevel((PetscObject)ksp,(PetscObject)pc,1);
530: KSPSetType(ksp,KSPPREONLY);
531: KSPGetPC(ksp,&subpc); /* Why do we need this here? */
532: if (subdomain_dm) {
533: KSPSetDM(ksp,subdomain_dm[i]);
534: DMDestroy(subdomain_dm+i);
535: }
536: PCGetOptionsPrefix(pc,&prefix);
537: KSPSetOptionsPrefix(ksp,prefix);
538: if (subdomain_names && subdomain_names[i]) {
539: PetscSNPrintf(subprefix,PETSC_MAX_PATH_LEN,"sub_%s_",subdomain_names[i]);
540: KSPAppendOptionsPrefix(ksp,subprefix);
541: PetscFree(subdomain_names[i]);
542: }
543: KSPAppendOptionsPrefix(ksp,"sub_");
544: osm->ksp[i] = ksp;
545: }
546: PetscFree(subdomain_dm);
547: PetscFree(subdomain_names);
548: scall = MAT_INITIAL_MATRIX;
549: } else { /* if (pc->setupcalled) */
550: /*
551: Destroy the submatrices from the previous iteration
552: */
553: if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
554: MatDestroyMatrices(osm->n,&osm->pmat);
555: scall = MAT_INITIAL_MATRIX;
556: }
557: if (osm->permutationIS) {
558: MatCreateSubMatrix(pc->pmat,osm->permutationIS,osm->permutationIS,scall,&osm->permutationP);
559: PetscObjectReference((PetscObject)osm->permutationP);
560: osm->pcmat = pc->pmat;
561: pc->pmat = osm->permutationP;
562: }
563: }
565: /*
566: Extract the submatrices.
567: */
568: if (size > 1) {
569: MatCreateSubMatricesMPI(pc->pmat,osm->n,osm->ois,osm->ois,scall,&osm->pmat);
570: } else {
571: MatCreateSubMatrices(pc->pmat,osm->n,osm->ois,osm->ois,scall,&osm->pmat);
572: }
573: if (scall == MAT_INITIAL_MATRIX) {
574: PetscObjectGetOptionsPrefix((PetscObject)pc->pmat,&pprefix);
575: for (i=0; i<osm->n; i++) {
576: PetscLogObjectParent((PetscObject)pc,(PetscObject)osm->pmat[i]);
577: PetscObjectSetOptionsPrefix((PetscObject)osm->pmat[i],pprefix);
578: }
579: }
581: /* Return control to the user so that the submatrices can be modified (e.g., to apply
582: different boundary conditions for the submatrices than for the global problem) */
583: PCModifySubMatrices(pc,osm->n,osm->ois,osm->ois,osm->pmat,pc->modifysubmatricesP);
585: /*
586: Loop over submatrices putting them into local ksps
587: */
588: for (i=0; i<osm->n; i++) {
589: KSPSetOperators(osm->ksp[i],osm->pmat[i],osm->pmat[i]);
590: KSPGetOptionsPrefix(osm->ksp[i],&prefix);
591: MatSetOptionsPrefix(osm->pmat[i],prefix);
592: if (!pc->setupcalled) {
593: KSPSetFromOptions(osm->ksp[i]);
594: }
595: }
596: if (osm->pcmat) {
597: MatDestroy(&pc->pmat);
598: pc->pmat = osm->pcmat;
599: osm->pcmat = NULL;
600: }
601: return 0;
602: }
604: static PetscErrorCode PCSetUpOnBlocks_GASM(PC pc)
605: {
606: PC_GASM *osm = (PC_GASM*)pc->data;
607: PetscInt i;
609: for (i=0; i<osm->n; i++) {
610: KSPSetUp(osm->ksp[i]);
611: }
612: return 0;
613: }
615: static PetscErrorCode PCApply_GASM(PC pc,Vec xin,Vec yout)
616: {
617: PC_GASM *osm = (PC_GASM*)pc->data;
618: PetscInt i;
619: Vec x,y;
620: ScatterMode forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
622: if (osm->pctoouter) {
623: VecScatterBegin(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);
624: VecScatterEnd(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);
625: x = osm->pcx;
626: y = osm->pcy;
627: } else {
628: x = xin;
629: y = yout;
630: }
631: /*
632: support for limiting the restriction or interpolation only to the inner
633: subdomain values (leaving the other values 0).
634: */
635: if (!(osm->type & PC_GASM_RESTRICT)) {
636: /* have to zero the work RHS since scatter may leave some slots empty */
637: VecZeroEntries(osm->gx);
638: VecScatterBegin(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);
639: } else {
640: VecScatterBegin(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);
641: }
642: VecZeroEntries(osm->gy);
643: if (!(osm->type & PC_GASM_RESTRICT)) {
644: VecScatterEnd(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);
645: } else {
646: VecScatterEnd(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);
647: }
648: /* do the subdomain solves */
649: for (i=0; i<osm->n; ++i) {
650: KSPSolve(osm->ksp[i],osm->x[i],osm->y[i]);
651: KSPCheckSolve(osm->ksp[i],pc,osm->y[i]);
652: }
653: /* do we need to zero y? */
654: VecZeroEntries(y);
655: if (!(osm->type & PC_GASM_INTERPOLATE)) {
656: VecScatterBegin(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);
657: VecScatterEnd(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);
658: } else {
659: VecScatterBegin(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);
660: VecScatterEnd(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);
661: }
662: if (osm->pctoouter) {
663: VecScatterBegin(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);
664: VecScatterEnd(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);
665: }
666: return 0;
667: }
669: static PetscErrorCode PCMatApply_GASM(PC pc,Mat Xin,Mat Yout)
670: {
671: PC_GASM *osm = (PC_GASM*)pc->data;
672: Mat X,Y,O=NULL,Z,W;
673: Vec x,y;
674: PetscInt i,m,M,N;
675: ScatterMode forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
678: MatGetSize(Xin,NULL,&N);
679: if (osm->pctoouter) {
680: VecGetLocalSize(osm->pcx,&m);
681: VecGetSize(osm->pcx,&M);
682: MatCreateDense(PetscObjectComm((PetscObject)osm->ois[0]),m,PETSC_DECIDE,M,N,NULL,&O);
683: for (i = 0; i < N; ++i) {
684: MatDenseGetColumnVecRead(Xin,i,&x);
685: MatDenseGetColumnVecWrite(O,i,&y);
686: VecScatterBegin(osm->pctoouter,x,y,INSERT_VALUES,SCATTER_REVERSE);
687: VecScatterEnd(osm->pctoouter,x,y,INSERT_VALUES,SCATTER_REVERSE);
688: MatDenseRestoreColumnVecWrite(O,i,&y);
689: MatDenseRestoreColumnVecRead(Xin,i,&x);
690: }
691: X = Y = O;
692: } else {
693: X = Xin;
694: Y = Yout;
695: }
696: /*
697: support for limiting the restriction or interpolation only to the inner
698: subdomain values (leaving the other values 0).
699: */
700: VecGetLocalSize(osm->x[0],&m);
701: VecGetSize(osm->x[0],&M);
702: MatCreateDense(PetscObjectComm((PetscObject)osm->ois[0]),m,PETSC_DECIDE,M,N,NULL,&Z);
703: for (i = 0; i < N; ++i) {
704: MatDenseGetColumnVecRead(X,i,&x);
705: MatDenseGetColumnVecWrite(Z,i,&y);
706: if (!(osm->type & PC_GASM_RESTRICT)) {
707: /* have to zero the work RHS since scatter may leave some slots empty */
708: VecZeroEntries(y);
709: VecScatterBegin(osm->girestriction,x,y,INSERT_VALUES,forward);
710: VecScatterEnd(osm->girestriction,x,y,INSERT_VALUES,forward);
711: } else {
712: VecScatterBegin(osm->gorestriction,x,y,INSERT_VALUES,forward);
713: VecScatterEnd(osm->gorestriction,x,y,INSERT_VALUES,forward);
714: }
715: MatDenseRestoreColumnVecWrite(Z,i,&y);
716: MatDenseRestoreColumnVecRead(X,i,&x);
717: }
718: MatCreateDense(PetscObjectComm((PetscObject)osm->ois[0]),m,PETSC_DECIDE,M,N,NULL,&W);
719: MatSetOption(Z,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);
720: MatAssemblyBegin(Z,MAT_FINAL_ASSEMBLY);
721: MatAssemblyEnd(Z,MAT_FINAL_ASSEMBLY);
722: /* do the subdomain solve */
723: KSPMatSolve(osm->ksp[0],Z,W);
724: KSPCheckSolve(osm->ksp[0],pc,NULL);
725: MatDestroy(&Z);
726: /* do we need to zero y? */
727: MatZeroEntries(Y);
728: for (i = 0; i < N; ++i) {
729: MatDenseGetColumnVecWrite(Y,i,&y);
730: MatDenseGetColumnVecRead(W,i,&x);
731: if (!(osm->type & PC_GASM_INTERPOLATE)) {
732: VecScatterBegin(osm->girestriction,x,y,ADD_VALUES,reverse);
733: VecScatterEnd(osm->girestriction,x,y,ADD_VALUES,reverse);
734: } else {
735: VecScatterBegin(osm->gorestriction,x,y,ADD_VALUES,reverse);
736: VecScatterEnd(osm->gorestriction,x,y,ADD_VALUES,reverse);
737: }
738: MatDenseRestoreColumnVecRead(W,i,&x);
739: if (osm->pctoouter) {
740: MatDenseGetColumnVecWrite(Yout,i,&x);
741: VecScatterBegin(osm->pctoouter,y,x,INSERT_VALUES,SCATTER_FORWARD);
742: VecScatterEnd(osm->pctoouter,y,x,INSERT_VALUES,SCATTER_FORWARD);
743: MatDenseRestoreColumnVecRead(Yout,i,&x);
744: }
745: MatDenseRestoreColumnVecWrite(Y,i,&y);
746: }
747: MatDestroy(&W);
748: MatDestroy(&O);
749: return 0;
750: }
752: static PetscErrorCode PCApplyTranspose_GASM(PC pc,Vec xin,Vec yout)
753: {
754: PC_GASM *osm = (PC_GASM*)pc->data;
755: PetscInt i;
756: Vec x,y;
757: ScatterMode forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
759: if (osm->pctoouter) {
760: VecScatterBegin(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);
761: VecScatterEnd(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);
762: x = osm->pcx;
763: y = osm->pcy;
764: }else{
765: x = xin;
766: y = yout;
767: }
768: /*
769: Support for limiting the restriction or interpolation to only local
770: subdomain values (leaving the other values 0).
772: Note: these are reversed from the PCApply_GASM() because we are applying the
773: transpose of the three terms
774: */
775: if (!(osm->type & PC_GASM_INTERPOLATE)) {
776: /* have to zero the work RHS since scatter may leave some slots empty */
777: VecZeroEntries(osm->gx);
778: VecScatterBegin(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);
779: } else {
780: VecScatterBegin(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);
781: }
782: VecZeroEntries(osm->gy);
783: if (!(osm->type & PC_GASM_INTERPOLATE)) {
784: VecScatterEnd(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);
785: } else {
786: VecScatterEnd(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);
787: }
788: /* do the local solves */
789: for (i=0; i<osm->n; ++i) { /* Note that the solves are local, so we can go to osm->n, rather than osm->nmax. */
790: KSPSolveTranspose(osm->ksp[i],osm->x[i],osm->y[i]);
791: KSPCheckSolve(osm->ksp[i],pc,osm->y[i]);
792: }
793: VecZeroEntries(y);
794: if (!(osm->type & PC_GASM_RESTRICT)) {
795: VecScatterBegin(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);
796: VecScatterEnd(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);
797: } else {
798: VecScatterBegin(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);
799: VecScatterEnd(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);
800: }
801: if (osm->pctoouter) {
802: VecScatterBegin(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);
803: VecScatterEnd(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);
804: }
805: return 0;
806: }
808: static PetscErrorCode PCReset_GASM(PC pc)
809: {
810: PC_GASM *osm = (PC_GASM*)pc->data;
811: PetscInt i;
813: if (osm->ksp) {
814: for (i=0; i<osm->n; i++) {
815: KSPReset(osm->ksp[i]);
816: }
817: }
818: if (osm->pmat) {
819: if (osm->n > 0) {
820: PetscMPIInt size;
821: MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);
822: if (size > 1) {
823: /* osm->pmat is created by MatCreateSubMatricesMPI(), cannot use MatDestroySubMatrices() */
824: MatDestroyMatrices(osm->n,&osm->pmat);
825: } else {
826: MatDestroySubMatrices(osm->n,&osm->pmat);
827: }
828: }
829: }
830: if (osm->x) {
831: for (i=0; i<osm->n; i++) {
832: VecDestroy(&osm->x[i]);
833: VecDestroy(&osm->y[i]);
834: }
835: }
836: VecDestroy(&osm->gx);
837: VecDestroy(&osm->gy);
839: VecScatterDestroy(&osm->gorestriction);
840: VecScatterDestroy(&osm->girestriction);
841: if (!osm->user_subdomains) {
842: PCGASMDestroySubdomains(osm->n,&osm->ois,&osm->iis);
843: osm->N = PETSC_DETERMINE;
844: osm->nmax = PETSC_DETERMINE;
845: }
846: if (osm->pctoouter) {
847: VecScatterDestroy(&(osm->pctoouter));
848: }
849: if (osm->permutationIS) {
850: ISDestroy(&(osm->permutationIS));
851: }
852: if (osm->pcx) {
853: VecDestroy(&(osm->pcx));
854: }
855: if (osm->pcy) {
856: VecDestroy(&(osm->pcy));
857: }
858: if (osm->permutationP) {
859: MatDestroy(&(osm->permutationP));
860: }
861: if (osm->pcmat) {
862: MatDestroy(&osm->pcmat);
863: }
864: return 0;
865: }
867: static PetscErrorCode PCDestroy_GASM(PC pc)
868: {
869: PC_GASM *osm = (PC_GASM*)pc->data;
870: PetscInt i;
872: PCReset_GASM(pc);
873: /* PCReset will not destroy subdomains, if user_subdomains is true. */
874: PCGASMDestroySubdomains(osm->n,&osm->ois,&osm->iis);
875: if (osm->ksp) {
876: for (i=0; i<osm->n; i++) {
877: KSPDestroy(&osm->ksp[i]);
878: }
879: PetscFree(osm->ksp);
880: }
881: PetscFree(osm->x);
882: PetscFree(osm->y);
883: PetscFree(pc->data);
884: return 0;
885: }
887: static PetscErrorCode PCSetFromOptions_GASM(PetscOptionItems *PetscOptionsObject,PC pc)
888: {
889: PC_GASM *osm = (PC_GASM*)pc->data;
890: PetscInt blocks,ovl;
891: PetscBool flg;
892: PCGASMType gasmtype;
894: PetscOptionsHead(PetscOptionsObject,"Generalized additive Schwarz options");
895: PetscOptionsBool("-pc_gasm_use_dm_subdomains","If subdomains aren't set, use DMCreateDomainDecomposition() to define subdomains.","PCGASMSetUseDMSubdomains",osm->dm_subdomains,&osm->dm_subdomains,&flg);
896: PetscOptionsInt("-pc_gasm_total_subdomains","Total number of subdomains across communicator","PCGASMSetTotalSubdomains",osm->N,&blocks,&flg);
897: if (flg) {
898: PCGASMSetTotalSubdomains(pc,blocks);
899: }
900: PetscOptionsInt("-pc_gasm_overlap","Number of overlapping degrees of freedom","PCGASMSetOverlap",osm->overlap,&ovl,&flg);
901: if (flg) {
902: PCGASMSetOverlap(pc,ovl);
903: osm->dm_subdomains = PETSC_FALSE;
904: }
905: flg = PETSC_FALSE;
906: PetscOptionsEnum("-pc_gasm_type","Type of restriction/extension","PCGASMSetType",PCGASMTypes,(PetscEnum)osm->type,(PetscEnum*)&gasmtype,&flg);
907: if (flg) PCGASMSetType(pc,gasmtype);
908: PetscOptionsBool("-pc_gasm_use_hierachical_partitioning","use hierarchical partitioning",NULL,osm->hierarchicalpartitioning,&osm->hierarchicalpartitioning,&flg);
909: PetscOptionsTail();
910: return 0;
911: }
913: /*------------------------------------------------------------------------------------*/
915: /*@
916: PCGASMSetTotalSubdomains - sets the total number of subdomains to use across the
917: communicator.
918: Logically collective on pc
920: Input Parameters:
921: + pc - the preconditioner
922: - N - total number of subdomains
924: Level: beginner
926: .seealso: PCGASMSetSubdomains(), PCGASMSetOverlap()
927: PCGASMCreateSubdomains2D()
928: @*/
929: PetscErrorCode PCGASMSetTotalSubdomains(PC pc,PetscInt N)
930: {
931: PC_GASM *osm = (PC_GASM*)pc->data;
932: PetscMPIInt size,rank;
937: PCGASMDestroySubdomains(osm->n,&osm->iis,&osm->ois);
938: osm->ois = osm->iis = NULL;
940: MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);
941: MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);
942: osm->N = N;
943: osm->n = PETSC_DETERMINE;
944: osm->nmax = PETSC_DETERMINE;
945: osm->dm_subdomains = PETSC_FALSE;
946: return 0;
947: }
949: static PetscErrorCode PCGASMSetSubdomains_GASM(PC pc,PetscInt n,IS iis[],IS ois[])
950: {
951: PC_GASM *osm = (PC_GASM*)pc->data;
952: PetscInt i;
957: PCGASMDestroySubdomains(osm->n,&osm->iis,&osm->ois);
958: osm->iis = osm->ois = NULL;
959: osm->n = n;
960: osm->N = PETSC_DETERMINE;
961: osm->nmax = PETSC_DETERMINE;
962: if (ois) {
963: PetscMalloc1(n,&osm->ois);
964: for (i=0; i<n; i++) {
965: PetscObjectReference((PetscObject)ois[i]);
966: osm->ois[i] = ois[i];
967: }
968: /*
969: Since the user set the outer subdomains, even if nontrivial overlap was requested via PCGASMSetOverlap(),
970: it will be ignored. To avoid confusion later on (e.g., when viewing the PC), the overlap size is set to -1.
971: */
972: osm->overlap = -1;
973: /* inner subdomains must be provided */
975: }/* end if */
976: if (iis) {
977: PetscMalloc1(n,&osm->iis);
978: for (i=0; i<n; i++) {
979: PetscObjectReference((PetscObject)iis[i]);
980: osm->iis[i] = iis[i];
981: }
982: if (!ois) {
983: osm->ois = NULL;
984: /* if user does not provide outer indices, we will create the corresponding outer indices using osm->overlap =1 in PCSetUp_GASM */
985: }
986: }
987: if (PetscDefined(USE_DEBUG)) {
988: PetscInt j,rstart,rend,*covered,lsize;
989: const PetscInt *indices;
990: /* check if the inner indices cover and only cover the local portion of the preconditioning matrix */
991: MatGetOwnershipRange(pc->pmat,&rstart,&rend);
992: PetscCalloc1(rend-rstart,&covered);
993: /* check if the current processor owns indices from others */
994: for (i=0; i<n; i++) {
995: ISGetIndices(osm->iis[i],&indices);
996: ISGetLocalSize(osm->iis[i],&lsize);
997: for (j=0; j<lsize; j++) {
1000: else covered[indices[j]-rstart] = 1;
1001: }
1002: ISRestoreIndices(osm->iis[i],&indices);
1003: }
1004: /* check if we miss any indices */
1005: for (i=rstart; i<rend; i++) {
1007: }
1008: PetscFree(covered);
1009: }
1010: if (iis) osm->user_subdomains = PETSC_TRUE;
1011: return 0;
1012: }
1014: static PetscErrorCode PCGASMSetOverlap_GASM(PC pc,PetscInt ovl)
1015: {
1016: PC_GASM *osm = (PC_GASM*)pc->data;
1020: if (!pc->setupcalled) osm->overlap = ovl;
1021: return 0;
1022: }
1024: static PetscErrorCode PCGASMSetType_GASM(PC pc,PCGASMType type)
1025: {
1026: PC_GASM *osm = (PC_GASM*)pc->data;
1028: osm->type = type;
1029: osm->type_set = PETSC_TRUE;
1030: return 0;
1031: }
1033: static PetscErrorCode PCGASMSetSortIndices_GASM(PC pc,PetscBool doSort)
1034: {
1035: PC_GASM *osm = (PC_GASM*)pc->data;
1037: osm->sort_indices = doSort;
1038: return 0;
1039: }
1041: /*
1042: FIXME: This routine might need to be modified now that multiple ranks per subdomain are allowed.
1043: In particular, it would upset the global subdomain number calculation.
1044: */
1045: static PetscErrorCode PCGASMGetSubKSP_GASM(PC pc,PetscInt *n,PetscInt *first,KSP **ksp)
1046: {
1047: PC_GASM *osm = (PC_GASM*)pc->data;
1051: if (n) *n = osm->n;
1052: if (first) {
1053: MPI_Scan(&osm->n,first,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));
1054: *first -= osm->n;
1055: }
1056: if (ksp) {
1057: /* Assume that local solves are now different; not necessarily
1058: true, though! This flag is used only for PCView_GASM() */
1059: *ksp = osm->ksp;
1060: osm->same_subdomain_solvers = PETSC_FALSE;
1061: }
1062: return 0;
1063: } /* PCGASMGetSubKSP_GASM() */
1065: /*@C
1066: PCGASMSetSubdomains - Sets the subdomains for this processor
1067: for the additive Schwarz preconditioner.
1069: Collective on pc
1071: Input Parameters:
1072: + pc - the preconditioner object
1073: . n - the number of subdomains for this processor
1074: . iis - the index sets that define the inner subdomains (or NULL for PETSc to determine subdomains)
1075: - ois - the index sets that define the outer subdomains (or NULL to use the same as iis, or to construct by expanding iis by the requested overlap)
1077: Notes:
1078: The IS indices use the parallel, global numbering of the vector entries.
1079: Inner subdomains are those where the correction is applied.
1080: Outer subdomains are those where the residual necessary to obtain the
1081: corrections is obtained (see PCGASMType for the use of inner/outer subdomains).
1082: Both inner and outer subdomains can extend over several processors.
1083: This processor's portion of a subdomain is known as a local subdomain.
1085: Inner subdomains can not overlap with each other, do not have any entities from remote processors,
1086: and have to cover the entire local subdomain owned by the current processor. The index sets on each
1087: process should be ordered such that the ith local subdomain is connected to the ith remote subdomain
1088: on another MPI process.
1090: By default the GASM preconditioner uses 1 (local) subdomain per processor.
1092: Level: advanced
1094: .seealso: PCGASMSetOverlap(), PCGASMGetSubKSP(),
1095: PCGASMCreateSubdomains2D(), PCGASMGetSubdomains()
1096: @*/
1097: PetscErrorCode PCGASMSetSubdomains(PC pc,PetscInt n,IS iis[],IS ois[])
1098: {
1099: PC_GASM *osm = (PC_GASM*)pc->data;
1102: PetscTryMethod(pc,"PCGASMSetSubdomains_C",(PC,PetscInt,IS[],IS[]),(pc,n,iis,ois));
1103: osm->dm_subdomains = PETSC_FALSE;
1104: return 0;
1105: }
1107: /*@
1108: PCGASMSetOverlap - Sets the overlap between a pair of subdomains for the
1109: additive Schwarz preconditioner. Either all or no processors in the
1110: pc communicator must call this routine.
1112: Logically Collective on pc
1114: Input Parameters:
1115: + pc - the preconditioner context
1116: - ovl - the amount of overlap between subdomains (ovl >= 0, default value = 0)
1118: Options Database Key:
1119: . -pc_gasm_overlap <overlap> - Sets overlap
1121: Notes:
1122: By default the GASM preconditioner uses 1 subdomain per processor. To use
1123: multiple subdomain per perocessor or "straddling" subdomains that intersect
1124: multiple processors use PCGASMSetSubdomains() (or option -pc_gasm_total_subdomains <n>).
1126: The overlap defaults to 0, so if one desires that no additional
1127: overlap be computed beyond what may have been set with a call to
1128: PCGASMSetSubdomains(), then ovl must be set to be 0. In particular, if one does
1129: not explicitly set the subdomains in application code, then all overlap would be computed
1130: internally by PETSc, and using an overlap of 0 would result in an GASM
1131: variant that is equivalent to the block Jacobi preconditioner.
1133: Note that one can define initial index sets with any overlap via
1134: PCGASMSetSubdomains(); the routine PCGASMSetOverlap() merely allows
1135: PETSc to extend that overlap further, if desired.
1137: Level: intermediate
1139: .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(),
1140: PCGASMCreateSubdomains2D(), PCGASMGetSubdomains()
1141: @*/
1142: PetscErrorCode PCGASMSetOverlap(PC pc,PetscInt ovl)
1143: {
1144: PC_GASM *osm = (PC_GASM*)pc->data;
1148: PetscTryMethod(pc,"PCGASMSetOverlap_C",(PC,PetscInt),(pc,ovl));
1149: osm->dm_subdomains = PETSC_FALSE;
1150: return 0;
1151: }
1153: /*@
1154: PCGASMSetType - Sets the type of restriction and interpolation used
1155: for local problems in the additive Schwarz method.
1157: Logically Collective on PC
1159: Input Parameters:
1160: + pc - the preconditioner context
1161: - type - variant of GASM, one of
1162: .vb
1163: PC_GASM_BASIC - full interpolation and restriction
1164: PC_GASM_RESTRICT - full restriction, local processor interpolation
1165: PC_GASM_INTERPOLATE - full interpolation, local processor restriction
1166: PC_GASM_NONE - local processor restriction and interpolation
1167: .ve
1169: Options Database Key:
1170: . -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type
1172: Level: intermediate
1174: .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(),
1175: PCGASMCreateSubdomains2D()
1176: @*/
1177: PetscErrorCode PCGASMSetType(PC pc,PCGASMType type)
1178: {
1181: PetscTryMethod(pc,"PCGASMSetType_C",(PC,PCGASMType),(pc,type));
1182: return 0;
1183: }
1185: /*@
1186: PCGASMSetSortIndices - Determines whether subdomain indices are sorted.
1188: Logically Collective on PC
1190: Input Parameters:
1191: + pc - the preconditioner context
1192: - doSort - sort the subdomain indices
1194: Level: intermediate
1196: .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(),
1197: PCGASMCreateSubdomains2D()
1198: @*/
1199: PetscErrorCode PCGASMSetSortIndices(PC pc,PetscBool doSort)
1200: {
1203: PetscTryMethod(pc,"PCGASMSetSortIndices_C",(PC,PetscBool),(pc,doSort));
1204: return 0;
1205: }
1207: /*@C
1208: PCGASMGetSubKSP - Gets the local KSP contexts for all blocks on
1209: this processor.
1211: Collective on PC iff first_local is requested
1213: Input Parameter:
1214: . pc - the preconditioner context
1216: Output Parameters:
1217: + n_local - the number of blocks on this processor or NULL
1218: . first_local - the global number of the first block on this processor or NULL,
1219: all processors must request or all must pass NULL
1220: - ksp - the array of KSP contexts
1222: Note:
1223: After PCGASMGetSubKSP() the array of KSPes is not to be freed
1225: Currently for some matrix implementations only 1 block per processor
1226: is supported.
1228: You must call KSPSetUp() before calling PCGASMGetSubKSP().
1230: Level: advanced
1232: .seealso: PCGASMSetSubdomains(), PCGASMSetOverlap(),
1233: PCGASMCreateSubdomains2D(),
1234: @*/
1235: PetscErrorCode PCGASMGetSubKSP(PC pc,PetscInt *n_local,PetscInt *first_local,KSP *ksp[])
1236: {
1238: PetscUseMethod(pc,"PCGASMGetSubKSP_C",(PC,PetscInt*,PetscInt*,KSP **),(pc,n_local,first_local,ksp));
1239: return 0;
1240: }
1242: /* -------------------------------------------------------------------------------------*/
1243: /*MC
1244: PCGASM - Use the (restricted) additive Schwarz method, each block is (approximately) solved with
1245: its own KSP object.
1247: Options Database Keys:
1248: + -pc_gasm_total_subdomains <n> - Sets total number of local subdomains to be distributed among processors
1249: . -pc_gasm_view_subdomains - activates the printing of subdomain indices in PCView(), -ksp_view or -snes_view
1250: . -pc_gasm_print_subdomains - activates the printing of subdomain indices in PCSetUp()
1251: . -pc_gasm_overlap <ovl> - Sets overlap by which to (automatically) extend local subdomains
1252: - -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type
1254: IMPORTANT: If you run with, for example, 3 blocks on 1 processor or 3 blocks on 3 processors you
1255: will get a different convergence rate due to the default option of -pc_gasm_type restrict. Use
1256: -pc_gasm_type basic to use the standard GASM.
1258: Notes:
1259: Blocks can be shared by multiple processes.
1261: To set options on the solvers for each block append -sub_ to all the KSP, and PC
1262: options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly
1264: To set the options on the solvers separate for each block call PCGASMGetSubKSP()
1265: and set the options directly on the resulting KSP object (you can access its PC
1266: with KSPGetPC())
1268: Level: beginner
1270: References:
1271: + * - M Dryja, OB Widlund, An additive variant of the Schwarz alternating method for the case of many subregions
1272: Courant Institute, New York University Technical report
1273: - * - Barry Smith, Petter Bjorstad, and William Gropp, Domain Decompositions: Parallel Multilevel Methods for Elliptic Partial Differential Equations,
1274: Cambridge University Press.
1276: .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC,
1277: PCBJACOBI, PCGASMGetSubKSP(), PCGASMSetSubdomains(),
1278: PCSetModifySubMatrices(), PCGASMSetOverlap(), PCGASMSetType()
1280: M*/
1282: PETSC_EXTERN PetscErrorCode PCCreate_GASM(PC pc)
1283: {
1284: PC_GASM *osm;
1286: PetscNewLog(pc,&osm);
1288: osm->N = PETSC_DETERMINE;
1289: osm->n = PETSC_DECIDE;
1290: osm->nmax = PETSC_DETERMINE;
1291: osm->overlap = 0;
1292: osm->ksp = NULL;
1293: osm->gorestriction = NULL;
1294: osm->girestriction = NULL;
1295: osm->pctoouter = NULL;
1296: osm->gx = NULL;
1297: osm->gy = NULL;
1298: osm->x = NULL;
1299: osm->y = NULL;
1300: osm->pcx = NULL;
1301: osm->pcy = NULL;
1302: osm->permutationIS = NULL;
1303: osm->permutationP = NULL;
1304: osm->pcmat = NULL;
1305: osm->ois = NULL;
1306: osm->iis = NULL;
1307: osm->pmat = NULL;
1308: osm->type = PC_GASM_RESTRICT;
1309: osm->same_subdomain_solvers = PETSC_TRUE;
1310: osm->sort_indices = PETSC_TRUE;
1311: osm->dm_subdomains = PETSC_FALSE;
1312: osm->hierarchicalpartitioning = PETSC_FALSE;
1314: pc->data = (void*)osm;
1315: pc->ops->apply = PCApply_GASM;
1316: pc->ops->matapply = PCMatApply_GASM;
1317: pc->ops->applytranspose = PCApplyTranspose_GASM;
1318: pc->ops->setup = PCSetUp_GASM;
1319: pc->ops->reset = PCReset_GASM;
1320: pc->ops->destroy = PCDestroy_GASM;
1321: pc->ops->setfromoptions = PCSetFromOptions_GASM;
1322: pc->ops->setuponblocks = PCSetUpOnBlocks_GASM;
1323: pc->ops->view = PCView_GASM;
1324: pc->ops->applyrichardson = NULL;
1326: PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetSubdomains_C",PCGASMSetSubdomains_GASM);
1327: PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetOverlap_C",PCGASMSetOverlap_GASM);
1328: PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetType_C",PCGASMSetType_GASM);
1329: PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetSortIndices_C",PCGASMSetSortIndices_GASM);
1330: PetscObjectComposeFunction((PetscObject)pc,"PCGASMGetSubKSP_C",PCGASMGetSubKSP_GASM);
1331: return 0;
1332: }
1334: PetscErrorCode PCGASMCreateLocalSubdomains(Mat A, PetscInt nloc, IS *iis[])
1335: {
1336: MatPartitioning mpart;
1337: const char *prefix;
1338: PetscInt i,j,rstart,rend,bs;
1339: PetscBool hasop, isbaij = PETSC_FALSE,foundpart = PETSC_FALSE;
1340: Mat Ad = NULL, adj;
1341: IS ispart,isnumb,*is;
1345: /* Get prefix, row distribution, and block size */
1346: MatGetOptionsPrefix(A,&prefix);
1347: MatGetOwnershipRange(A,&rstart,&rend);
1348: MatGetBlockSize(A,&bs);
1351: /* Get diagonal block from matrix if possible */
1352: MatHasOperation(A,MATOP_GET_DIAGONAL_BLOCK,&hasop);
1353: if (hasop) {
1354: MatGetDiagonalBlock(A,&Ad);
1355: }
1356: if (Ad) {
1357: PetscObjectBaseTypeCompare((PetscObject)Ad,MATSEQBAIJ,&isbaij);
1358: if (!isbaij) PetscObjectBaseTypeCompare((PetscObject)Ad,MATSEQSBAIJ,&isbaij);
1359: }
1360: if (Ad && nloc > 1) {
1361: PetscBool match,done;
1362: /* Try to setup a good matrix partitioning if available */
1363: MatPartitioningCreate(PETSC_COMM_SELF,&mpart);
1364: PetscObjectSetOptionsPrefix((PetscObject)mpart,prefix);
1365: MatPartitioningSetFromOptions(mpart);
1366: PetscObjectTypeCompare((PetscObject)mpart,MATPARTITIONINGCURRENT,&match);
1367: if (!match) {
1368: PetscObjectTypeCompare((PetscObject)mpart,MATPARTITIONINGSQUARE,&match);
1369: }
1370: if (!match) { /* assume a "good" partitioner is available */
1371: PetscInt na;
1372: const PetscInt *ia,*ja;
1373: MatGetRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);
1374: if (done) {
1375: /* Build adjacency matrix by hand. Unfortunately a call to
1376: MatConvert(Ad,MATMPIADJ,MAT_INITIAL_MATRIX,&adj) will
1377: remove the block-aij structure and we cannot expect
1378: MatPartitioning to split vertices as we need */
1379: PetscInt i,j,len,nnz,cnt,*iia=NULL,*jja=NULL;
1380: const PetscInt *row;
1381: nnz = 0;
1382: for (i=0; i<na; i++) { /* count number of nonzeros */
1383: len = ia[i+1] - ia[i];
1384: row = ja + ia[i];
1385: for (j=0; j<len; j++) {
1386: if (row[j] == i) { /* don't count diagonal */
1387: len--; break;
1388: }
1389: }
1390: nnz += len;
1391: }
1392: PetscMalloc1(na+1,&iia);
1393: PetscMalloc1(nnz,&jja);
1394: nnz = 0;
1395: iia[0] = 0;
1396: for (i=0; i<na; i++) { /* fill adjacency */
1397: cnt = 0;
1398: len = ia[i+1] - ia[i];
1399: row = ja + ia[i];
1400: for (j=0; j<len; j++) {
1401: if (row[j] != i) jja[nnz+cnt++] = row[j]; /* if not diagonal */
1402: }
1403: nnz += cnt;
1404: iia[i+1] = nnz;
1405: }
1406: /* Partitioning of the adjacency matrix */
1407: MatCreateMPIAdj(PETSC_COMM_SELF,na,na,iia,jja,NULL,&adj);
1408: MatPartitioningSetAdjacency(mpart,adj);
1409: MatPartitioningSetNParts(mpart,nloc);
1410: MatPartitioningApply(mpart,&ispart);
1411: ISPartitioningToNumbering(ispart,&isnumb);
1412: MatDestroy(&adj);
1413: foundpart = PETSC_TRUE;
1414: }
1415: MatRestoreRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);
1416: }
1417: MatPartitioningDestroy(&mpart);
1418: }
1419: PetscMalloc1(nloc,&is);
1420: if (!foundpart) {
1422: /* Partitioning by contiguous chunks of rows */
1424: PetscInt mbs = (rend-rstart)/bs;
1425: PetscInt start = rstart;
1426: for (i=0; i<nloc; i++) {
1427: PetscInt count = (mbs/nloc + ((mbs % nloc) > i)) * bs;
1428: ISCreateStride(PETSC_COMM_SELF,count,start,1,&is[i]);
1429: start += count;
1430: }
1432: } else {
1434: /* Partitioning by adjacency of diagonal block */
1436: const PetscInt *numbering;
1437: PetscInt *count,nidx,*indices,*newidx,start=0;
1438: /* Get node count in each partition */
1439: PetscMalloc1(nloc,&count);
1440: ISPartitioningCount(ispart,nloc,count);
1441: if (isbaij && bs > 1) { /* adjust for the block-aij case */
1442: for (i=0; i<nloc; i++) count[i] *= bs;
1443: }
1444: /* Build indices from node numbering */
1445: ISGetLocalSize(isnumb,&nidx);
1446: PetscMalloc1(nidx,&indices);
1447: for (i=0; i<nidx; i++) indices[i] = i; /* needs to be initialized */
1448: ISGetIndices(isnumb,&numbering);
1449: PetscSortIntWithPermutation(nidx,numbering,indices);
1450: ISRestoreIndices(isnumb,&numbering);
1451: if (isbaij && bs > 1) { /* adjust for the block-aij case */
1452: PetscMalloc1(nidx*bs,&newidx);
1453: for (i=0; i<nidx; i++) {
1454: for (j=0; j<bs; j++) newidx[i*bs+j] = indices[i]*bs + j;
1455: }
1456: PetscFree(indices);
1457: nidx *= bs;
1458: indices = newidx;
1459: }
1460: /* Shift to get global indices */
1461: for (i=0; i<nidx; i++) indices[i] += rstart;
1463: /* Build the index sets for each block */
1464: for (i=0; i<nloc; i++) {
1465: ISCreateGeneral(PETSC_COMM_SELF,count[i],&indices[start],PETSC_COPY_VALUES,&is[i]);
1466: ISSort(is[i]);
1467: start += count[i];
1468: }
1470: PetscFree(count);
1471: PetscFree(indices);
1472: ISDestroy(&isnumb);
1473: ISDestroy(&ispart);
1474: }
1475: *iis = is;
1476: return 0;
1477: }
1479: PETSC_INTERN PetscErrorCode PCGASMCreateStraddlingSubdomains(Mat A,PetscInt N,PetscInt *n,IS *iis[])
1480: {
1481: MatSubdomainsCreateCoalesce(A,N,n,iis);
1482: return 0;
1483: }
1485: /*@C
1486: PCGASMCreateSubdomains - Creates n index sets defining n nonoverlapping subdomains for the additive
1487: Schwarz preconditioner for a any problem based on its matrix.
1489: Collective
1491: Input Parameters:
1492: + A - The global matrix operator
1493: - N - the number of global subdomains requested
1495: Output Parameters:
1496: + n - the number of subdomains created on this processor
1497: - iis - the array of index sets defining the local inner subdomains (on which the correction is applied)
1499: Level: advanced
1501: Note: When N >= A's communicator size, each subdomain is local -- contained within a single processor.
1502: When N < size, the subdomains are 'straddling' (processor boundaries) and are no longer local.
1503: The resulting subdomains can be use in PCGASMSetSubdomains(pc,n,iss,NULL). The overlapping
1504: outer subdomains will be automatically generated from these according to the requested amount of
1505: overlap; this is currently supported only with local subdomains.
1507: .seealso: PCGASMSetSubdomains(), PCGASMDestroySubdomains()
1508: @*/
1509: PetscErrorCode PCGASMCreateSubdomains(Mat A,PetscInt N,PetscInt *n,IS *iis[])
1510: {
1511: PetscMPIInt size;
1517: MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
1518: if (N >= size) {
1519: *n = N/size + (N%size);
1520: PCGASMCreateLocalSubdomains(A,*n,iis);
1521: } else {
1522: PCGASMCreateStraddlingSubdomains(A,N,n,iis);
1523: }
1524: return 0;
1525: }
1527: /*@C
1528: PCGASMDestroySubdomains - Destroys the index sets created with
1529: PCGASMCreateSubdomains() or PCGASMCreateSubdomains2D. Should be
1530: called after setting subdomains with PCGASMSetSubdomains().
1532: Collective
1534: Input Parameters:
1535: + n - the number of index sets
1536: . iis - the array of inner subdomains,
1537: - ois - the array of outer subdomains, can be NULL
1539: Level: intermediate
1541: Notes:
1542: this is merely a convenience subroutine that walks each list,
1543: destroys each IS on the list, and then frees the list. At the end the
1544: list pointers are set to NULL.
1546: .seealso: PCGASMCreateSubdomains(), PCGASMSetSubdomains()
1547: @*/
1548: PetscErrorCode PCGASMDestroySubdomains(PetscInt n,IS **iis,IS **ois)
1549: {
1550: PetscInt i;
1552: if (n <= 0) return 0;
1553: if (ois) {
1555: if (*ois) {
1557: for (i=0; i<n; i++) {
1558: ISDestroy(&(*ois)[i]);
1559: }
1560: PetscFree((*ois));
1561: }
1562: }
1563: if (iis) {
1565: if (*iis) {
1567: for (i=0; i<n; i++) {
1568: ISDestroy(&(*iis)[i]);
1569: }
1570: PetscFree((*iis));
1571: }
1572: }
1573: return 0;
1574: }
1576: #define PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,xleft_loc,ylow_loc,xright_loc,yhigh_loc,n) \
1577: { \
1578: PetscInt first_row = first/M, last_row = last/M+1; \
1579: /* \
1580: Compute ylow_loc and yhigh_loc so that (ylow_loc,xleft) and (yhigh_loc,xright) are the corners \
1581: of the bounding box of the intersection of the subdomain with the local ownership range (local \
1582: subdomain). \
1583: Also compute xleft_loc and xright_loc as the lower and upper bounds on the first and last rows \
1584: of the intersection. \
1585: */ \
1586: /* ylow_loc is the grid row containing the first element of the local sumbdomain */ \
1587: *ylow_loc = PetscMax(first_row,ylow); \
1588: /* xleft_loc is the offset of first element of the local subdomain within its grid row (might actually be outside the local subdomain) */ \
1589: *xleft_loc = *ylow_loc==first_row ? PetscMax(first%M,xleft) : xleft; \
1590: /* yhigh_loc is the grid row above the last local subdomain element */ \
1591: *yhigh_loc = PetscMin(last_row,yhigh); \
1592: /* xright is the offset of the end of the local subdomain within its grid row (might actually be outside the local subdomain) */ \
1593: *xright_loc = *yhigh_loc==last_row ? PetscMin(xright,last%M) : xright; \
1594: /* Now compute the size of the local subdomain n. */ \
1595: *n = 0; \
1596: if (*ylow_loc < *yhigh_loc) { \
1597: PetscInt width = xright-xleft; \
1598: *n += width*(*yhigh_loc-*ylow_loc-1); \
1599: *n += PetscMin(PetscMax(*xright_loc-xleft,0),width); \
1600: *n -= PetscMin(PetscMax(*xleft_loc-xleft,0), width); \
1601: } \
1602: }
1604: /*@
1605: PCGASMCreateSubdomains2D - Creates the index sets for the overlapping Schwarz
1606: preconditioner for a two-dimensional problem on a regular grid.
1608: Collective
1610: Input Parameters:
1611: + pc - the preconditioner context
1612: . M - the global number of grid points in the x direction
1613: . N - the global number of grid points in the y direction
1614: . Mdomains - the global number of subdomains in the x direction
1615: . Ndomains - the global number of subdomains in the y direction
1616: . dof - degrees of freedom per node
1617: - overlap - overlap in mesh lines
1619: Output Parameters:
1620: + Nsub - the number of local subdomains created
1621: . iis - array of index sets defining inner (nonoverlapping) subdomains
1622: - ois - array of index sets defining outer (overlapping, if overlap > 0) subdomains
1624: Level: advanced
1626: .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(), PCGASMSetOverlap()
1627: @*/
1628: PetscErrorCode PCGASMCreateSubdomains2D(PC pc,PetscInt M,PetscInt N,PetscInt Mdomains,PetscInt Ndomains,PetscInt dof,PetscInt overlap,PetscInt *nsub,IS **iis,IS **ois)
1629: {
1630: PetscMPIInt size, rank;
1631: PetscInt i, j;
1632: PetscInt maxheight, maxwidth;
1633: PetscInt xstart, xleft, xright, xleft_loc, xright_loc;
1634: PetscInt ystart, ylow, yhigh, ylow_loc, yhigh_loc;
1635: PetscInt x[2][2], y[2][2], n[2];
1636: PetscInt first, last;
1637: PetscInt nidx, *idx;
1638: PetscInt ii,jj,s,q,d;
1639: PetscInt k,kk;
1640: PetscMPIInt color;
1641: MPI_Comm comm, subcomm;
1642: IS **xis = NULL, **is = ois, **is_local = iis;
1644: PetscObjectGetComm((PetscObject)pc, &comm);
1645: MPI_Comm_size(comm, &size);
1646: MPI_Comm_rank(comm, &rank);
1647: MatGetOwnershipRange(pc->pmat, &first, &last);
1649: "does not respect the number of degrees of freedom per grid point %D", first, last, dof);
1651: /* Determine the number of domains with nonzero intersections with the local ownership range. */
1652: s = 0;
1653: ystart = 0;
1654: for (j=0; j<Ndomains; ++j) {
1655: maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */
1657: /* Vertical domain limits with an overlap. */
1658: ylow = PetscMax(ystart - overlap,0);
1659: yhigh = PetscMin(ystart + maxheight + overlap,N);
1660: xstart = 0;
1661: for (i=0; i<Mdomains; ++i) {
1662: maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */
1664: /* Horizontal domain limits with an overlap. */
1665: xleft = PetscMax(xstart - overlap,0);
1666: xright = PetscMin(xstart + maxwidth + overlap,M);
1667: /*
1668: Determine whether this subdomain intersects this processor's ownership range of pc->pmat.
1669: */
1670: PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx));
1671: if (nidx) ++s;
1672: xstart += maxwidth;
1673: } /* for (i = 0; i < Mdomains; ++i) */
1674: ystart += maxheight;
1675: } /* for (j = 0; j < Ndomains; ++j) */
1677: /* Now we can allocate the necessary number of ISs. */
1678: *nsub = s;
1679: PetscMalloc1(*nsub,is);
1680: PetscMalloc1(*nsub,is_local);
1681: s = 0;
1682: ystart = 0;
1683: for (j=0; j<Ndomains; ++j) {
1684: maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */
1686: /* Vertical domain limits with an overlap. */
1687: y[0][0] = PetscMax(ystart - overlap,0);
1688: y[0][1] = PetscMin(ystart + maxheight + overlap,N);
1689: /* Vertical domain limits without an overlap. */
1690: y[1][0] = ystart;
1691: y[1][1] = PetscMin(ystart + maxheight,N);
1692: xstart = 0;
1693: for (i=0; i<Mdomains; ++i) {
1694: maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */
1696: /* Horizontal domain limits with an overlap. */
1697: x[0][0] = PetscMax(xstart - overlap,0);
1698: x[0][1] = PetscMin(xstart + maxwidth + overlap,M);
1699: /* Horizontal domain limits without an overlap. */
1700: x[1][0] = xstart;
1701: x[1][1] = PetscMin(xstart+maxwidth,M);
1702: /*
1703: Determine whether this domain intersects this processor's ownership range of pc->pmat.
1704: Do this twice: first for the domains with overlaps, and once without.
1705: During the first pass create the subcommunicators, and use them on the second pass as well.
1706: */
1707: for (q = 0; q < 2; ++q) {
1708: PetscBool split = PETSC_FALSE;
1709: /*
1710: domain limits, (xleft, xright) and (ylow, yheigh) are adjusted
1711: according to whether the domain with an overlap or without is considered.
1712: */
1713: xleft = x[q][0]; xright = x[q][1];
1714: ylow = y[q][0]; yhigh = y[q][1];
1715: PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx));
1716: nidx *= dof;
1717: n[q] = nidx;
1718: /*
1719: Based on the counted number of indices in the local domain *with an overlap*,
1720: construct a subcommunicator of all the processors supporting this domain.
1721: Observe that a domain with an overlap might have nontrivial local support,
1722: while the domain without an overlap might not. Hence, the decision to participate
1723: in the subcommunicator must be based on the domain with an overlap.
1724: */
1725: if (q == 0) {
1726: if (nidx) color = 1;
1727: else color = MPI_UNDEFINED;
1728: MPI_Comm_split(comm, color, rank, &subcomm);
1729: split = PETSC_TRUE;
1730: }
1731: /*
1732: Proceed only if the number of local indices *with an overlap* is nonzero.
1733: */
1734: if (n[0]) {
1735: if (q == 0) xis = is;
1736: if (q == 1) {
1737: /*
1738: The IS for the no-overlap subdomain shares a communicator with the overlapping domain.
1739: Moreover, if the overlap is zero, the two ISs are identical.
1740: */
1741: if (overlap == 0) {
1742: (*is_local)[s] = (*is)[s];
1743: PetscObjectReference((PetscObject)(*is)[s]);
1744: continue;
1745: } else {
1746: xis = is_local;
1747: subcomm = ((PetscObject)(*is)[s])->comm;
1748: }
1749: } /* if (q == 1) */
1750: idx = NULL;
1751: PetscMalloc1(nidx,&idx);
1752: if (nidx) {
1753: k = 0;
1754: for (jj=ylow_loc; jj<yhigh_loc; ++jj) {
1755: PetscInt x0 = (jj==ylow_loc) ? xleft_loc : xleft;
1756: PetscInt x1 = (jj==yhigh_loc-1) ? xright_loc : xright;
1757: kk = dof*(M*jj + x0);
1758: for (ii=x0; ii<x1; ++ii) {
1759: for (d = 0; d < dof; ++d) {
1760: idx[k++] = kk++;
1761: }
1762: }
1763: }
1764: }
1765: ISCreateGeneral(subcomm,nidx,idx,PETSC_OWN_POINTER,(*xis)+s);
1766: if (split) {
1767: MPI_Comm_free(&subcomm);
1768: }
1769: }/* if (n[0]) */
1770: }/* for (q = 0; q < 2; ++q) */
1771: if (n[0]) ++s;
1772: xstart += maxwidth;
1773: } /* for (i = 0; i < Mdomains; ++i) */
1774: ystart += maxheight;
1775: } /* for (j = 0; j < Ndomains; ++j) */
1776: return 0;
1777: }
1779: /*@C
1780: PCGASMGetSubdomains - Gets the subdomains supported on this processor
1781: for the additive Schwarz preconditioner.
1783: Not Collective
1785: Input Parameter:
1786: . pc - the preconditioner context
1788: Output Parameters:
1789: + n - the number of subdomains for this processor (default value = 1)
1790: . iis - the index sets that define the inner subdomains (without overlap) supported on this processor (can be NULL)
1791: - ois - the index sets that define the outer subdomains (with overlap) supported on this processor (can be NULL)
1793: Notes:
1794: The user is responsible for destroying the ISs and freeing the returned arrays.
1795: The IS numbering is in the parallel, global numbering of the vector.
1797: Level: advanced
1799: .seealso: PCGASMSetOverlap(), PCGASMGetSubKSP(), PCGASMCreateSubdomains2D(),
1800: PCGASMSetSubdomains(), PCGASMGetSubmatrices()
1801: @*/
1802: PetscErrorCode PCGASMGetSubdomains(PC pc,PetscInt *n,IS *iis[],IS *ois[])
1803: {
1804: PC_GASM *osm;
1805: PetscBool match;
1806: PetscInt i;
1809: PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);
1811: osm = (PC_GASM*)pc->data;
1812: if (n) *n = osm->n;
1813: if (iis) {
1814: PetscMalloc1(osm->n, iis);
1815: }
1816: if (ois) {
1817: PetscMalloc1(osm->n, ois);
1818: }
1819: if (iis || ois) {
1820: for (i = 0; i < osm->n; ++i) {
1821: if (iis) (*iis)[i] = osm->iis[i];
1822: if (ois) (*ois)[i] = osm->ois[i];
1823: }
1824: }
1825: return 0;
1826: }
1828: /*@C
1829: PCGASMGetSubmatrices - Gets the local submatrices (for this processor
1830: only) for the additive Schwarz preconditioner.
1832: Not Collective
1834: Input Parameter:
1835: . pc - the preconditioner context
1837: Output Parameters:
1838: + n - the number of matrices for this processor (default value = 1)
1839: - mat - the matrices
1841: Notes:
1842: matrices returned by this routine have the same communicators as the index sets (IS)
1843: used to define subdomains in PCGASMSetSubdomains()
1844: Level: advanced
1846: .seealso: PCGASMSetOverlap(), PCGASMGetSubKSP(),
1847: PCGASMCreateSubdomains2D(), PCGASMSetSubdomains(), PCGASMGetSubdomains()
1848: @*/
1849: PetscErrorCode PCGASMGetSubmatrices(PC pc,PetscInt *n,Mat *mat[])
1850: {
1851: PC_GASM *osm;
1852: PetscBool match;
1858: PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);
1860: osm = (PC_GASM*)pc->data;
1861: if (n) *n = osm->n;
1862: if (mat) *mat = osm->pmat;
1863: return 0;
1864: }
1866: /*@
1867: PCGASMSetUseDMSubdomains - Indicates whether to use DMCreateDomainDecomposition() to define the subdomains, whenever possible.
1868: Logically Collective
1870: Input Parameters:
1871: + pc - the preconditioner
1872: - flg - boolean indicating whether to use subdomains defined by the DM
1874: Options Database Key:
1875: . -pc_gasm_dm_subdomains -pc_gasm_overlap -pc_gasm_total_subdomains
1877: Level: intermediate
1879: Notes:
1880: PCGASMSetSubdomains(), PCGASMSetTotalSubdomains() or PCGASMSetOverlap() take precedence over PCGASMSetUseDMSubdomains(),
1881: so setting PCGASMSetSubdomains() with nontrivial subdomain ISs or any of PCGASMSetTotalSubdomains() and PCGASMSetOverlap()
1882: automatically turns the latter off.
1884: .seealso: PCGASMGetUseDMSubdomains(), PCGASMSetSubdomains(), PCGASMSetOverlap()
1885: PCGASMCreateSubdomains2D()
1886: @*/
1887: PetscErrorCode PCGASMSetUseDMSubdomains(PC pc,PetscBool flg)
1888: {
1889: PC_GASM *osm = (PC_GASM*)pc->data;
1890: PetscBool match;
1895: PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);
1896: if (match) {
1897: if (!osm->user_subdomains && osm->N == PETSC_DETERMINE && osm->overlap < 0) {
1898: osm->dm_subdomains = flg;
1899: }
1900: }
1901: return 0;
1902: }
1904: /*@
1905: PCGASMGetUseDMSubdomains - Returns flag indicating whether to use DMCreateDomainDecomposition() to define the subdomains, whenever possible.
1906: Not Collective
1908: Input Parameter:
1909: . pc - the preconditioner
1911: Output Parameter:
1912: . flg - boolean indicating whether to use subdomains defined by the DM
1914: Level: intermediate
1916: .seealso: PCGASMSetUseDMSubdomains(), PCGASMSetOverlap()
1917: PCGASMCreateSubdomains2D()
1918: @*/
1919: PetscErrorCode PCGASMGetUseDMSubdomains(PC pc,PetscBool* flg)
1920: {
1921: PC_GASM *osm = (PC_GASM*)pc->data;
1922: PetscBool match;
1926: PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);
1927: if (match) {
1928: if (flg) *flg = osm->dm_subdomains;
1929: }
1930: return 0;
1931: }