Actual source code: gasm.c

  1: /*
  2:   This file defines an "generalized" additive Schwarz preconditioner for any Mat implementation.
  3:   In this version each processor may intersect multiple subdomains and any subdomain may
  4:   intersect multiple processors.  Intersections of subdomains with processors are called *local
  5:   subdomains*.

  7:        N    - total number of distinct global subdomains          (set explicitly in PCGASMSetTotalSubdomains() or implicitly PCGASMSetSubdomains() and then calculated in PCSetUp_GASM())
  8:        n    - actual number of local subdomains on this processor (set in PCGASMSetSubdomains() or calculated in PCGASMSetTotalSubdomains())
  9:        nmax - maximum number of local subdomains per processor    (calculated in PCSetUp_GASM())
 10: */
 11: #include <petsc/private/pcimpl.h>
 12: #include <petscdm.h>

 14: typedef struct {
 15:   PetscInt    N,n,nmax;
 16:   PetscInt    overlap;                  /* overlap requested by user */
 17:   PCGASMType  type;                     /* use reduced interpolation, restriction or both */
 18:   PetscBool   type_set;                 /* if user set this value (so won't change it for symmetric problems) */
 19:   PetscBool   same_subdomain_solvers;   /* flag indicating whether all local solvers are same */
 20:   PetscBool   sort_indices;             /* flag to sort subdomain indices */
 21:   PetscBool   user_subdomains;          /* whether the user set explicit subdomain index sets -- keep them on PCReset() */
 22:   PetscBool   dm_subdomains;            /* whether DM is allowed to define subdomains */
 23:   PetscBool   hierarchicalpartitioning;
 24:   IS          *ois;                     /* index sets that define the outer (conceptually, overlapping) subdomains */
 25:   IS          *iis;                     /* index sets that define the inner (conceptually, nonoverlapping) subdomains */
 26:   KSP         *ksp;                     /* linear solvers for each subdomain */
 27:   Mat         *pmat;                    /* subdomain block matrices */
 28:   Vec         gx,gy;                    /* Merged work vectors */
 29:   Vec         *x,*y;                    /* Split work vectors; storage aliases pieces of storage of the above merged vectors. */
 30:   VecScatter  gorestriction;            /* merged restriction to disjoint union of outer subdomains */
 31:   VecScatter  girestriction;            /* merged restriction to disjoint union of inner subdomains */
 32:   VecScatter  pctoouter;
 33:   IS          permutationIS;
 34:   Mat         permutationP;
 35:   Mat         pcmat;
 36:   Vec         pcx,pcy;
 37: } PC_GASM;

 39: static PetscErrorCode  PCGASMComputeGlobalSubdomainNumbering_Private(PC pc,PetscInt **numbering,PetscInt **permutation)
 40: {
 41:   PC_GASM        *osm = (PC_GASM*)pc->data;
 42:   PetscInt       i;

 44:   /* Determine the number of globally-distinct subdomains and compute a global numbering for them. */
 45:   PetscMalloc2(osm->n,numbering,osm->n,permutation);
 46:   PetscObjectsListGetGlobalNumbering(PetscObjectComm((PetscObject)pc),osm->n,(PetscObject*)osm->iis,NULL,*numbering);
 47:   for (i = 0; i < osm->n; ++i) (*permutation)[i] = i;
 48:   PetscSortIntWithPermutation(osm->n,*numbering,*permutation);
 49:   return 0;
 50: }

 52: static PetscErrorCode  PCGASMSubdomainView_Private(PC pc, PetscInt i, PetscViewer viewer)
 53: {
 54:   PC_GASM        *osm = (PC_GASM*)pc->data;
 55:   PetscInt       j,nidx;
 56:   const PetscInt *idx;
 57:   PetscViewer    sviewer;
 58:   char           *cidx;

 61:   /* Inner subdomains. */
 62:   ISGetLocalSize(osm->iis[i], &nidx);
 63:   /*
 64:    No more than 15 characters per index plus a space.
 65:    PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx,
 66:    in case nidx == 0. That will take care of the space for the trailing '\0' as well.
 67:    For nidx == 0, the whole string 16 '\0'.
 68:    */
 69: #define len  16*(nidx+1)+1
 70:   PetscMalloc1(len, &cidx);
 71:   PetscViewerStringOpen(PETSC_COMM_SELF, cidx, len, &sviewer);
 72: #undef len
 73:   ISGetIndices(osm->iis[i], &idx);
 74:   for (j = 0; j < nidx; ++j) {
 75:     PetscViewerStringSPrintf(sviewer, "%D ", idx[j]);
 76:   }
 77:   ISRestoreIndices(osm->iis[i],&idx);
 78:   PetscViewerDestroy(&sviewer);
 79:   PetscViewerASCIIPrintf(viewer, "Inner subdomain:\n");
 80:   PetscViewerFlush(viewer);
 81:   PetscViewerASCIIPushSynchronized(viewer);
 82:   PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx);
 83:   PetscViewerFlush(viewer);
 84:   PetscViewerASCIIPopSynchronized(viewer);
 85:   PetscViewerASCIIPrintf(viewer, "\n");
 86:   PetscViewerFlush(viewer);
 87:   PetscFree(cidx);
 88:   /* Outer subdomains. */
 89:   ISGetLocalSize(osm->ois[i], &nidx);
 90:   /*
 91:    No more than 15 characters per index plus a space.
 92:    PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx,
 93:    in case nidx == 0. That will take care of the space for the trailing '\0' as well.
 94:    For nidx == 0, the whole string 16 '\0'.
 95:    */
 96: #define len  16*(nidx+1)+1
 97:   PetscMalloc1(len, &cidx);
 98:   PetscViewerStringOpen(PETSC_COMM_SELF, cidx, len, &sviewer);
 99: #undef len
100:   ISGetIndices(osm->ois[i], &idx);
101:   for (j = 0; j < nidx; ++j) {
102:     PetscViewerStringSPrintf(sviewer,"%D ", idx[j]);
103:   }
104:   PetscViewerDestroy(&sviewer);
105:   ISRestoreIndices(osm->ois[i],&idx);
106:   PetscViewerASCIIPrintf(viewer, "Outer subdomain:\n");
107:   PetscViewerFlush(viewer);
108:   PetscViewerASCIIPushSynchronized(viewer);
109:   PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx);
110:   PetscViewerFlush(viewer);
111:   PetscViewerASCIIPopSynchronized(viewer);
112:   PetscViewerASCIIPrintf(viewer, "\n");
113:   PetscViewerFlush(viewer);
114:   PetscFree(cidx);
115:   return 0;
116: }

118: static PetscErrorCode  PCGASMPrintSubdomains(PC pc)
119: {
120:   PC_GASM        *osm = (PC_GASM*)pc->data;
121:   const char     *prefix;
122:   char           fname[PETSC_MAX_PATH_LEN+1];
123:   PetscInt       l, d, count;
124:   PetscBool      found;
125:   PetscViewer    viewer, sviewer = NULL;
126:   PetscInt       *numbering,*permutation;/* global numbering of locally-supported subdomains and the permutation from the local ordering */

128:   PCGetOptionsPrefix(pc,&prefix);
129:   PetscOptionsHasName(NULL,prefix,"-pc_gasm_print_subdomains",&found);
130:   if (!found) return 0;
131:   PetscOptionsGetString(NULL,prefix,"-pc_gasm_print_subdomains",fname,sizeof(fname),&found);
132:   if (!found) PetscStrcpy(fname,"stdout");
133:   PetscViewerASCIIOpen(PetscObjectComm((PetscObject)pc),fname,&viewer);
134:   /*
135:    Make sure the viewer has a name. Otherwise this may cause a deadlock or other weird errors when creating a subcomm viewer:
136:    the subcomm viewer will attempt to inherit the viewer's name, which, if not set, will be constructed collectively on the comm.
137:   */
138:   PetscObjectName((PetscObject)viewer);
139:   l    = 0;
140:   PCGASMComputeGlobalSubdomainNumbering_Private(pc,&numbering,&permutation);
141:   for (count = 0; count < osm->N; ++count) {
142:     /* Now let subdomains go one at a time in the global numbering order and print their subdomain/solver info. */
143:     if (l<osm->n) {
144:       d = permutation[l]; /* d is the local number of the l-th smallest (in the global ordering) among the locally supported subdomains */
145:       if (numbering[d] == count) {
146:         PetscViewerGetSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);
147:         PCGASMSubdomainView_Private(pc,d,sviewer);
148:         PetscViewerRestoreSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);
149:         ++l;
150:       }
151:     }
152:     MPI_Barrier(PetscObjectComm((PetscObject)pc));
153:   }
154:   PetscFree2(numbering,permutation);
155:   PetscViewerDestroy(&viewer);
156:   return 0;
157: }

159: static PetscErrorCode PCView_GASM(PC pc,PetscViewer viewer)
160: {
161:   PC_GASM        *osm = (PC_GASM*)pc->data;
162:   const char     *prefix;
163:   PetscMPIInt    rank, size;
164:   PetscInt       bsz;
165:   PetscBool      iascii,view_subdomains=PETSC_FALSE;
166:   PetscViewer    sviewer;
167:   PetscInt       count, l;
168:   char           overlap[256]     = "user-defined overlap";
169:   char           gsubdomains[256] = "unknown total number of subdomains";
170:   char           msubdomains[256] = "unknown max number of local subdomains";
171:   PetscInt       *numbering,*permutation;/* global numbering of locally-supported subdomains and the permutation from the local ordering */

173:   MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size);
174:   MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank);

176:   if (osm->overlap >= 0) {
177:     PetscSNPrintf(overlap,sizeof(overlap),"requested amount of overlap = %D",osm->overlap);
178:   }
179:   if (osm->N != PETSC_DETERMINE) {
180:     PetscSNPrintf(gsubdomains, sizeof(gsubdomains), "total number of subdomains = %D",osm->N);
181:   }
182:   if (osm->nmax != PETSC_DETERMINE) {
183:     PetscSNPrintf(msubdomains,sizeof(msubdomains),"max number of local subdomains = %D",osm->nmax);
184:   }

186:   PCGetOptionsPrefix(pc,&prefix);
187:   PetscOptionsGetBool(NULL,prefix,"-pc_gasm_view_subdomains",&view_subdomains,NULL);

189:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
190:   if (iascii) {
191:     /*
192:      Make sure the viewer has a name. Otherwise this may cause a deadlock when creating a subcomm viewer:
193:      the subcomm viewer will attempt to inherit the viewer's name, which, if not set, will be constructed
194:      collectively on the comm.
195:      */
196:     PetscObjectName((PetscObject)viewer);
197:     PetscViewerASCIIPrintf(viewer,"  Restriction/interpolation type: %s\n",PCGASMTypes[osm->type]);
198:     PetscViewerASCIIPrintf(viewer,"  %s\n",overlap);
199:     PetscViewerASCIIPrintf(viewer,"  %s\n",gsubdomains);
200:     PetscViewerASCIIPrintf(viewer,"  %s\n",msubdomains);
201:     PetscViewerASCIIPushSynchronized(viewer);
202:     PetscViewerASCIISynchronizedPrintf(viewer,"  [%d|%d] number of locally-supported subdomains = %D\n",rank,size,osm->n);
203:     PetscViewerFlush(viewer);
204:     PetscViewerASCIIPopSynchronized(viewer);
205:     /* Cannot take advantage of osm->same_subdomain_solvers without a global numbering of subdomains. */
206:     PetscViewerASCIIPrintf(viewer,"  Subdomain solver info is as follows:\n");
207:     PetscViewerASCIIPushTab(viewer);
208:     PetscViewerASCIIPrintf(viewer,"  - - - - - - - - - - - - - - - - - -\n");
209:     /* Make sure that everybody waits for the banner to be printed. */
210:     MPI_Barrier(PetscObjectComm((PetscObject)viewer));
211:     /* Now let subdomains go one at a time in the global numbering order and print their subdomain/solver info. */
212:     PCGASMComputeGlobalSubdomainNumbering_Private(pc,&numbering,&permutation);
213:     l = 0;
214:     for (count = 0; count < osm->N; ++count) {
215:       PetscMPIInt srank, ssize;
216:       if (l<osm->n) {
217:         PetscInt d = permutation[l]; /* d is the local number of the l-th smallest (in the global ordering) among the locally supported subdomains */
218:         if (numbering[d] == count) {
219:           MPI_Comm_size(((PetscObject)osm->ois[d])->comm, &ssize);
220:           MPI_Comm_rank(((PetscObject)osm->ois[d])->comm, &srank);
221:           PetscViewerGetSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);
222:           ISGetLocalSize(osm->ois[d],&bsz);
223:           PetscViewerASCIISynchronizedPrintf(sviewer,"  [%d|%d] (subcomm [%d|%d]) local subdomain number %D, local size = %D\n",rank,size,srank,ssize,d,bsz);
224:           PetscViewerFlush(sviewer);
225:           if (view_subdomains) {
226:             PCGASMSubdomainView_Private(pc,d,sviewer);
227:           }
228:           if (!pc->setupcalled) {
229:             PetscViewerASCIIPrintf(sviewer, "  Solver not set up yet: PCSetUp() not yet called\n");
230:           } else {
231:             KSPView(osm->ksp[d],sviewer);
232:           }
233:           PetscViewerASCIIPrintf(sviewer,"  - - - - - - - - - - - - - - - - - -\n");
234:           PetscViewerFlush(sviewer);
235:           PetscViewerRestoreSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);
236:           ++l;
237:         }
238:       }
239:       MPI_Barrier(PetscObjectComm((PetscObject)pc));
240:     }
241:     PetscFree2(numbering,permutation);
242:     PetscViewerASCIIPopTab(viewer);
243:     PetscViewerFlush(viewer);
244:     /* this line is needed to match the extra PetscViewerASCIIPushSynchronized() in PetscViewerGetSubViewer() */
245:     PetscViewerASCIIPopSynchronized(viewer);
246:   }
247:   return 0;
248: }

250: PETSC_INTERN PetscErrorCode  PCGASMCreateLocalSubdomains(Mat A, PetscInt nloc, IS *iis[]);

252: PetscErrorCode PCGASMSetHierarchicalPartitioning(PC pc)
253: {
254:    PC_GASM              *osm = (PC_GASM*)pc->data;
255:    MatPartitioning       part;
256:    MPI_Comm              comm;
257:    PetscMPIInt           size;
258:    PetscInt              nlocalsubdomains,fromrows_localsize;
259:    IS                    partitioning,fromrows,isn;
260:    Vec                   outervec;

262:    PetscObjectGetComm((PetscObject)pc,&comm);
263:    MPI_Comm_size(comm,&size);
264:    /* we do not need a hierarchical partitioning when
265:     * the total number of subdomains is consistent with
266:     * the number of MPI tasks.
267:     * For the following cases, we do not need to use HP
268:     * */
269:    if (osm->N==PETSC_DETERMINE || osm->N>=size || osm->N==1) return 0;
271:    nlocalsubdomains = size/osm->N;
272:    osm->n           = 1;
273:    MatPartitioningCreate(comm,&part);
274:    MatPartitioningSetAdjacency(part,pc->pmat);
275:    MatPartitioningSetType(part,MATPARTITIONINGHIERARCH);
276:    MatPartitioningHierarchicalSetNcoarseparts(part,osm->N);
277:    MatPartitioningHierarchicalSetNfineparts(part,nlocalsubdomains);
278:    MatPartitioningSetFromOptions(part);
279:    /* get new processor owner number of each vertex */
280:    MatPartitioningApply(part,&partitioning);
281:    ISBuildTwoSided(partitioning,NULL,&fromrows);
282:    ISPartitioningToNumbering(partitioning,&isn);
283:    ISDestroy(&isn);
284:    ISGetLocalSize(fromrows,&fromrows_localsize);
285:    MatPartitioningDestroy(&part);
286:    MatCreateVecs(pc->pmat,&outervec,NULL);
287:    VecCreateMPI(comm,fromrows_localsize,PETSC_DETERMINE,&(osm->pcx));
288:    VecDuplicate(osm->pcx,&(osm->pcy));
289:    VecScatterCreate(osm->pcx,NULL,outervec,fromrows,&(osm->pctoouter));
290:    MatCreateSubMatrix(pc->pmat,fromrows,fromrows,MAT_INITIAL_MATRIX,&(osm->permutationP));
291:    PetscObjectReference((PetscObject)fromrows);
292:    osm->permutationIS = fromrows;
293:    osm->pcmat =  pc->pmat;
294:    PetscObjectReference((PetscObject)osm->permutationP);
295:    pc->pmat = osm->permutationP;
296:    VecDestroy(&outervec);
297:    ISDestroy(&fromrows);
298:    ISDestroy(&partitioning);
299:    osm->n           = PETSC_DETERMINE;
300:    return 0;
301: }

303: static PetscErrorCode PCSetUp_GASM(PC pc)
304: {
305:   PC_GASM        *osm = (PC_GASM*)pc->data;
306:   PetscInt       i,nInnerIndices,nTotalInnerIndices;
307:   PetscMPIInt    rank, size;
308:   MatReuse       scall = MAT_REUSE_MATRIX;
309:   KSP            ksp;
310:   PC             subpc;
311:   const char     *prefix,*pprefix;
312:   Vec            x,y;
313:   PetscInt       oni;       /* Number of indices in the i-th local outer subdomain.               */
314:   const PetscInt *oidxi;    /* Indices from the i-th subdomain local outer subdomain.             */
315:   PetscInt       on;        /* Number of indices in the disjoint union of local outer subdomains. */
316:   PetscInt       *oidx;     /* Indices in the disjoint union of local outer subdomains. */
317:   IS             gois;      /* Disjoint union the global indices of outer subdomains.             */
318:   IS             goid;      /* Identity IS of the size of the disjoint union of outer subdomains. */
319:   PetscScalar    *gxarray, *gyarray;
320:   PetscInt       gostart;   /* Start of locally-owned indices in the vectors -- osm->gx,osm->gy -- over the disjoint union of outer subdomains. */
321:   PetscInt       num_subdomains    = 0;
322:   DM             *subdomain_dm     = NULL;
323:   char           **subdomain_names = NULL;
324:   PetscInt       *numbering;

326:   MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);
327:   MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);
328:   if (!pc->setupcalled) {
329:         /* use a hierarchical partitioning */
330:     if (osm->hierarchicalpartitioning) {
331:       PCGASMSetHierarchicalPartitioning(pc);
332:     }
333:     if (osm->n == PETSC_DETERMINE) {
334:       if (osm->N != PETSC_DETERMINE) {
335:            /* No local subdomains given, but the desired number of total subdomains is known, so construct them accordingly. */
336:            PCGASMCreateSubdomains(pc->pmat,osm->N,&osm->n,&osm->iis);
337:       } else if (osm->dm_subdomains && pc->dm) {
338:         /* try pc->dm next, if allowed */
339:         PetscInt  d;
340:         IS       *inner_subdomain_is, *outer_subdomain_is;
341:         DMCreateDomainDecomposition(pc->dm, &num_subdomains, &subdomain_names, &inner_subdomain_is, &outer_subdomain_is, &subdomain_dm);
342:         if (num_subdomains) {
343:           PCGASMSetSubdomains(pc, num_subdomains, inner_subdomain_is, outer_subdomain_is);
344:         }
345:         for (d = 0; d < num_subdomains; ++d) {
346:           if (inner_subdomain_is) ISDestroy(&inner_subdomain_is[d]);
347:           if (outer_subdomain_is) ISDestroy(&outer_subdomain_is[d]);
348:         }
349:         PetscFree(inner_subdomain_is);
350:         PetscFree(outer_subdomain_is);
351:       } else {
352:         /* still no subdomains; use one per processor */
353:         osm->nmax = osm->n = 1;
354:         MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);
355:         osm->N    = size;
356:         PCGASMCreateLocalSubdomains(pc->pmat,osm->n,&osm->iis);
357:       }
358:     }
359:     if (!osm->iis) {
360:       /*
361:        osm->n was set in PCGASMSetSubdomains(), but the actual subdomains have not been supplied.
362:        We create the requisite number of local inner subdomains and then expand them into
363:        out subdomains, if necessary.
364:        */
365:       PCGASMCreateLocalSubdomains(pc->pmat,osm->n,&osm->iis);
366:     }
367:     if (!osm->ois) {
368:       /*
369:             Initially make outer subdomains the same as inner subdomains. If nonzero additional overlap
370:             has been requested, copy the inner subdomains over so they can be modified.
371:       */
372:       PetscMalloc1(osm->n,&osm->ois);
373:       for (i=0; i<osm->n; ++i) {
374:         if (osm->overlap > 0 && osm->N>1) { /* With positive overlap, osm->iis[i] will be modified */
375:           ISDuplicate(osm->iis[i],(osm->ois)+i);
376:           ISCopy(osm->iis[i],osm->ois[i]);
377:         } else {
378:           PetscObjectReference((PetscObject)((osm->iis)[i]));
379:           osm->ois[i] = osm->iis[i];
380:         }
381:       }
382:       if (osm->overlap>0 && osm->N>1) {
383:         /* Extend the "overlapping" regions by a number of steps */
384:         MatIncreaseOverlapSplit(pc->pmat,osm->n,osm->ois,osm->overlap);
385:       }
386:     }

388:     /* Now the subdomains are defined.  Determine their global and max local numbers, if necessary. */
389:     if (osm->nmax == PETSC_DETERMINE) {
390:       PetscMPIInt inwork,outwork;
391:       /* determine global number of subdomains and the max number of local subdomains */
392:       inwork     = osm->n;
393:       MPIU_Allreduce(&inwork,&outwork,1,MPI_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));
394:       osm->nmax  = outwork;
395:     }
396:     if (osm->N == PETSC_DETERMINE) {
397:       /* Determine the number of globally-distinct subdomains and compute a global numbering for them. */
398:       PetscObjectsListGetGlobalNumbering(PetscObjectComm((PetscObject)pc),osm->n,(PetscObject*)osm->ois,&osm->N,NULL);
399:     }

401:     if (osm->sort_indices) {
402:       for (i=0; i<osm->n; i++) {
403:         ISSort(osm->ois[i]);
404:         ISSort(osm->iis[i]);
405:       }
406:     }
407:     PCGetOptionsPrefix(pc,&prefix);
408:     PCGASMPrintSubdomains(pc);

410:     /*
411:        Merge the ISs, create merged vectors and restrictions.
412:      */
413:     /* Merge outer subdomain ISs and construct a restriction onto the disjoint union of local outer subdomains. */
414:     on = 0;
415:     for (i=0; i<osm->n; i++) {
416:       ISGetLocalSize(osm->ois[i],&oni);
417:       on  += oni;
418:     }
419:     PetscMalloc1(on, &oidx);
420:     on   = 0;
421:     /* Merge local indices together */
422:     for (i=0; i<osm->n; i++) {
423:       ISGetLocalSize(osm->ois[i],&oni);
424:       ISGetIndices(osm->ois[i],&oidxi);
425:       PetscArraycpy(oidx+on,oidxi,oni);
426:       ISRestoreIndices(osm->ois[i],&oidxi);
427:       on  += oni;
428:     }
429:     ISCreateGeneral(((PetscObject)(pc))->comm,on,oidx,PETSC_OWN_POINTER,&gois);
430:     nTotalInnerIndices = 0;
431:     for (i=0; i<osm->n; i++) {
432:       ISGetLocalSize(osm->iis[i],&nInnerIndices);
433:       nTotalInnerIndices += nInnerIndices;
434:     }
435:     VecCreateMPI(((PetscObject)(pc))->comm,nTotalInnerIndices,PETSC_DETERMINE,&x);
436:     VecDuplicate(x,&y);

438:     VecCreateMPI(PetscObjectComm((PetscObject)pc),on,PETSC_DECIDE,&osm->gx);
439:     VecDuplicate(osm->gx,&osm->gy);
440:     VecGetOwnershipRange(osm->gx, &gostart, NULL);
441:     ISCreateStride(PetscObjectComm((PetscObject)pc),on,gostart,1, &goid);
442:     /* gois might indices not on local */
443:     VecScatterCreate(x,gois,osm->gx,goid, &(osm->gorestriction));
444:     PetscMalloc1(osm->n,&numbering);
445:     PetscObjectsListGetGlobalNumbering(PetscObjectComm((PetscObject)pc),osm->n,(PetscObject*)osm->ois,NULL,numbering);
446:     VecDestroy(&x);
447:     ISDestroy(&gois);

449:     /* Merge inner subdomain ISs and construct a restriction onto the disjoint union of local inner subdomains. */
450:     {
451:       PetscInt        ini;           /* Number of indices the i-th a local inner subdomain. */
452:       PetscInt        in;            /* Number of indices in the disjoint union of local inner subdomains. */
453:       PetscInt       *iidx;          /* Global indices in the merged local inner subdomain. */
454:       PetscInt       *ioidx;         /* Global indices of the disjoint union of inner subdomains within the disjoint union of outer subdomains. */
455:       IS              giis;          /* IS for the disjoint union of inner subdomains. */
456:       IS              giois;         /* IS for the disjoint union of inner subdomains within the disjoint union of outer subdomains. */
457:       PetscScalar    *array;
458:       const PetscInt *indices;
459:       PetscInt        k;
460:       on = 0;
461:       for (i=0; i<osm->n; i++) {
462:         ISGetLocalSize(osm->ois[i],&oni);
463:         on  += oni;
464:       }
465:       PetscMalloc1(on, &iidx);
466:       PetscMalloc1(on, &ioidx);
467:       VecGetArray(y,&array);
468:       /* set communicator id to determine where overlap is */
469:       in   = 0;
470:       for (i=0; i<osm->n; i++) {
471:         ISGetLocalSize(osm->iis[i],&ini);
472:         for (k = 0; k < ini; ++k) {
473:           array[in+k] = numbering[i];
474:         }
475:         in += ini;
476:       }
477:       VecRestoreArray(y,&array);
478:       VecScatterBegin(osm->gorestriction,y,osm->gy,INSERT_VALUES,SCATTER_FORWARD);
479:       VecScatterEnd(osm->gorestriction,y,osm->gy,INSERT_VALUES,SCATTER_FORWARD);
480:       VecGetOwnershipRange(osm->gy,&gostart, NULL);
481:       VecGetArray(osm->gy,&array);
482:       on  = 0;
483:       in  = 0;
484:       for (i=0; i<osm->n; i++) {
485:         ISGetLocalSize(osm->ois[i],&oni);
486:         ISGetIndices(osm->ois[i],&indices);
487:         for (k=0; k<oni; k++) {
488:           /*  skip overlapping indices to get inner domain */
489:           if (PetscRealPart(array[on+k]) != numbering[i]) continue;
490:           iidx[in]    = indices[k];
491:           ioidx[in++] = gostart+on+k;
492:         }
493:         ISRestoreIndices(osm->ois[i], &indices);
494:         on += oni;
495:       }
496:       VecRestoreArray(osm->gy,&array);
497:       ISCreateGeneral(PetscObjectComm((PetscObject)pc),in,iidx,PETSC_OWN_POINTER,&giis);
498:       ISCreateGeneral(PetscObjectComm((PetscObject)pc),in,ioidx,PETSC_OWN_POINTER,&giois);
499:       VecScatterCreate(y,giis,osm->gy,giois,&osm->girestriction);
500:       VecDestroy(&y);
501:       ISDestroy(&giis);
502:       ISDestroy(&giois);
503:     }
504:     ISDestroy(&goid);
505:     PetscFree(numbering);

507:     /* Create the subdomain work vectors. */
508:     PetscMalloc1(osm->n,&osm->x);
509:     PetscMalloc1(osm->n,&osm->y);
510:     VecGetArray(osm->gx, &gxarray);
511:     VecGetArray(osm->gy, &gyarray);
512:     for (i=0, on=0; i<osm->n; ++i, on += oni) {
513:       PetscInt oNi;
514:       ISGetLocalSize(osm->ois[i],&oni);
515:       /* on a sub communicator */
516:       ISGetSize(osm->ois[i],&oNi);
517:       VecCreateMPIWithArray(((PetscObject)(osm->ois[i]))->comm,1,oni,oNi,gxarray+on,&osm->x[i]);
518:       VecCreateMPIWithArray(((PetscObject)(osm->ois[i]))->comm,1,oni,oNi,gyarray+on,&osm->y[i]);
519:     }
520:     VecRestoreArray(osm->gx, &gxarray);
521:     VecRestoreArray(osm->gy, &gyarray);
522:     /* Create the subdomain solvers */
523:     PetscMalloc1(osm->n,&osm->ksp);
524:     for (i=0; i<osm->n; i++) {
525:       char subprefix[PETSC_MAX_PATH_LEN+1];
526:       KSPCreate(((PetscObject)(osm->ois[i]))->comm,&ksp);
527:       KSPSetErrorIfNotConverged(ksp,pc->erroriffailure);
528:       PetscLogObjectParent((PetscObject)pc,(PetscObject)ksp);
529:       PetscObjectIncrementTabLevel((PetscObject)ksp,(PetscObject)pc,1);
530:       KSPSetType(ksp,KSPPREONLY);
531:       KSPGetPC(ksp,&subpc); /* Why do we need this here? */
532:       if (subdomain_dm) {
533:         KSPSetDM(ksp,subdomain_dm[i]);
534:         DMDestroy(subdomain_dm+i);
535:       }
536:       PCGetOptionsPrefix(pc,&prefix);
537:       KSPSetOptionsPrefix(ksp,prefix);
538:       if (subdomain_names && subdomain_names[i]) {
539:         PetscSNPrintf(subprefix,PETSC_MAX_PATH_LEN,"sub_%s_",subdomain_names[i]);
540:         KSPAppendOptionsPrefix(ksp,subprefix);
541:         PetscFree(subdomain_names[i]);
542:       }
543:       KSPAppendOptionsPrefix(ksp,"sub_");
544:       osm->ksp[i] = ksp;
545:     }
546:     PetscFree(subdomain_dm);
547:     PetscFree(subdomain_names);
548:     scall = MAT_INITIAL_MATRIX;
549:   } else { /* if (pc->setupcalled) */
550:     /*
551:        Destroy the submatrices from the previous iteration
552:     */
553:     if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
554:       MatDestroyMatrices(osm->n,&osm->pmat);
555:       scall = MAT_INITIAL_MATRIX;
556:     }
557:     if (osm->permutationIS) {
558:       MatCreateSubMatrix(pc->pmat,osm->permutationIS,osm->permutationIS,scall,&osm->permutationP);
559:       PetscObjectReference((PetscObject)osm->permutationP);
560:       osm->pcmat = pc->pmat;
561:       pc->pmat   = osm->permutationP;
562:     }
563:   }

565:   /*
566:      Extract the submatrices.
567:   */
568:   if (size > 1) {
569:     MatCreateSubMatricesMPI(pc->pmat,osm->n,osm->ois,osm->ois,scall,&osm->pmat);
570:   } else {
571:     MatCreateSubMatrices(pc->pmat,osm->n,osm->ois,osm->ois,scall,&osm->pmat);
572:   }
573:   if (scall == MAT_INITIAL_MATRIX) {
574:     PetscObjectGetOptionsPrefix((PetscObject)pc->pmat,&pprefix);
575:     for (i=0; i<osm->n; i++) {
576:       PetscLogObjectParent((PetscObject)pc,(PetscObject)osm->pmat[i]);
577:       PetscObjectSetOptionsPrefix((PetscObject)osm->pmat[i],pprefix);
578:     }
579:   }

581:   /* Return control to the user so that the submatrices can be modified (e.g., to apply
582:      different boundary conditions for the submatrices than for the global problem) */
583:   PCModifySubMatrices(pc,osm->n,osm->ois,osm->ois,osm->pmat,pc->modifysubmatricesP);

585:   /*
586:      Loop over submatrices putting them into local ksps
587:   */
588:   for (i=0; i<osm->n; i++) {
589:     KSPSetOperators(osm->ksp[i],osm->pmat[i],osm->pmat[i]);
590:     KSPGetOptionsPrefix(osm->ksp[i],&prefix);
591:     MatSetOptionsPrefix(osm->pmat[i],prefix);
592:     if (!pc->setupcalled) {
593:       KSPSetFromOptions(osm->ksp[i]);
594:     }
595:   }
596:   if (osm->pcmat) {
597:     MatDestroy(&pc->pmat);
598:     pc->pmat   = osm->pcmat;
599:     osm->pcmat = NULL;
600:   }
601:   return 0;
602: }

604: static PetscErrorCode PCSetUpOnBlocks_GASM(PC pc)
605: {
606:   PC_GASM        *osm = (PC_GASM*)pc->data;
607:   PetscInt       i;

609:   for (i=0; i<osm->n; i++) {
610:     KSPSetUp(osm->ksp[i]);
611:   }
612:   return 0;
613: }

615: static PetscErrorCode PCApply_GASM(PC pc,Vec xin,Vec yout)
616: {
617:   PC_GASM        *osm = (PC_GASM*)pc->data;
618:   PetscInt       i;
619:   Vec            x,y;
620:   ScatterMode    forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;

622:   if (osm->pctoouter) {
623:     VecScatterBegin(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);
624:     VecScatterEnd(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);
625:     x = osm->pcx;
626:     y = osm->pcy;
627:   } else {
628:     x = xin;
629:     y = yout;
630:   }
631:   /*
632:      support for limiting the restriction or interpolation only to the inner
633:      subdomain values (leaving the other values 0).
634:   */
635:   if (!(osm->type & PC_GASM_RESTRICT)) {
636:     /* have to zero the work RHS since scatter may leave some slots empty */
637:     VecZeroEntries(osm->gx);
638:     VecScatterBegin(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);
639:   } else {
640:     VecScatterBegin(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);
641:   }
642:   VecZeroEntries(osm->gy);
643:   if (!(osm->type & PC_GASM_RESTRICT)) {
644:     VecScatterEnd(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);
645:   } else {
646:     VecScatterEnd(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);
647:   }
648:   /* do the subdomain solves */
649:   for (i=0; i<osm->n; ++i) {
650:     KSPSolve(osm->ksp[i],osm->x[i],osm->y[i]);
651:     KSPCheckSolve(osm->ksp[i],pc,osm->y[i]);
652:   }
653:   /* do we need to zero y? */
654:   VecZeroEntries(y);
655:   if (!(osm->type & PC_GASM_INTERPOLATE)) {
656:     VecScatterBegin(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);
657:     VecScatterEnd(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);
658:   } else {
659:     VecScatterBegin(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);
660:     VecScatterEnd(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);
661:   }
662:   if (osm->pctoouter) {
663:     VecScatterBegin(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);
664:     VecScatterEnd(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);
665:   }
666:   return 0;
667: }

669: static PetscErrorCode PCMatApply_GASM(PC pc,Mat Xin,Mat Yout)
670: {
671:   PC_GASM        *osm = (PC_GASM*)pc->data;
672:   Mat            X,Y,O=NULL,Z,W;
673:   Vec            x,y;
674:   PetscInt       i,m,M,N;
675:   ScatterMode    forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;

678:   MatGetSize(Xin,NULL,&N);
679:   if (osm->pctoouter) {
680:     VecGetLocalSize(osm->pcx,&m);
681:     VecGetSize(osm->pcx,&M);
682:     MatCreateDense(PetscObjectComm((PetscObject)osm->ois[0]),m,PETSC_DECIDE,M,N,NULL,&O);
683:     for (i = 0; i < N; ++i) {
684:       MatDenseGetColumnVecRead(Xin,i,&x);
685:       MatDenseGetColumnVecWrite(O,i,&y);
686:       VecScatterBegin(osm->pctoouter,x,y,INSERT_VALUES,SCATTER_REVERSE);
687:       VecScatterEnd(osm->pctoouter,x,y,INSERT_VALUES,SCATTER_REVERSE);
688:       MatDenseRestoreColumnVecWrite(O,i,&y);
689:       MatDenseRestoreColumnVecRead(Xin,i,&x);
690:     }
691:     X = Y = O;
692:   } else {
693:     X = Xin;
694:     Y = Yout;
695:   }
696:   /*
697:      support for limiting the restriction or interpolation only to the inner
698:      subdomain values (leaving the other values 0).
699:   */
700:   VecGetLocalSize(osm->x[0],&m);
701:   VecGetSize(osm->x[0],&M);
702:   MatCreateDense(PetscObjectComm((PetscObject)osm->ois[0]),m,PETSC_DECIDE,M,N,NULL,&Z);
703:   for (i = 0; i < N; ++i) {
704:     MatDenseGetColumnVecRead(X,i,&x);
705:     MatDenseGetColumnVecWrite(Z,i,&y);
706:     if (!(osm->type & PC_GASM_RESTRICT)) {
707:       /* have to zero the work RHS since scatter may leave some slots empty */
708:       VecZeroEntries(y);
709:       VecScatterBegin(osm->girestriction,x,y,INSERT_VALUES,forward);
710:       VecScatterEnd(osm->girestriction,x,y,INSERT_VALUES,forward);
711:     } else {
712:       VecScatterBegin(osm->gorestriction,x,y,INSERT_VALUES,forward);
713:       VecScatterEnd(osm->gorestriction,x,y,INSERT_VALUES,forward);
714:     }
715:     MatDenseRestoreColumnVecWrite(Z,i,&y);
716:     MatDenseRestoreColumnVecRead(X,i,&x);
717:   }
718:   MatCreateDense(PetscObjectComm((PetscObject)osm->ois[0]),m,PETSC_DECIDE,M,N,NULL,&W);
719:   MatSetOption(Z,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);
720:   MatAssemblyBegin(Z,MAT_FINAL_ASSEMBLY);
721:   MatAssemblyEnd(Z,MAT_FINAL_ASSEMBLY);
722:   /* do the subdomain solve */
723:   KSPMatSolve(osm->ksp[0],Z,W);
724:   KSPCheckSolve(osm->ksp[0],pc,NULL);
725:   MatDestroy(&Z);
726:   /* do we need to zero y? */
727:   MatZeroEntries(Y);
728:   for (i = 0; i < N; ++i) {
729:     MatDenseGetColumnVecWrite(Y,i,&y);
730:     MatDenseGetColumnVecRead(W,i,&x);
731:     if (!(osm->type & PC_GASM_INTERPOLATE)) {
732:       VecScatterBegin(osm->girestriction,x,y,ADD_VALUES,reverse);
733:       VecScatterEnd(osm->girestriction,x,y,ADD_VALUES,reverse);
734:     } else {
735:       VecScatterBegin(osm->gorestriction,x,y,ADD_VALUES,reverse);
736:       VecScatterEnd(osm->gorestriction,x,y,ADD_VALUES,reverse);
737:     }
738:     MatDenseRestoreColumnVecRead(W,i,&x);
739:     if (osm->pctoouter) {
740:       MatDenseGetColumnVecWrite(Yout,i,&x);
741:       VecScatterBegin(osm->pctoouter,y,x,INSERT_VALUES,SCATTER_FORWARD);
742:       VecScatterEnd(osm->pctoouter,y,x,INSERT_VALUES,SCATTER_FORWARD);
743:       MatDenseRestoreColumnVecRead(Yout,i,&x);
744:     }
745:     MatDenseRestoreColumnVecWrite(Y,i,&y);
746:   }
747:   MatDestroy(&W);
748:   MatDestroy(&O);
749:   return 0;
750: }

752: static PetscErrorCode PCApplyTranspose_GASM(PC pc,Vec xin,Vec yout)
753: {
754:   PC_GASM        *osm = (PC_GASM*)pc->data;
755:   PetscInt       i;
756:   Vec            x,y;
757:   ScatterMode    forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;

759:   if (osm->pctoouter) {
760:    VecScatterBegin(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);
761:    VecScatterEnd(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);
762:    x = osm->pcx;
763:    y = osm->pcy;
764:   }else{
765:         x = xin;
766:         y = yout;
767:   }
768:   /*
769:      Support for limiting the restriction or interpolation to only local
770:      subdomain values (leaving the other values 0).

772:      Note: these are reversed from the PCApply_GASM() because we are applying the
773:      transpose of the three terms
774:   */
775:   if (!(osm->type & PC_GASM_INTERPOLATE)) {
776:     /* have to zero the work RHS since scatter may leave some slots empty */
777:     VecZeroEntries(osm->gx);
778:     VecScatterBegin(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);
779:   } else {
780:     VecScatterBegin(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);
781:   }
782:   VecZeroEntries(osm->gy);
783:   if (!(osm->type & PC_GASM_INTERPOLATE)) {
784:     VecScatterEnd(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);
785:   } else {
786:     VecScatterEnd(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);
787:   }
788:   /* do the local solves */
789:   for (i=0; i<osm->n; ++i) { /* Note that the solves are local, so we can go to osm->n, rather than osm->nmax. */
790:     KSPSolveTranspose(osm->ksp[i],osm->x[i],osm->y[i]);
791:     KSPCheckSolve(osm->ksp[i],pc,osm->y[i]);
792:   }
793:   VecZeroEntries(y);
794:   if (!(osm->type & PC_GASM_RESTRICT)) {
795:     VecScatterBegin(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);
796:     VecScatterEnd(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);
797:   } else {
798:     VecScatterBegin(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);
799:     VecScatterEnd(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);
800:   }
801:   if (osm->pctoouter) {
802:    VecScatterBegin(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);
803:    VecScatterEnd(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);
804:   }
805:   return 0;
806: }

808: static PetscErrorCode PCReset_GASM(PC pc)
809: {
810:   PC_GASM        *osm = (PC_GASM*)pc->data;
811:   PetscInt       i;

813:   if (osm->ksp) {
814:     for (i=0; i<osm->n; i++) {
815:       KSPReset(osm->ksp[i]);
816:     }
817:   }
818:   if (osm->pmat) {
819:     if (osm->n > 0) {
820:       PetscMPIInt size;
821:       MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);
822:       if (size > 1) {
823:         /* osm->pmat is created by MatCreateSubMatricesMPI(), cannot use MatDestroySubMatrices() */
824:         MatDestroyMatrices(osm->n,&osm->pmat);
825:       } else {
826:         MatDestroySubMatrices(osm->n,&osm->pmat);
827:       }
828:     }
829:   }
830:   if (osm->x) {
831:     for (i=0; i<osm->n; i++) {
832:       VecDestroy(&osm->x[i]);
833:       VecDestroy(&osm->y[i]);
834:     }
835:   }
836:   VecDestroy(&osm->gx);
837:   VecDestroy(&osm->gy);

839:   VecScatterDestroy(&osm->gorestriction);
840:   VecScatterDestroy(&osm->girestriction);
841:   if (!osm->user_subdomains) {
842:     PCGASMDestroySubdomains(osm->n,&osm->ois,&osm->iis);
843:     osm->N    = PETSC_DETERMINE;
844:     osm->nmax = PETSC_DETERMINE;
845:   }
846:   if (osm->pctoouter) {
847:         VecScatterDestroy(&(osm->pctoouter));
848:   }
849:   if (osm->permutationIS) {
850:         ISDestroy(&(osm->permutationIS));
851:   }
852:   if (osm->pcx) {
853:         VecDestroy(&(osm->pcx));
854:   }
855:   if (osm->pcy) {
856:         VecDestroy(&(osm->pcy));
857:   }
858:   if (osm->permutationP) {
859:     MatDestroy(&(osm->permutationP));
860:   }
861:   if (osm->pcmat) {
862:         MatDestroy(&osm->pcmat);
863:   }
864:   return 0;
865: }

867: static PetscErrorCode PCDestroy_GASM(PC pc)
868: {
869:   PC_GASM        *osm = (PC_GASM*)pc->data;
870:   PetscInt       i;

872:   PCReset_GASM(pc);
873:   /* PCReset will not destroy subdomains, if user_subdomains is true. */
874:   PCGASMDestroySubdomains(osm->n,&osm->ois,&osm->iis);
875:   if (osm->ksp) {
876:     for (i=0; i<osm->n; i++) {
877:       KSPDestroy(&osm->ksp[i]);
878:     }
879:     PetscFree(osm->ksp);
880:   }
881:   PetscFree(osm->x);
882:   PetscFree(osm->y);
883:   PetscFree(pc->data);
884:   return 0;
885: }

887: static PetscErrorCode PCSetFromOptions_GASM(PetscOptionItems *PetscOptionsObject,PC pc)
888: {
889:   PC_GASM        *osm = (PC_GASM*)pc->data;
890:   PetscInt       blocks,ovl;
891:   PetscBool      flg;
892:   PCGASMType     gasmtype;

894:   PetscOptionsHead(PetscOptionsObject,"Generalized additive Schwarz options");
895:   PetscOptionsBool("-pc_gasm_use_dm_subdomains","If subdomains aren't set, use DMCreateDomainDecomposition() to define subdomains.","PCGASMSetUseDMSubdomains",osm->dm_subdomains,&osm->dm_subdomains,&flg);
896:   PetscOptionsInt("-pc_gasm_total_subdomains","Total number of subdomains across communicator","PCGASMSetTotalSubdomains",osm->N,&blocks,&flg);
897:   if (flg) {
898:     PCGASMSetTotalSubdomains(pc,blocks);
899:   }
900:   PetscOptionsInt("-pc_gasm_overlap","Number of overlapping degrees of freedom","PCGASMSetOverlap",osm->overlap,&ovl,&flg);
901:   if (flg) {
902:     PCGASMSetOverlap(pc,ovl);
903:     osm->dm_subdomains = PETSC_FALSE;
904:   }
905:   flg  = PETSC_FALSE;
906:   PetscOptionsEnum("-pc_gasm_type","Type of restriction/extension","PCGASMSetType",PCGASMTypes,(PetscEnum)osm->type,(PetscEnum*)&gasmtype,&flg);
907:   if (flg) PCGASMSetType(pc,gasmtype);
908:   PetscOptionsBool("-pc_gasm_use_hierachical_partitioning","use hierarchical partitioning",NULL,osm->hierarchicalpartitioning,&osm->hierarchicalpartitioning,&flg);
909:   PetscOptionsTail();
910:   return 0;
911: }

913: /*------------------------------------------------------------------------------------*/

915: /*@
916:     PCGASMSetTotalSubdomains - sets the total number of subdomains to use across the
917:                                communicator.
918:     Logically collective on pc

920:     Input Parameters:
921: +   pc  - the preconditioner
922: -   N   - total number of subdomains

924:     Level: beginner

926: .seealso: PCGASMSetSubdomains(), PCGASMSetOverlap()
927:           PCGASMCreateSubdomains2D()
928: @*/
929: PetscErrorCode  PCGASMSetTotalSubdomains(PC pc,PetscInt N)
930: {
931:   PC_GASM        *osm = (PC_GASM*)pc->data;
932:   PetscMPIInt    size,rank;


937:   PCGASMDestroySubdomains(osm->n,&osm->iis,&osm->ois);
938:   osm->ois = osm->iis = NULL;

940:   MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);
941:   MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);
942:   osm->N    = N;
943:   osm->n    = PETSC_DETERMINE;
944:   osm->nmax = PETSC_DETERMINE;
945:   osm->dm_subdomains = PETSC_FALSE;
946:   return 0;
947: }

949: static PetscErrorCode  PCGASMSetSubdomains_GASM(PC pc,PetscInt n,IS iis[],IS ois[])
950: {
951:   PC_GASM         *osm = (PC_GASM*)pc->data;
952:   PetscInt        i;


957:   PCGASMDestroySubdomains(osm->n,&osm->iis,&osm->ois);
958:   osm->iis  = osm->ois = NULL;
959:   osm->n    = n;
960:   osm->N    = PETSC_DETERMINE;
961:   osm->nmax = PETSC_DETERMINE;
962:   if (ois) {
963:     PetscMalloc1(n,&osm->ois);
964:     for (i=0; i<n; i++) {
965:       PetscObjectReference((PetscObject)ois[i]);
966:       osm->ois[i] = ois[i];
967:     }
968:     /*
969:        Since the user set the outer subdomains, even if nontrivial overlap was requested via PCGASMSetOverlap(),
970:        it will be ignored.  To avoid confusion later on (e.g., when viewing the PC), the overlap size is set to -1.
971:     */
972:     osm->overlap = -1;
973:     /* inner subdomains must be provided  */
975:   }/* end if */
976:   if (iis) {
977:     PetscMalloc1(n,&osm->iis);
978:     for (i=0; i<n; i++) {
979:       PetscObjectReference((PetscObject)iis[i]);
980:       osm->iis[i] = iis[i];
981:     }
982:     if (!ois) {
983:       osm->ois = NULL;
984:       /* if user does not provide outer indices, we will create the corresponding outer indices using  osm->overlap =1 in PCSetUp_GASM */
985:     }
986:   }
987:   if (PetscDefined(USE_DEBUG)) {
988:     PetscInt        j,rstart,rend,*covered,lsize;
989:     const PetscInt  *indices;
990:     /* check if the inner indices cover and only cover the local portion of the preconditioning matrix */
991:     MatGetOwnershipRange(pc->pmat,&rstart,&rend);
992:     PetscCalloc1(rend-rstart,&covered);
993:     /* check if the current processor owns indices from others */
994:     for (i=0; i<n; i++) {
995:       ISGetIndices(osm->iis[i],&indices);
996:       ISGetLocalSize(osm->iis[i],&lsize);
997:       for (j=0; j<lsize; j++) {
1000:         else covered[indices[j]-rstart] = 1;
1001:       }
1002:     ISRestoreIndices(osm->iis[i],&indices);
1003:     }
1004:     /* check if we miss any indices */
1005:     for (i=rstart; i<rend; i++) {
1007:     }
1008:     PetscFree(covered);
1009:   }
1010:   if (iis)  osm->user_subdomains = PETSC_TRUE;
1011:   return 0;
1012: }

1014: static PetscErrorCode  PCGASMSetOverlap_GASM(PC pc,PetscInt ovl)
1015: {
1016:   PC_GASM *osm = (PC_GASM*)pc->data;

1020:   if (!pc->setupcalled) osm->overlap = ovl;
1021:   return 0;
1022: }

1024: static PetscErrorCode  PCGASMSetType_GASM(PC pc,PCGASMType type)
1025: {
1026:   PC_GASM *osm = (PC_GASM*)pc->data;

1028:   osm->type     = type;
1029:   osm->type_set = PETSC_TRUE;
1030:   return 0;
1031: }

1033: static PetscErrorCode  PCGASMSetSortIndices_GASM(PC pc,PetscBool doSort)
1034: {
1035:   PC_GASM *osm = (PC_GASM*)pc->data;

1037:   osm->sort_indices = doSort;
1038:   return 0;
1039: }

1041: /*
1042:    FIXME: This routine might need to be modified now that multiple ranks per subdomain are allowed.
1043:         In particular, it would upset the global subdomain number calculation.
1044: */
1045: static PetscErrorCode  PCGASMGetSubKSP_GASM(PC pc,PetscInt *n,PetscInt *first,KSP **ksp)
1046: {
1047:   PC_GASM        *osm = (PC_GASM*)pc->data;


1051:   if (n) *n = osm->n;
1052:   if (first) {
1053:     MPI_Scan(&osm->n,first,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));
1054:     *first -= osm->n;
1055:   }
1056:   if (ksp) {
1057:     /* Assume that local solves are now different; not necessarily
1058:        true, though!  This flag is used only for PCView_GASM() */
1059:     *ksp                        = osm->ksp;
1060:     osm->same_subdomain_solvers = PETSC_FALSE;
1061:   }
1062:   return 0;
1063: } /* PCGASMGetSubKSP_GASM() */

1065: /*@C
1066:     PCGASMSetSubdomains - Sets the subdomains for this processor
1067:     for the additive Schwarz preconditioner.

1069:     Collective on pc

1071:     Input Parameters:
1072: +   pc  - the preconditioner object
1073: .   n   - the number of subdomains for this processor
1074: .   iis - the index sets that define the inner subdomains (or NULL for PETSc to determine subdomains)
1075: -   ois - the index sets that define the outer subdomains (or NULL to use the same as iis, or to construct by expanding iis by the requested overlap)

1077:     Notes:
1078:     The IS indices use the parallel, global numbering of the vector entries.
1079:     Inner subdomains are those where the correction is applied.
1080:     Outer subdomains are those where the residual necessary to obtain the
1081:     corrections is obtained (see PCGASMType for the use of inner/outer subdomains).
1082:     Both inner and outer subdomains can extend over several processors.
1083:     This processor's portion of a subdomain is known as a local subdomain.

1085:     Inner subdomains can not overlap with each other, do not have any entities from remote processors,
1086:     and  have to cover the entire local subdomain owned by the current processor. The index sets on each
1087:     process should be ordered such that the ith local subdomain is connected to the ith remote subdomain
1088:     on another MPI process.

1090:     By default the GASM preconditioner uses 1 (local) subdomain per processor.

1092:     Level: advanced

1094: .seealso: PCGASMSetOverlap(), PCGASMGetSubKSP(),
1095:           PCGASMCreateSubdomains2D(), PCGASMGetSubdomains()
1096: @*/
1097: PetscErrorCode  PCGASMSetSubdomains(PC pc,PetscInt n,IS iis[],IS ois[])
1098: {
1099:   PC_GASM *osm = (PC_GASM*)pc->data;

1102:   PetscTryMethod(pc,"PCGASMSetSubdomains_C",(PC,PetscInt,IS[],IS[]),(pc,n,iis,ois));
1103:   osm->dm_subdomains = PETSC_FALSE;
1104:   return 0;
1105: }

1107: /*@
1108:     PCGASMSetOverlap - Sets the overlap between a pair of subdomains for the
1109:     additive Schwarz preconditioner.  Either all or no processors in the
1110:     pc communicator must call this routine.

1112:     Logically Collective on pc

1114:     Input Parameters:
1115: +   pc  - the preconditioner context
1116: -   ovl - the amount of overlap between subdomains (ovl >= 0, default value = 0)

1118:     Options Database Key:
1119: .   -pc_gasm_overlap <overlap> - Sets overlap

1121:     Notes:
1122:     By default the GASM preconditioner uses 1 subdomain per processor.  To use
1123:     multiple subdomain per perocessor or "straddling" subdomains that intersect
1124:     multiple processors use PCGASMSetSubdomains() (or option -pc_gasm_total_subdomains <n>).

1126:     The overlap defaults to 0, so if one desires that no additional
1127:     overlap be computed beyond what may have been set with a call to
1128:     PCGASMSetSubdomains(), then ovl must be set to be 0.  In particular, if one does
1129:     not explicitly set the subdomains in application code, then all overlap would be computed
1130:     internally by PETSc, and using an overlap of 0 would result in an GASM
1131:     variant that is equivalent to the block Jacobi preconditioner.

1133:     Note that one can define initial index sets with any overlap via
1134:     PCGASMSetSubdomains(); the routine PCGASMSetOverlap() merely allows
1135:     PETSc to extend that overlap further, if desired.

1137:     Level: intermediate

1139: .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(),
1140:           PCGASMCreateSubdomains2D(), PCGASMGetSubdomains()
1141: @*/
1142: PetscErrorCode  PCGASMSetOverlap(PC pc,PetscInt ovl)
1143: {
1144:   PC_GASM *osm = (PC_GASM*)pc->data;

1148:   PetscTryMethod(pc,"PCGASMSetOverlap_C",(PC,PetscInt),(pc,ovl));
1149:   osm->dm_subdomains = PETSC_FALSE;
1150:   return 0;
1151: }

1153: /*@
1154:     PCGASMSetType - Sets the type of restriction and interpolation used
1155:     for local problems in the additive Schwarz method.

1157:     Logically Collective on PC

1159:     Input Parameters:
1160: +   pc  - the preconditioner context
1161: -   type - variant of GASM, one of
1162: .vb
1163:       PC_GASM_BASIC       - full interpolation and restriction
1164:       PC_GASM_RESTRICT    - full restriction, local processor interpolation
1165:       PC_GASM_INTERPOLATE - full interpolation, local processor restriction
1166:       PC_GASM_NONE        - local processor restriction and interpolation
1167: .ve

1169:     Options Database Key:
1170: .   -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type

1172:     Level: intermediate

1174: .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(),
1175:           PCGASMCreateSubdomains2D()
1176: @*/
1177: PetscErrorCode  PCGASMSetType(PC pc,PCGASMType type)
1178: {
1181:   PetscTryMethod(pc,"PCGASMSetType_C",(PC,PCGASMType),(pc,type));
1182:   return 0;
1183: }

1185: /*@
1186:     PCGASMSetSortIndices - Determines whether subdomain indices are sorted.

1188:     Logically Collective on PC

1190:     Input Parameters:
1191: +   pc  - the preconditioner context
1192: -   doSort - sort the subdomain indices

1194:     Level: intermediate

1196: .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(),
1197:           PCGASMCreateSubdomains2D()
1198: @*/
1199: PetscErrorCode  PCGASMSetSortIndices(PC pc,PetscBool doSort)
1200: {
1203:   PetscTryMethod(pc,"PCGASMSetSortIndices_C",(PC,PetscBool),(pc,doSort));
1204:   return 0;
1205: }

1207: /*@C
1208:    PCGASMGetSubKSP - Gets the local KSP contexts for all blocks on
1209:    this processor.

1211:    Collective on PC iff first_local is requested

1213:    Input Parameter:
1214: .  pc - the preconditioner context

1216:    Output Parameters:
1217: +  n_local - the number of blocks on this processor or NULL
1218: .  first_local - the global number of the first block on this processor or NULL,
1219:                  all processors must request or all must pass NULL
1220: -  ksp - the array of KSP contexts

1222:    Note:
1223:    After PCGASMGetSubKSP() the array of KSPes is not to be freed

1225:    Currently for some matrix implementations only 1 block per processor
1226:    is supported.

1228:    You must call KSPSetUp() before calling PCGASMGetSubKSP().

1230:    Level: advanced

1232: .seealso: PCGASMSetSubdomains(), PCGASMSetOverlap(),
1233:           PCGASMCreateSubdomains2D(),
1234: @*/
1235: PetscErrorCode  PCGASMGetSubKSP(PC pc,PetscInt *n_local,PetscInt *first_local,KSP *ksp[])
1236: {
1238:   PetscUseMethod(pc,"PCGASMGetSubKSP_C",(PC,PetscInt*,PetscInt*,KSP **),(pc,n_local,first_local,ksp));
1239:   return 0;
1240: }

1242: /* -------------------------------------------------------------------------------------*/
1243: /*MC
1244:    PCGASM - Use the (restricted) additive Schwarz method, each block is (approximately) solved with
1245:            its own KSP object.

1247:    Options Database Keys:
1248: +  -pc_gasm_total_subdomains <n>  - Sets total number of local subdomains to be distributed among processors
1249: .  -pc_gasm_view_subdomains       - activates the printing of subdomain indices in PCView(), -ksp_view or -snes_view
1250: .  -pc_gasm_print_subdomains      - activates the printing of subdomain indices in PCSetUp()
1251: .  -pc_gasm_overlap <ovl>         - Sets overlap by which to (automatically) extend local subdomains
1252: -  -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type

1254:      IMPORTANT: If you run with, for example, 3 blocks on 1 processor or 3 blocks on 3 processors you
1255:       will get a different convergence rate due to the default option of -pc_gasm_type restrict. Use
1256:       -pc_gasm_type basic to use the standard GASM.

1258:    Notes:
1259:     Blocks can be shared by multiple processes.

1261:      To set options on the solvers for each block append -sub_ to all the KSP, and PC
1262:         options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly

1264:      To set the options on the solvers separate for each block call PCGASMGetSubKSP()
1265:          and set the options directly on the resulting KSP object (you can access its PC
1266:          with KSPGetPC())

1268:    Level: beginner

1270:     References:
1271: +   * - M Dryja, OB Widlund, An additive variant of the Schwarz alternating method for the case of many subregions
1272:      Courant Institute, New York University Technical report
1273: -   * - Barry Smith, Petter Bjorstad, and William Gropp, Domain Decompositions: Parallel Multilevel Methods for Elliptic Partial Differential Equations,
1274:     Cambridge University Press.

1276: .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PC,
1277:            PCBJACOBI,  PCGASMGetSubKSP(), PCGASMSetSubdomains(),
1278:            PCSetModifySubMatrices(), PCGASMSetOverlap(), PCGASMSetType()

1280: M*/

1282: PETSC_EXTERN PetscErrorCode PCCreate_GASM(PC pc)
1283: {
1284:   PC_GASM        *osm;

1286:   PetscNewLog(pc,&osm);

1288:   osm->N                        = PETSC_DETERMINE;
1289:   osm->n                        = PETSC_DECIDE;
1290:   osm->nmax                     = PETSC_DETERMINE;
1291:   osm->overlap                  = 0;
1292:   osm->ksp                      = NULL;
1293:   osm->gorestriction            = NULL;
1294:   osm->girestriction            = NULL;
1295:   osm->pctoouter                = NULL;
1296:   osm->gx                       = NULL;
1297:   osm->gy                       = NULL;
1298:   osm->x                        = NULL;
1299:   osm->y                        = NULL;
1300:   osm->pcx                      = NULL;
1301:   osm->pcy                      = NULL;
1302:   osm->permutationIS            = NULL;
1303:   osm->permutationP             = NULL;
1304:   osm->pcmat                    = NULL;
1305:   osm->ois                      = NULL;
1306:   osm->iis                      = NULL;
1307:   osm->pmat                     = NULL;
1308:   osm->type                     = PC_GASM_RESTRICT;
1309:   osm->same_subdomain_solvers   = PETSC_TRUE;
1310:   osm->sort_indices             = PETSC_TRUE;
1311:   osm->dm_subdomains            = PETSC_FALSE;
1312:   osm->hierarchicalpartitioning = PETSC_FALSE;

1314:   pc->data                 = (void*)osm;
1315:   pc->ops->apply           = PCApply_GASM;
1316:   pc->ops->matapply        = PCMatApply_GASM;
1317:   pc->ops->applytranspose  = PCApplyTranspose_GASM;
1318:   pc->ops->setup           = PCSetUp_GASM;
1319:   pc->ops->reset           = PCReset_GASM;
1320:   pc->ops->destroy         = PCDestroy_GASM;
1321:   pc->ops->setfromoptions  = PCSetFromOptions_GASM;
1322:   pc->ops->setuponblocks   = PCSetUpOnBlocks_GASM;
1323:   pc->ops->view            = PCView_GASM;
1324:   pc->ops->applyrichardson = NULL;

1326:   PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetSubdomains_C",PCGASMSetSubdomains_GASM);
1327:   PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetOverlap_C",PCGASMSetOverlap_GASM);
1328:   PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetType_C",PCGASMSetType_GASM);
1329:   PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetSortIndices_C",PCGASMSetSortIndices_GASM);
1330:   PetscObjectComposeFunction((PetscObject)pc,"PCGASMGetSubKSP_C",PCGASMGetSubKSP_GASM);
1331:   return 0;
1332: }

1334: PetscErrorCode  PCGASMCreateLocalSubdomains(Mat A, PetscInt nloc, IS *iis[])
1335: {
1336:   MatPartitioning mpart;
1337:   const char      *prefix;
1338:   PetscInt        i,j,rstart,rend,bs;
1339:   PetscBool       hasop, isbaij = PETSC_FALSE,foundpart = PETSC_FALSE;
1340:   Mat             Ad     = NULL, adj;
1341:   IS              ispart,isnumb,*is;


1345:   /* Get prefix, row distribution, and block size */
1346:   MatGetOptionsPrefix(A,&prefix);
1347:   MatGetOwnershipRange(A,&rstart,&rend);
1348:   MatGetBlockSize(A,&bs);

1351:   /* Get diagonal block from matrix if possible */
1352:   MatHasOperation(A,MATOP_GET_DIAGONAL_BLOCK,&hasop);
1353:   if (hasop) {
1354:     MatGetDiagonalBlock(A,&Ad);
1355:   }
1356:   if (Ad) {
1357:     PetscObjectBaseTypeCompare((PetscObject)Ad,MATSEQBAIJ,&isbaij);
1358:     if (!isbaij) PetscObjectBaseTypeCompare((PetscObject)Ad,MATSEQSBAIJ,&isbaij);
1359:   }
1360:   if (Ad && nloc > 1) {
1361:     PetscBool  match,done;
1362:     /* Try to setup a good matrix partitioning if available */
1363:     MatPartitioningCreate(PETSC_COMM_SELF,&mpart);
1364:     PetscObjectSetOptionsPrefix((PetscObject)mpart,prefix);
1365:     MatPartitioningSetFromOptions(mpart);
1366:     PetscObjectTypeCompare((PetscObject)mpart,MATPARTITIONINGCURRENT,&match);
1367:     if (!match) {
1368:       PetscObjectTypeCompare((PetscObject)mpart,MATPARTITIONINGSQUARE,&match);
1369:     }
1370:     if (!match) { /* assume a "good" partitioner is available */
1371:       PetscInt       na;
1372:       const PetscInt *ia,*ja;
1373:       MatGetRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);
1374:       if (done) {
1375:         /* Build adjacency matrix by hand. Unfortunately a call to
1376:            MatConvert(Ad,MATMPIADJ,MAT_INITIAL_MATRIX,&adj) will
1377:            remove the block-aij structure and we cannot expect
1378:            MatPartitioning to split vertices as we need */
1379:         PetscInt       i,j,len,nnz,cnt,*iia=NULL,*jja=NULL;
1380:         const PetscInt *row;
1381:         nnz = 0;
1382:         for (i=0; i<na; i++) { /* count number of nonzeros */
1383:           len = ia[i+1] - ia[i];
1384:           row = ja + ia[i];
1385:           for (j=0; j<len; j++) {
1386:             if (row[j] == i) { /* don't count diagonal */
1387:               len--; break;
1388:             }
1389:           }
1390:           nnz += len;
1391:         }
1392:         PetscMalloc1(na+1,&iia);
1393:         PetscMalloc1(nnz,&jja);
1394:         nnz    = 0;
1395:         iia[0] = 0;
1396:         for (i=0; i<na; i++) { /* fill adjacency */
1397:           cnt = 0;
1398:           len = ia[i+1] - ia[i];
1399:           row = ja + ia[i];
1400:           for (j=0; j<len; j++) {
1401:             if (row[j] != i) jja[nnz+cnt++] = row[j]; /* if not diagonal */
1402:           }
1403:           nnz += cnt;
1404:           iia[i+1] = nnz;
1405:         }
1406:         /* Partitioning of the adjacency matrix */
1407:         MatCreateMPIAdj(PETSC_COMM_SELF,na,na,iia,jja,NULL,&adj);
1408:         MatPartitioningSetAdjacency(mpart,adj);
1409:         MatPartitioningSetNParts(mpart,nloc);
1410:         MatPartitioningApply(mpart,&ispart);
1411:         ISPartitioningToNumbering(ispart,&isnumb);
1412:         MatDestroy(&adj);
1413:         foundpart = PETSC_TRUE;
1414:       }
1415:       MatRestoreRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);
1416:     }
1417:     MatPartitioningDestroy(&mpart);
1418:   }
1419:   PetscMalloc1(nloc,&is);
1420:   if (!foundpart) {

1422:     /* Partitioning by contiguous chunks of rows */

1424:     PetscInt mbs   = (rend-rstart)/bs;
1425:     PetscInt start = rstart;
1426:     for (i=0; i<nloc; i++) {
1427:       PetscInt count = (mbs/nloc + ((mbs % nloc) > i)) * bs;
1428:       ISCreateStride(PETSC_COMM_SELF,count,start,1,&is[i]);
1429:       start += count;
1430:     }

1432:   } else {

1434:     /* Partitioning by adjacency of diagonal block  */

1436:     const PetscInt *numbering;
1437:     PetscInt       *count,nidx,*indices,*newidx,start=0;
1438:     /* Get node count in each partition */
1439:     PetscMalloc1(nloc,&count);
1440:     ISPartitioningCount(ispart,nloc,count);
1441:     if (isbaij && bs > 1) { /* adjust for the block-aij case */
1442:       for (i=0; i<nloc; i++) count[i] *= bs;
1443:     }
1444:     /* Build indices from node numbering */
1445:     ISGetLocalSize(isnumb,&nidx);
1446:     PetscMalloc1(nidx,&indices);
1447:     for (i=0; i<nidx; i++) indices[i] = i; /* needs to be initialized */
1448:     ISGetIndices(isnumb,&numbering);
1449:     PetscSortIntWithPermutation(nidx,numbering,indices);
1450:     ISRestoreIndices(isnumb,&numbering);
1451:     if (isbaij && bs > 1) { /* adjust for the block-aij case */
1452:       PetscMalloc1(nidx*bs,&newidx);
1453:       for (i=0; i<nidx; i++) {
1454:         for (j=0; j<bs; j++) newidx[i*bs+j] = indices[i]*bs + j;
1455:       }
1456:       PetscFree(indices);
1457:       nidx   *= bs;
1458:       indices = newidx;
1459:     }
1460:     /* Shift to get global indices */
1461:     for (i=0; i<nidx; i++) indices[i] += rstart;

1463:     /* Build the index sets for each block */
1464:     for (i=0; i<nloc; i++) {
1465:       ISCreateGeneral(PETSC_COMM_SELF,count[i],&indices[start],PETSC_COPY_VALUES,&is[i]);
1466:       ISSort(is[i]);
1467:       start += count[i];
1468:     }

1470:     PetscFree(count);
1471:     PetscFree(indices);
1472:     ISDestroy(&isnumb);
1473:     ISDestroy(&ispart);
1474:   }
1475:   *iis = is;
1476:   return 0;
1477: }

1479: PETSC_INTERN PetscErrorCode  PCGASMCreateStraddlingSubdomains(Mat A,PetscInt N,PetscInt *n,IS *iis[])
1480: {
1481:   MatSubdomainsCreateCoalesce(A,N,n,iis);
1482:   return 0;
1483: }

1485: /*@C
1486:    PCGASMCreateSubdomains - Creates n index sets defining n nonoverlapping subdomains for the additive
1487:    Schwarz preconditioner for a any problem based on its matrix.

1489:    Collective

1491:    Input Parameters:
1492: +  A       - The global matrix operator
1493: -  N       - the number of global subdomains requested

1495:    Output Parameters:
1496: +  n   - the number of subdomains created on this processor
1497: -  iis - the array of index sets defining the local inner subdomains (on which the correction is applied)

1499:    Level: advanced

1501:    Note: When N >= A's communicator size, each subdomain is local -- contained within a single processor.
1502:          When N < size, the subdomains are 'straddling' (processor boundaries) and are no longer local.
1503:          The resulting subdomains can be use in PCGASMSetSubdomains(pc,n,iss,NULL).  The overlapping
1504:          outer subdomains will be automatically generated from these according to the requested amount of
1505:          overlap; this is currently supported only with local subdomains.

1507: .seealso: PCGASMSetSubdomains(), PCGASMDestroySubdomains()
1508: @*/
1509: PetscErrorCode  PCGASMCreateSubdomains(Mat A,PetscInt N,PetscInt *n,IS *iis[])
1510: {
1511:   PetscMPIInt     size;


1517:   MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
1518:   if (N >= size) {
1519:     *n = N/size + (N%size);
1520:     PCGASMCreateLocalSubdomains(A,*n,iis);
1521:   } else {
1522:     PCGASMCreateStraddlingSubdomains(A,N,n,iis);
1523:   }
1524:   return 0;
1525: }

1527: /*@C
1528:    PCGASMDestroySubdomains - Destroys the index sets created with
1529:    PCGASMCreateSubdomains() or PCGASMCreateSubdomains2D. Should be
1530:    called after setting subdomains with PCGASMSetSubdomains().

1532:    Collective

1534:    Input Parameters:
1535: +  n   - the number of index sets
1536: .  iis - the array of inner subdomains,
1537: -  ois - the array of outer subdomains, can be NULL

1539:    Level: intermediate

1541:    Notes:
1542:     this is merely a convenience subroutine that walks each list,
1543:    destroys each IS on the list, and then frees the list. At the end the
1544:    list pointers are set to NULL.

1546: .seealso: PCGASMCreateSubdomains(), PCGASMSetSubdomains()
1547: @*/
1548: PetscErrorCode  PCGASMDestroySubdomains(PetscInt n,IS **iis,IS **ois)
1549: {
1550:   PetscInt       i;

1552:   if (n <= 0) return 0;
1553:   if (ois) {
1555:     if (*ois) {
1557:       for (i=0; i<n; i++) {
1558:         ISDestroy(&(*ois)[i]);
1559:       }
1560:       PetscFree((*ois));
1561:     }
1562:   }
1563:   if (iis) {
1565:     if (*iis) {
1567:       for (i=0; i<n; i++) {
1568:         ISDestroy(&(*iis)[i]);
1569:       }
1570:       PetscFree((*iis));
1571:     }
1572:   }
1573:   return 0;
1574: }

1576: #define PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,xleft_loc,ylow_loc,xright_loc,yhigh_loc,n) \
1577:   {                                                                                                       \
1578:     PetscInt first_row = first/M, last_row = last/M+1;                                                     \
1579:     /*                                                                                                    \
1580:      Compute ylow_loc and yhigh_loc so that (ylow_loc,xleft) and (yhigh_loc,xright) are the corners       \
1581:      of the bounding box of the intersection of the subdomain with the local ownership range (local       \
1582:      subdomain).                                                                                          \
1583:      Also compute xleft_loc and xright_loc as the lower and upper bounds on the first and last rows       \
1584:      of the intersection.                                                                                 \
1585:     */                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             \
1586:     /* ylow_loc is the grid row containing the first element of the local sumbdomain */                   \
1587:     *ylow_loc = PetscMax(first_row,ylow);                                                                    \
1588:     /* xleft_loc is the offset of first element of the local subdomain within its grid row (might actually be outside the local subdomain) */ \
1589:     *xleft_loc = *ylow_loc==first_row ? PetscMax(first%M,xleft) : xleft;                                                                            \
1590:     /* yhigh_loc is the grid row above the last local subdomain element */                                                                    \
1591:     *yhigh_loc = PetscMin(last_row,yhigh);                                                                                                     \
1592:     /* xright is the offset of the end of the  local subdomain within its grid row (might actually be outside the local subdomain) */         \
1593:     *xright_loc = *yhigh_loc==last_row ? PetscMin(xright,last%M) : xright;                                                                          \
1594:     /* Now compute the size of the local subdomain n. */ \
1595:     *n = 0;                                               \
1596:     if (*ylow_loc < *yhigh_loc) {                           \
1597:       PetscInt width = xright-xleft;                     \
1598:       *n += width*(*yhigh_loc-*ylow_loc-1);                 \
1599:       *n += PetscMin(PetscMax(*xright_loc-xleft,0),width); \
1600:       *n -= PetscMin(PetscMax(*xleft_loc-xleft,0), width); \
1601:     } \
1602:   }

1604: /*@
1605:    PCGASMCreateSubdomains2D - Creates the index sets for the overlapping Schwarz
1606:    preconditioner for a two-dimensional problem on a regular grid.

1608:    Collective

1610:    Input Parameters:
1611: +  pc       - the preconditioner context
1612: .  M        - the global number of grid points in the x direction
1613: .  N        - the global number of grid points in the y direction
1614: .  Mdomains - the global number of subdomains in the x direction
1615: .  Ndomains - the global number of subdomains in the y direction
1616: .  dof      - degrees of freedom per node
1617: -  overlap  - overlap in mesh lines

1619:    Output Parameters:
1620: +  Nsub - the number of local subdomains created
1621: .  iis  - array of index sets defining inner (nonoverlapping) subdomains
1622: -  ois  - array of index sets defining outer (overlapping, if overlap > 0) subdomains

1624:    Level: advanced

1626: .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(), PCGASMSetOverlap()
1627: @*/
1628: PetscErrorCode  PCGASMCreateSubdomains2D(PC pc,PetscInt M,PetscInt N,PetscInt Mdomains,PetscInt Ndomains,PetscInt dof,PetscInt overlap,PetscInt *nsub,IS **iis,IS **ois)
1629: {
1630:   PetscMPIInt    size, rank;
1631:   PetscInt       i, j;
1632:   PetscInt       maxheight, maxwidth;
1633:   PetscInt       xstart, xleft, xright, xleft_loc, xright_loc;
1634:   PetscInt       ystart, ylow,  yhigh,  ylow_loc,  yhigh_loc;
1635:   PetscInt       x[2][2], y[2][2], n[2];
1636:   PetscInt       first, last;
1637:   PetscInt       nidx, *idx;
1638:   PetscInt       ii,jj,s,q,d;
1639:   PetscInt       k,kk;
1640:   PetscMPIInt    color;
1641:   MPI_Comm       comm, subcomm;
1642:   IS             **xis = NULL, **is = ois, **is_local = iis;

1644:   PetscObjectGetComm((PetscObject)pc, &comm);
1645:   MPI_Comm_size(comm, &size);
1646:   MPI_Comm_rank(comm, &rank);
1647:   MatGetOwnershipRange(pc->pmat, &first, &last);
1649:                                       "does not respect the number of degrees of freedom per grid point %D", first, last, dof);

1651:   /* Determine the number of domains with nonzero intersections with the local ownership range. */
1652:   s      = 0;
1653:   ystart = 0;
1654:   for (j=0; j<Ndomains; ++j) {
1655:     maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */
1657:     /* Vertical domain limits with an overlap. */
1658:     ylow   = PetscMax(ystart - overlap,0);
1659:     yhigh  = PetscMin(ystart + maxheight + overlap,N);
1660:     xstart = 0;
1661:     for (i=0; i<Mdomains; ++i) {
1662:       maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */
1664:       /* Horizontal domain limits with an overlap. */
1665:       xleft  = PetscMax(xstart - overlap,0);
1666:       xright = PetscMin(xstart + maxwidth + overlap,M);
1667:       /*
1668:          Determine whether this subdomain intersects this processor's ownership range of pc->pmat.
1669:       */
1670:       PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx));
1671:       if (nidx) ++s;
1672:       xstart += maxwidth;
1673:     } /* for (i = 0; i < Mdomains; ++i) */
1674:     ystart += maxheight;
1675:   } /* for (j = 0; j < Ndomains; ++j) */

1677:   /* Now we can allocate the necessary number of ISs. */
1678:   *nsub  = s;
1679:   PetscMalloc1(*nsub,is);
1680:   PetscMalloc1(*nsub,is_local);
1681:   s      = 0;
1682:   ystart = 0;
1683:   for (j=0; j<Ndomains; ++j) {
1684:     maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */
1686:     /* Vertical domain limits with an overlap. */
1687:     y[0][0] = PetscMax(ystart - overlap,0);
1688:     y[0][1] = PetscMin(ystart + maxheight + overlap,N);
1689:     /* Vertical domain limits without an overlap. */
1690:     y[1][0] = ystart;
1691:     y[1][1] = PetscMin(ystart + maxheight,N);
1692:     xstart  = 0;
1693:     for (i=0; i<Mdomains; ++i) {
1694:       maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */
1696:       /* Horizontal domain limits with an overlap. */
1697:       x[0][0] = PetscMax(xstart - overlap,0);
1698:       x[0][1] = PetscMin(xstart + maxwidth + overlap,M);
1699:       /* Horizontal domain limits without an overlap. */
1700:       x[1][0] = xstart;
1701:       x[1][1] = PetscMin(xstart+maxwidth,M);
1702:       /*
1703:          Determine whether this domain intersects this processor's ownership range of pc->pmat.
1704:          Do this twice: first for the domains with overlaps, and once without.
1705:          During the first pass create the subcommunicators, and use them on the second pass as well.
1706:       */
1707:       for (q = 0; q < 2; ++q) {
1708:         PetscBool split = PETSC_FALSE;
1709:         /*
1710:           domain limits, (xleft, xright) and (ylow, yheigh) are adjusted
1711:           according to whether the domain with an overlap or without is considered.
1712:         */
1713:         xleft = x[q][0]; xright = x[q][1];
1714:         ylow  = y[q][0]; yhigh  = y[q][1];
1715:         PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx));
1716:         nidx *= dof;
1717:         n[q]  = nidx;
1718:         /*
1719:          Based on the counted number of indices in the local domain *with an overlap*,
1720:          construct a subcommunicator of all the processors supporting this domain.
1721:          Observe that a domain with an overlap might have nontrivial local support,
1722:          while the domain without an overlap might not.  Hence, the decision to participate
1723:          in the subcommunicator must be based on the domain with an overlap.
1724:          */
1725:         if (q == 0) {
1726:           if (nidx) color = 1;
1727:           else color = MPI_UNDEFINED;
1728:           MPI_Comm_split(comm, color, rank, &subcomm);
1729:           split = PETSC_TRUE;
1730:         }
1731:         /*
1732:          Proceed only if the number of local indices *with an overlap* is nonzero.
1733:          */
1734:         if (n[0]) {
1735:           if (q == 0) xis = is;
1736:           if (q == 1) {
1737:             /*
1738:              The IS for the no-overlap subdomain shares a communicator with the overlapping domain.
1739:              Moreover, if the overlap is zero, the two ISs are identical.
1740:              */
1741:             if (overlap == 0) {
1742:               (*is_local)[s] = (*is)[s];
1743:               PetscObjectReference((PetscObject)(*is)[s]);
1744:               continue;
1745:             } else {
1746:               xis     = is_local;
1747:               subcomm = ((PetscObject)(*is)[s])->comm;
1748:             }
1749:           } /* if (q == 1) */
1750:           idx  = NULL;
1751:           PetscMalloc1(nidx,&idx);
1752:           if (nidx) {
1753:             k = 0;
1754:             for (jj=ylow_loc; jj<yhigh_loc; ++jj) {
1755:               PetscInt x0 = (jj==ylow_loc) ? xleft_loc : xleft;
1756:               PetscInt x1 = (jj==yhigh_loc-1) ? xright_loc : xright;
1757:               kk = dof*(M*jj + x0);
1758:               for (ii=x0; ii<x1; ++ii) {
1759:                 for (d = 0; d < dof; ++d) {
1760:                   idx[k++] = kk++;
1761:                 }
1762:               }
1763:             }
1764:           }
1765:           ISCreateGeneral(subcomm,nidx,idx,PETSC_OWN_POINTER,(*xis)+s);
1766:           if (split) {
1767:             MPI_Comm_free(&subcomm);
1768:           }
1769:         }/* if (n[0]) */
1770:       }/* for (q = 0; q < 2; ++q) */
1771:       if (n[0]) ++s;
1772:       xstart += maxwidth;
1773:     } /* for (i = 0; i < Mdomains; ++i) */
1774:     ystart += maxheight;
1775:   } /* for (j = 0; j < Ndomains; ++j) */
1776:   return 0;
1777: }

1779: /*@C
1780:     PCGASMGetSubdomains - Gets the subdomains supported on this processor
1781:     for the additive Schwarz preconditioner.

1783:     Not Collective

1785:     Input Parameter:
1786: .   pc - the preconditioner context

1788:     Output Parameters:
1789: +   n   - the number of subdomains for this processor (default value = 1)
1790: .   iis - the index sets that define the inner subdomains (without overlap) supported on this processor (can be NULL)
1791: -   ois - the index sets that define the outer subdomains (with overlap) supported on this processor (can be NULL)

1793:     Notes:
1794:     The user is responsible for destroying the ISs and freeing the returned arrays.
1795:     The IS numbering is in the parallel, global numbering of the vector.

1797:     Level: advanced

1799: .seealso: PCGASMSetOverlap(), PCGASMGetSubKSP(), PCGASMCreateSubdomains2D(),
1800:           PCGASMSetSubdomains(), PCGASMGetSubmatrices()
1801: @*/
1802: PetscErrorCode  PCGASMGetSubdomains(PC pc,PetscInt *n,IS *iis[],IS *ois[])
1803: {
1804:   PC_GASM        *osm;
1805:   PetscBool      match;
1806:   PetscInt       i;

1809:   PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);
1811:   osm = (PC_GASM*)pc->data;
1812:   if (n) *n = osm->n;
1813:   if (iis) {
1814:     PetscMalloc1(osm->n, iis);
1815:   }
1816:   if (ois) {
1817:     PetscMalloc1(osm->n, ois);
1818:   }
1819:   if (iis || ois) {
1820:     for (i = 0; i < osm->n; ++i) {
1821:       if (iis) (*iis)[i] = osm->iis[i];
1822:       if (ois) (*ois)[i] = osm->ois[i];
1823:     }
1824:   }
1825:   return 0;
1826: }

1828: /*@C
1829:     PCGASMGetSubmatrices - Gets the local submatrices (for this processor
1830:     only) for the additive Schwarz preconditioner.

1832:     Not Collective

1834:     Input Parameter:
1835: .   pc - the preconditioner context

1837:     Output Parameters:
1838: +   n   - the number of matrices for this processor (default value = 1)
1839: -   mat - the matrices

1841:     Notes:
1842:     matrices returned by this routine have the same communicators as the index sets (IS)
1843:            used to define subdomains in PCGASMSetSubdomains()
1844:     Level: advanced

1846: .seealso: PCGASMSetOverlap(), PCGASMGetSubKSP(),
1847:           PCGASMCreateSubdomains2D(), PCGASMSetSubdomains(), PCGASMGetSubdomains()
1848: @*/
1849: PetscErrorCode  PCGASMGetSubmatrices(PC pc,PetscInt *n,Mat *mat[])
1850: {
1851:   PC_GASM        *osm;
1852:   PetscBool      match;

1858:   PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);
1860:   osm = (PC_GASM*)pc->data;
1861:   if (n) *n = osm->n;
1862:   if (mat) *mat = osm->pmat;
1863:   return 0;
1864: }

1866: /*@
1867:     PCGASMSetUseDMSubdomains - Indicates whether to use DMCreateDomainDecomposition() to define the subdomains, whenever possible.
1868:     Logically Collective

1870:     Input Parameters:
1871: +   pc  - the preconditioner
1872: -   flg - boolean indicating whether to use subdomains defined by the DM

1874:     Options Database Key:
1875: .   -pc_gasm_dm_subdomains -pc_gasm_overlap -pc_gasm_total_subdomains

1877:     Level: intermediate

1879:     Notes:
1880:     PCGASMSetSubdomains(), PCGASMSetTotalSubdomains() or PCGASMSetOverlap() take precedence over PCGASMSetUseDMSubdomains(),
1881:     so setting PCGASMSetSubdomains() with nontrivial subdomain ISs or any of PCGASMSetTotalSubdomains() and PCGASMSetOverlap()
1882:     automatically turns the latter off.

1884: .seealso: PCGASMGetUseDMSubdomains(), PCGASMSetSubdomains(), PCGASMSetOverlap()
1885:           PCGASMCreateSubdomains2D()
1886: @*/
1887: PetscErrorCode  PCGASMSetUseDMSubdomains(PC pc,PetscBool flg)
1888: {
1889:   PC_GASM        *osm = (PC_GASM*)pc->data;
1890:   PetscBool      match;

1895:   PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);
1896:   if (match) {
1897:     if (!osm->user_subdomains && osm->N == PETSC_DETERMINE && osm->overlap < 0) {
1898:       osm->dm_subdomains = flg;
1899:     }
1900:   }
1901:   return 0;
1902: }

1904: /*@
1905:     PCGASMGetUseDMSubdomains - Returns flag indicating whether to use DMCreateDomainDecomposition() to define the subdomains, whenever possible.
1906:     Not Collective

1908:     Input Parameter:
1909: .   pc  - the preconditioner

1911:     Output Parameter:
1912: .   flg - boolean indicating whether to use subdomains defined by the DM

1914:     Level: intermediate

1916: .seealso: PCGASMSetUseDMSubdomains(), PCGASMSetOverlap()
1917:           PCGASMCreateSubdomains2D()
1918: @*/
1919: PetscErrorCode  PCGASMGetUseDMSubdomains(PC pc,PetscBool* flg)
1920: {
1921:   PC_GASM        *osm = (PC_GASM*)pc->data;
1922:   PetscBool      match;

1926:   PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);
1927:   if (match) {
1928:     if (flg) *flg = osm->dm_subdomains;
1929:   }
1930:   return 0;
1931: }