Actual source code: gasm.c

petsc-3.11.4 2019-09-28
Report Typos and Errors
  1: /*
  2:   This file defines an "generalized" additive Schwarz preconditioner for any Mat implementation.
  3:   In this version each processor may intersect multiple subdomains and any subdomain may
  4:   intersect multiple processors.  Intersections of subdomains with processors are called *local
  5:   subdomains*.

  7:        N    - total number of distinct global subdomains          (set explicitly in PCGASMSetTotalSubdomains() or implicitly PCGASMSetSubdomains() and then calculated in PCSetUp_GASM())
  8:        n    - actual number of local subdomains on this processor (set in PCGASMSetSubdomains() or calculated in PCGASMSetTotalSubdomains())
  9:        nmax - maximum number of local subdomains per processor    (calculated in PCSetUp_GASM())
 10: */
 11:  #include <petsc/private/pcimpl.h>
 12:  #include <petscdm.h>

 14: typedef struct {
 15:   PetscInt    N,n,nmax;
 16:   PetscInt    overlap;                  /* overlap requested by user */
 17:   PCGASMType  type;                     /* use reduced interpolation, restriction or both */
 18:   PetscBool   type_set;                 /* if user set this value (so won't change it for symmetric problems) */
 19:   PetscBool   same_subdomain_solvers;   /* flag indicating whether all local solvers are same */
 20:   PetscBool   sort_indices;             /* flag to sort subdomain indices */
 21:   PetscBool   user_subdomains;          /* whether the user set explicit subdomain index sets -- keep them on PCReset() */
 22:   PetscBool   dm_subdomains;            /* whether DM is allowed to define subdomains */
 23:   PetscBool   hierarchicalpartitioning;
 24:   IS          *ois;                     /* index sets that define the outer (conceptually, overlapping) subdomains */
 25:   IS          *iis;                     /* index sets that define the inner (conceptually, nonoverlapping) subdomains */
 26:   KSP         *ksp;                     /* linear solvers for each subdomain */
 27:   Mat         *pmat;                    /* subdomain block matrices */
 28:   Vec         gx,gy;                    /* Merged work vectors */
 29:   Vec         *x,*y;                    /* Split work vectors; storage aliases pieces of storage of the above merged vectors. */
 30:   VecScatter  gorestriction;            /* merged restriction to disjoint union of outer subdomains */
 31:   VecScatter  girestriction;            /* merged restriction to disjoint union of inner subdomains */
 32:   VecScatter  pctoouter;
 33:   IS          permutationIS;
 34:   Mat         permutationP;
 35:   Mat         pcmat;
 36:   Vec         pcx,pcy;
 37: } PC_GASM;

 39: static PetscErrorCode  PCGASMComputeGlobalSubdomainNumbering_Private(PC pc,PetscInt **numbering,PetscInt **permutation)
 40: {
 41:   PC_GASM        *osm = (PC_GASM*)pc->data;
 42:   PetscInt       i;

 46:   /* Determine the number of globally-distinct subdomains and compute a global numbering for them. */
 47:   PetscMalloc2(osm->n,numbering,osm->n,permutation);
 48:   PetscObjectsListGetGlobalNumbering(PetscObjectComm((PetscObject)pc),osm->n,(PetscObject*)osm->iis,NULL,*numbering);
 49:   for (i = 0; i < osm->n; ++i) (*permutation)[i] = i;
 50:   PetscSortIntWithPermutation(osm->n,*numbering,*permutation);
 51:   return(0);
 52: }

 54: static PetscErrorCode  PCGASMSubdomainView_Private(PC pc, PetscInt i, PetscViewer viewer)
 55: {
 56:   PC_GASM        *osm = (PC_GASM*)pc->data;
 57:   PetscInt       j,nidx;
 58:   const PetscInt *idx;
 59:   PetscViewer    sviewer;
 60:   char           *cidx;

 64:   if (i < 0 || i > osm->n) SETERRQ2(PetscObjectComm((PetscObject)viewer), PETSC_ERR_ARG_WRONG, "Invalid subdomain %D: must nonnegative and less than %D", i, osm->n);
 65:   /* Inner subdomains. */
 66:   ISGetLocalSize(osm->iis[i], &nidx);
 67:   /*
 68:    No more than 15 characters per index plus a space.
 69:    PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx,
 70:    in case nidx == 0. That will take care of the space for the trailing '\0' as well.
 71:    For nidx == 0, the whole string 16 '\0'.
 72:    */
 73:   PetscMalloc1(16*(nidx+1)+1, &cidx);
 74:   PetscViewerStringOpen(PETSC_COMM_SELF, cidx, 16*(nidx+1)+1, &sviewer);
 75:   ISGetIndices(osm->iis[i], &idx);
 76:   for (j = 0; j < nidx; ++j) {
 77:     PetscViewerStringSPrintf(sviewer, "%D ", idx[j]);
 78:   }
 79:   ISRestoreIndices(osm->iis[i],&idx);
 80:   PetscViewerDestroy(&sviewer);
 81:   PetscViewerASCIIPrintf(viewer, "Inner subdomain:\n");
 82:   PetscViewerFlush(viewer);
 83:   PetscViewerASCIIPushSynchronized(viewer);
 84:   PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx);
 85:   PetscViewerFlush(viewer);
 86:   PetscViewerASCIIPopSynchronized(viewer);
 87:   PetscViewerASCIIPrintf(viewer, "\n");
 88:   PetscViewerFlush(viewer);
 89:   PetscFree(cidx);
 90:   /* Outer subdomains. */
 91:   ISGetLocalSize(osm->ois[i], &nidx);
 92:   /*
 93:    No more than 15 characters per index plus a space.
 94:    PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx,
 95:    in case nidx == 0. That will take care of the space for the trailing '\0' as well.
 96:    For nidx == 0, the whole string 16 '\0'.
 97:    */
 98:   PetscMalloc1(16*(nidx+1)+1, &cidx);
 99:   PetscViewerStringOpen(PETSC_COMM_SELF, cidx, 16*(nidx+1)+1, &sviewer);
100:   ISGetIndices(osm->ois[i], &idx);
101:   for (j = 0; j < nidx; ++j) {
102:     PetscViewerStringSPrintf(sviewer,"%D ", idx[j]);
103:   }
104:   PetscViewerDestroy(&sviewer);
105:   ISRestoreIndices(osm->ois[i],&idx);
106:   PetscViewerASCIIPrintf(viewer, "Outer subdomain:\n");
107:   PetscViewerFlush(viewer);
108:   PetscViewerASCIIPushSynchronized(viewer);
109:   PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx);
110:   PetscViewerFlush(viewer);
111:   PetscViewerASCIIPopSynchronized(viewer);
112:   PetscViewerASCIIPrintf(viewer, "\n");
113:   PetscViewerFlush(viewer);
114:   PetscFree(cidx);
115:   return(0);
116: }

118: static PetscErrorCode  PCGASMPrintSubdomains(PC pc)
119: {
120:   PC_GASM        *osm = (PC_GASM*)pc->data;
121:   const char     *prefix;
122:   char           fname[PETSC_MAX_PATH_LEN+1];
123:   PetscInt       l, d, count;
124:   PetscBool      doprint,found;
125:   PetscViewer    viewer, sviewer = NULL;
126:   PetscInt       *numbering,*permutation;/* global numbering of locally-supported subdomains and the permutation from the local ordering */

130:   PCGetOptionsPrefix(pc,&prefix);
131:   doprint  = PETSC_FALSE;
132:   PetscOptionsGetBool(NULL,prefix,"-pc_gasm_print_subdomains",&doprint,NULL);
133:   if (!doprint) return(0);
134:   PetscOptionsGetString(NULL,prefix,"-pc_gasm_print_subdomains",fname,PETSC_MAX_PATH_LEN,&found);
135:   if (!found) { PetscStrcpy(fname,"stdout"); };
136:   PetscViewerASCIIOpen(PetscObjectComm((PetscObject)pc),fname,&viewer);
137:   /*
138:    Make sure the viewer has a name. Otherwise this may cause a deadlock or other weird errors when creating a subcomm viewer:
139:    the subcomm viewer will attempt to inherit the viewer's name, which, if not set, will be constructed collectively on the comm.
140:   */
141:   PetscObjectName((PetscObject)viewer);
142:   l    = 0;
143:   PCGASMComputeGlobalSubdomainNumbering_Private(pc,&numbering,&permutation);
144:   for (count = 0; count < osm->N; ++count) {
145:     /* Now let subdomains go one at a time in the global numbering order and print their subdomain/solver info. */
146:     if (l<osm->n) {
147:       d = permutation[l]; /* d is the local number of the l-th smallest (in the global ordering) among the locally supported subdomains */
148:       if (numbering[d] == count) {
149:         PetscViewerGetSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);
150:         PCGASMSubdomainView_Private(pc,d,sviewer);
151:         PetscViewerRestoreSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);
152:         ++l;
153:       }
154:     }
155:     MPI_Barrier(PetscObjectComm((PetscObject)pc));
156:   }
157:   PetscFree2(numbering,permutation);
158:   PetscViewerDestroy(&viewer);
159:   return(0);
160: }


163: static PetscErrorCode PCView_GASM(PC pc,PetscViewer viewer)
164: {
165:   PC_GASM        *osm = (PC_GASM*)pc->data;
166:   const char     *prefix;
168:   PetscMPIInt    rank, size;
169:   PetscInt       bsz;
170:   PetscBool      iascii,view_subdomains=PETSC_FALSE;
171:   PetscViewer    sviewer;
172:   PetscInt       count, l;
173:   char           overlap[256]     = "user-defined overlap";
174:   char           gsubdomains[256] = "unknown total number of subdomains";
175:   char           msubdomains[256] = "unknown max number of local subdomains";
176:   PetscInt       *numbering,*permutation;/* global numbering of locally-supported subdomains and the permutation from the local ordering */

179:   MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size);
180:   MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank);

182:   if (osm->overlap >= 0) {
183:     PetscSNPrintf(overlap,sizeof(overlap),"requested amount of overlap = %D",osm->overlap);
184:   }
185:   if (osm->N != PETSC_DETERMINE) {
186:     PetscSNPrintf(gsubdomains, sizeof(gsubdomains), "total number of subdomains = %D",osm->N);
187:   }
188:   if (osm->nmax != PETSC_DETERMINE) {
189:     PetscSNPrintf(msubdomains,sizeof(msubdomains),"max number of local subdomains = %D",osm->nmax);
190:   }

192:   PCGetOptionsPrefix(pc,&prefix);
193:   PetscOptionsGetBool(NULL,prefix,"-pc_gasm_view_subdomains",&view_subdomains,NULL);

195:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
196:   if (iascii) {
197:     /*
198:      Make sure the viewer has a name. Otherwise this may cause a deadlock when creating a subcomm viewer:
199:      the subcomm viewer will attempt to inherit the viewer's name, which, if not set, will be constructed
200:      collectively on the comm.
201:      */
202:     PetscObjectName((PetscObject)viewer);
203:     PetscViewerASCIIPrintf(viewer,"  Restriction/interpolation type: %s\n",PCGASMTypes[osm->type]);
204:     PetscViewerASCIIPrintf(viewer,"  %s\n",overlap);
205:     PetscViewerASCIIPrintf(viewer,"  %s\n",gsubdomains);
206:     PetscViewerASCIIPrintf(viewer,"  %s\n",msubdomains);
207:     PetscViewerASCIIPushSynchronized(viewer);
208:     PetscViewerASCIISynchronizedPrintf(viewer,"  [%d|%d] number of locally-supported subdomains = %D\n",rank,size,osm->n);
209:     PetscViewerFlush(viewer);
210:     PetscViewerASCIIPopSynchronized(viewer);
211:     /* Cannot take advantage of osm->same_subdomain_solvers without a global numbering of subdomains. */
212:     PetscViewerASCIIPrintf(viewer,"  Subdomain solver info is as follows:\n");
213:     PetscViewerASCIIPushTab(viewer);
214:     PetscViewerASCIIPrintf(viewer,"  - - - - - - - - - - - - - - - - - -\n");
215:     /* Make sure that everybody waits for the banner to be printed. */
216:     MPI_Barrier(PetscObjectComm((PetscObject)viewer));
217:     /* Now let subdomains go one at a time in the global numbering order and print their subdomain/solver info. */
218:     PCGASMComputeGlobalSubdomainNumbering_Private(pc,&numbering,&permutation);
219:     l = 0;
220:     for (count = 0; count < osm->N; ++count) {
221:       PetscMPIInt srank, ssize;
222:       if (l<osm->n) {
223:         PetscInt d = permutation[l]; /* d is the local number of the l-th smallest (in the global ordering) among the locally supported subdomains */
224:         if (numbering[d] == count) {
225:           MPI_Comm_size(((PetscObject)osm->ois[d])->comm, &ssize);
226:           MPI_Comm_rank(((PetscObject)osm->ois[d])->comm, &srank);
227:           PetscViewerGetSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);
228:           ISGetLocalSize(osm->ois[d],&bsz);
229:           PetscViewerASCIISynchronizedPrintf(sviewer,"  [%d|%d] (subcomm [%d|%d]) local subdomain number %D, local size = %D\n",rank,size,srank,ssize,d,bsz);
230:           PetscViewerFlush(sviewer);
231:           if (view_subdomains) {
232:             PCGASMSubdomainView_Private(pc,d,sviewer);
233:           }
234:           if (!pc->setupcalled) {
235:             PetscViewerASCIIPrintf(sviewer, "  Solver not set up yet: PCSetUp() not yet called\n");
236:           } else {
237:             KSPView(osm->ksp[d],sviewer);
238:           }
239:           PetscViewerASCIIPrintf(sviewer,"  - - - - - - - - - - - - - - - - - -\n");
240:           PetscViewerFlush(sviewer);
241:           PetscViewerRestoreSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);
242:           ++l;
243:         }
244:       }
245:       MPI_Barrier(PetscObjectComm((PetscObject)pc));
246:     }
247:     PetscFree2(numbering,permutation);
248:     PetscViewerASCIIPopTab(viewer);
249:   }
250:   PetscViewerFlush(viewer);
251:   PetscViewerASCIIPopSynchronized(viewer);
252:   return(0);
253: }

255: PETSC_INTERN PetscErrorCode  PCGASMCreateLocalSubdomains(Mat A, PetscInt nloc, IS *iis[]);



259: PetscErrorCode PCGASMSetHierarchicalPartitioning(PC pc)
260: {
261:    PC_GASM              *osm = (PC_GASM*)pc->data;
262:    MatPartitioning       part;
263:    MPI_Comm              comm;
264:    PetscMPIInt           size;
265:    PetscInt              nlocalsubdomains,fromrows_localsize;
266:    IS                    partitioning,fromrows,isn;
267:    Vec                   outervec;
268:    PetscErrorCode        ierr;

271:    PetscObjectGetComm((PetscObject)pc,&comm);
272:    MPI_Comm_size(comm,&size);
273:    /* we do not need a hierarchical partitioning when
274:     * the total number of subdomains is consistent with
275:     * the number of MPI tasks.
276:     * For the following cases, we do not need to use HP
277:     * */
278:    if(osm->N==PETSC_DETERMINE || osm->N>=size || osm->N==1) return(0);
279:    if(size%osm->N != 0) SETERRQ2(PETSC_COMM_WORLD,PETSC_ERR_ARG_INCOMP,"have to specify the total number of subdomains %D to be a factor of the number of processors %d \n",osm->N,size);
280:    nlocalsubdomains = size/osm->N;
281:    osm->n           = 1;
282:    MatPartitioningCreate(comm,&part);
283:    MatPartitioningSetAdjacency(part,pc->pmat);
284:    MatPartitioningSetType(part,MATPARTITIONINGHIERARCH);
285:    MatPartitioningHierarchicalSetNcoarseparts(part,osm->N);
286:    MatPartitioningHierarchicalSetNfineparts(part,nlocalsubdomains);
287:    MatPartitioningSetFromOptions(part);
288:    /* get new processor owner number of each vertex */
289:    MatPartitioningApply(part,&partitioning);
290:    ISBuildTwoSided(partitioning,NULL,&fromrows);
291:    ISPartitioningToNumbering(partitioning,&isn);
292:    ISDestroy(&isn);
293:    ISGetLocalSize(fromrows,&fromrows_localsize);
294:    MatPartitioningDestroy(&part);
295:    MatCreateVecs(pc->pmat,&outervec,NULL);
296:    VecCreateMPI(comm,fromrows_localsize,PETSC_DETERMINE,&(osm->pcx));
297:    VecDuplicate(osm->pcx,&(osm->pcy));
298:    VecScatterCreate(osm->pcx,NULL,outervec,fromrows,&(osm->pctoouter));
299:    MatCreateSubMatrix(pc->pmat,fromrows,fromrows,MAT_INITIAL_MATRIX,&(osm->permutationP));
300:    PetscObjectReference((PetscObject)fromrows);
301:    osm->permutationIS = fromrows;
302:    osm->pcmat =  pc->pmat;
303:    PetscObjectReference((PetscObject)osm->permutationP);
304:    pc->pmat = osm->permutationP;
305:    VecDestroy(&outervec);
306:    ISDestroy(&fromrows);
307:    ISDestroy(&partitioning);
308:    osm->n           = PETSC_DETERMINE;
309:    return(0);
310: }



314: static PetscErrorCode PCSetUp_GASM(PC pc)
315: {
316:   PC_GASM        *osm = (PC_GASM*)pc->data;
318:   PetscBool      symset,flg;
319:   PetscInt       i,nInnerIndices,nTotalInnerIndices;
320:   PetscMPIInt    rank, size;
321:   MatReuse       scall = MAT_REUSE_MATRIX;
322:   KSP            ksp;
323:   PC             subpc;
324:   const char     *prefix,*pprefix;
325:   Vec            x,y;
326:   PetscInt       oni;       /* Number of indices in the i-th local outer subdomain.               */
327:   const PetscInt *oidxi;    /* Indices from the i-th subdomain local outer subdomain.             */
328:   PetscInt       on;        /* Number of indices in the disjoint union of local outer subdomains. */
329:   PetscInt       *oidx;     /* Indices in the disjoint union of local outer subdomains. */
330:   IS             gois;      /* Disjoint union the global indices of outer subdomains.             */
331:   IS             goid;      /* Identity IS of the size of the disjoint union of outer subdomains. */
332:   PetscScalar    *gxarray, *gyarray;
333:   PetscInt       gostart;   /* Start of locally-owned indices in the vectors -- osm->gx,osm->gy -- over the disjoint union of outer subdomains. */
334:   PetscInt       num_subdomains    = 0;
335:   DM             *subdomain_dm     = NULL;
336:   char           **subdomain_names = NULL;
337:   PetscInt       *numbering;


341:   MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);
342:   MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);
343:   if (!pc->setupcalled) {
344:         /* use a hierarchical partitioning */
345:     if(osm->hierarchicalpartitioning){
346:       PCGASMSetHierarchicalPartitioning(pc);
347:     }
348:     if (!osm->type_set) {
349:       MatIsSymmetricKnown(pc->pmat,&symset,&flg);
350:       if (symset && flg) osm->type = PC_GASM_BASIC;
351:     }

353:     if (osm->n == PETSC_DETERMINE) {
354:       if (osm->N != PETSC_DETERMINE) {
355:            /* No local subdomains given, but the desired number of total subdomains is known, so construct them accordingly. */
356:            PCGASMCreateSubdomains(pc->pmat,osm->N,&osm->n,&osm->iis);
357:       } else if (osm->dm_subdomains && pc->dm) {
358:         /* try pc->dm next, if allowed */
359:         PetscInt  d;
360:         IS       *inner_subdomain_is, *outer_subdomain_is;
361:         DMCreateDomainDecomposition(pc->dm, &num_subdomains, &subdomain_names, &inner_subdomain_is, &outer_subdomain_is, &subdomain_dm);
362:         if (num_subdomains) {
363:           PCGASMSetSubdomains(pc, num_subdomains, inner_subdomain_is, outer_subdomain_is);
364:         }
365:         for (d = 0; d < num_subdomains; ++d) {
366:           if (inner_subdomain_is) {ISDestroy(&inner_subdomain_is[d]);}
367:           if (outer_subdomain_is) {ISDestroy(&outer_subdomain_is[d]);}
368:         }
369:         PetscFree(inner_subdomain_is);
370:         PetscFree(outer_subdomain_is);
371:       } else {
372:         /* still no subdomains; use one per processor */
373:         osm->nmax = osm->n = 1;
374:         MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);
375:         osm->N    = size;
376:         PCGASMCreateLocalSubdomains(pc->pmat,osm->n,&osm->iis);
377:       }
378:     }
379:     if (!osm->iis) {
380:       /*
381:        osm->n was set in PCGASMSetSubdomains(), but the actual subdomains have not been supplied.
382:        We create the requisite number of local inner subdomains and then expand them into
383:        out subdomains, if necessary.
384:        */
385:       PCGASMCreateLocalSubdomains(pc->pmat,osm->n,&osm->iis);
386:     }
387:     if (!osm->ois) {
388:       /*
389:             Initially make outer subdomains the same as inner subdomains. If nonzero additional overlap
390:             has been requested, copy the inner subdomains over so they can be modified.
391:       */
392:       PetscMalloc1(osm->n,&osm->ois);
393:       for (i=0; i<osm->n; ++i) {
394:         if (osm->overlap > 0 && osm->N>1) { /* With positive overlap, osm->iis[i] will be modified */
395:           ISDuplicate(osm->iis[i],(osm->ois)+i);
396:           ISCopy(osm->iis[i],osm->ois[i]);
397:         } else {
398:           PetscObjectReference((PetscObject)((osm->iis)[i]));
399:           osm->ois[i] = osm->iis[i];
400:         }
401:       }
402:       if (osm->overlap>0 && osm->N>1) {
403:            /* Extend the "overlapping" regions by a number of steps */
404:            MatIncreaseOverlapSplit(pc->pmat,osm->n,osm->ois,osm->overlap);
405:       }
406:     }

408:     /* Now the subdomains are defined.  Determine their global and max local numbers, if necessary. */
409:     if (osm->nmax == PETSC_DETERMINE) {
410:       PetscMPIInt inwork,outwork;
411:       /* determine global number of subdomains and the max number of local subdomains */
412:       inwork = osm->n;
413:       MPIU_Allreduce(&inwork,&outwork,1,MPI_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));
414:       osm->nmax  = outwork;
415:     }
416:     if (osm->N == PETSC_DETERMINE) {
417:       /* Determine the number of globally-distinct subdomains and compute a global numbering for them. */
418:       PetscObjectsListGetGlobalNumbering(PetscObjectComm((PetscObject)pc),osm->n,(PetscObject*)osm->ois,&osm->N,NULL);
419:     }


422:     if (osm->sort_indices) {
423:       for (i=0; i<osm->n; i++) {
424:         ISSort(osm->ois[i]);
425:         ISSort(osm->iis[i]);
426:       }
427:     }
428:     PCGetOptionsPrefix(pc,&prefix);
429:     PCGASMPrintSubdomains(pc);

431:     /*
432:        Merge the ISs, create merged vectors and restrictions.
433:      */
434:     /* Merge outer subdomain ISs and construct a restriction onto the disjoint union of local outer subdomains. */
435:     on = 0;
436:     for (i=0; i<osm->n; i++) {
437:       ISGetLocalSize(osm->ois[i],&oni);
438:       on  += oni;
439:     }
440:     PetscMalloc1(on, &oidx);
441:     on   = 0;
442:     /* Merge local indices together */
443:     for (i=0; i<osm->n; i++) {
444:       ISGetLocalSize(osm->ois[i],&oni);
445:       ISGetIndices(osm->ois[i],&oidxi);
446:       PetscMemcpy(oidx+on,oidxi,sizeof(PetscInt)*oni);
447:       ISRestoreIndices(osm->ois[i],&oidxi);
448:       on  += oni;
449:     }
450:     ISCreateGeneral(((PetscObject)(pc))->comm,on,oidx,PETSC_OWN_POINTER,&gois);
451:     nTotalInnerIndices = 0;
452:     for(i=0; i<osm->n; i++){
453:       ISGetLocalSize(osm->iis[i],&nInnerIndices);
454:       nTotalInnerIndices += nInnerIndices;
455:     }
456:     VecCreateMPI(((PetscObject)(pc))->comm,nTotalInnerIndices,PETSC_DETERMINE,&x);
457:     VecDuplicate(x,&y);

459:     VecCreateMPI(PetscObjectComm((PetscObject)pc),on,PETSC_DECIDE,&osm->gx);
460:     VecDuplicate(osm->gx,&osm->gy);
461:     VecGetOwnershipRange(osm->gx, &gostart, NULL);
462:     ISCreateStride(PetscObjectComm((PetscObject)pc),on,gostart,1, &goid);
463:     /* gois might indices not on local */
464:     VecScatterCreate(x,gois,osm->gx,goid, &(osm->gorestriction));
465:     PetscMalloc1(osm->n,&numbering);
466:     PetscObjectsListGetGlobalNumbering(PetscObjectComm((PetscObject)pc),osm->n,(PetscObject*)osm->ois,NULL,numbering);
467:     VecDestroy(&x);
468:     ISDestroy(&gois);

470:     /* Merge inner subdomain ISs and construct a restriction onto the disjoint union of local inner subdomains. */
471:     {
472:       PetscInt        ini;           /* Number of indices the i-th a local inner subdomain. */
473:       PetscInt        in;            /* Number of indices in the disjoint uniont of local inner subdomains. */
474:       PetscInt       *iidx;          /* Global indices in the merged local inner subdomain. */
475:       PetscInt       *ioidx;         /* Global indices of the disjoint union of inner subdomains within the disjoint union of outer subdomains. */
476:       IS              giis;          /* IS for the disjoint union of inner subdomains. */
477:       IS              giois;         /* IS for the disjoint union of inner subdomains within the disjoint union of outer subdomains. */
478:       PetscScalar    *array;
479:       const PetscInt *indices;
480:       PetscInt        k;
481:       on = 0;
482:       for (i=0; i<osm->n; i++) {
483:         ISGetLocalSize(osm->ois[i],&oni);
484:         on  += oni;
485:       }
486:       PetscMalloc1(on, &iidx);
487:       PetscMalloc1(on, &ioidx);
488:       VecGetArray(y,&array);
489:       /* set communicator id to determine where overlap is */
490:       in   = 0;
491:       for (i=0; i<osm->n; i++) {
492:         ISGetLocalSize(osm->iis[i],&ini);
493:         for (k = 0; k < ini; ++k){
494:           array[in+k] = numbering[i];
495:         }
496:         in += ini;
497:       }
498:       VecRestoreArray(y,&array);
499:       VecScatterBegin(osm->gorestriction,y,osm->gy,INSERT_VALUES,SCATTER_FORWARD);
500:       VecScatterEnd(osm->gorestriction,y,osm->gy,INSERT_VALUES,SCATTER_FORWARD);
501:       VecGetOwnershipRange(osm->gy,&gostart, NULL);
502:       VecGetArray(osm->gy,&array);
503:       on  = 0;
504:       in  = 0;
505:       for (i=0; i<osm->n; i++) {
506:             ISGetLocalSize(osm->ois[i],&oni);
507:             ISGetIndices(osm->ois[i],&indices);
508:             for (k=0; k<oni; k++) {
509:           /*  skip overlapping indices to get inner domain */
510:           if(PetscRealPart(array[on+k]) != numbering[i]) continue;
511:           iidx[in]    = indices[k];
512:           ioidx[in++] = gostart+on+k;
513:             }
514:             ISRestoreIndices(osm->ois[i], &indices);
515:             on += oni;
516:       }
517:       VecRestoreArray(osm->gy,&array);
518:       ISCreateGeneral(PetscObjectComm((PetscObject)pc),in,iidx,PETSC_OWN_POINTER,&giis);
519:       ISCreateGeneral(PetscObjectComm((PetscObject)pc),in,ioidx,PETSC_OWN_POINTER,&giois);
520:       VecScatterCreate(y,giis,osm->gy,giois,&osm->girestriction);
521:       VecDestroy(&y);
522:       ISDestroy(&giis);
523:       ISDestroy(&giois);
524:     }
525:     ISDestroy(&goid);
526:     PetscFree(numbering);

528:     /* Create the subdomain work vectors. */
529:     PetscMalloc1(osm->n,&osm->x);
530:     PetscMalloc1(osm->n,&osm->y);
531:     VecGetArray(osm->gx, &gxarray);
532:     VecGetArray(osm->gy, &gyarray);
533:     for (i=0, on=0; i<osm->n; ++i, on += oni) {
534:       PetscInt oNi;
535:       ISGetLocalSize(osm->ois[i],&oni);
536:       /* on a sub communicator */
537:       ISGetSize(osm->ois[i],&oNi);
538:       VecCreateMPIWithArray(((PetscObject)(osm->ois[i]))->comm,1,oni,oNi,gxarray+on,&osm->x[i]);
539:       VecCreateMPIWithArray(((PetscObject)(osm->ois[i]))->comm,1,oni,oNi,gyarray+on,&osm->y[i]);
540:     }
541:     VecRestoreArray(osm->gx, &gxarray);
542:     VecRestoreArray(osm->gy, &gyarray);
543:     /* Create the subdomain solvers */
544:     PetscMalloc1(osm->n,&osm->ksp);
545:     for (i=0; i<osm->n; i++) {
546:       char subprefix[PETSC_MAX_PATH_LEN+1];
547:       KSPCreate(((PetscObject)(osm->ois[i]))->comm,&ksp);
548:       KSPSetErrorIfNotConverged(ksp,pc->erroriffailure);
549:       PetscLogObjectParent((PetscObject)pc,(PetscObject)ksp);
550:       PetscObjectIncrementTabLevel((PetscObject)ksp,(PetscObject)pc,1);
551:       KSPSetType(ksp,KSPPREONLY);
552:       KSPGetPC(ksp,&subpc); /* Why do we need this here? */
553:       if (subdomain_dm) {
554:             KSPSetDM(ksp,subdomain_dm[i]);
555:             DMDestroy(subdomain_dm+i);
556:       }
557:       PCGetOptionsPrefix(pc,&prefix);
558:       KSPSetOptionsPrefix(ksp,prefix);
559:       if (subdomain_names && subdomain_names[i]) {
560:              PetscSNPrintf(subprefix,PETSC_MAX_PATH_LEN,"sub_%s_",subdomain_names[i]);
561:              KSPAppendOptionsPrefix(ksp,subprefix);
562:              PetscFree(subdomain_names[i]);
563:       }
564:       KSPAppendOptionsPrefix(ksp,"sub_");
565:       osm->ksp[i] = ksp;
566:     }
567:     PetscFree(subdomain_dm);
568:     PetscFree(subdomain_names);
569:     scall = MAT_INITIAL_MATRIX;

571:   } else { /* if (pc->setupcalled) */
572:     /*
573:        Destroy the submatrices from the previous iteration
574:     */
575:     if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
576:       MatDestroyMatrices(osm->n,&osm->pmat);
577:       scall = MAT_INITIAL_MATRIX;
578:     }
579:     if(osm->permutationIS){
580:       MatCreateSubMatrix(pc->pmat,osm->permutationIS,osm->permutationIS,scall,&osm->permutationP);
581:       PetscObjectReference((PetscObject)osm->permutationP);
582:       osm->pcmat = pc->pmat;
583:       pc->pmat   = osm->permutationP;
584:     }

586:   }


589:   /*
590:      Extract out the submatrices.
591:   */
592:   if (size > 1) {
593:     MatCreateSubMatricesMPI(pc->pmat,osm->n,osm->ois,osm->ois,scall,&osm->pmat);
594:   } else {
595:     MatCreateSubMatrices(pc->pmat,osm->n,osm->ois,osm->ois,scall,&osm->pmat);
596:   }
597:   if (scall == MAT_INITIAL_MATRIX) {
598:     PetscObjectGetOptionsPrefix((PetscObject)pc->pmat,&pprefix);
599:     for (i=0; i<osm->n; i++) {
600:       PetscLogObjectParent((PetscObject)pc,(PetscObject)osm->pmat[i]);
601:       PetscObjectSetOptionsPrefix((PetscObject)osm->pmat[i],pprefix);
602:     }
603:   }

605:   /* Return control to the user so that the submatrices can be modified (e.g., to apply
606:      different boundary conditions for the submatrices than for the global problem) */
607:   PCModifySubMatrices(pc,osm->n,osm->ois,osm->ois,osm->pmat,pc->modifysubmatricesP);

609:   /*
610:      Loop over submatrices putting them into local ksps
611:   */
612:   for (i=0; i<osm->n; i++) {
613:     KSPSetOperators(osm->ksp[i],osm->pmat[i],osm->pmat[i]);
614:     if (!pc->setupcalled) {
615:       KSPSetFromOptions(osm->ksp[i]);
616:     }
617:   }
618:   if(osm->pcmat){
619:     MatDestroy(&pc->pmat);
620:     pc->pmat   = osm->pcmat;
621:     osm->pcmat = 0;
622:   }
623:   return(0);
624: }

626: static PetscErrorCode PCSetUpOnBlocks_GASM(PC pc)
627: {
628:   PC_GASM        *osm = (PC_GASM*)pc->data;
630:   PetscInt       i;

633:   for (i=0; i<osm->n; i++) {
634:     KSPSetUp(osm->ksp[i]);
635:   }
636:   return(0);
637: }

639: static PetscErrorCode PCApply_GASM(PC pc,Vec xin,Vec yout)
640: {
641:   PC_GASM        *osm = (PC_GASM*)pc->data;
643:   PetscInt       i;
644:   Vec            x,y;
645:   ScatterMode    forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;

648:   if(osm->pctoouter){
649:     VecScatterBegin(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);
650:     VecScatterEnd(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);
651:     x = osm->pcx;
652:     y = osm->pcy;
653:   }else{
654:         x = xin;
655:         y = yout;
656:   }
657:   /*
658:      Support for limiting the restriction or interpolation only to the inner
659:      subdomain values (leaving the other values 0).
660:   */
661:   if (!(osm->type & PC_GASM_RESTRICT)) {
662:     /* have to zero the work RHS since scatter may leave some slots empty */
663:     VecZeroEntries(osm->gx);
664:     VecScatterBegin(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);
665:   } else {
666:     VecScatterBegin(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);
667:   }
668:   VecZeroEntries(osm->gy);
669:   if (!(osm->type & PC_GASM_RESTRICT)) {
670:     VecScatterEnd(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);
671:   } else {
672:     VecScatterEnd(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);
673:   }
674:   /* do the subdomain solves */
675:   for (i=0; i<osm->n; ++i) {
676:     KSPSolve(osm->ksp[i],osm->x[i],osm->y[i]);
677:     KSPCheckSolve(osm->ksp[i],pc,osm->y[i]);
678:   }
679:   /* Do we need to zero y ?? */
680:   VecZeroEntries(y);
681:   if (!(osm->type & PC_GASM_INTERPOLATE)) {
682:     VecScatterBegin(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);
683:     VecScatterEnd(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);
684:   } else {
685:     VecScatterBegin(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);
686:     VecScatterEnd(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);
687:   }
688:   if(osm->pctoouter){
689:     VecScatterBegin(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);
690:     VecScatterEnd(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);
691:   }
692:   return(0);
693: }

695: static PetscErrorCode PCApplyTranspose_GASM(PC pc,Vec xin,Vec yout)
696: {
697:   PC_GASM        *osm = (PC_GASM*)pc->data;
699:   PetscInt       i;
700:   Vec            x,y;
701:   ScatterMode    forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;

704:   if(osm->pctoouter){
705:    VecScatterBegin(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);
706:    VecScatterEnd(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);
707:    x = osm->pcx;
708:    y = osm->pcy;
709:   }else{
710:         x = xin;
711:         y = yout;
712:   }
713:   /*
714:      Support for limiting the restriction or interpolation to only local
715:      subdomain values (leaving the other values 0).

717:      Note: these are reversed from the PCApply_GASM() because we are applying the
718:      transpose of the three terms
719:   */
720:   if (!(osm->type & PC_GASM_INTERPOLATE)) {
721:     /* have to zero the work RHS since scatter may leave some slots empty */
722:     VecZeroEntries(osm->gx);
723:     VecScatterBegin(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);
724:   } else {
725:     VecScatterBegin(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);
726:   }
727:   VecZeroEntries(osm->gy);
728:   if (!(osm->type & PC_GASM_INTERPOLATE)) {
729:     VecScatterEnd(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);
730:   } else {
731:     VecScatterEnd(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);
732:   }
733:   /* do the local solves */
734:   for (i=0; i<osm->n; ++i) { /* Note that the solves are local, so we can go to osm->n, rather than osm->nmax. */
735:     KSPSolveTranspose(osm->ksp[i],osm->x[i],osm->y[i]);
736:     KSPCheckSolve(osm->ksp[i],pc,osm->y[i]);
737:   }
738:   VecZeroEntries(y);
739:   if (!(osm->type & PC_GASM_RESTRICT)) {
740:     VecScatterBegin(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);
741:     VecScatterEnd(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);
742:   } else {
743:     VecScatterBegin(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);
744:     VecScatterEnd(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);
745:   }
746:   if(osm->pctoouter){
747:    VecScatterBegin(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);
748:    VecScatterEnd(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);
749:   }
750:   return(0);
751: }

753: static PetscErrorCode PCReset_GASM(PC pc)
754: {
755:   PC_GASM        *osm = (PC_GASM*)pc->data;
757:   PetscInt       i;

760:   if (osm->ksp) {
761:     for (i=0; i<osm->n; i++) {
762:       KSPReset(osm->ksp[i]);
763:     }
764:   }
765:   if (osm->pmat) {
766:     if (osm->n > 0) {
767:       PetscMPIInt size;
768:       MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);
769:       if (size > 1) {
770:         /* osm->pmat is created by MatCreateSubMatricesMPI(), cannot use MatDestroySubMatrices() */
771:         MatDestroyMatrices(osm->n,&osm->pmat);
772:       } else {
773:         MatDestroySubMatrices(osm->n,&osm->pmat);
774:       }
775:     }
776:   }
777:   if (osm->x) {
778:     for (i=0; i<osm->n; i++) {
779:       VecDestroy(&osm->x[i]);
780:       VecDestroy(&osm->y[i]);
781:     }
782:   }
783:   VecDestroy(&osm->gx);
784:   VecDestroy(&osm->gy);

786:   VecScatterDestroy(&osm->gorestriction);
787:   VecScatterDestroy(&osm->girestriction);
788:   if (!osm->user_subdomains) {
789:     PCGASMDestroySubdomains(osm->n,&osm->ois,&osm->iis);
790:     osm->N    = PETSC_DETERMINE;
791:     osm->nmax = PETSC_DETERMINE;
792:   }
793:   if(osm->pctoouter){
794:         VecScatterDestroy(&(osm->pctoouter));
795:   }
796:   if(osm->permutationIS){
797:         ISDestroy(&(osm->permutationIS));
798:   }
799:   if(osm->pcx){
800:         VecDestroy(&(osm->pcx));
801:   }
802:   if(osm->pcy){
803:         VecDestroy(&(osm->pcy));
804:   }
805:   if(osm->permutationP){
806:     MatDestroy(&(osm->permutationP));
807:   }
808:   if(osm->pcmat){
809:         MatDestroy(&osm->pcmat);
810:   }
811:   return(0);
812: }

814: static PetscErrorCode PCDestroy_GASM(PC pc)
815: {
816:   PC_GASM        *osm = (PC_GASM*)pc->data;
818:   PetscInt       i;

821:   PCReset_GASM(pc);
822:   /* PCReset will not destroy subdomains, if user_subdomains is true. */
823:   PCGASMDestroySubdomains(osm->n,&osm->ois,&osm->iis);
824:   if (osm->ksp) {
825:     for (i=0; i<osm->n; i++) {
826:       KSPDestroy(&osm->ksp[i]);
827:     }
828:     PetscFree(osm->ksp);
829:   }
830:   PetscFree(osm->x);
831:   PetscFree(osm->y);
832:   PetscFree(pc->data);
833:   return(0);
834: }

836: static PetscErrorCode PCSetFromOptions_GASM(PetscOptionItems *PetscOptionsObject,PC pc)
837: {
838:   PC_GASM        *osm = (PC_GASM*)pc->data;
840:   PetscInt       blocks,ovl;
841:   PetscBool      symset,flg;
842:   PCGASMType     gasmtype;

845:   /* set the type to symmetric if matrix is symmetric */
846:   if (!osm->type_set && pc->pmat) {
847:     MatIsSymmetricKnown(pc->pmat,&symset,&flg);
848:     if (symset && flg) osm->type = PC_GASM_BASIC;
849:   }
850:   PetscOptionsHead(PetscOptionsObject,"Generalized additive Schwarz options");
851:   PetscOptionsBool("-pc_gasm_use_dm_subdomains","If subdomains aren't set, use DMCreateDomainDecomposition() to define subdomains.","PCGASMSetUseDMSubdomains",osm->dm_subdomains,&osm->dm_subdomains,&flg);
852:   PetscOptionsInt("-pc_gasm_total_subdomains","Total number of subdomains across communicator","PCGASMSetTotalSubdomains",osm->N,&blocks,&flg);
853:   if (flg) {
854:     PCGASMSetTotalSubdomains(pc,blocks);
855:   }
856:   PetscOptionsInt("-pc_gasm_overlap","Number of overlapping degrees of freedom","PCGASMSetOverlap",osm->overlap,&ovl,&flg);
857:   if (flg) {
858:     PCGASMSetOverlap(pc,ovl);
859:     osm->dm_subdomains = PETSC_FALSE;
860:   }
861:   flg  = PETSC_FALSE;
862:   PetscOptionsEnum("-pc_gasm_type","Type of restriction/extension","PCGASMSetType",PCGASMTypes,(PetscEnum)osm->type,(PetscEnum*)&gasmtype,&flg);
863:   if (flg) {PCGASMSetType(pc,gasmtype);}
864:   PetscOptionsBool("-pc_gasm_use_hierachical_partitioning","use hierarchical partitioning",NULL,osm->hierarchicalpartitioning,&osm->hierarchicalpartitioning,&flg);
865:   PetscOptionsTail();
866:   return(0);
867: }

869: /*------------------------------------------------------------------------------------*/

871: /*@
872:     PCGASMSetTotalSubdomains - sets the total number of subdomains to use across the
873:                                communicator.
874:     Logically collective on pc

876:     Input Parameters:
877: +   pc  - the preconditioner
878: -   N   - total number of subdomains


881:     Level: beginner

883: .keywords: PC, ASM, DM, set, subdomains, additive Schwarz

885: .seealso: PCGASMSetSubdomains(), PCGASMSetOverlap()
886:           PCGASMCreateSubdomains2D()
887: @*/
888: PetscErrorCode  PCGASMSetTotalSubdomains(PC pc,PetscInt N)
889: {
890:   PC_GASM        *osm = (PC_GASM*)pc->data;
891:   PetscMPIInt    size,rank;

895:   if (N < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Total number of subdomains must be 1 or more, got N = %D",N);
896:   if (pc->setupcalled) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetTotalSubdomains() should be called before calling PCSetUp().");

898:   PCGASMDestroySubdomains(osm->n,&osm->iis,&osm->ois);
899:   osm->ois = osm->iis = NULL;

901:   MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);
902:   MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);
903:   osm->N    = N;
904:   osm->n    = PETSC_DETERMINE;
905:   osm->nmax = PETSC_DETERMINE;
906:   osm->dm_subdomains = PETSC_FALSE;
907:   return(0);
908: }


911: static PetscErrorCode  PCGASMSetSubdomains_GASM(PC pc,PetscInt n,IS iis[],IS ois[])
912: {
913:   PC_GASM         *osm = (PC_GASM*)pc->data;
914:   PetscErrorCode  ierr;
915:   PetscInt        i;

918:   if (n < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Each process must have 1 or more subdomains, got n = %D",n);
919:   if (pc->setupcalled) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetSubdomains() should be called before calling PCSetUp().");

921:   PCGASMDestroySubdomains(osm->n,&osm->iis,&osm->ois);
922:   osm->iis  = osm->ois = NULL;
923:   osm->n    = n;
924:   osm->N    = PETSC_DETERMINE;
925:   osm->nmax = PETSC_DETERMINE;
926:   if (ois) {
927:     PetscMalloc1(n,&osm->ois);
928:     for (i=0; i<n; i++) {
929:       PetscObjectReference((PetscObject)ois[i]);
930:       osm->ois[i] = ois[i];
931:     }
932:     /*
933:        Since the user set the outer subdomains, even if nontrivial overlap was requested via PCGASMSetOverlap(),
934:        it will be ignored.  To avoid confusion later on (e.g., when viewing the PC), the overlap size is set to -1.
935:     */
936:     osm->overlap = -1;
937:     /* inner subdomains must be provided  */
938:     if (!iis) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"inner indices have to be provided \n");
939:   }/* end if */
940:   if (iis) {
941:     PetscMalloc1(n,&osm->iis);
942:     for (i=0; i<n; i++) {
943:       PetscObjectReference((PetscObject)iis[i]);
944:       osm->iis[i] = iis[i];
945:     }
946:     if (!ois) {
947:       osm->ois = NULL;
948:       /* if user does not provide outer indices, we will create the corresponding outer indices using  osm->overlap =1 in PCSetUp_GASM */
949:     }
950:   }
951: #if defined(PETSC_USE_DEBUG)
952:   {
953:     PetscInt        j,rstart,rend,*covered,lsize;
954:     const PetscInt  *indices;
955:     /* check if the inner indices cover and only cover the local portion of the preconditioning matrix */
956:     MatGetOwnershipRange(pc->pmat,&rstart,&rend);
957:     PetscCalloc1(rend-rstart,&covered);
958:     /* check if the current processor owns indices from others */
959:     for (i=0; i<n; i++) {
960:       ISGetIndices(osm->iis[i],&indices);
961:       ISGetLocalSize(osm->iis[i],&lsize);
962:       for (j=0; j<lsize; j++) {
963:         if (indices[j]<rstart || indices[j]>=rend) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"inner subdomains can not own an index %d from other processors", indices[j]);
964:         else if (covered[indices[j]-rstart]==1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"inner subdomains can not have an overlapping index %d ",indices[j]);
965:         else covered[indices[j]-rstart] = 1;
966:       }
967:     ISRestoreIndices(osm->iis[i],&indices);
968:     }
969:     /* check if we miss any indices */
970:     for (i=rstart; i<rend; i++) {
971:       if (!covered[i-rstart]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"local entity %d was not covered by inner subdomains",i);
972:     }
973:     PetscFree(covered);
974:   }
975: #endif
976:   if (iis)  osm->user_subdomains = PETSC_TRUE;
977:   return(0);
978: }


981: static PetscErrorCode  PCGASMSetOverlap_GASM(PC pc,PetscInt ovl)
982: {
983:   PC_GASM *osm = (PC_GASM*)pc->data;

986:   if (ovl < 0) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap value requested");
987:   if (pc->setupcalled && ovl != osm->overlap) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetOverlap() should be called before PCSetUp().");
988:   if (!pc->setupcalled) osm->overlap = ovl;
989:   return(0);
990: }

992: static PetscErrorCode  PCGASMSetType_GASM(PC pc,PCGASMType type)
993: {
994:   PC_GASM *osm = (PC_GASM*)pc->data;

997:   osm->type     = type;
998:   osm->type_set = PETSC_TRUE;
999:   return(0);
1000: }

1002: static PetscErrorCode  PCGASMSetSortIndices_GASM(PC pc,PetscBool doSort)
1003: {
1004:   PC_GASM *osm = (PC_GASM*)pc->data;

1007:   osm->sort_indices = doSort;
1008:   return(0);
1009: }

1011: /*
1012:    FIXME: This routine might need to be modified now that multiple ranks per subdomain are allowed.
1013:         In particular, it would upset the global subdomain number calculation.
1014: */
1015: static PetscErrorCode  PCGASMGetSubKSP_GASM(PC pc,PetscInt *n,PetscInt *first,KSP **ksp)
1016: {
1017:   PC_GASM        *osm = (PC_GASM*)pc->data;

1021:   if (osm->n < 1) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ORDER,"Need to call PCSetUP() on PC (or KSPSetUp() on the outer KSP object) before calling here");

1023:   if (n) *n = osm->n;
1024:   if (first) {
1025:     MPI_Scan(&osm->n,first,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));
1026:     *first -= osm->n;
1027:   }
1028:   if (ksp) {
1029:     /* Assume that local solves are now different; not necessarily
1030:        true, though!  This flag is used only for PCView_GASM() */
1031:     *ksp                        = osm->ksp;
1032:     osm->same_subdomain_solvers = PETSC_FALSE;
1033:   }
1034:   return(0);
1035: } /* PCGASMGetSubKSP_GASM() */

1037: /*@C
1038:     PCGASMSetSubdomains - Sets the subdomains for this processor
1039:     for the additive Schwarz preconditioner.

1041:     Collective on pc

1043:     Input Parameters:
1044: +   pc  - the preconditioner object
1045: .   n   - the number of subdomains for this processor
1046: .   iis - the index sets that define the inner subdomains (or NULL for PETSc to determine subdomains)
1047: -   ois - the index sets that define the outer subdomains (or NULL to use the same as iis, or to construct by expanding iis by the requested overlap)

1049:     Notes:
1050:     The IS indices use the parallel, global numbering of the vector entries.
1051:     Inner subdomains are those where the correction is applied.
1052:     Outer subdomains are those where the residual necessary to obtain the
1053:     corrections is obtained (see PCGASMType for the use of inner/outer subdomains).
1054:     Both inner and outer subdomains can extend over several processors.
1055:     This processor's portion of a subdomain is known as a local subdomain.

1057:     Inner subdomains can not overlap with each other, do not have any entities from remote processors,
1058:     and  have to cover the entire local subdomain owned by the current processor. The index sets on each
1059:     process should be ordered such that the ith local subdomain is connected to the ith remote subdomain
1060:     on another MPI process.

1062:     By default the GASM preconditioner uses 1 (local) subdomain per processor.


1065:     Level: advanced

1067: .keywords: PC, GASM, set, subdomains, additive Schwarz

1069: .seealso: PCGASMSetNumSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(),
1070:           PCGASMCreateSubdomains2D(), PCGASMGetSubdomains()
1071: @*/
1072: PetscErrorCode  PCGASMSetSubdomains(PC pc,PetscInt n,IS iis[],IS ois[])
1073: {
1074:   PC_GASM *osm = (PC_GASM*)pc->data;

1079:   PetscTryMethod(pc,"PCGASMSetSubdomains_C",(PC,PetscInt,IS[],IS[]),(pc,n,iis,ois));
1080:   osm->dm_subdomains = PETSC_FALSE;
1081:   return(0);
1082: }


1085: /*@
1086:     PCGASMSetOverlap - Sets the overlap between a pair of subdomains for the
1087:     additive Schwarz preconditioner.  Either all or no processors in the
1088:     pc communicator must call this routine.

1090:     Logically Collective on pc

1092:     Input Parameters:
1093: +   pc  - the preconditioner context
1094: -   ovl - the amount of overlap between subdomains (ovl >= 0, default value = 0)

1096:     Options Database Key:
1097: .   -pc_gasm_overlap <overlap> - Sets overlap

1099:     Notes:
1100:     By default the GASM preconditioner uses 1 subdomain per processor.  To use
1101:     multiple subdomain per perocessor or "straddling" subdomains that intersect
1102:     multiple processors use PCGASMSetSubdomains() (or option -pc_gasm_total_subdomains <n>).

1104:     The overlap defaults to 0, so if one desires that no additional
1105:     overlap be computed beyond what may have been set with a call to
1106:     PCGASMSetSubdomains(), then ovl must be set to be 0.  In particular, if one does
1107:     not explicitly set the subdomains in application code, then all overlap would be computed
1108:     internally by PETSc, and using an overlap of 0 would result in an GASM
1109:     variant that is equivalent to the block Jacobi preconditioner.

1111:     Note that one can define initial index sets with any overlap via
1112:     PCGASMSetSubdomains(); the routine PCGASMSetOverlap() merely allows
1113:     PETSc to extend that overlap further, if desired.

1115:     Level: intermediate

1117: .keywords: PC, GASM, set, overlap

1119: .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(),
1120:           PCGASMCreateSubdomains2D(), PCGASMGetSubdomains()
1121: @*/
1122: PetscErrorCode  PCGASMSetOverlap(PC pc,PetscInt ovl)
1123: {
1125:   PC_GASM *osm = (PC_GASM*)pc->data;

1130:   PetscTryMethod(pc,"PCGASMSetOverlap_C",(PC,PetscInt),(pc,ovl));
1131:   osm->dm_subdomains = PETSC_FALSE;
1132:   return(0);
1133: }

1135: /*@
1136:     PCGASMSetType - Sets the type of restriction and interpolation used
1137:     for local problems in the additive Schwarz method.

1139:     Logically Collective on PC

1141:     Input Parameters:
1142: +   pc  - the preconditioner context
1143: -   type - variant of GASM, one of
1144: .vb
1145:       PC_GASM_BASIC       - full interpolation and restriction
1146:       PC_GASM_RESTRICT    - full restriction, local processor interpolation
1147:       PC_GASM_INTERPOLATE - full interpolation, local processor restriction
1148:       PC_GASM_NONE        - local processor restriction and interpolation
1149: .ve

1151:     Options Database Key:
1152: .   -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type

1154:     Level: intermediate

1156: .keywords: PC, GASM, set, type

1158: .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(),
1159:           PCGASMCreateSubdomains2D()
1160: @*/
1161: PetscErrorCode  PCGASMSetType(PC pc,PCGASMType type)
1162: {

1168:   PetscTryMethod(pc,"PCGASMSetType_C",(PC,PCGASMType),(pc,type));
1169:   return(0);
1170: }

1172: /*@
1173:     PCGASMSetSortIndices - Determines whether subdomain indices are sorted.

1175:     Logically Collective on PC

1177:     Input Parameters:
1178: +   pc  - the preconditioner context
1179: -   doSort - sort the subdomain indices

1181:     Level: intermediate

1183: .keywords: PC, GASM, set, type

1185: .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(),
1186:           PCGASMCreateSubdomains2D()
1187: @*/
1188: PetscErrorCode  PCGASMSetSortIndices(PC pc,PetscBool doSort)
1189: {

1195:   PetscTryMethod(pc,"PCGASMSetSortIndices_C",(PC,PetscBool),(pc,doSort));
1196:   return(0);
1197: }

1199: /*@C
1200:    PCGASMGetSubKSP - Gets the local KSP contexts for all blocks on
1201:    this processor.

1203:    Collective on PC iff first_local is requested

1205:    Input Parameter:
1206: .  pc - the preconditioner context

1208:    Output Parameters:
1209: +  n_local - the number of blocks on this processor or NULL
1210: .  first_local - the global number of the first block on this processor or NULL,
1211:                  all processors must request or all must pass NULL
1212: -  ksp - the array of KSP contexts

1214:    Note:
1215:    After PCGASMGetSubKSP() the array of KSPes is not to be freed

1217:    Currently for some matrix implementations only 1 block per processor
1218:    is supported.

1220:    You must call KSPSetUp() before calling PCGASMGetSubKSP().

1222:    Level: advanced

1224: .keywords: PC, GASM, additive Schwarz, get, sub, KSP, context

1226: .seealso: PCGASMSetSubdomains(), PCGASMSetOverlap(),
1227:           PCGASMCreateSubdomains2D(),
1228: @*/
1229: PetscErrorCode  PCGASMGetSubKSP(PC pc,PetscInt *n_local,PetscInt *first_local,KSP *ksp[])
1230: {

1235:   PetscUseMethod(pc,"PCGASMGetSubKSP_C",(PC,PetscInt*,PetscInt*,KSP **),(pc,n_local,first_local,ksp));
1236:   return(0);
1237: }

1239: /* -------------------------------------------------------------------------------------*/
1240: /*MC
1241:    PCGASM - Use the (restricted) additive Schwarz method, each block is (approximately) solved with
1242:            its own KSP object.

1244:    Options Database Keys:
1245: +  -pc_gasm_total_subdomains <n>  - Sets total number of local subdomains to be distributed among processors
1246: .  -pc_gasm_view_subdomains       - activates the printing of subdomain indices in PCView(), -ksp_view or -snes_view
1247: .  -pc_gasm_print_subdomains      - activates the printing of subdomain indices in PCSetUp()
1248: .  -pc_gasm_overlap <ovl>         - Sets overlap by which to (automatically) extend local subdomains
1249: -  -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type

1251:      IMPORTANT: If you run with, for example, 3 blocks on 1 processor or 3 blocks on 3 processors you
1252:       will get a different convergence rate due to the default option of -pc_gasm_type restrict. Use
1253:       -pc_gasm_type basic to use the standard GASM.

1255:    Notes:
1256:     Blocks can be shared by multiple processes.

1258:      To set options on the solvers for each block append -sub_ to all the KSP, and PC
1259:         options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly

1261:      To set the options on the solvers separate for each block call PCGASMGetSubKSP()
1262:          and set the options directly on the resulting KSP object (you can access its PC
1263:          with KSPGetPC())


1266:    Level: beginner

1268:    Concepts: additive Schwarz method

1270:     References:
1271: +   1. - M Dryja, OB Widlund, An additive variant of the Schwarz alternating method for the case of many subregions
1272:      Courant Institute, New York University Technical report
1273: -   2. - Barry Smith, Petter Bjorstad, and William Gropp, Domain Decompositions: Parallel Multilevel Methods for Elliptic Partial Differential Equations,
1274:     Cambridge University Press.

1276: .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PC,
1277:            PCBJACOBI,  PCGASMGetSubKSP(), PCGASMSetSubdomains(),
1278:            PCSetModifySubmatrices(), PCGASMSetOverlap(), PCGASMSetType()

1280: M*/

1282: PETSC_EXTERN PetscErrorCode PCCreate_GASM(PC pc)
1283: {
1285:   PC_GASM        *osm;

1288:   PetscNewLog(pc,&osm);

1290:   osm->N                        = PETSC_DETERMINE;
1291:   osm->n                        = PETSC_DECIDE;
1292:   osm->nmax                     = PETSC_DETERMINE;
1293:   osm->overlap                  = 0;
1294:   osm->ksp                      = 0;
1295:   osm->gorestriction            = 0;
1296:   osm->girestriction            = 0;
1297:   osm->pctoouter                = 0;
1298:   osm->gx                       = 0;
1299:   osm->gy                       = 0;
1300:   osm->x                        = 0;
1301:   osm->y                        = 0;
1302:   osm->pcx                      = 0;
1303:   osm->pcy                      = 0;
1304:   osm->permutationIS            = 0;
1305:   osm->permutationP             = 0;
1306:   osm->pcmat                    = 0;
1307:   osm->ois                      = 0;
1308:   osm->iis                      = 0;
1309:   osm->pmat                     = 0;
1310:   osm->type                     = PC_GASM_RESTRICT;
1311:   osm->same_subdomain_solvers   = PETSC_TRUE;
1312:   osm->sort_indices             = PETSC_TRUE;
1313:   osm->dm_subdomains            = PETSC_FALSE;
1314:   osm->hierarchicalpartitioning = PETSC_FALSE;

1316:   pc->data                 = (void*)osm;
1317:   pc->ops->apply           = PCApply_GASM;
1318:   pc->ops->applytranspose  = PCApplyTranspose_GASM;
1319:   pc->ops->setup           = PCSetUp_GASM;
1320:   pc->ops->reset           = PCReset_GASM;
1321:   pc->ops->destroy         = PCDestroy_GASM;
1322:   pc->ops->setfromoptions  = PCSetFromOptions_GASM;
1323:   pc->ops->setuponblocks   = PCSetUpOnBlocks_GASM;
1324:   pc->ops->view            = PCView_GASM;
1325:   pc->ops->applyrichardson = 0;

1327:   PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetSubdomains_C",PCGASMSetSubdomains_GASM);
1328:   PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetOverlap_C",PCGASMSetOverlap_GASM);
1329:   PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetType_C",PCGASMSetType_GASM);
1330:   PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetSortIndices_C",PCGASMSetSortIndices_GASM);
1331:   PetscObjectComposeFunction((PetscObject)pc,"PCGASMGetSubKSP_C",PCGASMGetSubKSP_GASM);
1332:   return(0);
1333: }


1336: PetscErrorCode  PCGASMCreateLocalSubdomains(Mat A, PetscInt nloc, IS *iis[])
1337: {
1338:   MatPartitioning mpart;
1339:   const char      *prefix;
1340:   PetscInt        i,j,rstart,rend,bs;
1341:   PetscBool       hasop, isbaij = PETSC_FALSE,foundpart = PETSC_FALSE;
1342:   Mat             Ad     = NULL, adj;
1343:   IS              ispart,isnumb,*is;
1344:   PetscErrorCode  ierr;

1347:   if (nloc < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"number of local subdomains must > 0, got nloc = %D",nloc);

1349:   /* Get prefix, row distribution, and block size */
1350:   MatGetOptionsPrefix(A,&prefix);
1351:   MatGetOwnershipRange(A,&rstart,&rend);
1352:   MatGetBlockSize(A,&bs);
1353:   if (rstart/bs*bs != rstart || rend/bs*bs != rend) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"bad row distribution [%D,%D) for matrix block size %D",rstart,rend,bs);

1355:   /* Get diagonal block from matrix if possible */
1356:   MatHasOperation(A,MATOP_GET_DIAGONAL_BLOCK,&hasop);
1357:   if (hasop) {
1358:     MatGetDiagonalBlock(A,&Ad);
1359:   }
1360:   if (Ad) {
1361:     PetscObjectTypeCompare((PetscObject)Ad,MATSEQBAIJ,&isbaij);
1362:     if (!isbaij) {PetscObjectTypeCompare((PetscObject)Ad,MATSEQSBAIJ,&isbaij);}
1363:   }
1364:   if (Ad && nloc > 1) {
1365:     PetscBool  match,done;
1366:     /* Try to setup a good matrix partitioning if available */
1367:     MatPartitioningCreate(PETSC_COMM_SELF,&mpart);
1368:     PetscObjectSetOptionsPrefix((PetscObject)mpart,prefix);
1369:     MatPartitioningSetFromOptions(mpart);
1370:     PetscObjectTypeCompare((PetscObject)mpart,MATPARTITIONINGCURRENT,&match);
1371:     if (!match) {
1372:       PetscObjectTypeCompare((PetscObject)mpart,MATPARTITIONINGSQUARE,&match);
1373:     }
1374:     if (!match) { /* assume a "good" partitioner is available */
1375:       PetscInt       na;
1376:       const PetscInt *ia,*ja;
1377:       MatGetRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);
1378:       if (done) {
1379:         /* Build adjacency matrix by hand. Unfortunately a call to
1380:            MatConvert(Ad,MATMPIADJ,MAT_INITIAL_MATRIX,&adj) will
1381:            remove the block-aij structure and we cannot expect
1382:            MatPartitioning to split vertices as we need */
1383:         PetscInt       i,j,len,nnz,cnt,*iia=0,*jja=0;
1384:         const PetscInt *row;
1385:         nnz = 0;
1386:         for (i=0; i<na; i++) { /* count number of nonzeros */
1387:           len = ia[i+1] - ia[i];
1388:           row = ja + ia[i];
1389:           for (j=0; j<len; j++) {
1390:             if (row[j] == i) { /* don't count diagonal */
1391:               len--; break;
1392:             }
1393:           }
1394:           nnz += len;
1395:         }
1396:         PetscMalloc1(na+1,&iia);
1397:         PetscMalloc1(nnz,&jja);
1398:         nnz    = 0;
1399:         iia[0] = 0;
1400:         for (i=0; i<na; i++) { /* fill adjacency */
1401:           cnt = 0;
1402:           len = ia[i+1] - ia[i];
1403:           row = ja + ia[i];
1404:           for (j=0; j<len; j++) {
1405:             if (row[j] != i) jja[nnz+cnt++] = row[j]; /* if not diagonal */
1406:           }
1407:           nnz += cnt;
1408:           iia[i+1] = nnz;
1409:         }
1410:         /* Partitioning of the adjacency matrix */
1411:         MatCreateMPIAdj(PETSC_COMM_SELF,na,na,iia,jja,NULL,&adj);
1412:         MatPartitioningSetAdjacency(mpart,adj);
1413:         MatPartitioningSetNParts(mpart,nloc);
1414:         MatPartitioningApply(mpart,&ispart);
1415:         ISPartitioningToNumbering(ispart,&isnumb);
1416:         MatDestroy(&adj);
1417:         foundpart = PETSC_TRUE;
1418:       }
1419:       MatRestoreRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);
1420:     }
1421:     MatPartitioningDestroy(&mpart);
1422:   }
1423:   PetscMalloc1(nloc,&is);
1424:   if (!foundpart) {

1426:     /* Partitioning by contiguous chunks of rows */

1428:     PetscInt mbs   = (rend-rstart)/bs;
1429:     PetscInt start = rstart;
1430:     for (i=0; i<nloc; i++) {
1431:       PetscInt count = (mbs/nloc + ((mbs % nloc) > i)) * bs;
1432:       ISCreateStride(PETSC_COMM_SELF,count,start,1,&is[i]);
1433:       start += count;
1434:     }

1436:   } else {

1438:     /* Partitioning by adjacency of diagonal block  */

1440:     const PetscInt *numbering;
1441:     PetscInt       *count,nidx,*indices,*newidx,start=0;
1442:     /* Get node count in each partition */
1443:     PetscMalloc1(nloc,&count);
1444:     ISPartitioningCount(ispart,nloc,count);
1445:     if (isbaij && bs > 1) { /* adjust for the block-aij case */
1446:       for (i=0; i<nloc; i++) count[i] *= bs;
1447:     }
1448:     /* Build indices from node numbering */
1449:     ISGetLocalSize(isnumb,&nidx);
1450:     PetscMalloc1(nidx,&indices);
1451:     for (i=0; i<nidx; i++) indices[i] = i; /* needs to be initialized */
1452:     ISGetIndices(isnumb,&numbering);
1453:     PetscSortIntWithPermutation(nidx,numbering,indices);
1454:     ISRestoreIndices(isnumb,&numbering);
1455:     if (isbaij && bs > 1) { /* adjust for the block-aij case */
1456:       PetscMalloc1(nidx*bs,&newidx);
1457:       for (i=0; i<nidx; i++) {
1458:         for (j=0; j<bs; j++) newidx[i*bs+j] = indices[i]*bs + j;
1459:       }
1460:       PetscFree(indices);
1461:       nidx   *= bs;
1462:       indices = newidx;
1463:     }
1464:     /* Shift to get global indices */
1465:     for (i=0; i<nidx; i++) indices[i] += rstart;

1467:     /* Build the index sets for each block */
1468:     for (i=0; i<nloc; i++) {
1469:       ISCreateGeneral(PETSC_COMM_SELF,count[i],&indices[start],PETSC_COPY_VALUES,&is[i]);
1470:       ISSort(is[i]);
1471:       start += count[i];
1472:     }

1474:     PetscFree(count);
1475:     PetscFree(indices);
1476:     ISDestroy(&isnumb);
1477:     ISDestroy(&ispart);
1478:   }
1479:   *iis = is;
1480:   return(0);
1481: }

1483: PETSC_INTERN PetscErrorCode  PCGASMCreateStraddlingSubdomains(Mat A,PetscInt N,PetscInt *n,IS *iis[])
1484: {
1485:   PetscErrorCode  ierr;

1488:   MatSubdomainsCreateCoalesce(A,N,n,iis);
1489:   return(0);
1490: }



1494: /*@C
1495:    PCGASMCreateSubdomains - Creates n index sets defining n nonoverlapping subdomains for the additive
1496:    Schwarz preconditioner for a any problem based on its matrix.

1498:    Collective

1500:    Input Parameters:
1501: +  A       - The global matrix operator
1502: -  N       - the number of global subdomains requested

1504:    Output Parameters:
1505: +  n   - the number of subdomains created on this processor
1506: -  iis - the array of index sets defining the local inner subdomains (on which the correction is applied)

1508:    Level: advanced

1510:    Note: When N >= A's communicator size, each subdomain is local -- contained within a single processor.
1511:          When N < size, the subdomains are 'straddling' (processor boundaries) and are no longer local.
1512:          The resulting subdomains can be use in PCGASMSetSubdomains(pc,n,iss,NULL).  The overlapping
1513:          outer subdomains will be automatically generated from these according to the requested amount of
1514:          overlap; this is currently supported only with local subdomains.


1517: .keywords: PC, GASM, additive Schwarz, create, subdomains, unstructured grid

1519: .seealso: PCGASMSetSubdomains(), PCGASMDestroySubdomains()
1520: @*/
1521: PetscErrorCode  PCGASMCreateSubdomains(Mat A,PetscInt N,PetscInt *n,IS *iis[])
1522: {
1523:   PetscMPIInt     size;
1524:   PetscErrorCode  ierr;


1530:   if (N < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Number of subdomains must be > 0, N = %D",N);
1531:   MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
1532:   if (N >= size) {
1533:     *n = N/size + (N%size);
1534:     PCGASMCreateLocalSubdomains(A,*n,iis);
1535:   } else {
1536:     PCGASMCreateStraddlingSubdomains(A,N,n,iis);
1537:   }
1538:   return(0);
1539: }

1541: /*@C
1542:    PCGASMDestroySubdomains - Destroys the index sets created with
1543:    PCGASMCreateSubdomains() or PCGASMCreateSubdomains2D. Should be
1544:    called after setting subdomains with PCGASMSetSubdomains().

1546:    Collective

1548:    Input Parameters:
1549: +  n   - the number of index sets
1550: .  iis - the array of inner subdomains,
1551: -  ois - the array of outer subdomains, can be NULL

1553:    Level: intermediate

1555:    Notes:
1556:     this is merely a convenience subroutine that walks each list,
1557:    destroys each IS on the list, and then frees the list. At the end the
1558:    list pointers are set to NULL.

1560: .keywords: PC, GASM, additive Schwarz, create, subdomains, unstructured grid

1562: .seealso: PCGASMCreateSubdomains(), PCGASMSetSubdomains()
1563: @*/
1564: PetscErrorCode  PCGASMDestroySubdomains(PetscInt n,IS **iis,IS **ois)
1565: {
1566:   PetscInt       i;

1570:   if (n <= 0) return(0);
1571:   if (ois) {
1573:     if (*ois) {
1575:       for (i=0; i<n; i++) {
1576:         ISDestroy(&(*ois)[i]);
1577:       }
1578:       PetscFree((*ois));
1579:     }
1580:   }
1581:   if (iis) {
1583:     if (*iis) {
1585:       for (i=0; i<n; i++) {
1586:         ISDestroy(&(*iis)[i]);
1587:       }
1588:       PetscFree((*iis));
1589:     }
1590:   }
1591:   return(0);
1592: }


1595: #define PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,xleft_loc,ylow_loc,xright_loc,yhigh_loc,n) \
1596:   {                                                                                                       \
1597:     PetscInt first_row = first/M, last_row = last/M+1;                                                     \
1598:     /*                                                                                                    \
1599:      Compute ylow_loc and yhigh_loc so that (ylow_loc,xleft) and (yhigh_loc,xright) are the corners       \
1600:      of the bounding box of the intersection of the subdomain with the local ownership range (local       \
1601:      subdomain).                                                                                          \
1602:      Also compute xleft_loc and xright_loc as the lower and upper bounds on the first and last rows       \
1603:      of the intersection.                                                                                 \
1604:     */                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             \
1605:     /* ylow_loc is the grid row containing the first element of the local sumbdomain */                   \
1606:     *ylow_loc = PetscMax(first_row,ylow);                                                                    \
1607:     /* xleft_loc is the offset of first element of the local subdomain within its grid row (might actually be outside the local subdomain) */ \
1608:     *xleft_loc = *ylow_loc==first_row ? PetscMax(first%M,xleft) : xleft;                                                                            \
1609:     /* yhigh_loc is the grid row above the last local subdomain element */                                                                    \
1610:     *yhigh_loc = PetscMin(last_row,yhigh);                                                                                                     \
1611:     /* xright is the offset of the end of the  local subdomain within its grid row (might actually be outside the local subdomain) */         \
1612:     *xright_loc = *yhigh_loc==last_row ? PetscMin(xright,last%M) : xright;                                                                          \
1613:     /* Now compute the size of the local subdomain n. */ \
1614:     *n = 0;                                               \
1615:     if (*ylow_loc < *yhigh_loc) {                           \
1616:       PetscInt width = xright-xleft;                     \
1617:       *n += width*(*yhigh_loc-*ylow_loc-1);                 \
1618:       *n += PetscMin(PetscMax(*xright_loc-xleft,0),width); \
1619:       *n -= PetscMin(PetscMax(*xleft_loc-xleft,0), width); \
1620:     } \
1621:   }



1625: /*@
1626:    PCGASMCreateSubdomains2D - Creates the index sets for the overlapping Schwarz
1627:    preconditioner for a two-dimensional problem on a regular grid.

1629:    Collective

1631:    Input Parameters:
1632: +  M, N               - the global number of grid points in the x and y directions
1633: .  Mdomains, Ndomains - the global number of subdomains in the x and y directions
1634: .  dof                - degrees of freedom per node
1635: -  overlap            - overlap in mesh lines

1637:    Output Parameters:
1638: +  Nsub - the number of local subdomains created
1639: .  iis  - array of index sets defining inner (nonoverlapping) subdomains
1640: -  ois  - array of index sets defining outer (overlapping, if overlap > 0) subdomains


1643:    Level: advanced

1645: .keywords: PC, GASM, additive Schwarz, create, subdomains, 2D, regular grid

1647: .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(), PCGASMSetOverlap()
1648: @*/
1649: PetscErrorCode  PCGASMCreateSubdomains2D(PC pc,PetscInt M,PetscInt N,PetscInt Mdomains,PetscInt Ndomains,PetscInt dof,PetscInt overlap,PetscInt *nsub,IS **iis,IS **ois)
1650: {
1652:   PetscMPIInt    size, rank;
1653:   PetscInt       i, j;
1654:   PetscInt       maxheight, maxwidth;
1655:   PetscInt       xstart, xleft, xright, xleft_loc, xright_loc;
1656:   PetscInt       ystart, ylow,  yhigh,  ylow_loc,  yhigh_loc;
1657:   PetscInt       x[2][2], y[2][2], n[2];
1658:   PetscInt       first, last;
1659:   PetscInt       nidx, *idx;
1660:   PetscInt       ii,jj,s,q,d;
1661:   PetscInt       k,kk;
1662:   PetscMPIInt    color;
1663:   MPI_Comm       comm, subcomm;
1664:   IS             **xis = 0, **is = ois, **is_local = iis;

1667:   PetscObjectGetComm((PetscObject)pc, &comm);
1668:   MPI_Comm_size(comm, &size);
1669:   MPI_Comm_rank(comm, &rank);
1670:   MatGetOwnershipRange(pc->pmat, &first, &last);
1671:   if (first%dof || last%dof) SETERRQ3(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Matrix row partitioning unsuitable for domain decomposition: local row range (%D,%D) "
1672:                                       "does not respect the number of degrees of freedom per grid point %D", first, last, dof);

1674:   /* Determine the number of domains with nonzero intersections with the local ownership range. */
1675:   s      = 0;
1676:   ystart = 0;
1677:   for (j=0; j<Ndomains; ++j) {
1678:     maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */
1679:     if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N);
1680:     /* Vertical domain limits with an overlap. */
1681:     ylow   = PetscMax(ystart - overlap,0);
1682:     yhigh  = PetscMin(ystart + maxheight + overlap,N);
1683:     xstart = 0;
1684:     for (i=0; i<Mdomains; ++i) {
1685:       maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */
1686:       if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M);
1687:       /* Horizontal domain limits with an overlap. */
1688:       xleft  = PetscMax(xstart - overlap,0);
1689:       xright = PetscMin(xstart + maxwidth + overlap,M);
1690:       /*
1691:          Determine whether this subdomain intersects this processor's ownership range of pc->pmat.
1692:       */
1693:       PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx));
1694:       if (nidx) ++s;
1695:       xstart += maxwidth;
1696:     } /* for (i = 0; i < Mdomains; ++i) */
1697:     ystart += maxheight;
1698:   } /* for (j = 0; j < Ndomains; ++j) */

1700:   /* Now we can allocate the necessary number of ISs. */
1701:   *nsub  = s;
1702:   PetscMalloc1(*nsub,is);
1703:   PetscMalloc1(*nsub,is_local);
1704:   s      = 0;
1705:   ystart = 0;
1706:   for (j=0; j<Ndomains; ++j) {
1707:     maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */
1708:     if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N);
1709:     /* Vertical domain limits with an overlap. */
1710:     y[0][0] = PetscMax(ystart - overlap,0);
1711:     y[0][1] = PetscMin(ystart + maxheight + overlap,N);
1712:     /* Vertical domain limits without an overlap. */
1713:     y[1][0] = ystart;
1714:     y[1][1] = PetscMin(ystart + maxheight,N);
1715:     xstart  = 0;
1716:     for (i=0; i<Mdomains; ++i) {
1717:       maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */
1718:       if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M);
1719:       /* Horizontal domain limits with an overlap. */
1720:       x[0][0] = PetscMax(xstart - overlap,0);
1721:       x[0][1] = PetscMin(xstart + maxwidth + overlap,M);
1722:       /* Horizontal domain limits without an overlap. */
1723:       x[1][0] = xstart;
1724:       x[1][1] = PetscMin(xstart+maxwidth,M);
1725:       /*
1726:          Determine whether this domain intersects this processor's ownership range of pc->pmat.
1727:          Do this twice: first for the domains with overlaps, and once without.
1728:          During the first pass create the subcommunicators, and use them on the second pass as well.
1729:       */
1730:       for (q = 0; q < 2; ++q) {
1731:         PetscBool split = PETSC_FALSE;
1732:         /*
1733:           domain limits, (xleft, xright) and (ylow, yheigh) are adjusted
1734:           according to whether the domain with an overlap or without is considered.
1735:         */
1736:         xleft = x[q][0]; xright = x[q][1];
1737:         ylow  = y[q][0]; yhigh  = y[q][1];
1738:         PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx));
1739:         nidx *= dof;
1740:         n[q]  = nidx;
1741:         /*
1742:          Based on the counted number of indices in the local domain *with an overlap*,
1743:          construct a subcommunicator of all the processors supporting this domain.
1744:          Observe that a domain with an overlap might have nontrivial local support,
1745:          while the domain without an overlap might not.  Hence, the decision to participate
1746:          in the subcommunicator must be based on the domain with an overlap.
1747:          */
1748:         if (q == 0) {
1749:           if (nidx) color = 1;
1750:           else color = MPI_UNDEFINED;
1751:           MPI_Comm_split(comm, color, rank, &subcomm);
1752:           split = PETSC_TRUE;
1753:         }
1754:         /*
1755:          Proceed only if the number of local indices *with an overlap* is nonzero.
1756:          */
1757:         if (n[0]) {
1758:           if (q == 0) xis = is;
1759:           if (q == 1) {
1760:             /*
1761:              The IS for the no-overlap subdomain shares a communicator with the overlapping domain.
1762:              Moreover, if the overlap is zero, the two ISs are identical.
1763:              */
1764:             if (overlap == 0) {
1765:               (*is_local)[s] = (*is)[s];
1766:               PetscObjectReference((PetscObject)(*is)[s]);
1767:               continue;
1768:             } else {
1769:               xis     = is_local;
1770:               subcomm = ((PetscObject)(*is)[s])->comm;
1771:             }
1772:           } /* if (q == 1) */
1773:           idx  = NULL;
1774:           PetscMalloc1(nidx,&idx);
1775:           if (nidx) {
1776:             k = 0;
1777:             for (jj=ylow_loc; jj<yhigh_loc; ++jj) {
1778:               PetscInt x0 = (jj==ylow_loc) ? xleft_loc : xleft;
1779:               PetscInt x1 = (jj==yhigh_loc-1) ? xright_loc : xright;
1780:               kk = dof*(M*jj + x0);
1781:               for (ii=x0; ii<x1; ++ii) {
1782:                 for (d = 0; d < dof; ++d) {
1783:                   idx[k++] = kk++;
1784:                 }
1785:               }
1786:             }
1787:           }
1788:           ISCreateGeneral(subcomm,nidx,idx,PETSC_OWN_POINTER,(*xis)+s);
1789:           if (split) {
1790:             MPI_Comm_free(&subcomm);
1791:           }
1792:         }/* if (n[0]) */
1793:       }/* for (q = 0; q < 2; ++q) */
1794:       if (n[0]) ++s;
1795:       xstart += maxwidth;
1796:     } /* for (i = 0; i < Mdomains; ++i) */
1797:     ystart += maxheight;
1798:   } /* for (j = 0; j < Ndomains; ++j) */
1799:   return(0);
1800: }

1802: /*@C
1803:     PCGASMGetSubdomains - Gets the subdomains supported on this processor
1804:     for the additive Schwarz preconditioner.

1806:     Not Collective

1808:     Input Parameter:
1809: .   pc - the preconditioner context

1811:     Output Parameters:
1812: +   n   - the number of subdomains for this processor (default value = 1)
1813: .   iis - the index sets that define the inner subdomains (without overlap) supported on this processor (can be NULL)
1814: -   ois - the index sets that define the outer subdomains (with overlap) supported on this processor (can be NULL)


1817:     Notes:
1818:     The user is responsible for destroying the ISs and freeing the returned arrays.
1819:     The IS numbering is in the parallel, global numbering of the vector.

1821:     Level: advanced

1823: .keywords: PC, GASM, get, subdomains, additive Schwarz

1825: .seealso: PCGASMSetOverlap(), PCGASMGetSubKSP(), PCGASMCreateSubdomains2D(),
1826:           PCGASMSetSubdomains(), PCGASMGetSubmatrices()
1827: @*/
1828: PetscErrorCode  PCGASMGetSubdomains(PC pc,PetscInt *n,IS *iis[],IS *ois[])
1829: {
1830:   PC_GASM        *osm;
1832:   PetscBool      match;
1833:   PetscInt       i;

1837:   PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);
1838:   if (!match) SETERRQ2(PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_WRONG, "Incorrect object type: expected %s, got %s instead", PCGASM, ((PetscObject)pc)->type_name);
1839:   osm = (PC_GASM*)pc->data;
1840:   if (n) *n = osm->n;
1841:   if (iis) {
1842:     PetscMalloc1(osm->n, iis);
1843:   }
1844:   if (ois) {
1845:     PetscMalloc1(osm->n, ois);
1846:   }
1847:   if (iis || ois) {
1848:     for (i = 0; i < osm->n; ++i) {
1849:       if (iis) (*iis)[i] = osm->iis[i];
1850:       if (ois) (*ois)[i] = osm->ois[i];
1851:     }
1852:   }
1853:   return(0);
1854: }

1856: /*@C
1857:     PCGASMGetSubmatrices - Gets the local submatrices (for this processor
1858:     only) for the additive Schwarz preconditioner.

1860:     Not Collective

1862:     Input Parameter:
1863: .   pc - the preconditioner context

1865:     Output Parameters:
1866: +   n   - the number of matrices for this processor (default value = 1)
1867: -   mat - the matrices

1869:     Notes:
1870:     matrices returned by this routine have the same communicators as the index sets (IS)
1871:            used to define subdomains in PCGASMSetSubdomains()
1872:     Level: advanced

1874: .keywords: PC, GASM, set, local, subdomains, additive Schwarz, block Jacobi

1876: .seealso: PCGASMSetOverlap(), PCGASMGetSubKSP(),
1877:           PCGASMCreateSubdomains2D(), PCGASMSetSubdomains(), PCGASMGetSubdomains()
1878: @*/
1879: PetscErrorCode  PCGASMGetSubmatrices(PC pc,PetscInt *n,Mat *mat[])
1880: {
1881:   PC_GASM        *osm;
1883:   PetscBool      match;

1889:   if (!pc->setupcalled) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"Must call after KSPSetUP() or PCSetUp().");
1890:   PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);
1891:   if (!match) SETERRQ2(PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_WRONG, "Expected %s, got %s instead", PCGASM, ((PetscObject)pc)->type_name);
1892:   osm = (PC_GASM*)pc->data;
1893:   if (n) *n = osm->n;
1894:   if (mat) *mat = osm->pmat;
1895:   return(0);
1896: }

1898: /*@
1899:     PCGASMSetUseDMSubdomains - Indicates whether to use DMCreateDomainDecomposition() to define the subdomains, whenever possible.
1900:     Logically Collective

1902:     Input Parameter:
1903: +   pc  - the preconditioner
1904: -   flg - boolean indicating whether to use subdomains defined by the DM

1906:     Options Database Key:
1907: .   -pc_gasm_dm_subdomains -pc_gasm_overlap -pc_gasm_total_subdomains

1909:     Level: intermediate

1911:     Notes:
1912:     PCGASMSetSubdomains(), PCGASMSetTotalSubdomains() or PCGASMSetOverlap() take precedence over PCGASMSetUseDMSubdomains(),
1913:     so setting PCGASMSetSubdomains() with nontrivial subdomain ISs or any of PCGASMSetTotalSubdomains() and PCGASMSetOverlap()
1914:     automatically turns the latter off.

1916: .keywords: PC, ASM, DM, set, subdomains, additive Schwarz

1918: .seealso: PCGASMGetUseDMSubdomains(), PCGASMSetSubdomains(), PCGASMSetOverlap()
1919:           PCGASMCreateSubdomains2D()
1920: @*/
1921: PetscErrorCode  PCGASMSetUseDMSubdomains(PC pc,PetscBool flg)
1922: {
1923:   PC_GASM        *osm = (PC_GASM*)pc->data;
1925:   PetscBool      match;

1930:   if (pc->setupcalled) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"Not for a setup PC.");
1931:   PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);
1932:   if (match) {
1933:     if (!osm->user_subdomains && osm->N == PETSC_DETERMINE && osm->overlap < 0) {
1934:       osm->dm_subdomains = flg;
1935:     }
1936:   }
1937:   return(0);
1938: }

1940: /*@
1941:     PCGASMGetUseDMSubdomains - Returns flag indicating whether to use DMCreateDomainDecomposition() to define the subdomains, whenever possible.
1942:     Not Collective

1944:     Input Parameter:
1945: .   pc  - the preconditioner

1947:     Output Parameter:
1948: .   flg - boolean indicating whether to use subdomains defined by the DM

1950:     Level: intermediate

1952: .keywords: PC, ASM, DM, set, subdomains, additive Schwarz

1954: .seealso: PCGASMSetUseDMSubdomains(), PCGASMSetOverlap()
1955:           PCGASMCreateSubdomains2D()
1956: @*/
1957: PetscErrorCode  PCGASMGetUseDMSubdomains(PC pc,PetscBool* flg)
1958: {
1959:   PC_GASM        *osm = (PC_GASM*)pc->data;
1961:   PetscBool      match;

1966:   PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);
1967:   if (match) {
1968:     if (flg) *flg = osm->dm_subdomains;
1969:   }
1970:   return(0);
1971: }