Actual source code: asm.c
petsc-3.3-p7 2013-05-11
2: /*
3: This file defines an additive Schwarz preconditioner for any Mat implementation.
5: Note that each processor may have any number of subdomains. But in order to
6: deal easily with the VecScatter(), we treat each processor as if it has the
7: same number of subdomains.
9: n - total number of true subdomains on all processors
10: n_local_true - actual number of subdomains on this processor
11: n_local = maximum over all processors of n_local_true
12: */
13: #include <petsc-private/pcimpl.h> /*I "petscpc.h" I*/
15: typedef struct {
16: PetscInt n, n_local, n_local_true;
17: PetscInt overlap; /* overlap requested by user */
18: KSP *ksp; /* linear solvers for each block */
19: VecScatter *restriction; /* mapping from global to subregion */
20: VecScatter *localization; /* mapping from overlapping to non-overlapping subregion */
21: VecScatter *prolongation; /* mapping from subregion to global */
22: Vec *x,*y,*y_local; /* work vectors */
23: IS *is; /* index set that defines each overlapping subdomain */
24: IS *is_local; /* index set that defines each non-overlapping subdomain, may be NULL */
25: Mat *mat,*pmat; /* mat is not currently used */
26: PCASMType type; /* use reduced interpolation, restriction or both */
27: PetscBool type_set; /* if user set this value (so won't change it for symmetric problems) */
28: PetscBool same_local_solves; /* flag indicating whether all local solvers are same */
29: PetscBool sort_indices; /* flag to sort subdomain indices */
30: } PC_ASM;
34: static PetscErrorCode PCView_ASM(PC pc,PetscViewer viewer)
35: {
36: PC_ASM *osm = (PC_ASM*)pc->data;
38: PetscMPIInt rank;
39: PetscInt i,bsz;
40: PetscBool iascii,isstring;
41: PetscViewer sviewer;
45: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
46: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSTRING,&isstring);
47: if (iascii) {
48: char overlaps[256] = "user-defined overlap",blocks[256] = "total subdomain blocks not yet set";
49: if (osm->overlap >= 0) {PetscSNPrintf(overlaps,sizeof overlaps,"amount of overlap = %D",osm->overlap);}
50: if (osm->n > 0) {PetscSNPrintf(blocks,sizeof blocks,"total subdomain blocks = %D",osm->n);}
51: PetscViewerASCIIPrintf(viewer," Additive Schwarz: %s, %s\n",blocks,overlaps);
52: PetscViewerASCIIPrintf(viewer," Additive Schwarz: restriction/interpolation type - %s\n",PCASMTypes[osm->type]);
53: MPI_Comm_rank(((PetscObject)pc)->comm,&rank);
54: if (osm->same_local_solves) {
55: if (osm->ksp) {
56: PetscViewerASCIIPrintf(viewer," Local solve is same for all blocks, in the following KSP and PC objects:\n");
57: PetscViewerGetSingleton(viewer,&sviewer);
58: if (!rank) {
59: PetscViewerASCIIPushTab(viewer);
60: KSPView(osm->ksp[0],sviewer);
61: PetscViewerASCIIPopTab(viewer);
62: }
63: PetscViewerRestoreSingleton(viewer,&sviewer);
64: }
65: } else {
66: PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);
67: PetscViewerASCIISynchronizedPrintf(viewer," [%d] number of local blocks = %D\n",(int)rank,osm->n_local_true);
68: PetscViewerFlush(viewer);
69: PetscViewerASCIIPrintf(viewer," Local solve info for each block is in the following KSP and PC objects:\n");
70: PetscViewerASCIIPushTab(viewer);
71: PetscViewerASCIIPrintf(viewer,"- - - - - - - - - - - - - - - - - -\n");
72: for (i=0; i<osm->n_local; i++) {
73: PetscViewerGetSingleton(viewer,&sviewer);
74: if (i < osm->n_local_true) {
75: ISGetLocalSize(osm->is[i],&bsz);
76: PetscViewerASCIISynchronizedPrintf(sviewer,"[%d] local block number %D, size = %D\n",(int)rank,i,bsz);
77: KSPView(osm->ksp[i],sviewer);
78: PetscViewerASCIISynchronizedPrintf(sviewer,"- - - - - - - - - - - - - - - - - -\n");
79: }
80: PetscViewerRestoreSingleton(viewer,&sviewer);
81: }
82: PetscViewerASCIIPopTab(viewer);
83: PetscViewerFlush(viewer);
84: PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);
85: }
86: } else if (isstring) {
87: PetscViewerStringSPrintf(viewer," blocks=%D, overlap=%D, type=%s",osm->n,osm->overlap,PCASMTypes[osm->type]);
88: PetscViewerGetSingleton(viewer,&sviewer);
89: if (osm->ksp) {KSPView(osm->ksp[0],sviewer);}
90: PetscViewerRestoreSingleton(viewer,&sviewer);
91: } else {
92: SETERRQ1(((PetscObject)pc)->comm,PETSC_ERR_SUP,"Viewer type %s not supported for PCASM",((PetscObject)viewer)->type_name);
93: }
94: return(0);
95: }
99: static PetscErrorCode PCASMPrintSubdomains(PC pc)
100: {
101: PC_ASM *osm = (PC_ASM*)pc->data;
102: const char *prefix;
103: char fname[PETSC_MAX_PATH_LEN+1];
104: PetscViewer viewer, sviewer;
105: char *s;
106: PetscInt i,j,nidx;
107: const PetscInt *idx;
108: PetscMPIInt rank, size;
112: MPI_Comm_size(((PetscObject)pc)->comm, &size);
113: MPI_Comm_rank(((PetscObject)pc)->comm, &rank);
114: PCGetOptionsPrefix(pc,&prefix);
115: PetscOptionsGetString(prefix,"-pc_asm_print_subdomains",fname,PETSC_MAX_PATH_LEN,PETSC_NULL);
116: if (fname[0] == 0) { PetscStrcpy(fname,"stdout"); };
117: PetscViewerASCIIOpen(((PetscObject)pc)->comm,fname,&viewer);
118: for (i=0;i<osm->n_local;i++) {
119: if(i < osm->n_local_true) {
120: ISGetLocalSize(osm->is[i],&nidx);
121: ISGetIndices(osm->is[i],&idx);
122: /* Print to a string viewer; no more than 15 characters per index plus 512 char for the header.*/
123: PetscMalloc(sizeof(char)*(16*(nidx+1)+512), &s);
124: PetscViewerStringOpen(PETSC_COMM_SELF, s, 16*(nidx+1)+512, &sviewer);
125: PetscViewerStringSPrintf(sviewer, "[%D:%D] Subdomain %D with overlap:\n", rank, size, i);
126: for (j=0; j<nidx; j++) {
127: PetscViewerStringSPrintf(sviewer,"%D ",idx[j]);
128: }
129: ISRestoreIndices(osm->is[i],&idx);
130: PetscViewerStringSPrintf(sviewer,"\n");
131: PetscViewerDestroy(&sviewer);
132: PetscViewerASCIISynchronizedAllow(viewer, PETSC_TRUE);
133: PetscViewerASCIISynchronizedPrintf(viewer, s);
134: PetscViewerFlush(viewer);
135: PetscViewerASCIISynchronizedAllow(viewer, PETSC_FALSE);
136: PetscFree(s);
137: if (osm->is_local) {
138: /* Print to a string viewer; no more than 15 characters per index plus 512 char for the header.*/
139: PetscMalloc(sizeof(char)*(16*(nidx+1)+512), &s);
140: PetscViewerStringOpen(PETSC_COMM_SELF, s, 16*(nidx+1)+512, &sviewer);
141: PetscViewerStringSPrintf(sviewer, "[%D:%D] Subdomain %D without overlap:\n", rank, size, i);
142: ISGetLocalSize(osm->is_local[i],&nidx);
143: ISGetIndices(osm->is_local[i],&idx);
144: for (j=0; j<nidx; j++) {
145: PetscViewerStringSPrintf(sviewer,"%D ",idx[j]);
146: }
147: ISRestoreIndices(osm->is_local[i],&idx);
148: PetscViewerStringSPrintf(sviewer,"\n");
149: PetscViewerDestroy(&sviewer);
150: PetscViewerASCIISynchronizedAllow(viewer, PETSC_TRUE);
151: PetscViewerASCIISynchronizedPrintf(viewer, s);
152: PetscViewerFlush(viewer);
153: PetscViewerASCIISynchronizedAllow(viewer, PETSC_FALSE);
154: PetscFree(s);
155: }
156: }
157: else {
158: /* Participate in collective viewer calls. */
159: PetscViewerASCIISynchronizedAllow(viewer, PETSC_TRUE);
160: PetscViewerFlush(viewer);
161: PetscViewerASCIISynchronizedAllow(viewer, PETSC_FALSE);
162: /* Assume either all ranks have is_local or none do. */
163: if(osm->is_local) {
164: PetscViewerASCIISynchronizedAllow(viewer, PETSC_TRUE);
165: PetscViewerFlush(viewer);
166: PetscViewerASCIISynchronizedAllow(viewer, PETSC_FALSE);
167: }
168: }
169: }
170: PetscViewerFlush(viewer);
171: PetscViewerDestroy(&viewer);
172: return(0);
173: }
177: static PetscErrorCode PCSetUp_ASM(PC pc)
178: {
179: PC_ASM *osm = (PC_ASM*)pc->data;
181: PetscBool symset,flg;
182: PetscInt i,m,m_local,firstRow,lastRow;
183: MatReuse scall = MAT_REUSE_MATRIX;
184: IS isl;
185: KSP ksp;
186: PC subpc;
187: const char *prefix,*pprefix;
188: Vec vec;
189: DM *domain_dm = PETSC_NULL;
192: if (!pc->setupcalled) {
194: if (!osm->type_set) {
195: MatIsSymmetricKnown(pc->pmat,&symset,&flg);
196: if (symset && flg) { osm->type = PC_ASM_BASIC; }
197: }
199: /* Note: if subdomains have been set either via PCASMSetTotalSubdomains() or via PCASMSetLocalSubdomains(), osm->n_local_true will not be PETSC_DECIDE */
200: if (osm->n_local_true == PETSC_DECIDE) {
201: /* no subdomains given */
202: /* try pc->dm first */
203: if(pc->dm) {
204: char ddm_name[1024];
205: DM ddm;
206: PetscBool flg;
207: PetscInt num_domains, d;
208: char **domain_names;
209: IS *inner_domain_is, *outer_domain_is;
210: /* Allow the user to request a decomposition DM by name */
211: PetscStrncpy(ddm_name, "", 1024);
212: PetscOptionsString("-pc_asm_decomposition", "Name of the DM defining the composition", "PCSetDM", ddm_name, ddm_name,1024,&flg);
213: if(flg) {
214: DMCreateDomainDecompositionDM(pc->dm, ddm_name, &ddm);
215: if(!ddm) {
216: SETERRQ1(((PetscObject)pc)->comm, PETSC_ERR_ARG_WRONGSTATE, "Uknown DM decomposition name %s", ddm_name);
217: }
218: PetscInfo(pc,"Using domain decomposition DM defined using options database\n");
219: PCSetDM(pc,ddm);
220: }
221: DMCreateDomainDecomposition(pc->dm, &num_domains, &domain_names, &inner_domain_is, &outer_domain_is, &domain_dm);
222: if(num_domains) {
223: PCASMSetLocalSubdomains(pc, num_domains, outer_domain_is, inner_domain_is);
224: }
225: for(d = 0; d < num_domains; ++d) {
226: PetscFree(domain_names[d]);
227: ISDestroy(&inner_domain_is[d]);
228: ISDestroy(&outer_domain_is[d]);
229: }
230: PetscFree(domain_names);
231: PetscFree(inner_domain_is);
232: PetscFree(outer_domain_is);
233: }
234: if (osm->n_local_true == PETSC_DECIDE) {
235: /* still no subdomains; use one subdomain per processor */
236: osm->n_local_true = 1;
237: }
238: }
239: {/* determine the global and max number of subdomains */
240: PetscInt inwork[2],outwork[2];
241: inwork[0] = inwork[1] = osm->n_local_true;
242: MPI_Allreduce(inwork,outwork,1,MPIU_2INT,PetscMaxSum_Op,((PetscObject)pc)->comm);
243: osm->n_local = outwork[0];
244: osm->n = outwork[1];
245: }
246: if (!osm->is){ /* create the index sets */
247: PCASMCreateSubdomains(pc->pmat,osm->n_local_true,&osm->is);
248: }
249: if (osm->n_local_true > 1 && !osm->is_local) {
250: PetscMalloc(osm->n_local_true*sizeof(IS),&osm->is_local);
251: for (i=0; i<osm->n_local_true; i++) {
252: if (osm->overlap > 0) { /* With positive overlap, osm->is[i] will be modified */
253: ISDuplicate(osm->is[i],&osm->is_local[i]);
254: ISCopy(osm->is[i],osm->is_local[i]);
255: } else {
256: PetscObjectReference((PetscObject)osm->is[i]);
257: osm->is_local[i] = osm->is[i];
258: }
259: }
260: }
261: PCGetOptionsPrefix(pc,&prefix);
262: flg = PETSC_FALSE;
263: PetscOptionsGetBool(prefix,"-pc_asm_print_subdomains",&flg,PETSC_NULL);
264: if (flg) { PCASMPrintSubdomains(pc); }
266: if (osm->overlap > 0) {
267: /* Extend the "overlapping" regions by a number of steps */
268: MatIncreaseOverlap(pc->pmat,osm->n_local_true,osm->is,osm->overlap);
269: }
270: if (osm->sort_indices) {
271: for (i=0; i<osm->n_local_true; i++) {
272: ISSort(osm->is[i]);
273: if (osm->is_local) {
274: ISSort(osm->is_local[i]);
275: }
276: }
277: }
278: /* Create the local work vectors and scatter contexts */
279: MatGetVecs(pc->pmat,&vec,0);
280: PetscMalloc(osm->n_local*sizeof(VecScatter),&osm->restriction);
281: if (osm->is_local) {PetscMalloc(osm->n_local*sizeof(VecScatter),&osm->localization);}
282: PetscMalloc(osm->n_local*sizeof(VecScatter),&osm->prolongation);
283: PetscMalloc(osm->n_local*sizeof(Vec),&osm->x);
284: PetscMalloc(osm->n_local*sizeof(Vec),&osm->y);
285: PetscMalloc(osm->n_local*sizeof(Vec),&osm->y_local);
286: VecGetOwnershipRange(vec, &firstRow, &lastRow);
287: for (i=0; i<osm->n_local_true; ++i, firstRow += m_local) {
288: ISGetLocalSize(osm->is[i],&m);
289: VecCreateSeq(PETSC_COMM_SELF,m,&osm->x[i]);
290: ISCreateStride(PETSC_COMM_SELF,m,0,1,&isl);
291: VecScatterCreate(vec,osm->is[i],osm->x[i],isl,&osm->restriction[i]);
292: ISDestroy(&isl);
293: VecDuplicate(osm->x[i],&osm->y[i]);
294: if (osm->is_local) {
295: ISLocalToGlobalMapping ltog;
296: IS isll;
297: const PetscInt *idx_local;
298: PetscInt *idx,nout;
300: ISLocalToGlobalMappingCreateIS(osm->is[i],<og);
301: ISGetLocalSize(osm->is_local[i],&m_local);
302: ISGetIndices(osm->is_local[i], &idx_local);
303: PetscMalloc(m_local*sizeof(PetscInt),&idx);
304: ISGlobalToLocalMappingApply(ltog,IS_GTOLM_DROP,m_local,idx_local,&nout,idx);
305: ISLocalToGlobalMappingDestroy(<og);
306: if (nout != m_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"is_local not a subset of is");
307: ISRestoreIndices(osm->is_local[i], &idx_local);
308: ISCreateGeneral(PETSC_COMM_SELF,m_local,idx,PETSC_OWN_POINTER,&isll);
309: ISCreateStride(PETSC_COMM_SELF,m_local,0,1,&isl);
310: VecCreateSeq(PETSC_COMM_SELF,m_local,&osm->y_local[i]);
311: VecScatterCreate(osm->y[i],isll,osm->y_local[i],isl,&osm->localization[i]);
312: ISDestroy(&isll);
314: VecScatterCreate(vec,osm->is_local[i],osm->y_local[i],isl,&osm->prolongation[i]);
315: ISDestroy(&isl);
316: } else {
317: VecGetLocalSize(vec,&m_local);
318: osm->y_local[i] = osm->y[i];
319: PetscObjectReference((PetscObject) osm->y[i]);
320: osm->prolongation[i] = osm->restriction[i];
321: PetscObjectReference((PetscObject) osm->restriction[i]);
322: }
323: }
324: if (firstRow != lastRow) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Specified ASM subdomain sizes were invalid: %d != %d", firstRow, lastRow);
325: for (i=osm->n_local_true; i<osm->n_local; i++) {
326: VecCreateSeq(PETSC_COMM_SELF,0,&osm->x[i]);
327: VecDuplicate(osm->x[i],&osm->y[i]);
328: VecDuplicate(osm->x[i],&osm->y_local[i]);
329: ISCreateStride(PETSC_COMM_SELF,0,0,1,&isl);
330: VecScatterCreate(vec,isl,osm->x[i],isl,&osm->restriction[i]);
331: if (osm->is_local) {
332: VecScatterCreate(osm->y[i],isl,osm->y_local[i],isl,&osm->localization[i]);
333: VecScatterCreate(vec,isl,osm->x[i],isl,&osm->prolongation[i]);
334: } else {
335: osm->prolongation[i] = osm->restriction[i];
336: PetscObjectReference((PetscObject) osm->restriction[i]);
337: }
338: ISDestroy(&isl);
339: }
340: VecDestroy(&vec);
342: if (!osm->ksp) {
343: /* Create the local solvers */
344: PetscMalloc(osm->n_local_true*sizeof(KSP *),&osm->ksp);
345: if(domain_dm) {
346: PetscInfo(pc,"Setting up ASM subproblems using the embedded DM\n");
347: }
348: for (i=0; i<osm->n_local_true; i++) {
349: KSPCreate(PETSC_COMM_SELF,&ksp);
350: PetscLogObjectParent(pc,ksp);
351: PetscObjectIncrementTabLevel((PetscObject)ksp,(PetscObject)pc,1);
352: KSPSetType(ksp,KSPPREONLY);
353: KSPGetPC(ksp,&subpc);
354: PCGetOptionsPrefix(pc,&prefix);
355: KSPSetOptionsPrefix(ksp,prefix);
356: KSPAppendOptionsPrefix(ksp,"sub_");
357: if(domain_dm){
358: KSPSetDM(ksp, domain_dm[i]);
359: KSPSetDMActive(ksp, PETSC_FALSE);
360: DMDestroy(&domain_dm[i]);
361: }
362: osm->ksp[i] = ksp;
363: }
364: if(domain_dm) {
365: PetscFree(domain_dm);
366: }
367: }
368: scall = MAT_INITIAL_MATRIX;
369: } else {
370: /*
371: Destroy the blocks from the previous iteration
372: */
373: if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
374: MatDestroyMatrices(osm->n_local_true,&osm->pmat);
375: scall = MAT_INITIAL_MATRIX;
376: }
377: }
379: /*
380: Extract out the submatrices
381: */
382: MatGetSubMatrices(pc->pmat,osm->n_local_true,osm->is,osm->is,scall,&osm->pmat);
383: if (scall == MAT_INITIAL_MATRIX) {
384: PetscObjectGetOptionsPrefix((PetscObject)pc->pmat,&pprefix);
385: for (i=0; i<osm->n_local_true; i++) {
386: PetscLogObjectParent(pc,osm->pmat[i]);
387: PetscObjectSetOptionsPrefix((PetscObject)osm->pmat[i],pprefix);
388: }
389: }
391: /* Return control to the user so that the submatrices can be modified (e.g., to apply
392: different boundary conditions for the submatrices than for the global problem) */
393: PCModifySubMatrices(pc,osm->n_local_true,osm->is,osm->is,osm->pmat,pc->modifysubmatricesP);
395: /*
396: Loop over subdomains putting them into local ksp
397: */
398: for (i=0; i<osm->n_local_true; i++) {
399: KSPSetOperators(osm->ksp[i],osm->pmat[i],osm->pmat[i],pc->flag);
400: if (!pc->setupcalled) {
401: KSPSetFromOptions(osm->ksp[i]);
402: }
403: }
405: return(0);
406: }
410: static PetscErrorCode PCSetUpOnBlocks_ASM(PC pc)
411: {
412: PC_ASM *osm = (PC_ASM*)pc->data;
414: PetscInt i;
417: for (i=0; i<osm->n_local_true; i++) {
418: KSPSetUp(osm->ksp[i]);
419: }
420: return(0);
421: }
425: static PetscErrorCode PCApply_ASM(PC pc,Vec x,Vec y)
426: {
427: PC_ASM *osm = (PC_ASM*)pc->data;
429: PetscInt i,n_local = osm->n_local,n_local_true = osm->n_local_true;
430: ScatterMode forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
433: /*
434: Support for limiting the restriction or interpolation to only local
435: subdomain values (leaving the other values 0).
436: */
437: if (!(osm->type & PC_ASM_RESTRICT)) {
438: forward = SCATTER_FORWARD_LOCAL;
439: /* have to zero the work RHS since scatter may leave some slots empty */
440: for (i=0; i<n_local_true; i++) {
441: VecZeroEntries(osm->x[i]);
442: }
443: }
444: if (!(osm->type & PC_ASM_INTERPOLATE)) {
445: reverse = SCATTER_REVERSE_LOCAL;
446: }
448: for (i=0; i<n_local; i++) {
449: VecScatterBegin(osm->restriction[i],x,osm->x[i],INSERT_VALUES,forward);
450: }
451: VecZeroEntries(y);
452: /* do the local solves */
453: for (i=0; i<n_local_true; i++) {
454: VecScatterEnd(osm->restriction[i],x,osm->x[i],INSERT_VALUES,forward);
455: KSPSolve(osm->ksp[i],osm->x[i],osm->y[i]);
456: if (osm->localization) {
457: VecScatterBegin(osm->localization[i],osm->y[i],osm->y_local[i],INSERT_VALUES,forward);
458: VecScatterEnd(osm->localization[i],osm->y[i],osm->y_local[i],INSERT_VALUES,forward);
459: }
460: VecScatterBegin(osm->prolongation[i],osm->y_local[i],y,ADD_VALUES,reverse);
461: }
462: /* handle the rest of the scatters that do not have local solves */
463: for (i=n_local_true; i<n_local; i++) {
464: VecScatterEnd(osm->restriction[i],x,osm->x[i],INSERT_VALUES,forward);
465: VecScatterBegin(osm->prolongation[i],osm->y_local[i],y,ADD_VALUES,reverse);
466: }
467: for (i=0; i<n_local; i++) {
468: VecScatterEnd(osm->prolongation[i],osm->y_local[i],y,ADD_VALUES,reverse);
469: }
470: return(0);
471: }
475: static PetscErrorCode PCApplyTranspose_ASM(PC pc,Vec x,Vec y)
476: {
477: PC_ASM *osm = (PC_ASM*)pc->data;
479: PetscInt i,n_local = osm->n_local,n_local_true = osm->n_local_true;
480: ScatterMode forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
483: /*
484: Support for limiting the restriction or interpolation to only local
485: subdomain values (leaving the other values 0).
487: Note: these are reversed from the PCApply_ASM() because we are applying the
488: transpose of the three terms
489: */
490: if (!(osm->type & PC_ASM_INTERPOLATE)) {
491: forward = SCATTER_FORWARD_LOCAL;
492: /* have to zero the work RHS since scatter may leave some slots empty */
493: for (i=0; i<n_local_true; i++) {
494: VecZeroEntries(osm->x[i]);
495: }
496: }
497: if (!(osm->type & PC_ASM_RESTRICT)) {
498: reverse = SCATTER_REVERSE_LOCAL;
499: }
501: for (i=0; i<n_local; i++) {
502: VecScatterBegin(osm->restriction[i],x,osm->x[i],INSERT_VALUES,forward);
503: }
504: VecZeroEntries(y);
505: /* do the local solves */
506: for (i=0; i<n_local_true; i++) {
507: VecScatterEnd(osm->restriction[i],x,osm->x[i],INSERT_VALUES,forward);
508: KSPSolveTranspose(osm->ksp[i],osm->x[i],osm->y[i]);
509: if (osm->localization) {
510: VecScatterBegin(osm->localization[i],osm->y[i],osm->y_local[i],INSERT_VALUES,forward);
511: VecScatterEnd(osm->localization[i],osm->y[i],osm->y_local[i],INSERT_VALUES,forward);
512: }
513: VecScatterBegin(osm->prolongation[i],osm->y_local[i],y,ADD_VALUES,reverse);
514: }
515: /* handle the rest of the scatters that do not have local solves */
516: for (i=n_local_true; i<n_local; i++) {
517: VecScatterEnd(osm->restriction[i],x,osm->x[i],INSERT_VALUES,forward);
518: VecScatterBegin(osm->prolongation[i],osm->y_local[i],y,ADD_VALUES,reverse);
519: }
520: for (i=0; i<n_local; i++) {
521: VecScatterEnd(osm->prolongation[i],osm->y_local[i],y,ADD_VALUES,reverse);
522: }
523: return(0);
524: }
528: static PetscErrorCode PCReset_ASM(PC pc)
529: {
530: PC_ASM *osm = (PC_ASM*)pc->data;
532: PetscInt i;
535: if (osm->ksp) {
536: for (i=0; i<osm->n_local_true; i++) {
537: KSPReset(osm->ksp[i]);
538: }
539: }
540: if (osm->pmat) {
541: if (osm->n_local_true > 0) {
542: MatDestroyMatrices(osm->n_local_true,&osm->pmat);
543: }
544: }
545: if (osm->restriction) {
546: for (i=0; i<osm->n_local; i++) {
547: VecScatterDestroy(&osm->restriction[i]);
548: if (osm->localization) {VecScatterDestroy(&osm->localization[i]);}
549: VecScatterDestroy(&osm->prolongation[i]);
550: VecDestroy(&osm->x[i]);
551: VecDestroy(&osm->y[i]);
552: VecDestroy(&osm->y_local[i]);
553: }
554: PetscFree(osm->restriction);
555: if (osm->localization) {PetscFree(osm->localization);}
556: PetscFree(osm->prolongation);
557: PetscFree(osm->x);
558: PetscFree(osm->y);
559: PetscFree(osm->y_local);
560: }
561: if (osm->is) {
562: PCASMDestroySubdomains(osm->n_local_true,osm->is,osm->is_local);
563: osm->is = 0;
564: osm->is_local = 0;
565: }
566: return(0);
567: }
571: static PetscErrorCode PCDestroy_ASM(PC pc)
572: {
573: PC_ASM *osm = (PC_ASM*)pc->data;
575: PetscInt i;
578: PCReset_ASM(pc);
579: if (osm->ksp) {
580: for (i=0; i<osm->n_local_true; i++) {
581: KSPDestroy(&osm->ksp[i]);
582: }
583: PetscFree(osm->ksp);
584: }
585: PetscFree(pc->data);
586: return(0);
587: }
591: static PetscErrorCode PCSetFromOptions_ASM(PC pc)
592: {
593: PC_ASM *osm = (PC_ASM*)pc->data;
595: PetscInt blocks,ovl;
596: PetscBool symset,flg;
597: PCASMType asmtype;
600: /* set the type to symmetric if matrix is symmetric */
601: if (!osm->type_set && pc->pmat) {
602: MatIsSymmetricKnown(pc->pmat,&symset,&flg);
603: if (symset && flg) { osm->type = PC_ASM_BASIC; }
604: }
605: PetscOptionsHead("Additive Schwarz options");
606: PetscOptionsInt("-pc_asm_blocks","Number of subdomains","PCASMSetTotalSubdomains",osm->n,&blocks,&flg);
607: if (flg) {PCASMSetTotalSubdomains(pc,blocks,PETSC_NULL,PETSC_NULL); }
608: PetscOptionsInt("-pc_asm_overlap","Number of grid points overlap","PCASMSetOverlap",osm->overlap,&ovl,&flg);
609: if (flg) {PCASMSetOverlap(pc,ovl); }
610: flg = PETSC_FALSE;
611: PetscOptionsEnum("-pc_asm_type","Type of restriction/extension","PCASMSetType",PCASMTypes,(PetscEnum)osm->type,(PetscEnum*)&asmtype,&flg);
612: if (flg) {PCASMSetType(pc,asmtype); }
613: PetscOptionsTail();
614: return(0);
615: }
617: /*------------------------------------------------------------------------------------*/
619: EXTERN_C_BEGIN
622: PetscErrorCode PCASMSetLocalSubdomains_ASM(PC pc,PetscInt n,IS is[],IS is_local[])
623: {
624: PC_ASM *osm = (PC_ASM*)pc->data;
626: PetscInt i;
629: if (n < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Each process must have 1 or more blocks, n = %D",n);
630: if (pc->setupcalled && (n != osm->n_local_true || is)) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"PCASMSetLocalSubdomains() should be called before calling PCSetUp().");
632: if (!pc->setupcalled) {
633: if (is) {
634: for (i=0; i<n; i++) {PetscObjectReference((PetscObject)is[i]);}
635: }
636: if (is_local) {
637: for (i=0; i<n; i++) {PetscObjectReference((PetscObject)is_local[i]);}
638: }
639: if (osm->is) {
640: PCASMDestroySubdomains(osm->n_local_true,osm->is,osm->is_local);
641: }
642: osm->n_local_true = n;
643: osm->is = 0;
644: osm->is_local = 0;
645: if (is) {
646: PetscMalloc(n*sizeof(IS),&osm->is);
647: for (i=0; i<n; i++) { osm->is[i] = is[i]; }
648: /* Flag indicating that the user has set overlapping subdomains so PCASM should not increase their size. */
649: osm->overlap = -1;
650: }
651: if (is_local) {
652: PetscMalloc(n*sizeof(IS),&osm->is_local);
653: for (i=0; i<n; i++) { osm->is_local[i] = is_local[i]; }
654: }
655: }
656: return(0);
657: }
658: EXTERN_C_END
660: EXTERN_C_BEGIN
663: PetscErrorCode PCASMSetTotalSubdomains_ASM(PC pc,PetscInt N,IS *is,IS *is_local)
664: {
665: PC_ASM *osm = (PC_ASM*)pc->data;
667: PetscMPIInt rank,size;
668: PetscInt n;
671: if (N < 1) SETERRQ1(((PetscObject)pc)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Number of total blocks must be > 0, N = %D",N);
672: if (is || is_local) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_SUP,"Use PCASMSetLocalSubdomains() to set specific index sets\n\they cannot be set globally yet.");
674: /*
675: Split the subdomains equally among all processors
676: */
677: MPI_Comm_rank(((PetscObject)pc)->comm,&rank);
678: MPI_Comm_size(((PetscObject)pc)->comm,&size);
679: n = N/size + ((N % size) > rank);
680: if (!n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Process %d must have at least one block: total processors %d total blocks %D",(int)rank,(int)size,N);
681: if (pc->setupcalled && n != osm->n_local_true) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"PCASMSetTotalSubdomains() should be called before PCSetUp().");
682: if (!pc->setupcalled) {
683: if (osm->is) {
684: PCASMDestroySubdomains(osm->n_local_true,osm->is,osm->is_local);
685: }
686: osm->n_local_true = n;
687: osm->is = 0;
688: osm->is_local = 0;
689: }
690: return(0);
691: }
692: EXTERN_C_END
694: EXTERN_C_BEGIN
697: PetscErrorCode PCASMSetOverlap_ASM(PC pc,PetscInt ovl)
698: {
699: PC_ASM *osm = (PC_ASM*)pc->data;
702: if (ovl < 0) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap value requested");
703: if (pc->setupcalled && ovl != osm->overlap) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"PCASMSetOverlap() should be called before PCSetUp().");
704: if (!pc->setupcalled) {
705: osm->overlap = ovl;
706: }
707: return(0);
708: }
709: EXTERN_C_END
711: EXTERN_C_BEGIN
714: PetscErrorCode PCASMSetType_ASM(PC pc,PCASMType type)
715: {
716: PC_ASM *osm = (PC_ASM*)pc->data;
719: osm->type = type;
720: osm->type_set = PETSC_TRUE;
721: return(0);
722: }
723: EXTERN_C_END
725: EXTERN_C_BEGIN
728: PetscErrorCode PCASMSetSortIndices_ASM(PC pc,PetscBool doSort)
729: {
730: PC_ASM *osm = (PC_ASM*)pc->data;
733: osm->sort_indices = doSort;
734: return(0);
735: }
736: EXTERN_C_END
738: EXTERN_C_BEGIN
741: PetscErrorCode PCASMGetSubKSP_ASM(PC pc,PetscInt *n_local,PetscInt *first_local,KSP **ksp)
742: {
743: PC_ASM *osm = (PC_ASM*)pc->data;
747: if (osm->n_local_true < 1) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ORDER,"Need to call PCSetUP() on PC (or KSPSetUp() on the outer KSP object) before calling here");
749: if (n_local) {
750: *n_local = osm->n_local_true;
751: }
752: if (first_local) {
753: MPI_Scan(&osm->n_local_true,first_local,1,MPIU_INT,MPI_SUM,((PetscObject)pc)->comm);
754: *first_local -= osm->n_local_true;
755: }
756: if (ksp) {
757: /* Assume that local solves are now different; not necessarily
758: true though! This flag is used only for PCView_ASM() */
759: *ksp = osm->ksp;
760: osm->same_local_solves = PETSC_FALSE;
761: }
762: return(0);
763: }
764: EXTERN_C_END
769: /*@C
770: PCASMSetLocalSubdomains - Sets the local subdomains (for this processor only) for the additive Schwarz preconditioner.
772: Collective on PC
774: Input Parameters:
775: + pc - the preconditioner context
776: . n - the number of subdomains for this processor (default value = 1)
777: . is - the index set that defines the subdomains for this processor
778: (or PETSC_NULL for PETSc to determine subdomains)
779: - is_local - the index sets that define the local part of the subdomains for this processor
780: (or PETSC_NULL to use the default of 1 subdomain per process)
782: Notes:
783: The IS numbering is in the parallel, global numbering of the vector for both is and is_local
785: By default the ASM preconditioner uses 1 block per processor.
787: Use PCASMSetTotalSubdomains() to set the subdomains for all processors.
789: Level: advanced
791: .keywords: PC, ASM, set, local, subdomains, additive Schwarz
793: .seealso: PCASMSetTotalSubdomains(), PCASMSetOverlap(), PCASMGetSubKSP(),
794: PCASMCreateSubdomains2D(), PCASMGetLocalSubdomains()
795: @*/
796: PetscErrorCode PCASMSetLocalSubdomains(PC pc,PetscInt n,IS is[],IS is_local[])
797: {
802: PetscTryMethod(pc,"PCASMSetLocalSubdomains_C",(PC,PetscInt,IS[],IS[]),(pc,n,is,is_local));
803: return(0);
804: }
808: /*@C
809: PCASMSetTotalSubdomains - Sets the subdomains for all processors for the
810: additive Schwarz preconditioner. Either all or no processors in the
811: PC communicator must call this routine, with the same index sets.
813: Collective on PC
815: Input Parameters:
816: + pc - the preconditioner context
817: . N - the number of subdomains for all processors
818: . is - the index sets that define the subdomains for all processors
819: (or PETSC_NULL to ask PETSc to compe up with subdomains)
820: - is_local - the index sets that define the local part of the subdomains for this processor
821: (or PETSC_NULL to use the default of 1 subdomain per process)
823: Options Database Key:
824: To set the total number of subdomain blocks rather than specify the
825: index sets, use the option
826: . -pc_asm_blocks <blks> - Sets total blocks
828: Notes:
829: Currently you cannot use this to set the actual subdomains with the argument is.
831: By default the ASM preconditioner uses 1 block per processor.
833: These index sets cannot be destroyed until after completion of the
834: linear solves for which the ASM preconditioner is being used.
836: Use PCASMSetLocalSubdomains() to set local subdomains.
838: The IS numbering is in the parallel, global numbering of the vector for both is and is_local
840: Level: advanced
842: .keywords: PC, ASM, set, total, global, subdomains, additive Schwarz
844: .seealso: PCASMSetLocalSubdomains(), PCASMSetOverlap(), PCASMGetSubKSP(),
845: PCASMCreateSubdomains2D()
846: @*/
847: PetscErrorCode PCASMSetTotalSubdomains(PC pc,PetscInt N,IS is[],IS is_local[])
848: {
853: PetscTryMethod(pc,"PCASMSetTotalSubdomains_C",(PC,PetscInt,IS[],IS[]),(pc,N,is,is_local));
854: return(0);
855: }
859: /*@
860: PCASMSetOverlap - Sets the overlap between a pair of subdomains for the
861: additive Schwarz preconditioner. Either all or no processors in the
862: PC communicator must call this routine.
864: Logically Collective on PC
866: Input Parameters:
867: + pc - the preconditioner context
868: - ovl - the amount of overlap between subdomains (ovl >= 0, default value = 1)
870: Options Database Key:
871: . -pc_asm_overlap <ovl> - Sets overlap
873: Notes:
874: By default the ASM preconditioner uses 1 block per processor. To use
875: multiple blocks per perocessor, see PCASMSetTotalSubdomains() and
876: PCASMSetLocalSubdomains() (and the option -pc_asm_blocks <blks>).
878: The overlap defaults to 1, so if one desires that no additional
879: overlap be computed beyond what may have been set with a call to
880: PCASMSetTotalSubdomains() or PCASMSetLocalSubdomains(), then ovl
881: must be set to be 0. In particular, if one does not explicitly set
882: the subdomains an application code, then all overlap would be computed
883: internally by PETSc, and using an overlap of 0 would result in an ASM
884: variant that is equivalent to the block Jacobi preconditioner.
886: Note that one can define initial index sets with any overlap via
887: PCASMSetTotalSubdomains() or PCASMSetLocalSubdomains(); the routine
888: PCASMSetOverlap() merely allows PETSc to extend that overlap further
889: if desired.
891: Level: intermediate
893: .keywords: PC, ASM, set, overlap
895: .seealso: PCASMSetTotalSubdomains(), PCASMSetLocalSubdomains(), PCASMGetSubKSP(),
896: PCASMCreateSubdomains2D(), PCASMGetLocalSubdomains()
897: @*/
898: PetscErrorCode PCASMSetOverlap(PC pc,PetscInt ovl)
899: {
905: PetscTryMethod(pc,"PCASMSetOverlap_C",(PC,PetscInt),(pc,ovl));
906: return(0);
907: }
911: /*@
912: PCASMSetType - Sets the type of restriction and interpolation used
913: for local problems in the additive Schwarz method.
915: Logically Collective on PC
917: Input Parameters:
918: + pc - the preconditioner context
919: - type - variant of ASM, one of
920: .vb
921: PC_ASM_BASIC - full interpolation and restriction
922: PC_ASM_RESTRICT - full restriction, local processor interpolation
923: PC_ASM_INTERPOLATE - full interpolation, local processor restriction
924: PC_ASM_NONE - local processor restriction and interpolation
925: .ve
927: Options Database Key:
928: . -pc_asm_type [basic,restrict,interpolate,none] - Sets ASM type
930: Level: intermediate
932: .keywords: PC, ASM, set, type
934: .seealso: PCASMSetTotalSubdomains(), PCASMSetTotalSubdomains(), PCASMGetSubKSP(),
935: PCASMCreateSubdomains2D()
936: @*/
937: PetscErrorCode PCASMSetType(PC pc,PCASMType type)
938: {
944: PetscTryMethod(pc,"PCASMSetType_C",(PC,PCASMType),(pc,type));
945: return(0);
946: }
950: /*@
951: PCASMSetSortIndices - Determines whether subdomain indices are sorted.
953: Logically Collective on PC
955: Input Parameters:
956: + pc - the preconditioner context
957: - doSort - sort the subdomain indices
959: Level: intermediate
961: .keywords: PC, ASM, set, type
963: .seealso: PCASMSetLocalSubdomains(), PCASMSetTotalSubdomains(), PCASMGetSubKSP(),
964: PCASMCreateSubdomains2D()
965: @*/
966: PetscErrorCode PCASMSetSortIndices(PC pc,PetscBool doSort)
967: {
973: PetscTryMethod(pc,"PCASMSetSortIndices_C",(PC,PetscBool),(pc,doSort));
974: return(0);
975: }
979: /*@C
980: PCASMGetSubKSP - Gets the local KSP contexts for all blocks on
981: this processor.
982:
983: Collective on PC iff first_local is requested
985: Input Parameter:
986: . pc - the preconditioner context
988: Output Parameters:
989: + n_local - the number of blocks on this processor or PETSC_NULL
990: . first_local - the global number of the first block on this processor or PETSC_NULL,
991: all processors must request or all must pass PETSC_NULL
992: - ksp - the array of KSP contexts
994: Note:
995: After PCASMGetSubKSP() the array of KSPes is not to be freed.
997: Currently for some matrix implementations only 1 block per processor
998: is supported.
999:
1000: You must call KSPSetUp() before calling PCASMGetSubKSP().
1002: Fortran note:
1003: The output argument 'ksp' must be an array of sufficient length or PETSC_NULL_OBJECT. The latter can be used to learn the necessary length.
1005: Level: advanced
1007: .keywords: PC, ASM, additive Schwarz, get, sub, KSP, context
1009: .seealso: PCASMSetTotalSubdomains(), PCASMSetTotalSubdomains(), PCASMSetOverlap(),
1010: PCASMCreateSubdomains2D(),
1011: @*/
1012: PetscErrorCode PCASMGetSubKSP(PC pc,PetscInt *n_local,PetscInt *first_local,KSP *ksp[])
1013: {
1018: PetscUseMethod(pc,"PCASMGetSubKSP_C",(PC,PetscInt*,PetscInt*,KSP **),(pc,n_local,first_local,ksp));
1019: return(0);
1020: }
1022: /* -------------------------------------------------------------------------------------*/
1023: /*MC
1024: PCASM - Use the (restricted) additive Schwarz method, each block is (approximately) solved with
1025: its own KSP object.
1027: Options Database Keys:
1028: + -pc_asm_truelocal - Activates PCASMSetUseTrueLocal()
1029: . -pc_asm_blocks <blks> - Sets total blocks
1030: . -pc_asm_overlap <ovl> - Sets overlap
1031: - -pc_asm_type [basic,restrict,interpolate,none] - Sets ASM type
1033: IMPORTANT: If you run with, for example, 3 blocks on 1 processor or 3 blocks on 3 processors you
1034: will get a different convergence rate due to the default option of -pc_asm_type restrict. Use
1035: -pc_asm_type basic to use the standard ASM.
1037: Notes: Each processor can have one or more blocks, but a block cannot be shared by more
1038: than one processor. Defaults to one block per processor.
1040: To set options on the solvers for each block append -sub_ to all the KSP, and PC
1041: options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly
1042:
1043: To set the options on the solvers separate for each block call PCASMGetSubKSP()
1044: and set the options directly on the resulting KSP object (you can access its PC
1045: with KSPGetPC())
1048: Level: beginner
1050: Concepts: additive Schwarz method
1052: References:
1053: An additive variant of the Schwarz alternating method for the case of many subregions
1054: M Dryja, OB Widlund - Courant Institute, New York University Technical report
1056: Domain Decompositions: Parallel Multilevel Methods for Elliptic Partial Differential Equations,
1057: Barry Smith, Petter Bjorstad, and William Gropp, Cambridge University Press, ISBN 0-521-49589-X.
1059: .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC,
1060: PCBJACOBI, PCASMSetUseTrueLocal(), PCASMGetSubKSP(), PCASMSetLocalSubdomains(),
1061: PCASMSetTotalSubdomains(), PCSetModifySubmatrices(), PCASMSetOverlap(), PCASMSetType()
1063: M*/
1065: EXTERN_C_BEGIN
1068: PetscErrorCode PCCreate_ASM(PC pc)
1069: {
1071: PC_ASM *osm;
1074: PetscNewLog(pc,PC_ASM,&osm);
1075: osm->n = PETSC_DECIDE;
1076: osm->n_local = 0;
1077: osm->n_local_true = PETSC_DECIDE;
1078: osm->overlap = 1;
1079: osm->ksp = 0;
1080: osm->restriction = 0;
1081: osm->localization = 0;
1082: osm->prolongation = 0;
1083: osm->x = 0;
1084: osm->y = 0;
1085: osm->y_local = 0;
1086: osm->is = 0;
1087: osm->is_local = 0;
1088: osm->mat = 0;
1089: osm->pmat = 0;
1090: osm->type = PC_ASM_RESTRICT;
1091: osm->same_local_solves = PETSC_TRUE;
1092: osm->sort_indices = PETSC_TRUE;
1094: pc->data = (void*)osm;
1095: pc->ops->apply = PCApply_ASM;
1096: pc->ops->applytranspose = PCApplyTranspose_ASM;
1097: pc->ops->setup = PCSetUp_ASM;
1098: pc->ops->reset = PCReset_ASM;
1099: pc->ops->destroy = PCDestroy_ASM;
1100: pc->ops->setfromoptions = PCSetFromOptions_ASM;
1101: pc->ops->setuponblocks = PCSetUpOnBlocks_ASM;
1102: pc->ops->view = PCView_ASM;
1103: pc->ops->applyrichardson = 0;
1105: PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCASMSetLocalSubdomains_C","PCASMSetLocalSubdomains_ASM",
1106: PCASMSetLocalSubdomains_ASM);
1107: PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCASMSetTotalSubdomains_C","PCASMSetTotalSubdomains_ASM",
1108: PCASMSetTotalSubdomains_ASM);
1109: PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCASMSetOverlap_C","PCASMSetOverlap_ASM",
1110: PCASMSetOverlap_ASM);
1111: PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCASMSetType_C","PCASMSetType_ASM",
1112: PCASMSetType_ASM);
1113: PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCASMSetSortIndices_C","PCASMSetSortIndices_ASM",
1114: PCASMSetSortIndices_ASM);
1115: PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCASMGetSubKSP_C","PCASMGetSubKSP_ASM",
1116: PCASMGetSubKSP_ASM);
1117: return(0);
1118: }
1119: EXTERN_C_END
1124: /*@C
1125: PCASMCreateSubdomains - Creates the index sets for the overlapping Schwarz
1126: preconditioner for a any problem on a general grid.
1128: Collective
1130: Input Parameters:
1131: + A - The global matrix operator
1132: - n - the number of local blocks
1134: Output Parameters:
1135: . outis - the array of index sets defining the subdomains
1137: Level: advanced
1139: Note: this generates nonoverlapping subdomains; the PCASM will generate the overlap
1140: from these if you use PCASMSetLocalSubdomains()
1142: In the Fortran version you must provide the array outis[] already allocated of length n.
1144: .keywords: PC, ASM, additive Schwarz, create, subdomains, unstructured grid
1146: .seealso: PCASMSetLocalSubdomains(), PCASMDestroySubdomains()
1147: @*/
1148: PetscErrorCode PCASMCreateSubdomains(Mat A, PetscInt n, IS* outis[])
1149: {
1150: MatPartitioning mpart;
1151: const char *prefix;
1152: PetscErrorCode (*f)(Mat,Mat*);
1153: PetscMPIInt size;
1154: PetscInt i,j,rstart,rend,bs;
1155: PetscBool isbaij = PETSC_FALSE,foundpart = PETSC_FALSE;
1156: Mat Ad = PETSC_NULL, adj;
1157: IS ispart,isnumb,*is;
1158: PetscErrorCode ierr;
1163: if (n < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"number of local blocks must be > 0, n = %D",n);
1165: /* Get prefix, row distribution, and block size */
1166: MatGetOptionsPrefix(A,&prefix);
1167: MatGetOwnershipRange(A,&rstart,&rend);
1168: MatGetBlockSize(A,&bs);
1169: if (rstart/bs*bs != rstart || rend/bs*bs != rend) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"bad row distribution [%D,%D) for matrix block size %D",rstart,rend,bs);
1171: /* Get diagonal block from matrix if possible */
1172: MPI_Comm_size(((PetscObject)A)->comm,&size);
1173: PetscObjectQueryFunction((PetscObject)A,"MatGetDiagonalBlock_C",(void (**)(void))&f);
1174: if (f) {
1175: MatGetDiagonalBlock(A,&Ad);
1176: } else if (size == 1) {
1177: Ad = A;
1178: }
1179: if (Ad) {
1180: PetscObjectTypeCompare((PetscObject)Ad,MATSEQBAIJ,&isbaij);
1181: if (!isbaij) {PetscObjectTypeCompare((PetscObject)Ad,MATSEQSBAIJ,&isbaij);}
1182: }
1183: if (Ad && n > 1) {
1184: PetscBool match,done;
1185: /* Try to setup a good matrix partitioning if available */
1186: MatPartitioningCreate(PETSC_COMM_SELF,&mpart);
1187: PetscObjectSetOptionsPrefix((PetscObject)mpart,prefix);
1188: MatPartitioningSetFromOptions(mpart);
1189: PetscObjectTypeCompare((PetscObject)mpart,MATPARTITIONINGCURRENT,&match);
1190: if (!match) {
1191: PetscObjectTypeCompare((PetscObject)mpart,MATPARTITIONINGSQUARE,&match);
1192: }
1193: if (!match) { /* assume a "good" partitioner is available */
1194: PetscInt na,*ia,*ja;
1195: MatGetRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);
1196: if (done) {
1197: /* Build adjacency matrix by hand. Unfortunately a call to
1198: MatConvert(Ad,MATMPIADJ,MAT_INITIAL_MATRIX,&adj) will
1199: remove the block-aij structure and we cannot expect
1200: MatPartitioning to split vertices as we need */
1201: PetscInt i,j,*row,len,nnz,cnt,*iia=0,*jja=0;
1202: nnz = 0;
1203: for (i=0; i<na; i++) { /* count number of nonzeros */
1204: len = ia[i+1] - ia[i];
1205: row = ja + ia[i];
1206: for (j=0; j<len; j++) {
1207: if (row[j] == i) { /* don't count diagonal */
1208: len--; break;
1209: }
1210: }
1211: nnz += len;
1212: }
1213: PetscMalloc((na+1)*sizeof(PetscInt),&iia);
1214: PetscMalloc((nnz)*sizeof(PetscInt),&jja);
1215: nnz = 0;
1216: iia[0] = 0;
1217: for (i=0; i<na; i++) { /* fill adjacency */
1218: cnt = 0;
1219: len = ia[i+1] - ia[i];
1220: row = ja + ia[i];
1221: for (j=0; j<len; j++) {
1222: if (row[j] != i) { /* if not diagonal */
1223: jja[nnz+cnt++] = row[j];
1224: }
1225: }
1226: nnz += cnt;
1227: iia[i+1] = nnz;
1228: }
1229: /* Partitioning of the adjacency matrix */
1230: MatCreateMPIAdj(PETSC_COMM_SELF,na,na,iia,jja,PETSC_NULL,&adj);
1231: MatPartitioningSetAdjacency(mpart,adj);
1232: MatPartitioningSetNParts(mpart,n);
1233: MatPartitioningApply(mpart,&ispart);
1234: ISPartitioningToNumbering(ispart,&isnumb);
1235: MatDestroy(&adj);
1236: foundpart = PETSC_TRUE;
1237: }
1238: MatRestoreRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);
1239: }
1240: MatPartitioningDestroy(&mpart);
1241: }
1243: PetscMalloc(n*sizeof(IS),&is);
1244: *outis = is;
1246: if (!foundpart) {
1248: /* Partitioning by contiguous chunks of rows */
1250: PetscInt mbs = (rend-rstart)/bs;
1251: PetscInt start = rstart;
1252: for (i=0; i<n; i++) {
1253: PetscInt count = (mbs/n + ((mbs % n) > i)) * bs;
1254: ISCreateStride(PETSC_COMM_SELF,count,start,1,&is[i]);
1255: start += count;
1256: }
1258: } else {
1260: /* Partitioning by adjacency of diagonal block */
1262: const PetscInt *numbering;
1263: PetscInt *count,nidx,*indices,*newidx,start=0;
1264: /* Get node count in each partition */
1265: PetscMalloc(n*sizeof(PetscInt),&count);
1266: ISPartitioningCount(ispart,n,count);
1267: if (isbaij && bs > 1) { /* adjust for the block-aij case */
1268: for (i=0; i<n; i++) count[i] *= bs;
1269: }
1270: /* Build indices from node numbering */
1271: ISGetLocalSize(isnumb,&nidx);
1272: PetscMalloc(nidx*sizeof(PetscInt),&indices);
1273: for (i=0; i<nidx; i++) indices[i] = i; /* needs to be initialized */
1274: ISGetIndices(isnumb,&numbering);
1275: PetscSortIntWithPermutation(nidx,numbering,indices);
1276: ISRestoreIndices(isnumb,&numbering);
1277: if (isbaij && bs > 1) { /* adjust for the block-aij case */
1278: PetscMalloc(nidx*bs*sizeof(PetscInt),&newidx);
1279: for (i=0; i<nidx; i++)
1280: for (j=0; j<bs; j++)
1281: newidx[i*bs+j] = indices[i]*bs + j;
1282: PetscFree(indices);
1283: nidx *= bs;
1284: indices = newidx;
1285: }
1286: /* Shift to get global indices */
1287: for (i=0; i<nidx; i++) indices[i] += rstart;
1289: /* Build the index sets for each block */
1290: for (i=0; i<n; i++) {
1291: ISCreateGeneral(PETSC_COMM_SELF,count[i],&indices[start],PETSC_COPY_VALUES,&is[i]);
1292: ISSort(is[i]);
1293: start += count[i];
1294: }
1296: PetscFree(count);
1297: PetscFree(indices);
1298: ISDestroy(&isnumb);
1299: ISDestroy(&ispart);
1301: }
1303: return(0);
1304: }
1308: /*@C
1309: PCASMDestroySubdomains - Destroys the index sets created with
1310: PCASMCreateSubdomains(). Should be called after setting subdomains
1311: with PCASMSetLocalSubdomains().
1313: Collective
1315: Input Parameters:
1316: + n - the number of index sets
1317: . is - the array of index sets
1318: - is_local - the array of local index sets, can be PETSC_NULL
1320: Level: advanced
1322: .keywords: PC, ASM, additive Schwarz, create, subdomains, unstructured grid
1324: .seealso: PCASMCreateSubdomains(), PCASMSetLocalSubdomains()
1325: @*/
1326: PetscErrorCode PCASMDestroySubdomains(PetscInt n, IS is[], IS is_local[])
1327: {
1328: PetscInt i;
1331: if (n <= 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"n must be > 0: n = %D",n);
1333: for (i=0; i<n; i++) { ISDestroy(&is[i]); }
1334: PetscFree(is);
1335: if (is_local) {
1337: for (i=0; i<n; i++) { ISDestroy(&is_local[i]); }
1338: PetscFree(is_local);
1339: }
1340: return(0);
1341: }
1345: /*@
1346: PCASMCreateSubdomains2D - Creates the index sets for the overlapping Schwarz
1347: preconditioner for a two-dimensional problem on a regular grid.
1349: Not Collective
1351: Input Parameters:
1352: + m, n - the number of mesh points in the x and y directions
1353: . M, N - the number of subdomains in the x and y directions
1354: . dof - degrees of freedom per node
1355: - overlap - overlap in mesh lines
1357: Output Parameters:
1358: + Nsub - the number of subdomains created
1359: . is - array of index sets defining overlapping (if overlap > 0) subdomains
1360: - is_local - array of index sets defining non-overlapping subdomains
1362: Note:
1363: Presently PCAMSCreateSubdomains2d() is valid only for sequential
1364: preconditioners. More general related routines are
1365: PCASMSetTotalSubdomains() and PCASMSetLocalSubdomains().
1367: Level: advanced
1369: .keywords: PC, ASM, additive Schwarz, create, subdomains, 2D, regular grid
1371: .seealso: PCASMSetTotalSubdomains(), PCASMSetLocalSubdomains(), PCASMGetSubKSP(),
1372: PCASMSetOverlap()
1373: @*/
1374: PetscErrorCode PCASMCreateSubdomains2D(PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt dof,PetscInt overlap,PetscInt *Nsub,IS **is,IS **is_local)
1375: {
1376: PetscInt i,j,height,width,ystart,xstart,yleft,yright,xleft,xright,loc_outer;
1378: PetscInt nidx,*idx,loc,ii,jj,count;
1381: if (dof != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP," ");
1383: *Nsub = N*M;
1384: PetscMalloc((*Nsub)*sizeof(IS*),is);
1385: PetscMalloc((*Nsub)*sizeof(IS*),is_local);
1386: ystart = 0;
1387: loc_outer = 0;
1388: for (i=0; i<N; i++) {
1389: height = n/N + ((n % N) > i); /* height of subdomain */
1390: if (height < 2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many N subdomains for mesh dimension n");
1391: yleft = ystart - overlap; if (yleft < 0) yleft = 0;
1392: yright = ystart + height + overlap; if (yright > n) yright = n;
1393: xstart = 0;
1394: for (j=0; j<M; j++) {
1395: width = m/M + ((m % M) > j); /* width of subdomain */
1396: if (width < 2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many M subdomains for mesh dimension m");
1397: xleft = xstart - overlap; if (xleft < 0) xleft = 0;
1398: xright = xstart + width + overlap; if (xright > m) xright = m;
1399: nidx = (xright - xleft)*(yright - yleft);
1400: PetscMalloc(nidx*sizeof(PetscInt),&idx);
1401: loc = 0;
1402: for (ii=yleft; ii<yright; ii++) {
1403: count = m*ii + xleft;
1404: for (jj=xleft; jj<xright; jj++) {
1405: idx[loc++] = count++;
1406: }
1407: }
1408: ISCreateGeneral(PETSC_COMM_SELF,nidx,idx,PETSC_COPY_VALUES,(*is)+loc_outer);
1409: if (overlap == 0) {
1410: PetscObjectReference((PetscObject)(*is)[loc_outer]);
1411: (*is_local)[loc_outer] = (*is)[loc_outer];
1412: } else {
1413: for (loc=0,ii=ystart; ii<ystart+height; ii++) {
1414: for (jj=xstart; jj<xstart+width; jj++) {
1415: idx[loc++] = m*ii + jj;
1416: }
1417: }
1418: ISCreateGeneral(PETSC_COMM_SELF,loc,idx,PETSC_COPY_VALUES,*is_local+loc_outer);
1419: }
1420: PetscFree(idx);
1421: xstart += width;
1422: loc_outer++;
1423: }
1424: ystart += height;
1425: }
1426: for (i=0; i<*Nsub; i++) { ISSort((*is)[i]); }
1427: return(0);
1428: }
1432: /*@C
1433: PCASMGetLocalSubdomains - Gets the local subdomains (for this processor
1434: only) for the additive Schwarz preconditioner.
1436: Not Collective
1438: Input Parameter:
1439: . pc - the preconditioner context
1441: Output Parameters:
1442: + n - the number of subdomains for this processor (default value = 1)
1443: . is - the index sets that define the subdomains for this processor
1444: - is_local - the index sets that define the local part of the subdomains for this processor (can be PETSC_NULL)
1445:
1447: Notes:
1448: The IS numbering is in the parallel, global numbering of the vector.
1450: Level: advanced
1452: .keywords: PC, ASM, set, local, subdomains, additive Schwarz
1454: .seealso: PCASMSetTotalSubdomains(), PCASMSetOverlap(), PCASMGetSubKSP(),
1455: PCASMCreateSubdomains2D(), PCASMSetLocalSubdomains(), PCASMGetLocalSubmatrices()
1456: @*/
1457: PetscErrorCode PCASMGetLocalSubdomains(PC pc,PetscInt *n,IS *is[],IS *is_local[])
1458: {
1459: PC_ASM *osm;
1461: PetscBool match;
1467: PetscObjectTypeCompare((PetscObject)pc,PCASM,&match);
1468: if (!match) {
1469: if (n) *n = 0;
1470: if (is) *is = PETSC_NULL;
1471: } else {
1472: osm = (PC_ASM*)pc->data;
1473: if (n) *n = osm->n_local_true;
1474: if (is) *is = osm->is;
1475: if (is_local) *is_local = osm->is_local;
1476: }
1477: return(0);
1478: }
1482: /*@C
1483: PCASMGetLocalSubmatrices - Gets the local submatrices (for this processor
1484: only) for the additive Schwarz preconditioner.
1486: Not Collective
1488: Input Parameter:
1489: . pc - the preconditioner context
1491: Output Parameters:
1492: + n - the number of matrices for this processor (default value = 1)
1493: - mat - the matrices
1494:
1496: Level: advanced
1498: Notes: Call after PCSetUp() (or KSPSetUp()) but before PCApply() (or KSPApply()) and before PCSetUpOnBlocks())
1500: Usually one would use PCSetModifySubmatrices() to change the submatrices in building the preconditioner.
1502: .keywords: PC, ASM, set, local, subdomains, additive Schwarz, block Jacobi
1504: .seealso: PCASMSetTotalSubdomains(), PCASMSetOverlap(), PCASMGetSubKSP(),
1505: PCASMCreateSubdomains2D(), PCASMSetLocalSubdomains(), PCASMGetLocalSubdomains(), PCSetModifySubmatrices()
1506: @*/
1507: PetscErrorCode PCASMGetLocalSubmatrices(PC pc,PetscInt *n,Mat *mat[])
1508: {
1509: PC_ASM *osm;
1511: PetscBool match;
1517: if (!pc->setupcalled) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"Must call after KSPSetUP() or PCSetUp().");
1518: PetscObjectTypeCompare((PetscObject)pc,PCASM,&match);
1519: if (!match) {
1520: if (n) *n = 0;
1521: if (mat) *mat = PETSC_NULL;
1522: } else {
1523: osm = (PC_ASM*)pc->data;
1524: if (n) *n = osm->n_local_true;
1525: if (mat) *mat = osm->pmat;
1526: }
1527: return(0);
1528: }