Actual source code: bjacobi.c
petsc-3.8.4 2018-03-24
2: /*
3: Defines a block Jacobi preconditioner.
4: */
6: #include <../src/ksp/pc/impls/bjacobi/bjacobi.h>
8: static PetscErrorCode PCSetUp_BJacobi_Singleblock(PC,Mat,Mat);
9: static PetscErrorCode PCSetUp_BJacobi_Multiblock(PC,Mat,Mat);
10: static PetscErrorCode PCSetUp_BJacobi_Multiproc(PC);
12: static PetscErrorCode PCSetUp_BJacobi(PC pc)
13: {
14: PC_BJacobi *jac = (PC_BJacobi*)pc->data;
15: Mat mat = pc->mat,pmat = pc->pmat;
17: void (*f)(void);
18: PetscInt N,M,start,i,sum,end;
19: PetscInt bs,i_start=-1,i_end=-1;
20: PetscMPIInt rank,size;
21: const char *pprefix,*mprefix;
24: MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);
25: MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);
26: MatGetLocalSize(pc->pmat,&M,&N);
27: MatGetBlockSize(pc->pmat,&bs);
29: if (jac->n > 0 && jac->n < size) {
30: PCSetUp_BJacobi_Multiproc(pc);
31: return(0);
32: }
34: /* --------------------------------------------------------------------------
35: Determines the number of blocks assigned to each processor
36: -----------------------------------------------------------------------------*/
38: /* local block count given */
39: if (jac->n_local > 0 && jac->n < 0) {
40: MPIU_Allreduce(&jac->n_local,&jac->n,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));
41: if (jac->l_lens) { /* check that user set these correctly */
42: sum = 0;
43: for (i=0; i<jac->n_local; i++) {
44: if (jac->l_lens[i]/bs*bs !=jac->l_lens[i]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Mat blocksize doesn't match block Jacobi layout");
45: sum += jac->l_lens[i];
46: }
47: if (sum != M) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local lens set incorrectly");
48: } else {
49: PetscMalloc1(jac->n_local,&jac->l_lens);
50: for (i=0; i<jac->n_local; i++) jac->l_lens[i] = bs*((M/bs)/jac->n_local + (((M/bs) % jac->n_local) > i));
51: }
52: } else if (jac->n > 0 && jac->n_local < 0) { /* global block count given */
53: /* global blocks given: determine which ones are local */
54: if (jac->g_lens) {
55: /* check if the g_lens is has valid entries */
56: for (i=0; i<jac->n; i++) {
57: if (!jac->g_lens[i]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Zero block not allowed");
58: if (jac->g_lens[i]/bs*bs != jac->g_lens[i]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Mat blocksize doesn't match block Jacobi layout");
59: }
60: if (size == 1) {
61: jac->n_local = jac->n;
62: PetscMalloc1(jac->n_local,&jac->l_lens);
63: PetscMemcpy(jac->l_lens,jac->g_lens,jac->n_local*sizeof(PetscInt));
64: /* check that user set these correctly */
65: sum = 0;
66: for (i=0; i<jac->n_local; i++) sum += jac->l_lens[i];
67: if (sum != M) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Global lens set incorrectly");
68: } else {
69: MatGetOwnershipRange(pc->pmat,&start,&end);
70: /* loop over blocks determing first one owned by me */
71: sum = 0;
72: for (i=0; i<jac->n+1; i++) {
73: if (sum == start) { i_start = i; goto start_1;}
74: if (i < jac->n) sum += jac->g_lens[i];
75: }
76: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Block sizes used in PCBJacobiSetTotalBlocks()\nare not compatible with parallel matrix layout");
77: start_1:
78: for (i=i_start; i<jac->n+1; i++) {
79: if (sum == end) { i_end = i; goto end_1; }
80: if (i < jac->n) sum += jac->g_lens[i];
81: }
82: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Block sizes used in PCBJacobiSetTotalBlocks()\nare not compatible with parallel matrix layout");
83: end_1:
84: jac->n_local = i_end - i_start;
85: PetscMalloc1(jac->n_local,&jac->l_lens);
86: PetscMemcpy(jac->l_lens,jac->g_lens+i_start,jac->n_local*sizeof(PetscInt));
87: }
88: } else { /* no global blocks given, determine then using default layout */
89: jac->n_local = jac->n/size + ((jac->n % size) > rank);
90: PetscMalloc1(jac->n_local,&jac->l_lens);
91: for (i=0; i<jac->n_local; i++) {
92: jac->l_lens[i] = ((M/bs)/jac->n_local + (((M/bs) % jac->n_local) > i))*bs;
93: if (!jac->l_lens[i]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Too many blocks given");
94: }
95: }
96: } else if (jac->n < 0 && jac->n_local < 0) { /* no blocks given */
97: jac->n = size;
98: jac->n_local = 1;
99: PetscMalloc1(1,&jac->l_lens);
100: jac->l_lens[0] = M;
101: } else { /* jac->n > 0 && jac->n_local > 0 */
102: if (!jac->l_lens) {
103: PetscMalloc1(jac->n_local,&jac->l_lens);
104: for (i=0; i<jac->n_local; i++) jac->l_lens[i] = bs*((M/bs)/jac->n_local + (((M/bs) % jac->n_local) > i));
105: }
106: }
107: if (jac->n_local < 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of blocks is less than number of processors");
109: /* -------------------------
110: Determines mat and pmat
111: ---------------------------*/
112: MatShellGetOperation(pc->mat,MATOP_GET_DIAGONAL_BLOCK,&f);
113: if (!f && size == 1) {
114: mat = pc->mat;
115: pmat = pc->pmat;
116: } else {
117: if (pc->useAmat) {
118: /* use block from Amat matrix, not Pmat for local MatMult() */
119: MatGetDiagonalBlock(pc->mat,&mat);
120: /* make submatrix have same prefix as entire matrix */
121: PetscObjectGetOptionsPrefix((PetscObject)pc->mat,&mprefix);
122: PetscObjectSetOptionsPrefix((PetscObject)mat,mprefix);
123: }
124: if (pc->pmat != pc->mat || !pc->useAmat) {
125: MatGetDiagonalBlock(pc->pmat,&pmat);
126: /* make submatrix have same prefix as entire matrix */
127: PetscObjectGetOptionsPrefix((PetscObject)pc->pmat,&pprefix);
128: PetscObjectSetOptionsPrefix((PetscObject)pmat,pprefix);
129: } else pmat = mat;
130: }
132: /* ------
133: Setup code depends on the number of blocks
134: */
135: if (jac->n_local == 1) {
136: PCSetUp_BJacobi_Singleblock(pc,mat,pmat);
137: } else {
138: PCSetUp_BJacobi_Multiblock(pc,mat,pmat);
139: }
140: return(0);
141: }
143: /* Default destroy, if it has never been setup */
144: static PetscErrorCode PCDestroy_BJacobi(PC pc)
145: {
146: PC_BJacobi *jac = (PC_BJacobi*)pc->data;
150: PetscFree(jac->g_lens);
151: PetscFree(jac->l_lens);
152: PetscFree(pc->data);
153: return(0);
154: }
157: static PetscErrorCode PCSetFromOptions_BJacobi(PetscOptionItems *PetscOptionsObject,PC pc)
158: {
159: PC_BJacobi *jac = (PC_BJacobi*)pc->data;
161: PetscInt blocks,i;
162: PetscBool flg;
165: PetscOptionsHead(PetscOptionsObject,"Block Jacobi options");
166: PetscOptionsInt("-pc_bjacobi_blocks","Total number of blocks","PCBJacobiSetTotalBlocks",jac->n,&blocks,&flg);
167: if (flg) {
168: PCBJacobiSetTotalBlocks(pc,blocks,NULL);
169: }
170: if (jac->ksp) {
171: /* The sub-KSP has already been set up (e.g., PCSetUp_BJacobi_Singleblock), but KSPSetFromOptions was not called
172: * unless we had already been called. */
173: for (i=0; i<jac->n_local; i++) {
174: KSPSetFromOptions(jac->ksp[i]);
175: }
176: }
177: PetscOptionsTail();
178: return(0);
179: }
181: #include <petscdraw.h>
182: static PetscErrorCode PCView_BJacobi(PC pc,PetscViewer viewer)
183: {
184: PC_BJacobi *jac = (PC_BJacobi*)pc->data;
185: PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc*)jac->data;
186: PetscErrorCode ierr;
187: PetscMPIInt rank;
188: PetscInt i;
189: PetscBool iascii,isstring,isdraw;
190: PetscViewer sviewer;
193: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
194: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSTRING,&isstring);
195: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
196: if (iascii) {
197: if (pc->useAmat) {
198: PetscViewerASCIIPrintf(viewer," using Amat local matrix, number of blocks = %D\n",jac->n);
199: }
200: PetscViewerASCIIPrintf(viewer," number of blocks = %D\n",jac->n);
201: MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);
202: if (jac->same_local_solves) {
203: PetscViewerASCIIPrintf(viewer," Local solve is same for all blocks, in the following KSP and PC objects:\n");
204: if (jac->ksp && !jac->psubcomm) {
205: PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);
206: if (!rank) {
207: PetscViewerASCIIPushTab(viewer);
208: KSPView(jac->ksp[0],sviewer);
209: PetscViewerASCIIPopTab(viewer);
210: }
211: PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);
212: } else if (mpjac && jac->ksp && mpjac->psubcomm) {
213: PetscViewerGetSubViewer(viewer,mpjac->psubcomm->child,&sviewer);
214: if (!mpjac->psubcomm->color) {
215: PetscViewerASCIIPushTab(viewer);
216: KSPView(*(jac->ksp),sviewer);
217: PetscViewerASCIIPopTab(viewer);
218: }
219: PetscViewerRestoreSubViewer(viewer,mpjac->psubcomm->child,&sviewer);
220: }
221: } else {
222: PetscInt n_global;
223: MPIU_Allreduce(&jac->n_local,&n_global,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));
224: PetscViewerASCIIPushSynchronized(viewer);
225: PetscViewerASCIIPrintf(viewer," Local solve info for each block is in the following KSP and PC objects:\n");
226: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] number of local blocks = %D, first local block number = %D\n",
227: rank,jac->n_local,jac->first_local);
228: PetscViewerASCIIPushTab(viewer);
229: PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);
230: for (i=0; i<jac->n_local; i++) {
231: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] local block number %D\n",rank,i);
232: KSPView(jac->ksp[i],sviewer);
233: PetscViewerASCIISynchronizedPrintf(viewer,"- - - - - - - - - - - - - - - - - -\n");
234: }
235: PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);
236: PetscViewerASCIIPopTab(viewer);
237: PetscViewerFlush(viewer);
238: PetscViewerASCIIPopSynchronized(viewer);
239: }
240: } else if (isstring) {
241: PetscViewerStringSPrintf(viewer," blks=%D",jac->n);
242: PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);
243: if (jac->ksp) {KSPView(jac->ksp[0],sviewer);}
244: PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);
245: } else if (isdraw) {
246: PetscDraw draw;
247: char str[25];
248: PetscReal x,y,bottom,h;
250: PetscViewerDrawGetDraw(viewer,0,&draw);
251: PetscDrawGetCurrentPoint(draw,&x,&y);
252: PetscSNPrintf(str,25,"Number blocks %D",jac->n);
253: PetscDrawStringBoxed(draw,x,y,PETSC_DRAW_RED,PETSC_DRAW_BLACK,str,NULL,&h);
254: bottom = y - h;
255: PetscDrawPushCurrentPoint(draw,x,bottom);
256: /* warning the communicator on viewer is different then on ksp in parallel */
257: if (jac->ksp) {KSPView(jac->ksp[0],viewer);}
258: PetscDrawPopCurrentPoint(draw);
259: }
260: return(0);
261: }
263: /* -------------------------------------------------------------------------------------*/
265: static PetscErrorCode PCBJacobiGetSubKSP_BJacobi(PC pc,PetscInt *n_local,PetscInt *first_local,KSP **ksp)
266: {
267: PC_BJacobi *jac = (PC_BJacobi*)pc->data;;
270: if (!pc->setupcalled) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"Must call KSPSetUp() or PCSetUp() first");
272: if (n_local) *n_local = jac->n_local;
273: if (first_local) *first_local = jac->first_local;
274: *ksp = jac->ksp;
275: jac->same_local_solves = PETSC_FALSE; /* Assume that local solves are now different;
276: not necessarily true though! This flag is
277: used only for PCView_BJacobi() */
278: return(0);
279: }
281: static PetscErrorCode PCBJacobiSetTotalBlocks_BJacobi(PC pc,PetscInt blocks,PetscInt *lens)
282: {
283: PC_BJacobi *jac = (PC_BJacobi*)pc->data;
287: if (pc->setupcalled > 0 && jac->n!=blocks) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ORDER,"Cannot alter number of blocks after PCSetUp()/KSPSetUp() has been called");
288: jac->n = blocks;
289: if (!lens) jac->g_lens = 0;
290: else {
291: PetscMalloc1(blocks,&jac->g_lens);
292: PetscLogObjectMemory((PetscObject)pc,blocks*sizeof(PetscInt));
293: PetscMemcpy(jac->g_lens,lens,blocks*sizeof(PetscInt));
294: }
295: return(0);
296: }
298: static PetscErrorCode PCBJacobiGetTotalBlocks_BJacobi(PC pc, PetscInt *blocks, const PetscInt *lens[])
299: {
300: PC_BJacobi *jac = (PC_BJacobi*) pc->data;
303: *blocks = jac->n;
304: if (lens) *lens = jac->g_lens;
305: return(0);
306: }
308: static PetscErrorCode PCBJacobiSetLocalBlocks_BJacobi(PC pc,PetscInt blocks,const PetscInt lens[])
309: {
310: PC_BJacobi *jac;
314: jac = (PC_BJacobi*)pc->data;
316: jac->n_local = blocks;
317: if (!lens) jac->l_lens = 0;
318: else {
319: PetscMalloc1(blocks,&jac->l_lens);
320: PetscLogObjectMemory((PetscObject)pc,blocks*sizeof(PetscInt));
321: PetscMemcpy(jac->l_lens,lens,blocks*sizeof(PetscInt));
322: }
323: return(0);
324: }
326: static PetscErrorCode PCBJacobiGetLocalBlocks_BJacobi(PC pc, PetscInt *blocks, const PetscInt *lens[])
327: {
328: PC_BJacobi *jac = (PC_BJacobi*) pc->data;
331: *blocks = jac->n_local;
332: if (lens) *lens = jac->l_lens;
333: return(0);
334: }
336: /* -------------------------------------------------------------------------------------*/
338: /*@C
339: PCBJacobiGetSubKSP - Gets the local KSP contexts for all blocks on
340: this processor.
342: Note Collective
344: Input Parameter:
345: . pc - the preconditioner context
347: Output Parameters:
348: + n_local - the number of blocks on this processor, or NULL
349: . first_local - the global number of the first block on this processor, or NULL
350: - ksp - the array of KSP contexts
352: Notes:
353: After PCBJacobiGetSubKSP() the array of KSP contexts is not to be freed.
355: Currently for some matrix implementations only 1 block per processor
356: is supported.
358: You must call KSPSetUp() or PCSetUp() before calling PCBJacobiGetSubKSP().
360: Fortran Usage: You must pass in a KSP array that is large enough to contain all the local KSPs.
361: You can call PCBJacobiGetSubKSP(pc,nlocal,firstlocal,PETSC_NULL_KSP,ierr) to determine how large the
362: KSP array must be.
364: Level: advanced
366: .keywords: block, Jacobi, get, sub, KSP, context
368: .seealso: PCBJacobiGetSubKSP()
369: @*/
370: PetscErrorCode PCBJacobiGetSubKSP(PC pc,PetscInt *n_local,PetscInt *first_local,KSP *ksp[])
371: {
376: PetscUseMethod(pc,"PCBJacobiGetSubKSP_C",(PC,PetscInt*,PetscInt*,KSP **),(pc,n_local,first_local,ksp));
377: return(0);
378: }
380: /*@
381: PCBJacobiSetTotalBlocks - Sets the global number of blocks for the block
382: Jacobi preconditioner.
384: Collective on PC
386: Input Parameters:
387: + pc - the preconditioner context
388: . blocks - the number of blocks
389: - lens - [optional] integer array containing the size of each block
391: Options Database Key:
392: . -pc_bjacobi_blocks <blocks> - Sets the number of global blocks
394: Notes:
395: Currently only a limited number of blocking configurations are supported.
396: All processors sharing the PC must call this routine with the same data.
398: Level: intermediate
400: .keywords: set, number, Jacobi, global, total, blocks
402: .seealso: PCSetUseAmat(), PCBJacobiSetLocalBlocks()
403: @*/
404: PetscErrorCode PCBJacobiSetTotalBlocks(PC pc,PetscInt blocks,const PetscInt lens[])
405: {
410: if (blocks <= 0) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_OUTOFRANGE,"Must have positive blocks");
411: PetscTryMethod(pc,"PCBJacobiSetTotalBlocks_C",(PC,PetscInt,const PetscInt[]),(pc,blocks,lens));
412: return(0);
413: }
415: /*@C
416: PCBJacobiGetTotalBlocks - Gets the global number of blocks for the block
417: Jacobi preconditioner.
419: Not Collective
421: Input Parameter:
422: . pc - the preconditioner context
424: Output parameters:
425: + blocks - the number of blocks
426: - lens - integer array containing the size of each block
428: Level: intermediate
430: .keywords: get, number, Jacobi, global, total, blocks
432: .seealso: PCSetUseAmat(), PCBJacobiGetLocalBlocks()
433: @*/
434: PetscErrorCode PCBJacobiGetTotalBlocks(PC pc, PetscInt *blocks, const PetscInt *lens[])
435: {
441: PetscUseMethod(pc,"PCBJacobiGetTotalBlocks_C",(PC,PetscInt*, const PetscInt *[]),(pc,blocks,lens));
442: return(0);
443: }
445: /*@
446: PCBJacobiSetLocalBlocks - Sets the local number of blocks for the block
447: Jacobi preconditioner.
449: Not Collective
451: Input Parameters:
452: + pc - the preconditioner context
453: . blocks - the number of blocks
454: - lens - [optional] integer array containing size of each block
456: Note:
457: Currently only a limited number of blocking configurations are supported.
459: Level: intermediate
461: .keywords: PC, set, number, Jacobi, local, blocks
463: .seealso: PCSetUseAmat(), PCBJacobiSetTotalBlocks()
464: @*/
465: PetscErrorCode PCBJacobiSetLocalBlocks(PC pc,PetscInt blocks,const PetscInt lens[])
466: {
471: if (blocks < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Must have nonegative blocks");
472: PetscTryMethod(pc,"PCBJacobiSetLocalBlocks_C",(PC,PetscInt,const PetscInt []),(pc,blocks,lens));
473: return(0);
474: }
476: /*@C
477: PCBJacobiGetLocalBlocks - Gets the local number of blocks for the block
478: Jacobi preconditioner.
480: Not Collective
482: Input Parameters:
483: + pc - the preconditioner context
484: . blocks - the number of blocks
485: - lens - [optional] integer array containing size of each block
487: Note:
488: Currently only a limited number of blocking configurations are supported.
490: Level: intermediate
492: .keywords: PC, get, number, Jacobi, local, blocks
494: .seealso: PCSetUseAmat(), PCBJacobiGetTotalBlocks()
495: @*/
496: PetscErrorCode PCBJacobiGetLocalBlocks(PC pc, PetscInt *blocks, const PetscInt *lens[])
497: {
503: PetscUseMethod(pc,"PCBJacobiGetLocalBlocks_C",(PC,PetscInt*, const PetscInt *[]),(pc,blocks,lens));
504: return(0);
505: }
507: /* -----------------------------------------------------------------------------------*/
509: /*MC
510: PCBJACOBI - Use block Jacobi preconditioning, each block is (approximately) solved with
511: its own KSP object.
513: Options Database Keys:
514: . -pc_use_amat - use Amat to apply block of operator in inner Krylov method
516: Notes: Each processor can have one or more blocks, but a block cannot be shared by more
517: than one processor. Defaults to one block per processor.
519: To set options on the solvers for each block append -sub_ to all the KSP, KSP, and PC
520: options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly
522: To set the options on the solvers separate for each block call PCBJacobiGetSubKSP()
523: and set the options directly on the resulting KSP object (you can access its PC
524: KSPGetPC())
526: For GPU-based vectors (CUDA, CUSP, ViennaCL) it is recommended to use exactly one block per MPI process for best
527: performance. Different block partitioning may lead to additional data transfers
528: between host and GPU that lead to degraded performance.
530: Level: beginner
532: Concepts: block Jacobi
535: .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC,
536: PCASM, PCSetUseAmat(), PCGetUseAmat(), PCBJacobiGetSubKSP(), PCBJacobiSetTotalBlocks(),
537: PCBJacobiSetLocalBlocks(), PCSetModifySubmatrices()
538: M*/
540: PETSC_EXTERN PetscErrorCode PCCreate_BJacobi(PC pc)
541: {
543: PetscMPIInt rank;
544: PC_BJacobi *jac;
547: PetscNewLog(pc,&jac);
548: MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);
550: pc->ops->apply = 0;
551: pc->ops->applytranspose = 0;
552: pc->ops->setup = PCSetUp_BJacobi;
553: pc->ops->destroy = PCDestroy_BJacobi;
554: pc->ops->setfromoptions = PCSetFromOptions_BJacobi;
555: pc->ops->view = PCView_BJacobi;
556: pc->ops->applyrichardson = 0;
558: pc->data = (void*)jac;
559: jac->n = -1;
560: jac->n_local = -1;
561: jac->first_local = rank;
562: jac->ksp = 0;
563: jac->same_local_solves = PETSC_TRUE;
564: jac->g_lens = 0;
565: jac->l_lens = 0;
566: jac->psubcomm = 0;
568: PetscObjectComposeFunction((PetscObject)pc,"PCBJacobiGetSubKSP_C",PCBJacobiGetSubKSP_BJacobi);
569: PetscObjectComposeFunction((PetscObject)pc,"PCBJacobiSetTotalBlocks_C",PCBJacobiSetTotalBlocks_BJacobi);
570: PetscObjectComposeFunction((PetscObject)pc,"PCBJacobiGetTotalBlocks_C",PCBJacobiGetTotalBlocks_BJacobi);
571: PetscObjectComposeFunction((PetscObject)pc,"PCBJacobiSetLocalBlocks_C",PCBJacobiSetLocalBlocks_BJacobi);
572: PetscObjectComposeFunction((PetscObject)pc,"PCBJacobiGetLocalBlocks_C",PCBJacobiGetLocalBlocks_BJacobi);
573: return(0);
574: }
576: /* --------------------------------------------------------------------------------------------*/
577: /*
578: These are for a single block per processor; works for AIJ, BAIJ; Seq and MPI
579: */
580: static PetscErrorCode PCReset_BJacobi_Singleblock(PC pc)
581: {
582: PC_BJacobi *jac = (PC_BJacobi*)pc->data;
583: PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock*)jac->data;
584: PetscErrorCode ierr;
587: KSPReset(jac->ksp[0]);
588: VecDestroy(&bjac->x);
589: VecDestroy(&bjac->y);
590: return(0);
591: }
593: static PetscErrorCode PCDestroy_BJacobi_Singleblock(PC pc)
594: {
595: PC_BJacobi *jac = (PC_BJacobi*)pc->data;
596: PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock*)jac->data;
597: PetscErrorCode ierr;
600: PCReset_BJacobi_Singleblock(pc);
601: KSPDestroy(&jac->ksp[0]);
602: PetscFree(jac->ksp);
603: PetscFree(jac->l_lens);
604: PetscFree(jac->g_lens);
605: PetscFree(bjac);
606: PetscFree(pc->data);
607: return(0);
608: }
610: static PetscErrorCode PCSetUpOnBlocks_BJacobi_Singleblock(PC pc)
611: {
612: PetscErrorCode ierr;
613: PC_BJacobi *jac = (PC_BJacobi*)pc->data;
614: KSP subksp = jac->ksp[0];
615: KSPConvergedReason reason;
618: KSPSetUp(subksp);
619: KSPGetConvergedReason(subksp,&reason);
620: if (reason == KSP_DIVERGED_PCSETUP_FAILED) {
621: pc->failedreason = PC_SUBPC_ERROR;
622: }
623: return(0);
624: }
626: static PetscErrorCode PCApply_BJacobi_Singleblock(PC pc,Vec x,Vec y)
627: {
628: PetscErrorCode ierr;
629: PC_BJacobi *jac = (PC_BJacobi*)pc->data;
630: PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock*)jac->data;
633: VecGetLocalVectorRead(x, bjac->x);
634: VecGetLocalVector(y, bjac->y);
635: /* Since the inner KSP matrix may point directly to the diagonal block of an MPI matrix the inner
636: matrix may change even if the outter KSP/PC has not updated the preconditioner, this will trigger a rebuild
637: of the inner preconditioner automatically unless we pass down the outter preconditioners reuse flag.*/
638: KSPSetReusePreconditioner(jac->ksp[0],pc->reusepreconditioner);
639: KSPSolve(jac->ksp[0],bjac->x,bjac->y);
640: VecRestoreLocalVectorRead(x, bjac->x);
641: VecRestoreLocalVector(y, bjac->y);
642: return(0);
643: }
645: static PetscErrorCode PCApplySymmetricLeft_BJacobi_Singleblock(PC pc,Vec x,Vec y)
646: {
647: PetscErrorCode ierr;
648: PC_BJacobi *jac = (PC_BJacobi*)pc->data;
649: PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock*)jac->data;
650: PetscScalar *y_array;
651: const PetscScalar *x_array;
652: PC subpc;
655: /*
656: The VecPlaceArray() is to avoid having to copy the
657: y vector into the bjac->x vector. The reason for
658: the bjac->x vector is that we need a sequential vector
659: for the sequential solve.
660: */
661: VecGetArrayRead(x,&x_array);
662: VecGetArray(y,&y_array);
663: VecPlaceArray(bjac->x,x_array);
664: VecPlaceArray(bjac->y,y_array);
665: /* apply the symmetric left portion of the inner PC operator */
666: /* note this by-passes the inner KSP and its options completely */
667: KSPGetPC(jac->ksp[0],&subpc);
668: PCApplySymmetricLeft(subpc,bjac->x,bjac->y);
669: VecResetArray(bjac->x);
670: VecResetArray(bjac->y);
671: VecRestoreArrayRead(x,&x_array);
672: VecRestoreArray(y,&y_array);
673: return(0);
674: }
676: static PetscErrorCode PCApplySymmetricRight_BJacobi_Singleblock(PC pc,Vec x,Vec y)
677: {
678: PetscErrorCode ierr;
679: PC_BJacobi *jac = (PC_BJacobi*)pc->data;
680: PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock*)jac->data;
681: PetscScalar *y_array;
682: const PetscScalar *x_array;
683: PC subpc;
686: /*
687: The VecPlaceArray() is to avoid having to copy the
688: y vector into the bjac->x vector. The reason for
689: the bjac->x vector is that we need a sequential vector
690: for the sequential solve.
691: */
692: VecGetArrayRead(x,&x_array);
693: VecGetArray(y,&y_array);
694: VecPlaceArray(bjac->x,x_array);
695: VecPlaceArray(bjac->y,y_array);
697: /* apply the symmetric right portion of the inner PC operator */
698: /* note this by-passes the inner KSP and its options completely */
700: KSPGetPC(jac->ksp[0],&subpc);
701: PCApplySymmetricRight(subpc,bjac->x,bjac->y);
703: VecRestoreArrayRead(x,&x_array);
704: VecRestoreArray(y,&y_array);
705: return(0);
706: }
708: static PetscErrorCode PCApplyTranspose_BJacobi_Singleblock(PC pc,Vec x,Vec y)
709: {
710: PetscErrorCode ierr;
711: PC_BJacobi *jac = (PC_BJacobi*)pc->data;
712: PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock*)jac->data;
713: PetscScalar *y_array;
714: const PetscScalar *x_array;
717: /*
718: The VecPlaceArray() is to avoid having to copy the
719: y vector into the bjac->x vector. The reason for
720: the bjac->x vector is that we need a sequential vector
721: for the sequential solve.
722: */
723: VecGetArrayRead(x,&x_array);
724: VecGetArray(y,&y_array);
725: VecPlaceArray(bjac->x,x_array);
726: VecPlaceArray(bjac->y,y_array);
727: KSPSolveTranspose(jac->ksp[0],bjac->x,bjac->y);
728: VecResetArray(bjac->x);
729: VecResetArray(bjac->y);
730: VecRestoreArrayRead(x,&x_array);
731: VecRestoreArray(y,&y_array);
732: return(0);
733: }
735: static PetscErrorCode PCSetUp_BJacobi_Singleblock(PC pc,Mat mat,Mat pmat)
736: {
737: PC_BJacobi *jac = (PC_BJacobi*)pc->data;
738: PetscErrorCode ierr;
739: PetscInt m;
740: KSP ksp;
741: PC_BJacobi_Singleblock *bjac;
742: PetscBool wasSetup = PETSC_TRUE;
743: #if defined(PETSC_HAVE_CUSP) || defined(PETSC_HAVE_VECCUDA) || defined(PETSC_HAVE_VIENNACL)
744: PetscBool is_gpumatrix = PETSC_FALSE;
745: #endif
748: if (!pc->setupcalled) {
749: const char *prefix;
751: if (!jac->ksp) {
752: wasSetup = PETSC_FALSE;
754: KSPCreate(PETSC_COMM_SELF,&ksp);
755: KSPSetErrorIfNotConverged(ksp,pc->erroriffailure);
756: PetscObjectIncrementTabLevel((PetscObject)ksp,(PetscObject)pc,1);
757: PetscLogObjectParent((PetscObject)pc,(PetscObject)ksp);
758: KSPSetType(ksp,KSPPREONLY);
759: PCGetOptionsPrefix(pc,&prefix);
760: KSPSetOptionsPrefix(ksp,prefix);
761: KSPAppendOptionsPrefix(ksp,"sub_");
763: pc->ops->reset = PCReset_BJacobi_Singleblock;
764: pc->ops->destroy = PCDestroy_BJacobi_Singleblock;
765: pc->ops->apply = PCApply_BJacobi_Singleblock;
766: pc->ops->applysymmetricleft = PCApplySymmetricLeft_BJacobi_Singleblock;
767: pc->ops->applysymmetricright = PCApplySymmetricRight_BJacobi_Singleblock;
768: pc->ops->applytranspose = PCApplyTranspose_BJacobi_Singleblock;
769: pc->ops->setuponblocks = PCSetUpOnBlocks_BJacobi_Singleblock;
771: PetscMalloc1(1,&jac->ksp);
772: jac->ksp[0] = ksp;
774: PetscNewLog(pc,&bjac);
775: jac->data = (void*)bjac;
776: } else {
777: ksp = jac->ksp[0];
778: bjac = (PC_BJacobi_Singleblock*)jac->data;
779: }
781: /*
782: The reason we need to generate these vectors is to serve
783: as the right-hand side and solution vector for the solve on the
784: block. We do not need to allocate space for the vectors since
785: that is provided via VecPlaceArray() just before the call to
786: KSPSolve() on the block.
787: */
788: MatGetSize(pmat,&m,&m);
789: VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,NULL,&bjac->x);
790: VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,NULL,&bjac->y);
791: #if defined(PETSC_HAVE_CUSP)
792: PetscObjectTypeCompareAny((PetscObject)pmat,&is_gpumatrix,MATAIJCUSP,MATSEQAIJCUSP,MATMPIAIJCUSP,"");
793: if (is_gpumatrix) {
794: VecSetType(bjac->x,VECCUSP);
795: VecSetType(bjac->y,VECCUSP);
796: }
797: #elif defined(PETSC_HAVE_VECCUDA)
798: PetscObjectTypeCompareAny((PetscObject)pmat,&is_gpumatrix,MATAIJCUSPARSE,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,"");
799: if (is_gpumatrix) {
800: VecSetType(bjac->x,VECCUDA);
801: VecSetType(bjac->y,VECCUDA);
802: }
803: #elif defined(PETSC_HAVE_VIENNACL)
804: PetscObjectTypeCompareAny((PetscObject)pmat,&is_gpumatrix,MATAIJVIENNACL,MATSEQAIJVIENNACL,MATMPIAIJVIENNACL,"");
805: if (is_gpumatrix) {
806: VecSetType(bjac->x,VECVIENNACL);
807: VecSetType(bjac->y,VECVIENNACL);
808: }
809: #endif
810: PetscLogObjectParent((PetscObject)pc,(PetscObject)bjac->x);
811: PetscLogObjectParent((PetscObject)pc,(PetscObject)bjac->y);
812: } else {
813: ksp = jac->ksp[0];
814: bjac = (PC_BJacobi_Singleblock*)jac->data;
815: }
816: if (pc->useAmat) {
817: KSPSetOperators(ksp,mat,pmat);
818: } else {
819: KSPSetOperators(ksp,pmat,pmat);
820: }
821: if (!wasSetup && pc->setfromoptionscalled) {
822: /* If PCSetFromOptions_BJacobi is called later, KSPSetFromOptions will be called at that time. */
823: KSPSetFromOptions(ksp);
824: }
825: return(0);
826: }
828: /* ---------------------------------------------------------------------------------------------*/
829: static PetscErrorCode PCReset_BJacobi_Multiblock(PC pc)
830: {
831: PC_BJacobi *jac = (PC_BJacobi*)pc->data;
832: PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock*)jac->data;
833: PetscErrorCode ierr;
834: PetscInt i;
837: if (bjac && bjac->pmat) {
838: MatDestroyMatrices(jac->n_local,&bjac->pmat);
839: if (pc->useAmat) {
840: MatDestroyMatrices(jac->n_local,&bjac->mat);
841: }
842: }
844: for (i=0; i<jac->n_local; i++) {
845: KSPReset(jac->ksp[i]);
846: if (bjac && bjac->x) {
847: VecDestroy(&bjac->x[i]);
848: VecDestroy(&bjac->y[i]);
849: ISDestroy(&bjac->is[i]);
850: }
851: }
852: PetscFree(jac->l_lens);
853: PetscFree(jac->g_lens);
854: return(0);
855: }
857: static PetscErrorCode PCDestroy_BJacobi_Multiblock(PC pc)
858: {
859: PC_BJacobi *jac = (PC_BJacobi*)pc->data;
860: PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock*)jac->data;
861: PetscErrorCode ierr;
862: PetscInt i;
865: PCReset_BJacobi_Multiblock(pc);
866: if (bjac) {
867: PetscFree2(bjac->x,bjac->y);
868: PetscFree(bjac->starts);
869: PetscFree(bjac->is);
870: }
871: PetscFree(jac->data);
872: for (i=0; i<jac->n_local; i++) {
873: KSPDestroy(&jac->ksp[i]);
874: }
875: PetscFree(jac->ksp);
876: PetscFree(pc->data);
877: return(0);
878: }
880: static PetscErrorCode PCSetUpOnBlocks_BJacobi_Multiblock(PC pc)
881: {
882: PC_BJacobi *jac = (PC_BJacobi*)pc->data;
883: PetscErrorCode ierr;
884: PetscInt i,n_local = jac->n_local;
885: KSPConvergedReason reason;
888: for (i=0; i<n_local; i++) {
889: KSPSetUp(jac->ksp[i]);
890: KSPGetConvergedReason(jac->ksp[i],&reason);
891: if (reason == KSP_DIVERGED_PCSETUP_FAILED) {
892: pc->failedreason = PC_SUBPC_ERROR;
893: }
894: }
895: return(0);
896: }
898: /*
899: Preconditioner for block Jacobi
900: */
901: static PetscErrorCode PCApply_BJacobi_Multiblock(PC pc,Vec x,Vec y)
902: {
903: PC_BJacobi *jac = (PC_BJacobi*)pc->data;
904: PetscErrorCode ierr;
905: PetscInt i,n_local = jac->n_local;
906: PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock*)jac->data;
907: PetscScalar *yin;
908: const PetscScalar *xin;
911: VecGetArrayRead(x,&xin);
912: VecGetArray(y,&yin);
913: for (i=0; i<n_local; i++) {
914: /*
915: To avoid copying the subvector from x into a workspace we instead
916: make the workspace vector array point to the subpart of the array of
917: the global vector.
918: */
919: VecPlaceArray(bjac->x[i],xin+bjac->starts[i]);
920: VecPlaceArray(bjac->y[i],yin+bjac->starts[i]);
922: PetscLogEventBegin(PC_ApplyOnBlocks,jac->ksp[i],bjac->x[i],bjac->y[i],0);
923: KSPSolve(jac->ksp[i],bjac->x[i],bjac->y[i]);
924: PetscLogEventEnd(PC_ApplyOnBlocks,jac->ksp[i],bjac->x[i],bjac->y[i],0);
926: VecResetArray(bjac->x[i]);
927: VecResetArray(bjac->y[i]);
928: }
929: VecRestoreArrayRead(x,&xin);
930: VecRestoreArray(y,&yin);
931: return(0);
932: }
934: /*
935: Preconditioner for block Jacobi
936: */
937: static PetscErrorCode PCApplyTranspose_BJacobi_Multiblock(PC pc,Vec x,Vec y)
938: {
939: PC_BJacobi *jac = (PC_BJacobi*)pc->data;
940: PetscErrorCode ierr;
941: PetscInt i,n_local = jac->n_local;
942: PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock*)jac->data;
943: PetscScalar *yin;
944: const PetscScalar *xin;
947: VecGetArrayRead(x,&xin);
948: VecGetArray(y,&yin);
949: for (i=0; i<n_local; i++) {
950: /*
951: To avoid copying the subvector from x into a workspace we instead
952: make the workspace vector array point to the subpart of the array of
953: the global vector.
954: */
955: VecPlaceArray(bjac->x[i],xin+bjac->starts[i]);
956: VecPlaceArray(bjac->y[i],yin+bjac->starts[i]);
958: PetscLogEventBegin(PC_ApplyTransposeOnBlocks,jac->ksp[i],bjac->x[i],bjac->y[i],0);
959: KSPSolveTranspose(jac->ksp[i],bjac->x[i],bjac->y[i]);
960: PetscLogEventEnd(PC_ApplyTransposeOnBlocks,jac->ksp[i],bjac->x[i],bjac->y[i],0);
962: VecResetArray(bjac->x[i]);
963: VecResetArray(bjac->y[i]);
964: }
965: VecRestoreArrayRead(x,&xin);
966: VecRestoreArray(y,&yin);
967: return(0);
968: }
970: static PetscErrorCode PCSetUp_BJacobi_Multiblock(PC pc,Mat mat,Mat pmat)
971: {
972: PC_BJacobi *jac = (PC_BJacobi*)pc->data;
973: PetscErrorCode ierr;
974: PetscInt m,n_local,N,M,start,i;
975: const char *prefix,*pprefix,*mprefix;
976: KSP ksp;
977: Vec x,y;
978: PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock*)jac->data;
979: PC subpc;
980: IS is;
981: MatReuse scall;
982: #if defined(PETSC_HAVE_CUSP) || defined(PETSC_HAVE_VECCUDA) || defined(PETSC_HAVE_VIENNACL)
983: PetscBool is_gpumatrix = PETSC_FALSE;
984: #endif
987: MatGetLocalSize(pc->pmat,&M,&N);
989: n_local = jac->n_local;
991: if (pc->useAmat) {
992: PetscBool same;
993: PetscObjectTypeCompare((PetscObject)mat,((PetscObject)pmat)->type_name,&same);
994: if (!same) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_INCOMP,"Matrices not of same type");
995: }
997: if (!pc->setupcalled) {
998: scall = MAT_INITIAL_MATRIX;
1000: if (!jac->ksp) {
1001: pc->ops->reset = PCReset_BJacobi_Multiblock;
1002: pc->ops->destroy = PCDestroy_BJacobi_Multiblock;
1003: pc->ops->apply = PCApply_BJacobi_Multiblock;
1004: pc->ops->applytranspose= PCApplyTranspose_BJacobi_Multiblock;
1005: pc->ops->setuponblocks = PCSetUpOnBlocks_BJacobi_Multiblock;
1007: PetscNewLog(pc,&bjac);
1008: PetscMalloc1(n_local,&jac->ksp);
1009: PetscLogObjectMemory((PetscObject)pc,sizeof(n_local*sizeof(KSP)));
1010: PetscMalloc2(n_local,&bjac->x,n_local,&bjac->y);
1011: PetscMalloc1(n_local,&bjac->starts);
1012: PetscLogObjectMemory((PetscObject)pc,sizeof(n_local*sizeof(PetscScalar)));
1014: jac->data = (void*)bjac;
1015: PetscMalloc1(n_local,&bjac->is);
1016: PetscLogObjectMemory((PetscObject)pc,sizeof(n_local*sizeof(IS)));
1018: for (i=0; i<n_local; i++) {
1019: KSPCreate(PETSC_COMM_SELF,&ksp);
1020: KSPSetErrorIfNotConverged(ksp,pc->erroriffailure);
1021: PetscObjectIncrementTabLevel((PetscObject)ksp,(PetscObject)pc,1);
1022: PetscLogObjectParent((PetscObject)pc,(PetscObject)ksp);
1023: KSPSetType(ksp,KSPPREONLY);
1024: KSPGetPC(ksp,&subpc);
1025: PCGetOptionsPrefix(pc,&prefix);
1026: KSPSetOptionsPrefix(ksp,prefix);
1027: KSPAppendOptionsPrefix(ksp,"sub_");
1029: jac->ksp[i] = ksp;
1030: }
1031: } else {
1032: bjac = (PC_BJacobi_Multiblock*)jac->data;
1033: }
1035: start = 0;
1036: for (i=0; i<n_local; i++) {
1037: m = jac->l_lens[i];
1038: /*
1039: The reason we need to generate these vectors is to serve
1040: as the right-hand side and solution vector for the solve on the
1041: block. We do not need to allocate space for the vectors since
1042: that is provided via VecPlaceArray() just before the call to
1043: KSPSolve() on the block.
1045: */
1046: VecCreateSeq(PETSC_COMM_SELF,m,&x);
1047: VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,NULL,&y);
1048: #if defined(PETSC_HAVE_CUSP)
1049: PetscObjectTypeCompareAny((PetscObject)pmat,&is_gpumatrix,MATAIJCUSP,MATSEQAIJCUSP,MATMPIAIJCUSP,"");
1050: if (is_gpumatrix) {
1051: VecSetType(x,VECCUSP);
1052: VecSetType(y,VECCUSP);
1053: }
1054: #elif defined(PETSC_HAVE_VECCUDA)
1055: PetscObjectTypeCompareAny((PetscObject)pmat,&is_gpumatrix,MATAIJCUSPARSE,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,"");
1056: if (is_gpumatrix) {
1057: VecSetType(x,VECCUDA);
1058: VecSetType(y,VECCUDA);
1059: }
1060: #elif defined(PETSC_HAVE_VIENNACL)
1061: PetscObjectTypeCompareAny((PetscObject)pmat,&is_gpumatrix,MATAIJVIENNACL,MATSEQAIJVIENNACL,MATMPIAIJVIENNACL,"");
1062: if (is_gpumatrix) {
1063: VecSetType(x,VECVIENNACL);
1064: VecSetType(y,VECVIENNACL);
1065: }
1066: #endif
1067: PetscLogObjectParent((PetscObject)pc,(PetscObject)x);
1068: PetscLogObjectParent((PetscObject)pc,(PetscObject)y);
1070: bjac->x[i] = x;
1071: bjac->y[i] = y;
1072: bjac->starts[i] = start;
1074: ISCreateStride(PETSC_COMM_SELF,m,start,1,&is);
1075: bjac->is[i] = is;
1076: PetscLogObjectParent((PetscObject)pc,(PetscObject)is);
1078: start += m;
1079: }
1080: } else {
1081: bjac = (PC_BJacobi_Multiblock*)jac->data;
1082: /*
1083: Destroy the blocks from the previous iteration
1084: */
1085: if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1086: MatDestroyMatrices(n_local,&bjac->pmat);
1087: if (pc->useAmat) {
1088: MatDestroyMatrices(n_local,&bjac->mat);
1089: }
1090: scall = MAT_INITIAL_MATRIX;
1091: } else scall = MAT_REUSE_MATRIX;
1092: }
1094: MatCreateSubMatrices(pmat,n_local,bjac->is,bjac->is,scall,&bjac->pmat);
1095: if (pc->useAmat) {
1096: PetscObjectGetOptionsPrefix((PetscObject)mat,&mprefix);
1097: MatCreateSubMatrices(mat,n_local,bjac->is,bjac->is,scall,&bjac->mat);
1098: }
1099: /* Return control to the user so that the submatrices can be modified (e.g., to apply
1100: different boundary conditions for the submatrices than for the global problem) */
1101: PCModifySubMatrices(pc,n_local,bjac->is,bjac->is,bjac->pmat,pc->modifysubmatricesP);
1103: PetscObjectGetOptionsPrefix((PetscObject)pmat,&pprefix);
1104: for (i=0; i<n_local; i++) {
1105: PetscLogObjectParent((PetscObject)pc,(PetscObject)bjac->pmat[i]);
1106: PetscObjectSetOptionsPrefix((PetscObject)bjac->pmat[i],pprefix);
1107: if (pc->useAmat) {
1108: PetscLogObjectParent((PetscObject)pc,(PetscObject)bjac->mat[i]);
1109: PetscObjectSetOptionsPrefix((PetscObject)bjac->mat[i],mprefix);
1110: KSPSetOperators(jac->ksp[i],bjac->mat[i],bjac->pmat[i]);
1111: } else {
1112: KSPSetOperators(jac->ksp[i],bjac->pmat[i],bjac->pmat[i]);
1113: }
1114: if (pc->setfromoptionscalled) {
1115: KSPSetFromOptions(jac->ksp[i]);
1116: }
1117: }
1118: return(0);
1119: }
1121: /* ---------------------------------------------------------------------------------------------*/
1122: /*
1123: These are for a single block with multiple processes;
1124: */
1125: static PetscErrorCode PCReset_BJacobi_Multiproc(PC pc)
1126: {
1127: PC_BJacobi *jac = (PC_BJacobi*)pc->data;
1128: PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc*)jac->data;
1129: PetscErrorCode ierr;
1132: VecDestroy(&mpjac->ysub);
1133: VecDestroy(&mpjac->xsub);
1134: MatDestroy(&mpjac->submats);
1135: if (jac->ksp) {KSPReset(jac->ksp[0]);}
1136: return(0);
1137: }
1139: static PetscErrorCode PCDestroy_BJacobi_Multiproc(PC pc)
1140: {
1141: PC_BJacobi *jac = (PC_BJacobi*)pc->data;
1142: PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc*)jac->data;
1143: PetscErrorCode ierr;
1146: PCReset_BJacobi_Multiproc(pc);
1147: KSPDestroy(&jac->ksp[0]);
1148: PetscFree(jac->ksp);
1149: PetscSubcommDestroy(&mpjac->psubcomm);
1151: PetscFree(mpjac);
1152: PetscFree(pc->data);
1153: return(0);
1154: }
1156: static PetscErrorCode PCApply_BJacobi_Multiproc(PC pc,Vec x,Vec y)
1157: {
1158: PC_BJacobi *jac = (PC_BJacobi*)pc->data;
1159: PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc*)jac->data;
1160: PetscErrorCode ierr;
1161: PetscScalar *yarray;
1162: const PetscScalar *xarray;
1163: KSPConvergedReason reason;
1166: /* place x's and y's local arrays into xsub and ysub */
1167: VecGetArrayRead(x,&xarray);
1168: VecGetArray(y,&yarray);
1169: VecPlaceArray(mpjac->xsub,xarray);
1170: VecPlaceArray(mpjac->ysub,yarray);
1172: /* apply preconditioner on each matrix block */
1173: PetscLogEventBegin(PC_ApplyOnMproc,jac->ksp[0],mpjac->xsub,mpjac->ysub,0);
1174: KSPSolve(jac->ksp[0],mpjac->xsub,mpjac->ysub);
1175: PetscLogEventEnd(PC_ApplyOnMproc,jac->ksp[0],mpjac->xsub,mpjac->ysub,0);
1176: KSPGetConvergedReason(jac->ksp[0],&reason);
1177: if (reason == KSP_DIVERGED_PCSETUP_FAILED) {
1178: pc->failedreason = PC_SUBPC_ERROR;
1179: }
1181: VecResetArray(mpjac->xsub);
1182: VecResetArray(mpjac->ysub);
1183: VecRestoreArrayRead(x,&xarray);
1184: VecRestoreArray(y,&yarray);
1185: return(0);
1186: }
1188: static PetscErrorCode PCSetUp_BJacobi_Multiproc(PC pc)
1189: {
1190: PC_BJacobi *jac = (PC_BJacobi*)pc->data;
1191: PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc*)jac->data;
1192: PetscErrorCode ierr;
1193: PetscInt m,n;
1194: MPI_Comm comm,subcomm=0;
1195: const char *prefix;
1196: PetscBool wasSetup = PETSC_TRUE;
1197: #if defined(PETSC_HAVE_CUSP) || defined(PETSC_HAVE_VECCUDA) || defined(PETSC_HAVE_VIENNACL)
1198: PetscBool is_gpumatrix = PETSC_FALSE;
1199: #endif
1203: PetscObjectGetComm((PetscObject)pc,&comm);
1204: if (jac->n_local > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only a single block in a subcommunicator is supported");
1205: jac->n_local = 1; /* currently only a single block is supported for a subcommunicator */
1206: if (!pc->setupcalled) {
1207: wasSetup = PETSC_FALSE;
1208: PetscNewLog(pc,&mpjac);
1209: jac->data = (void*)mpjac;
1211: /* initialize datastructure mpjac */
1212: if (!jac->psubcomm) {
1213: /* Create default contiguous subcommunicatiors if user does not provide them */
1214: PetscSubcommCreate(comm,&jac->psubcomm);
1215: PetscSubcommSetNumber(jac->psubcomm,jac->n);
1216: PetscSubcommSetType(jac->psubcomm,PETSC_SUBCOMM_CONTIGUOUS);
1217: PetscLogObjectMemory((PetscObject)pc,sizeof(PetscSubcomm));
1218: }
1219: mpjac->psubcomm = jac->psubcomm;
1220: subcomm = PetscSubcommChild(mpjac->psubcomm);
1222: /* Get matrix blocks of pmat */
1223: MatGetMultiProcBlock(pc->pmat,subcomm,MAT_INITIAL_MATRIX,&mpjac->submats);
1225: /* create a new PC that processors in each subcomm have copy of */
1226: PetscMalloc1(1,&jac->ksp);
1227: KSPCreate(subcomm,&jac->ksp[0]);
1228: KSPSetErrorIfNotConverged(jac->ksp[0],pc->erroriffailure);
1229: PetscObjectIncrementTabLevel((PetscObject)jac->ksp[0],(PetscObject)pc,1);
1230: PetscLogObjectParent((PetscObject)pc,(PetscObject)jac->ksp[0]);
1231: KSPSetOperators(jac->ksp[0],mpjac->submats,mpjac->submats);
1232: KSPGetPC(jac->ksp[0],&mpjac->pc);
1234: PCGetOptionsPrefix(pc,&prefix);
1235: KSPSetOptionsPrefix(jac->ksp[0],prefix);
1236: KSPAppendOptionsPrefix(jac->ksp[0],"sub_");
1237: /*
1238: PetscMPIInt rank,subsize,subrank;
1239: MPI_Comm_rank(comm,&rank);
1240: MPI_Comm_size(subcomm,&subsize);
1241: MPI_Comm_rank(subcomm,&subrank);
1243: MatGetLocalSize(mpjac->submats,&m,NULL);
1244: MatGetSize(mpjac->submats,&n,NULL);
1245: PetscSynchronizedPrintf(comm,"[%d], sub-size %d,sub-rank %d\n",rank,subsize,subrank);
1246: PetscSynchronizedFlush(comm,PETSC_STDOUT);
1247: */
1249: /* create dummy vectors xsub and ysub */
1250: MatGetLocalSize(mpjac->submats,&m,&n);
1251: VecCreateMPIWithArray(subcomm,1,n,PETSC_DECIDE,NULL,&mpjac->xsub);
1252: VecCreateMPIWithArray(subcomm,1,m,PETSC_DECIDE,NULL,&mpjac->ysub);
1253: #if defined(PETSC_HAVE_CUSP)
1254: PetscObjectTypeCompareAny((PetscObject)mpjac->submats,&is_gpumatrix,MATAIJCUSP,MATMPIAIJCUSP,"");
1255: if (is_gpumatrix) {
1256: VecSetType(mpjac->xsub,VECMPICUSP);
1257: VecSetType(mpjac->ysub,VECMPICUSP);
1258: }
1259: #elif defined(PETSC_HAVE_VECCUDA)
1260: PetscObjectTypeCompareAny((PetscObject)mpjac->submats,&is_gpumatrix,MATAIJCUSPARSE,MATMPIAIJCUSPARSE,"");
1261: if (is_gpumatrix) {
1262: VecSetType(mpjac->xsub,VECMPICUDA);
1263: VecSetType(mpjac->ysub,VECMPICUDA);
1264: }
1265: #elif defined(PETSC_HAVE_VIENNACL)
1266: PetscObjectTypeCompareAny((PetscObject)mpjac->submats,&is_gpumatrix,MATAIJVIENNACL,MATMPIAIJVIENNACL,"");
1267: if (is_gpumatrix) {
1268: VecSetType(mpjac->xsub,VECMPIVIENNACL);
1269: VecSetType(mpjac->ysub,VECMPIVIENNACL);
1270: }
1271: #endif
1272: PetscLogObjectParent((PetscObject)pc,(PetscObject)mpjac->xsub);
1273: PetscLogObjectParent((PetscObject)pc,(PetscObject)mpjac->ysub);
1275: pc->ops->reset = PCReset_BJacobi_Multiproc;
1276: pc->ops->destroy = PCDestroy_BJacobi_Multiproc;
1277: pc->ops->apply = PCApply_BJacobi_Multiproc;
1278: } else { /* pc->setupcalled */
1279: subcomm = PetscSubcommChild(mpjac->psubcomm);
1280: if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1281: /* destroy old matrix blocks, then get new matrix blocks */
1282: if (mpjac->submats) {MatDestroy(&mpjac->submats);}
1283: MatGetMultiProcBlock(pc->pmat,subcomm,MAT_INITIAL_MATRIX,&mpjac->submats);
1284: } else {
1285: MatGetMultiProcBlock(pc->pmat,subcomm,MAT_REUSE_MATRIX,&mpjac->submats);
1286: }
1287: KSPSetOperators(jac->ksp[0],mpjac->submats,mpjac->submats);
1288: }
1290: if (!wasSetup && pc->setfromoptionscalled) {
1291: KSPSetFromOptions(jac->ksp[0]);
1292: }
1293: return(0);
1294: }