Actual source code: bjacobi.c

petsc-3.8.4 2018-03-24
Report Typos and Errors

  2: /*
  3:    Defines a block Jacobi preconditioner.
  4: */

  6:  #include <../src/ksp/pc/impls/bjacobi/bjacobi.h>

  8: static PetscErrorCode PCSetUp_BJacobi_Singleblock(PC,Mat,Mat);
  9: static PetscErrorCode PCSetUp_BJacobi_Multiblock(PC,Mat,Mat);
 10: static PetscErrorCode PCSetUp_BJacobi_Multiproc(PC);

 12: static PetscErrorCode PCSetUp_BJacobi(PC pc)
 13: {
 14:   PC_BJacobi     *jac = (PC_BJacobi*)pc->data;
 15:   Mat            mat  = pc->mat,pmat = pc->pmat;
 17:   void           (*f)(void);
 18:   PetscInt       N,M,start,i,sum,end;
 19:   PetscInt       bs,i_start=-1,i_end=-1;
 20:   PetscMPIInt    rank,size;
 21:   const char     *pprefix,*mprefix;

 24:   MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);
 25:   MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);
 26:   MatGetLocalSize(pc->pmat,&M,&N);
 27:   MatGetBlockSize(pc->pmat,&bs);

 29:   if (jac->n > 0 && jac->n < size) {
 30:     PCSetUp_BJacobi_Multiproc(pc);
 31:     return(0);
 32:   }

 34:   /* --------------------------------------------------------------------------
 35:       Determines the number of blocks assigned to each processor
 36:   -----------------------------------------------------------------------------*/

 38:   /*   local block count  given */
 39:   if (jac->n_local > 0 && jac->n < 0) {
 40:     MPIU_Allreduce(&jac->n_local,&jac->n,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));
 41:     if (jac->l_lens) { /* check that user set these correctly */
 42:       sum = 0;
 43:       for (i=0; i<jac->n_local; i++) {
 44:         if (jac->l_lens[i]/bs*bs !=jac->l_lens[i]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Mat blocksize doesn't match block Jacobi layout");
 45:         sum += jac->l_lens[i];
 46:       }
 47:       if (sum != M) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local lens set incorrectly");
 48:     } else {
 49:       PetscMalloc1(jac->n_local,&jac->l_lens);
 50:       for (i=0; i<jac->n_local; i++) jac->l_lens[i] = bs*((M/bs)/jac->n_local + (((M/bs) % jac->n_local) > i));
 51:     }
 52:   } else if (jac->n > 0 && jac->n_local < 0) { /* global block count given */
 53:     /* global blocks given: determine which ones are local */
 54:     if (jac->g_lens) {
 55:       /* check if the g_lens is has valid entries */
 56:       for (i=0; i<jac->n; i++) {
 57:         if (!jac->g_lens[i]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Zero block not allowed");
 58:         if (jac->g_lens[i]/bs*bs != jac->g_lens[i]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Mat blocksize doesn't match block Jacobi layout");
 59:       }
 60:       if (size == 1) {
 61:         jac->n_local = jac->n;
 62:         PetscMalloc1(jac->n_local,&jac->l_lens);
 63:         PetscMemcpy(jac->l_lens,jac->g_lens,jac->n_local*sizeof(PetscInt));
 64:         /* check that user set these correctly */
 65:         sum = 0;
 66:         for (i=0; i<jac->n_local; i++) sum += jac->l_lens[i];
 67:         if (sum != M) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Global lens set incorrectly");
 68:       } else {
 69:         MatGetOwnershipRange(pc->pmat,&start,&end);
 70:         /* loop over blocks determing first one owned by me */
 71:         sum = 0;
 72:         for (i=0; i<jac->n+1; i++) {
 73:           if (sum == start) { i_start = i; goto start_1;}
 74:           if (i < jac->n) sum += jac->g_lens[i];
 75:         }
 76:         SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Block sizes used in PCBJacobiSetTotalBlocks()\nare not compatible with parallel matrix layout");
 77: start_1:
 78:         for (i=i_start; i<jac->n+1; i++) {
 79:           if (sum == end) { i_end = i; goto end_1; }
 80:           if (i < jac->n) sum += jac->g_lens[i];
 81:         }
 82:         SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Block sizes used in PCBJacobiSetTotalBlocks()\nare not compatible with parallel matrix layout");
 83: end_1:
 84:         jac->n_local = i_end - i_start;
 85:         PetscMalloc1(jac->n_local,&jac->l_lens);
 86:         PetscMemcpy(jac->l_lens,jac->g_lens+i_start,jac->n_local*sizeof(PetscInt));
 87:       }
 88:     } else { /* no global blocks given, determine then using default layout */
 89:       jac->n_local = jac->n/size + ((jac->n % size) > rank);
 90:       PetscMalloc1(jac->n_local,&jac->l_lens);
 91:       for (i=0; i<jac->n_local; i++) {
 92:         jac->l_lens[i] = ((M/bs)/jac->n_local + (((M/bs) % jac->n_local) > i))*bs;
 93:         if (!jac->l_lens[i]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Too many blocks given");
 94:       }
 95:     }
 96:   } else if (jac->n < 0 && jac->n_local < 0) { /* no blocks given */
 97:     jac->n         = size;
 98:     jac->n_local   = 1;
 99:     PetscMalloc1(1,&jac->l_lens);
100:     jac->l_lens[0] = M;
101:   } else { /* jac->n > 0 && jac->n_local > 0 */
102:     if (!jac->l_lens) {
103:       PetscMalloc1(jac->n_local,&jac->l_lens);
104:       for (i=0; i<jac->n_local; i++) jac->l_lens[i] = bs*((M/bs)/jac->n_local + (((M/bs) % jac->n_local) > i));
105:     }
106:   }
107:   if (jac->n_local < 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of blocks is less than number of processors");

109:   /* -------------------------
110:       Determines mat and pmat
111:   ---------------------------*/
112:   MatShellGetOperation(pc->mat,MATOP_GET_DIAGONAL_BLOCK,&f);
113:   if (!f && size == 1) {
114:     mat  = pc->mat;
115:     pmat = pc->pmat;
116:   } else {
117:     if (pc->useAmat) {
118:       /* use block from Amat matrix, not Pmat for local MatMult() */
119:       MatGetDiagonalBlock(pc->mat,&mat);
120:       /* make submatrix have same prefix as entire matrix */
121:       PetscObjectGetOptionsPrefix((PetscObject)pc->mat,&mprefix);
122:       PetscObjectSetOptionsPrefix((PetscObject)mat,mprefix);
123:     }
124:     if (pc->pmat != pc->mat || !pc->useAmat) {
125:       MatGetDiagonalBlock(pc->pmat,&pmat);
126:       /* make submatrix have same prefix as entire matrix */
127:       PetscObjectGetOptionsPrefix((PetscObject)pc->pmat,&pprefix);
128:       PetscObjectSetOptionsPrefix((PetscObject)pmat,pprefix);
129:     } else pmat = mat;
130:   }

132:   /* ------
133:      Setup code depends on the number of blocks
134:   */
135:   if (jac->n_local == 1) {
136:     PCSetUp_BJacobi_Singleblock(pc,mat,pmat);
137:   } else {
138:     PCSetUp_BJacobi_Multiblock(pc,mat,pmat);
139:   }
140:   return(0);
141: }

143: /* Default destroy, if it has never been setup */
144: static PetscErrorCode PCDestroy_BJacobi(PC pc)
145: {
146:   PC_BJacobi     *jac = (PC_BJacobi*)pc->data;

150:   PetscFree(jac->g_lens);
151:   PetscFree(jac->l_lens);
152:   PetscFree(pc->data);
153:   return(0);
154: }


157: static PetscErrorCode PCSetFromOptions_BJacobi(PetscOptionItems *PetscOptionsObject,PC pc)
158: {
159:   PC_BJacobi     *jac = (PC_BJacobi*)pc->data;
161:   PetscInt       blocks,i;
162:   PetscBool      flg;

165:   PetscOptionsHead(PetscOptionsObject,"Block Jacobi options");
166:   PetscOptionsInt("-pc_bjacobi_blocks","Total number of blocks","PCBJacobiSetTotalBlocks",jac->n,&blocks,&flg);
167:   if (flg) {
168:     PCBJacobiSetTotalBlocks(pc,blocks,NULL);
169:   }
170:   if (jac->ksp) {
171:     /* The sub-KSP has already been set up (e.g., PCSetUp_BJacobi_Singleblock), but KSPSetFromOptions was not called
172:      * unless we had already been called. */
173:     for (i=0; i<jac->n_local; i++) {
174:       KSPSetFromOptions(jac->ksp[i]);
175:     }
176:   }
177:   PetscOptionsTail();
178:   return(0);
179: }

181:  #include <petscdraw.h>
182: static PetscErrorCode PCView_BJacobi(PC pc,PetscViewer viewer)
183: {
184:   PC_BJacobi           *jac   = (PC_BJacobi*)pc->data;
185:   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc*)jac->data;
186:   PetscErrorCode       ierr;
187:   PetscMPIInt          rank;
188:   PetscInt             i;
189:   PetscBool            iascii,isstring,isdraw;
190:   PetscViewer          sviewer;

193:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
194:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSTRING,&isstring);
195:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
196:   if (iascii) {
197:     if (pc->useAmat) {
198:       PetscViewerASCIIPrintf(viewer,"  using Amat local matrix, number of blocks = %D\n",jac->n);
199:     }
200:     PetscViewerASCIIPrintf(viewer,"  number of blocks = %D\n",jac->n);
201:     MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);
202:     if (jac->same_local_solves) {
203:       PetscViewerASCIIPrintf(viewer,"  Local solve is same for all blocks, in the following KSP and PC objects:\n");
204:       if (jac->ksp && !jac->psubcomm) {
205:         PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);
206:         if (!rank) {
207:           PetscViewerASCIIPushTab(viewer);
208:           KSPView(jac->ksp[0],sviewer);
209:           PetscViewerASCIIPopTab(viewer);
210:         }
211:         PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);
212:       } else if (mpjac && jac->ksp && mpjac->psubcomm) {
213:         PetscViewerGetSubViewer(viewer,mpjac->psubcomm->child,&sviewer);
214:         if (!mpjac->psubcomm->color) {
215:           PetscViewerASCIIPushTab(viewer);
216:           KSPView(*(jac->ksp),sviewer);
217:           PetscViewerASCIIPopTab(viewer);
218:         }
219:         PetscViewerRestoreSubViewer(viewer,mpjac->psubcomm->child,&sviewer);
220:       }
221:     } else {
222:       PetscInt n_global;
223:       MPIU_Allreduce(&jac->n_local,&n_global,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));
224:       PetscViewerASCIIPushSynchronized(viewer);
225:       PetscViewerASCIIPrintf(viewer,"  Local solve info for each block is in the following KSP and PC objects:\n");
226:       PetscViewerASCIISynchronizedPrintf(viewer,"[%d] number of local blocks = %D, first local block number = %D\n",
227:                                                 rank,jac->n_local,jac->first_local);
228:       PetscViewerASCIIPushTab(viewer);
229:       PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);
230:       for (i=0; i<jac->n_local; i++) {
231:         PetscViewerASCIISynchronizedPrintf(viewer,"[%d] local block number %D\n",rank,i);
232:         KSPView(jac->ksp[i],sviewer);
233:         PetscViewerASCIISynchronizedPrintf(viewer,"- - - - - - - - - - - - - - - - - -\n");
234:       }
235:       PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);
236:       PetscViewerASCIIPopTab(viewer);
237:       PetscViewerFlush(viewer);
238:       PetscViewerASCIIPopSynchronized(viewer);
239:     }
240:   } else if (isstring) {
241:     PetscViewerStringSPrintf(viewer," blks=%D",jac->n);
242:     PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);
243:     if (jac->ksp) {KSPView(jac->ksp[0],sviewer);}
244:     PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);
245:   } else if (isdraw) {
246:     PetscDraw draw;
247:     char      str[25];
248:     PetscReal x,y,bottom,h;

250:     PetscViewerDrawGetDraw(viewer,0,&draw);
251:     PetscDrawGetCurrentPoint(draw,&x,&y);
252:     PetscSNPrintf(str,25,"Number blocks %D",jac->n);
253:     PetscDrawStringBoxed(draw,x,y,PETSC_DRAW_RED,PETSC_DRAW_BLACK,str,NULL,&h);
254:     bottom = y - h;
255:     PetscDrawPushCurrentPoint(draw,x,bottom);
256:     /* warning the communicator on viewer is different then on ksp in parallel */
257:     if (jac->ksp) {KSPView(jac->ksp[0],viewer);}
258:     PetscDrawPopCurrentPoint(draw);
259:   }
260:   return(0);
261: }

263: /* -------------------------------------------------------------------------------------*/

265: static PetscErrorCode  PCBJacobiGetSubKSP_BJacobi(PC pc,PetscInt *n_local,PetscInt *first_local,KSP **ksp)
266: {
267:   PC_BJacobi *jac = (PC_BJacobi*)pc->data;;

270:   if (!pc->setupcalled) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"Must call KSPSetUp() or PCSetUp() first");

272:   if (n_local) *n_local = jac->n_local;
273:   if (first_local) *first_local = jac->first_local;
274:   *ksp                   = jac->ksp;
275:   jac->same_local_solves = PETSC_FALSE;        /* Assume that local solves are now different;
276:                                                   not necessarily true though!  This flag is
277:                                                   used only for PCView_BJacobi() */
278:   return(0);
279: }

281: static PetscErrorCode  PCBJacobiSetTotalBlocks_BJacobi(PC pc,PetscInt blocks,PetscInt *lens)
282: {
283:   PC_BJacobi     *jac = (PC_BJacobi*)pc->data;

287:   if (pc->setupcalled > 0 && jac->n!=blocks) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ORDER,"Cannot alter number of blocks after PCSetUp()/KSPSetUp() has been called");
288:   jac->n = blocks;
289:   if (!lens) jac->g_lens = 0;
290:   else {
291:     PetscMalloc1(blocks,&jac->g_lens);
292:     PetscLogObjectMemory((PetscObject)pc,blocks*sizeof(PetscInt));
293:     PetscMemcpy(jac->g_lens,lens,blocks*sizeof(PetscInt));
294:   }
295:   return(0);
296: }

298: static PetscErrorCode  PCBJacobiGetTotalBlocks_BJacobi(PC pc, PetscInt *blocks, const PetscInt *lens[])
299: {
300:   PC_BJacobi *jac = (PC_BJacobi*) pc->data;

303:   *blocks = jac->n;
304:   if (lens) *lens = jac->g_lens;
305:   return(0);
306: }

308: static PetscErrorCode  PCBJacobiSetLocalBlocks_BJacobi(PC pc,PetscInt blocks,const PetscInt lens[])
309: {
310:   PC_BJacobi     *jac;

314:   jac = (PC_BJacobi*)pc->data;

316:   jac->n_local = blocks;
317:   if (!lens) jac->l_lens = 0;
318:   else {
319:     PetscMalloc1(blocks,&jac->l_lens);
320:     PetscLogObjectMemory((PetscObject)pc,blocks*sizeof(PetscInt));
321:     PetscMemcpy(jac->l_lens,lens,blocks*sizeof(PetscInt));
322:   }
323:   return(0);
324: }

326: static PetscErrorCode  PCBJacobiGetLocalBlocks_BJacobi(PC pc, PetscInt *blocks, const PetscInt *lens[])
327: {
328:   PC_BJacobi *jac = (PC_BJacobi*) pc->data;

331:   *blocks = jac->n_local;
332:   if (lens) *lens = jac->l_lens;
333:   return(0);
334: }

336: /* -------------------------------------------------------------------------------------*/

338: /*@C
339:    PCBJacobiGetSubKSP - Gets the local KSP contexts for all blocks on
340:    this processor.

342:    Note Collective

344:    Input Parameter:
345: .  pc - the preconditioner context

347:    Output Parameters:
348: +  n_local - the number of blocks on this processor, or NULL
349: .  first_local - the global number of the first block on this processor, or NULL
350: -  ksp - the array of KSP contexts

352:    Notes:
353:    After PCBJacobiGetSubKSP() the array of KSP contexts is not to be freed.

355:    Currently for some matrix implementations only 1 block per processor
356:    is supported.

358:    You must call KSPSetUp() or PCSetUp() before calling PCBJacobiGetSubKSP().

360:    Fortran Usage: You must pass in a KSP array that is large enough to contain all the local KSPs.
361:       You can call PCBJacobiGetSubKSP(pc,nlocal,firstlocal,PETSC_NULL_KSP,ierr) to determine how large the
362:       KSP array must be.

364:    Level: advanced

366: .keywords:  block, Jacobi, get, sub, KSP, context

368: .seealso: PCBJacobiGetSubKSP()
369: @*/
370: PetscErrorCode  PCBJacobiGetSubKSP(PC pc,PetscInt *n_local,PetscInt *first_local,KSP *ksp[])
371: {

376:   PetscUseMethod(pc,"PCBJacobiGetSubKSP_C",(PC,PetscInt*,PetscInt*,KSP **),(pc,n_local,first_local,ksp));
377:   return(0);
378: }

380: /*@
381:    PCBJacobiSetTotalBlocks - Sets the global number of blocks for the block
382:    Jacobi preconditioner.

384:    Collective on PC

386:    Input Parameters:
387: +  pc - the preconditioner context
388: .  blocks - the number of blocks
389: -  lens - [optional] integer array containing the size of each block

391:    Options Database Key:
392: .  -pc_bjacobi_blocks <blocks> - Sets the number of global blocks

394:    Notes:
395:    Currently only a limited number of blocking configurations are supported.
396:    All processors sharing the PC must call this routine with the same data.

398:    Level: intermediate

400: .keywords:  set, number, Jacobi, global, total, blocks

402: .seealso: PCSetUseAmat(), PCBJacobiSetLocalBlocks()
403: @*/
404: PetscErrorCode  PCBJacobiSetTotalBlocks(PC pc,PetscInt blocks,const PetscInt lens[])
405: {

410:   if (blocks <= 0) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_OUTOFRANGE,"Must have positive blocks");
411:   PetscTryMethod(pc,"PCBJacobiSetTotalBlocks_C",(PC,PetscInt,const PetscInt[]),(pc,blocks,lens));
412:   return(0);
413: }

415: /*@C
416:    PCBJacobiGetTotalBlocks - Gets the global number of blocks for the block
417:    Jacobi preconditioner.

419:    Not Collective

421:    Input Parameter:
422: .  pc - the preconditioner context

424:    Output parameters:
425: +  blocks - the number of blocks
426: -  lens - integer array containing the size of each block

428:    Level: intermediate

430: .keywords:  get, number, Jacobi, global, total, blocks

432: .seealso: PCSetUseAmat(), PCBJacobiGetLocalBlocks()
433: @*/
434: PetscErrorCode  PCBJacobiGetTotalBlocks(PC pc, PetscInt *blocks, const PetscInt *lens[])
435: {

441:   PetscUseMethod(pc,"PCBJacobiGetTotalBlocks_C",(PC,PetscInt*, const PetscInt *[]),(pc,blocks,lens));
442:   return(0);
443: }

445: /*@
446:    PCBJacobiSetLocalBlocks - Sets the local number of blocks for the block
447:    Jacobi preconditioner.

449:    Not Collective

451:    Input Parameters:
452: +  pc - the preconditioner context
453: .  blocks - the number of blocks
454: -  lens - [optional] integer array containing size of each block

456:    Note:
457:    Currently only a limited number of blocking configurations are supported.

459:    Level: intermediate

461: .keywords: PC, set, number, Jacobi, local, blocks

463: .seealso: PCSetUseAmat(), PCBJacobiSetTotalBlocks()
464: @*/
465: PetscErrorCode  PCBJacobiSetLocalBlocks(PC pc,PetscInt blocks,const PetscInt lens[])
466: {

471:   if (blocks < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Must have nonegative blocks");
472:   PetscTryMethod(pc,"PCBJacobiSetLocalBlocks_C",(PC,PetscInt,const PetscInt []),(pc,blocks,lens));
473:   return(0);
474: }

476: /*@C
477:    PCBJacobiGetLocalBlocks - Gets the local number of blocks for the block
478:    Jacobi preconditioner.

480:    Not Collective

482:    Input Parameters:
483: +  pc - the preconditioner context
484: .  blocks - the number of blocks
485: -  lens - [optional] integer array containing size of each block

487:    Note:
488:    Currently only a limited number of blocking configurations are supported.

490:    Level: intermediate

492: .keywords: PC, get, number, Jacobi, local, blocks

494: .seealso: PCSetUseAmat(), PCBJacobiGetTotalBlocks()
495: @*/
496: PetscErrorCode  PCBJacobiGetLocalBlocks(PC pc, PetscInt *blocks, const PetscInt *lens[])
497: {

503:   PetscUseMethod(pc,"PCBJacobiGetLocalBlocks_C",(PC,PetscInt*, const PetscInt *[]),(pc,blocks,lens));
504:   return(0);
505: }

507: /* -----------------------------------------------------------------------------------*/

509: /*MC
510:    PCBJACOBI - Use block Jacobi preconditioning, each block is (approximately) solved with
511:            its own KSP object.

513:    Options Database Keys:
514: .  -pc_use_amat - use Amat to apply block of operator in inner Krylov method

516:    Notes: Each processor can have one or more blocks, but a block cannot be shared by more
517:      than one processor. Defaults to one block per processor.

519:      To set options on the solvers for each block append -sub_ to all the KSP, KSP, and PC
520:         options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly

522:      To set the options on the solvers separate for each block call PCBJacobiGetSubKSP()
523:          and set the options directly on the resulting KSP object (you can access its PC
524:          KSPGetPC())

526:      For GPU-based vectors (CUDA, CUSP, ViennaCL) it is recommended to use exactly one block per MPI process for best
527:          performance.  Different block partitioning may lead to additional data transfers
528:          between host and GPU that lead to degraded performance.

530:    Level: beginner

532:    Concepts: block Jacobi


535: .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PC,
536:            PCASM, PCSetUseAmat(), PCGetUseAmat(), PCBJacobiGetSubKSP(), PCBJacobiSetTotalBlocks(),
537:            PCBJacobiSetLocalBlocks(), PCSetModifySubmatrices()
538: M*/

540: PETSC_EXTERN PetscErrorCode PCCreate_BJacobi(PC pc)
541: {
543:   PetscMPIInt    rank;
544:   PC_BJacobi     *jac;

547:   PetscNewLog(pc,&jac);
548:   MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);

550:   pc->ops->apply           = 0;
551:   pc->ops->applytranspose  = 0;
552:   pc->ops->setup           = PCSetUp_BJacobi;
553:   pc->ops->destroy         = PCDestroy_BJacobi;
554:   pc->ops->setfromoptions  = PCSetFromOptions_BJacobi;
555:   pc->ops->view            = PCView_BJacobi;
556:   pc->ops->applyrichardson = 0;

558:   pc->data               = (void*)jac;
559:   jac->n                 = -1;
560:   jac->n_local           = -1;
561:   jac->first_local       = rank;
562:   jac->ksp               = 0;
563:   jac->same_local_solves = PETSC_TRUE;
564:   jac->g_lens            = 0;
565:   jac->l_lens            = 0;
566:   jac->psubcomm          = 0;

568:   PetscObjectComposeFunction((PetscObject)pc,"PCBJacobiGetSubKSP_C",PCBJacobiGetSubKSP_BJacobi);
569:   PetscObjectComposeFunction((PetscObject)pc,"PCBJacobiSetTotalBlocks_C",PCBJacobiSetTotalBlocks_BJacobi);
570:   PetscObjectComposeFunction((PetscObject)pc,"PCBJacobiGetTotalBlocks_C",PCBJacobiGetTotalBlocks_BJacobi);
571:   PetscObjectComposeFunction((PetscObject)pc,"PCBJacobiSetLocalBlocks_C",PCBJacobiSetLocalBlocks_BJacobi);
572:   PetscObjectComposeFunction((PetscObject)pc,"PCBJacobiGetLocalBlocks_C",PCBJacobiGetLocalBlocks_BJacobi);
573:   return(0);
574: }

576: /* --------------------------------------------------------------------------------------------*/
577: /*
578:         These are for a single block per processor; works for AIJ, BAIJ; Seq and MPI
579: */
580: static PetscErrorCode PCReset_BJacobi_Singleblock(PC pc)
581: {
582:   PC_BJacobi             *jac  = (PC_BJacobi*)pc->data;
583:   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock*)jac->data;
584:   PetscErrorCode         ierr;

587:   KSPReset(jac->ksp[0]);
588:   VecDestroy(&bjac->x);
589:   VecDestroy(&bjac->y);
590:   return(0);
591: }

593: static PetscErrorCode PCDestroy_BJacobi_Singleblock(PC pc)
594: {
595:   PC_BJacobi             *jac  = (PC_BJacobi*)pc->data;
596:   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock*)jac->data;
597:   PetscErrorCode         ierr;

600:   PCReset_BJacobi_Singleblock(pc);
601:   KSPDestroy(&jac->ksp[0]);
602:   PetscFree(jac->ksp);
603:   PetscFree(jac->l_lens);
604:   PetscFree(jac->g_lens);
605:   PetscFree(bjac);
606:   PetscFree(pc->data);
607:   return(0);
608: }

610: static PetscErrorCode PCSetUpOnBlocks_BJacobi_Singleblock(PC pc)
611: {
612:   PetscErrorCode     ierr;
613:   PC_BJacobi         *jac = (PC_BJacobi*)pc->data;
614:   KSP                subksp = jac->ksp[0];
615:   KSPConvergedReason reason;

618:   KSPSetUp(subksp);
619:   KSPGetConvergedReason(subksp,&reason);
620:   if (reason == KSP_DIVERGED_PCSETUP_FAILED) {
621:     pc->failedreason = PC_SUBPC_ERROR;
622:   }
623:   return(0);
624: }

626: static PetscErrorCode PCApply_BJacobi_Singleblock(PC pc,Vec x,Vec y)
627: {
628:   PetscErrorCode         ierr;
629:   PC_BJacobi             *jac  = (PC_BJacobi*)pc->data;
630:   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock*)jac->data;

633:   VecGetLocalVectorRead(x, bjac->x);
634:   VecGetLocalVector(y, bjac->y);
635:  /* Since the inner KSP matrix may point directly to the diagonal block of an MPI matrix the inner
636:      matrix may change even if the outter KSP/PC has not updated the preconditioner, this will trigger a rebuild
637:      of the inner preconditioner automatically unless we pass down the outter preconditioners reuse flag.*/
638:   KSPSetReusePreconditioner(jac->ksp[0],pc->reusepreconditioner);
639:   KSPSolve(jac->ksp[0],bjac->x,bjac->y);
640:   VecRestoreLocalVectorRead(x, bjac->x);
641:   VecRestoreLocalVector(y, bjac->y);
642:   return(0);
643: }

645: static PetscErrorCode PCApplySymmetricLeft_BJacobi_Singleblock(PC pc,Vec x,Vec y)
646: {
647:   PetscErrorCode         ierr;
648:   PC_BJacobi             *jac  = (PC_BJacobi*)pc->data;
649:   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock*)jac->data;
650:   PetscScalar            *y_array;
651:   const PetscScalar      *x_array;
652:   PC                     subpc;

655:   /*
656:       The VecPlaceArray() is to avoid having to copy the
657:     y vector into the bjac->x vector. The reason for
658:     the bjac->x vector is that we need a sequential vector
659:     for the sequential solve.
660:   */
661:   VecGetArrayRead(x,&x_array);
662:   VecGetArray(y,&y_array);
663:   VecPlaceArray(bjac->x,x_array);
664:   VecPlaceArray(bjac->y,y_array);
665:   /* apply the symmetric left portion of the inner PC operator */
666:   /* note this by-passes the inner KSP and its options completely */
667:   KSPGetPC(jac->ksp[0],&subpc);
668:   PCApplySymmetricLeft(subpc,bjac->x,bjac->y);
669:   VecResetArray(bjac->x);
670:   VecResetArray(bjac->y);
671:   VecRestoreArrayRead(x,&x_array);
672:   VecRestoreArray(y,&y_array);
673:   return(0);
674: }

676: static PetscErrorCode PCApplySymmetricRight_BJacobi_Singleblock(PC pc,Vec x,Vec y)
677: {
678:   PetscErrorCode         ierr;
679:   PC_BJacobi             *jac  = (PC_BJacobi*)pc->data;
680:   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock*)jac->data;
681:   PetscScalar            *y_array;
682:   const PetscScalar      *x_array;
683:   PC                     subpc;

686:   /*
687:       The VecPlaceArray() is to avoid having to copy the
688:     y vector into the bjac->x vector. The reason for
689:     the bjac->x vector is that we need a sequential vector
690:     for the sequential solve.
691:   */
692:   VecGetArrayRead(x,&x_array);
693:   VecGetArray(y,&y_array);
694:   VecPlaceArray(bjac->x,x_array);
695:   VecPlaceArray(bjac->y,y_array);

697:   /* apply the symmetric right portion of the inner PC operator */
698:   /* note this by-passes the inner KSP and its options completely */

700:   KSPGetPC(jac->ksp[0],&subpc);
701:   PCApplySymmetricRight(subpc,bjac->x,bjac->y);

703:   VecRestoreArrayRead(x,&x_array);
704:   VecRestoreArray(y,&y_array);
705:   return(0);
706: }

708: static PetscErrorCode PCApplyTranspose_BJacobi_Singleblock(PC pc,Vec x,Vec y)
709: {
710:   PetscErrorCode         ierr;
711:   PC_BJacobi             *jac  = (PC_BJacobi*)pc->data;
712:   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock*)jac->data;
713:   PetscScalar            *y_array;
714:   const PetscScalar      *x_array;

717:   /*
718:       The VecPlaceArray() is to avoid having to copy the
719:     y vector into the bjac->x vector. The reason for
720:     the bjac->x vector is that we need a sequential vector
721:     for the sequential solve.
722:   */
723:   VecGetArrayRead(x,&x_array);
724:   VecGetArray(y,&y_array);
725:   VecPlaceArray(bjac->x,x_array);
726:   VecPlaceArray(bjac->y,y_array);
727:   KSPSolveTranspose(jac->ksp[0],bjac->x,bjac->y);
728:   VecResetArray(bjac->x);
729:   VecResetArray(bjac->y);
730:   VecRestoreArrayRead(x,&x_array);
731:   VecRestoreArray(y,&y_array);
732:   return(0);
733: }

735: static PetscErrorCode PCSetUp_BJacobi_Singleblock(PC pc,Mat mat,Mat pmat)
736: {
737:   PC_BJacobi             *jac = (PC_BJacobi*)pc->data;
738:   PetscErrorCode         ierr;
739:   PetscInt               m;
740:   KSP                    ksp;
741:   PC_BJacobi_Singleblock *bjac;
742:   PetscBool              wasSetup = PETSC_TRUE;
743: #if defined(PETSC_HAVE_CUSP) || defined(PETSC_HAVE_VECCUDA) || defined(PETSC_HAVE_VIENNACL)
744:   PetscBool              is_gpumatrix = PETSC_FALSE;
745: #endif

748:   if (!pc->setupcalled) {
749:     const char *prefix;

751:     if (!jac->ksp) {
752:       wasSetup = PETSC_FALSE;

754:       KSPCreate(PETSC_COMM_SELF,&ksp);
755:       KSPSetErrorIfNotConverged(ksp,pc->erroriffailure);
756:       PetscObjectIncrementTabLevel((PetscObject)ksp,(PetscObject)pc,1);
757:       PetscLogObjectParent((PetscObject)pc,(PetscObject)ksp);
758:       KSPSetType(ksp,KSPPREONLY);
759:       PCGetOptionsPrefix(pc,&prefix);
760:       KSPSetOptionsPrefix(ksp,prefix);
761:       KSPAppendOptionsPrefix(ksp,"sub_");

763:       pc->ops->reset               = PCReset_BJacobi_Singleblock;
764:       pc->ops->destroy             = PCDestroy_BJacobi_Singleblock;
765:       pc->ops->apply               = PCApply_BJacobi_Singleblock;
766:       pc->ops->applysymmetricleft  = PCApplySymmetricLeft_BJacobi_Singleblock;
767:       pc->ops->applysymmetricright = PCApplySymmetricRight_BJacobi_Singleblock;
768:       pc->ops->applytranspose      = PCApplyTranspose_BJacobi_Singleblock;
769:       pc->ops->setuponblocks       = PCSetUpOnBlocks_BJacobi_Singleblock;

771:       PetscMalloc1(1,&jac->ksp);
772:       jac->ksp[0] = ksp;

774:       PetscNewLog(pc,&bjac);
775:       jac->data = (void*)bjac;
776:     } else {
777:       ksp  = jac->ksp[0];
778:       bjac = (PC_BJacobi_Singleblock*)jac->data;
779:     }

781:     /*
782:       The reason we need to generate these vectors is to serve
783:       as the right-hand side and solution vector for the solve on the
784:       block. We do not need to allocate space for the vectors since
785:       that is provided via VecPlaceArray() just before the call to
786:       KSPSolve() on the block.
787:     */
788:     MatGetSize(pmat,&m,&m);
789:     VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,NULL,&bjac->x);
790:     VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,NULL,&bjac->y);
791: #if defined(PETSC_HAVE_CUSP)
792:     PetscObjectTypeCompareAny((PetscObject)pmat,&is_gpumatrix,MATAIJCUSP,MATSEQAIJCUSP,MATMPIAIJCUSP,"");
793:     if (is_gpumatrix) {
794:       VecSetType(bjac->x,VECCUSP);
795:       VecSetType(bjac->y,VECCUSP);
796:     }
797: #elif defined(PETSC_HAVE_VECCUDA)
798:     PetscObjectTypeCompareAny((PetscObject)pmat,&is_gpumatrix,MATAIJCUSPARSE,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,"");
799:     if (is_gpumatrix) {
800:       VecSetType(bjac->x,VECCUDA);
801:       VecSetType(bjac->y,VECCUDA);
802:     }
803: #elif defined(PETSC_HAVE_VIENNACL)
804:     PetscObjectTypeCompareAny((PetscObject)pmat,&is_gpumatrix,MATAIJVIENNACL,MATSEQAIJVIENNACL,MATMPIAIJVIENNACL,"");
805:     if (is_gpumatrix) {
806:       VecSetType(bjac->x,VECVIENNACL);
807:       VecSetType(bjac->y,VECVIENNACL);
808:     }
809: #endif
810:     PetscLogObjectParent((PetscObject)pc,(PetscObject)bjac->x);
811:     PetscLogObjectParent((PetscObject)pc,(PetscObject)bjac->y);
812:   } else {
813:     ksp  = jac->ksp[0];
814:     bjac = (PC_BJacobi_Singleblock*)jac->data;
815:   }
816:   if (pc->useAmat) {
817:     KSPSetOperators(ksp,mat,pmat);
818:   } else {
819:     KSPSetOperators(ksp,pmat,pmat);
820:   }
821:   if (!wasSetup && pc->setfromoptionscalled) {
822:     /* If PCSetFromOptions_BJacobi is called later, KSPSetFromOptions will be called at that time. */
823:     KSPSetFromOptions(ksp);
824:   }
825:   return(0);
826: }

828: /* ---------------------------------------------------------------------------------------------*/
829: static PetscErrorCode PCReset_BJacobi_Multiblock(PC pc)
830: {
831:   PC_BJacobi            *jac  = (PC_BJacobi*)pc->data;
832:   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock*)jac->data;
833:   PetscErrorCode        ierr;
834:   PetscInt              i;

837:   if (bjac && bjac->pmat) {
838:     MatDestroyMatrices(jac->n_local,&bjac->pmat);
839:     if (pc->useAmat) {
840:       MatDestroyMatrices(jac->n_local,&bjac->mat);
841:     }
842:   }

844:   for (i=0; i<jac->n_local; i++) {
845:     KSPReset(jac->ksp[i]);
846:     if (bjac && bjac->x) {
847:       VecDestroy(&bjac->x[i]);
848:       VecDestroy(&bjac->y[i]);
849:       ISDestroy(&bjac->is[i]);
850:     }
851:   }
852:   PetscFree(jac->l_lens);
853:   PetscFree(jac->g_lens);
854:   return(0);
855: }

857: static PetscErrorCode PCDestroy_BJacobi_Multiblock(PC pc)
858: {
859:   PC_BJacobi            *jac  = (PC_BJacobi*)pc->data;
860:   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock*)jac->data;
861:   PetscErrorCode        ierr;
862:   PetscInt              i;

865:   PCReset_BJacobi_Multiblock(pc);
866:   if (bjac) {
867:     PetscFree2(bjac->x,bjac->y);
868:     PetscFree(bjac->starts);
869:     PetscFree(bjac->is);
870:   }
871:   PetscFree(jac->data);
872:   for (i=0; i<jac->n_local; i++) {
873:     KSPDestroy(&jac->ksp[i]);
874:   }
875:   PetscFree(jac->ksp);
876:   PetscFree(pc->data);
877:   return(0);
878: }

880: static PetscErrorCode PCSetUpOnBlocks_BJacobi_Multiblock(PC pc)
881: {
882:   PC_BJacobi         *jac = (PC_BJacobi*)pc->data;
883:   PetscErrorCode     ierr;
884:   PetscInt           i,n_local = jac->n_local;
885:   KSPConvergedReason reason;

888:   for (i=0; i<n_local; i++) {
889:     KSPSetUp(jac->ksp[i]);
890:     KSPGetConvergedReason(jac->ksp[i],&reason);
891:     if (reason == KSP_DIVERGED_PCSETUP_FAILED) {
892:       pc->failedreason = PC_SUBPC_ERROR;
893:     }
894:   }
895:   return(0);
896: }

898: /*
899:       Preconditioner for block Jacobi
900: */
901: static PetscErrorCode PCApply_BJacobi_Multiblock(PC pc,Vec x,Vec y)
902: {
903:   PC_BJacobi            *jac = (PC_BJacobi*)pc->data;
904:   PetscErrorCode        ierr;
905:   PetscInt              i,n_local = jac->n_local;
906:   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock*)jac->data;
907:   PetscScalar           *yin;
908:   const PetscScalar     *xin;

911:   VecGetArrayRead(x,&xin);
912:   VecGetArray(y,&yin);
913:   for (i=0; i<n_local; i++) {
914:     /*
915:        To avoid copying the subvector from x into a workspace we instead
916:        make the workspace vector array point to the subpart of the array of
917:        the global vector.
918:     */
919:     VecPlaceArray(bjac->x[i],xin+bjac->starts[i]);
920:     VecPlaceArray(bjac->y[i],yin+bjac->starts[i]);

922:     PetscLogEventBegin(PC_ApplyOnBlocks,jac->ksp[i],bjac->x[i],bjac->y[i],0);
923:     KSPSolve(jac->ksp[i],bjac->x[i],bjac->y[i]);
924:     PetscLogEventEnd(PC_ApplyOnBlocks,jac->ksp[i],bjac->x[i],bjac->y[i],0);

926:     VecResetArray(bjac->x[i]);
927:     VecResetArray(bjac->y[i]);
928:   }
929:   VecRestoreArrayRead(x,&xin);
930:   VecRestoreArray(y,&yin);
931:   return(0);
932: }

934: /*
935:       Preconditioner for block Jacobi
936: */
937: static PetscErrorCode PCApplyTranspose_BJacobi_Multiblock(PC pc,Vec x,Vec y)
938: {
939:   PC_BJacobi            *jac = (PC_BJacobi*)pc->data;
940:   PetscErrorCode        ierr;
941:   PetscInt              i,n_local = jac->n_local;
942:   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock*)jac->data;
943:   PetscScalar           *yin;
944:   const PetscScalar     *xin;

947:   VecGetArrayRead(x,&xin);
948:   VecGetArray(y,&yin);
949:   for (i=0; i<n_local; i++) {
950:     /*
951:        To avoid copying the subvector from x into a workspace we instead
952:        make the workspace vector array point to the subpart of the array of
953:        the global vector.
954:     */
955:     VecPlaceArray(bjac->x[i],xin+bjac->starts[i]);
956:     VecPlaceArray(bjac->y[i],yin+bjac->starts[i]);

958:     PetscLogEventBegin(PC_ApplyTransposeOnBlocks,jac->ksp[i],bjac->x[i],bjac->y[i],0);
959:     KSPSolveTranspose(jac->ksp[i],bjac->x[i],bjac->y[i]);
960:     PetscLogEventEnd(PC_ApplyTransposeOnBlocks,jac->ksp[i],bjac->x[i],bjac->y[i],0);

962:     VecResetArray(bjac->x[i]);
963:     VecResetArray(bjac->y[i]);
964:   }
965:   VecRestoreArrayRead(x,&xin);
966:   VecRestoreArray(y,&yin);
967:   return(0);
968: }

970: static PetscErrorCode PCSetUp_BJacobi_Multiblock(PC pc,Mat mat,Mat pmat)
971: {
972:   PC_BJacobi            *jac = (PC_BJacobi*)pc->data;
973:   PetscErrorCode        ierr;
974:   PetscInt              m,n_local,N,M,start,i;
975:   const char            *prefix,*pprefix,*mprefix;
976:   KSP                   ksp;
977:   Vec                   x,y;
978:   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock*)jac->data;
979:   PC                    subpc;
980:   IS                    is;
981:   MatReuse              scall;
982: #if defined(PETSC_HAVE_CUSP) || defined(PETSC_HAVE_VECCUDA) || defined(PETSC_HAVE_VIENNACL)
983:   PetscBool              is_gpumatrix = PETSC_FALSE;
984: #endif

987:   MatGetLocalSize(pc->pmat,&M,&N);

989:   n_local = jac->n_local;

991:   if (pc->useAmat) {
992:     PetscBool same;
993:     PetscObjectTypeCompare((PetscObject)mat,((PetscObject)pmat)->type_name,&same);
994:     if (!same) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_INCOMP,"Matrices not of same type");
995:   }

997:   if (!pc->setupcalled) {
998:     scall = MAT_INITIAL_MATRIX;

1000:     if (!jac->ksp) {
1001:       pc->ops->reset         = PCReset_BJacobi_Multiblock;
1002:       pc->ops->destroy       = PCDestroy_BJacobi_Multiblock;
1003:       pc->ops->apply         = PCApply_BJacobi_Multiblock;
1004:       pc->ops->applytranspose= PCApplyTranspose_BJacobi_Multiblock;
1005:       pc->ops->setuponblocks = PCSetUpOnBlocks_BJacobi_Multiblock;

1007:       PetscNewLog(pc,&bjac);
1008:       PetscMalloc1(n_local,&jac->ksp);
1009:       PetscLogObjectMemory((PetscObject)pc,sizeof(n_local*sizeof(KSP)));
1010:       PetscMalloc2(n_local,&bjac->x,n_local,&bjac->y);
1011:       PetscMalloc1(n_local,&bjac->starts);
1012:       PetscLogObjectMemory((PetscObject)pc,sizeof(n_local*sizeof(PetscScalar)));

1014:       jac->data = (void*)bjac;
1015:       PetscMalloc1(n_local,&bjac->is);
1016:       PetscLogObjectMemory((PetscObject)pc,sizeof(n_local*sizeof(IS)));

1018:       for (i=0; i<n_local; i++) {
1019:         KSPCreate(PETSC_COMM_SELF,&ksp);
1020:         KSPSetErrorIfNotConverged(ksp,pc->erroriffailure);
1021:         PetscObjectIncrementTabLevel((PetscObject)ksp,(PetscObject)pc,1);
1022:         PetscLogObjectParent((PetscObject)pc,(PetscObject)ksp);
1023:         KSPSetType(ksp,KSPPREONLY);
1024:         KSPGetPC(ksp,&subpc);
1025:         PCGetOptionsPrefix(pc,&prefix);
1026:         KSPSetOptionsPrefix(ksp,prefix);
1027:         KSPAppendOptionsPrefix(ksp,"sub_");

1029:         jac->ksp[i] = ksp;
1030:       }
1031:     } else {
1032:       bjac = (PC_BJacobi_Multiblock*)jac->data;
1033:     }

1035:     start = 0;
1036:     for (i=0; i<n_local; i++) {
1037:       m = jac->l_lens[i];
1038:       /*
1039:       The reason we need to generate these vectors is to serve
1040:       as the right-hand side and solution vector for the solve on the
1041:       block. We do not need to allocate space for the vectors since
1042:       that is provided via VecPlaceArray() just before the call to
1043:       KSPSolve() on the block.

1045:       */
1046:       VecCreateSeq(PETSC_COMM_SELF,m,&x);
1047:       VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,NULL,&y);
1048: #if defined(PETSC_HAVE_CUSP)
1049:       PetscObjectTypeCompareAny((PetscObject)pmat,&is_gpumatrix,MATAIJCUSP,MATSEQAIJCUSP,MATMPIAIJCUSP,"");
1050:       if (is_gpumatrix) {
1051:         VecSetType(x,VECCUSP);
1052:         VecSetType(y,VECCUSP);
1053:       }
1054: #elif defined(PETSC_HAVE_VECCUDA)
1055:       PetscObjectTypeCompareAny((PetscObject)pmat,&is_gpumatrix,MATAIJCUSPARSE,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,"");
1056:       if (is_gpumatrix) {
1057:         VecSetType(x,VECCUDA);
1058:         VecSetType(y,VECCUDA);
1059:       }
1060: #elif defined(PETSC_HAVE_VIENNACL)
1061:       PetscObjectTypeCompareAny((PetscObject)pmat,&is_gpumatrix,MATAIJVIENNACL,MATSEQAIJVIENNACL,MATMPIAIJVIENNACL,"");
1062:       if (is_gpumatrix) {
1063:         VecSetType(x,VECVIENNACL);
1064:         VecSetType(y,VECVIENNACL);
1065:       }
1066: #endif
1067:       PetscLogObjectParent((PetscObject)pc,(PetscObject)x);
1068:       PetscLogObjectParent((PetscObject)pc,(PetscObject)y);

1070:       bjac->x[i]      = x;
1071:       bjac->y[i]      = y;
1072:       bjac->starts[i] = start;

1074:       ISCreateStride(PETSC_COMM_SELF,m,start,1,&is);
1075:       bjac->is[i] = is;
1076:       PetscLogObjectParent((PetscObject)pc,(PetscObject)is);

1078:       start += m;
1079:     }
1080:   } else {
1081:     bjac = (PC_BJacobi_Multiblock*)jac->data;
1082:     /*
1083:        Destroy the blocks from the previous iteration
1084:     */
1085:     if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1086:       MatDestroyMatrices(n_local,&bjac->pmat);
1087:       if (pc->useAmat) {
1088:         MatDestroyMatrices(n_local,&bjac->mat);
1089:       }
1090:       scall = MAT_INITIAL_MATRIX;
1091:     } else scall = MAT_REUSE_MATRIX;
1092:   }

1094:   MatCreateSubMatrices(pmat,n_local,bjac->is,bjac->is,scall,&bjac->pmat);
1095:   if (pc->useAmat) {
1096:     PetscObjectGetOptionsPrefix((PetscObject)mat,&mprefix);
1097:     MatCreateSubMatrices(mat,n_local,bjac->is,bjac->is,scall,&bjac->mat);
1098:   }
1099:   /* Return control to the user so that the submatrices can be modified (e.g., to apply
1100:      different boundary conditions for the submatrices than for the global problem) */
1101:   PCModifySubMatrices(pc,n_local,bjac->is,bjac->is,bjac->pmat,pc->modifysubmatricesP);

1103:   PetscObjectGetOptionsPrefix((PetscObject)pmat,&pprefix);
1104:   for (i=0; i<n_local; i++) {
1105:     PetscLogObjectParent((PetscObject)pc,(PetscObject)bjac->pmat[i]);
1106:     PetscObjectSetOptionsPrefix((PetscObject)bjac->pmat[i],pprefix);
1107:     if (pc->useAmat) {
1108:       PetscLogObjectParent((PetscObject)pc,(PetscObject)bjac->mat[i]);
1109:       PetscObjectSetOptionsPrefix((PetscObject)bjac->mat[i],mprefix);
1110:       KSPSetOperators(jac->ksp[i],bjac->mat[i],bjac->pmat[i]);
1111:     } else {
1112:       KSPSetOperators(jac->ksp[i],bjac->pmat[i],bjac->pmat[i]);
1113:     }
1114:     if (pc->setfromoptionscalled) {
1115:       KSPSetFromOptions(jac->ksp[i]);
1116:     }
1117:   }
1118:   return(0);
1119: }

1121: /* ---------------------------------------------------------------------------------------------*/
1122: /*
1123:       These are for a single block with multiple processes;
1124: */
1125: static PetscErrorCode PCReset_BJacobi_Multiproc(PC pc)
1126: {
1127:   PC_BJacobi           *jac   = (PC_BJacobi*)pc->data;
1128:   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc*)jac->data;
1129:   PetscErrorCode       ierr;

1132:   VecDestroy(&mpjac->ysub);
1133:   VecDestroy(&mpjac->xsub);
1134:   MatDestroy(&mpjac->submats);
1135:   if (jac->ksp) {KSPReset(jac->ksp[0]);}
1136:   return(0);
1137: }

1139: static PetscErrorCode PCDestroy_BJacobi_Multiproc(PC pc)
1140: {
1141:   PC_BJacobi           *jac   = (PC_BJacobi*)pc->data;
1142:   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc*)jac->data;
1143:   PetscErrorCode       ierr;

1146:   PCReset_BJacobi_Multiproc(pc);
1147:   KSPDestroy(&jac->ksp[0]);
1148:   PetscFree(jac->ksp);
1149:   PetscSubcommDestroy(&mpjac->psubcomm);

1151:   PetscFree(mpjac);
1152:   PetscFree(pc->data);
1153:   return(0);
1154: }

1156: static PetscErrorCode PCApply_BJacobi_Multiproc(PC pc,Vec x,Vec y)
1157: {
1158:   PC_BJacobi           *jac   = (PC_BJacobi*)pc->data;
1159:   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc*)jac->data;
1160:   PetscErrorCode       ierr;
1161:   PetscScalar          *yarray;
1162:   const PetscScalar    *xarray;
1163:   KSPConvergedReason   reason;

1166:   /* place x's and y's local arrays into xsub and ysub */
1167:   VecGetArrayRead(x,&xarray);
1168:   VecGetArray(y,&yarray);
1169:   VecPlaceArray(mpjac->xsub,xarray);
1170:   VecPlaceArray(mpjac->ysub,yarray);

1172:   /* apply preconditioner on each matrix block */
1173:   PetscLogEventBegin(PC_ApplyOnMproc,jac->ksp[0],mpjac->xsub,mpjac->ysub,0);
1174:   KSPSolve(jac->ksp[0],mpjac->xsub,mpjac->ysub);
1175:   PetscLogEventEnd(PC_ApplyOnMproc,jac->ksp[0],mpjac->xsub,mpjac->ysub,0);
1176:   KSPGetConvergedReason(jac->ksp[0],&reason);
1177:   if (reason == KSP_DIVERGED_PCSETUP_FAILED) {
1178:     pc->failedreason = PC_SUBPC_ERROR;
1179:   }

1181:   VecResetArray(mpjac->xsub);
1182:   VecResetArray(mpjac->ysub);
1183:   VecRestoreArrayRead(x,&xarray);
1184:   VecRestoreArray(y,&yarray);
1185:   return(0);
1186: }

1188: static PetscErrorCode PCSetUp_BJacobi_Multiproc(PC pc)
1189: {
1190:   PC_BJacobi           *jac   = (PC_BJacobi*)pc->data;
1191:   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc*)jac->data;
1192:   PetscErrorCode       ierr;
1193:   PetscInt             m,n;
1194:   MPI_Comm             comm,subcomm=0;
1195:   const char           *prefix;
1196:   PetscBool            wasSetup = PETSC_TRUE;
1197: #if defined(PETSC_HAVE_CUSP) || defined(PETSC_HAVE_VECCUDA) || defined(PETSC_HAVE_VIENNACL)
1198:   PetscBool              is_gpumatrix = PETSC_FALSE;
1199: #endif


1203:   PetscObjectGetComm((PetscObject)pc,&comm);
1204:   if (jac->n_local > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only a single block in a subcommunicator is supported");
1205:   jac->n_local = 1; /* currently only a single block is supported for a subcommunicator */
1206:   if (!pc->setupcalled) {
1207:     wasSetup  = PETSC_FALSE;
1208:     PetscNewLog(pc,&mpjac);
1209:     jac->data = (void*)mpjac;

1211:     /* initialize datastructure mpjac */
1212:     if (!jac->psubcomm) {
1213:       /* Create default contiguous subcommunicatiors if user does not provide them */
1214:       PetscSubcommCreate(comm,&jac->psubcomm);
1215:       PetscSubcommSetNumber(jac->psubcomm,jac->n);
1216:       PetscSubcommSetType(jac->psubcomm,PETSC_SUBCOMM_CONTIGUOUS);
1217:       PetscLogObjectMemory((PetscObject)pc,sizeof(PetscSubcomm));
1218:     }
1219:     mpjac->psubcomm = jac->psubcomm;
1220:     subcomm         = PetscSubcommChild(mpjac->psubcomm);

1222:     /* Get matrix blocks of pmat */
1223:     MatGetMultiProcBlock(pc->pmat,subcomm,MAT_INITIAL_MATRIX,&mpjac->submats);

1225:     /* create a new PC that processors in each subcomm have copy of */
1226:     PetscMalloc1(1,&jac->ksp);
1227:     KSPCreate(subcomm,&jac->ksp[0]);
1228:     KSPSetErrorIfNotConverged(jac->ksp[0],pc->erroriffailure);
1229:     PetscObjectIncrementTabLevel((PetscObject)jac->ksp[0],(PetscObject)pc,1);
1230:     PetscLogObjectParent((PetscObject)pc,(PetscObject)jac->ksp[0]);
1231:     KSPSetOperators(jac->ksp[0],mpjac->submats,mpjac->submats);
1232:     KSPGetPC(jac->ksp[0],&mpjac->pc);

1234:     PCGetOptionsPrefix(pc,&prefix);
1235:     KSPSetOptionsPrefix(jac->ksp[0],prefix);
1236:     KSPAppendOptionsPrefix(jac->ksp[0],"sub_");
1237:     /*
1238:       PetscMPIInt rank,subsize,subrank;
1239:       MPI_Comm_rank(comm,&rank);
1240:       MPI_Comm_size(subcomm,&subsize);
1241:       MPI_Comm_rank(subcomm,&subrank);

1243:       MatGetLocalSize(mpjac->submats,&m,NULL);
1244:       MatGetSize(mpjac->submats,&n,NULL);
1245:       PetscSynchronizedPrintf(comm,"[%d], sub-size %d,sub-rank %d\n",rank,subsize,subrank);
1246:       PetscSynchronizedFlush(comm,PETSC_STDOUT);
1247:     */

1249:     /* create dummy vectors xsub and ysub */
1250:     MatGetLocalSize(mpjac->submats,&m,&n);
1251:     VecCreateMPIWithArray(subcomm,1,n,PETSC_DECIDE,NULL,&mpjac->xsub);
1252:     VecCreateMPIWithArray(subcomm,1,m,PETSC_DECIDE,NULL,&mpjac->ysub);
1253: #if defined(PETSC_HAVE_CUSP)
1254:     PetscObjectTypeCompareAny((PetscObject)mpjac->submats,&is_gpumatrix,MATAIJCUSP,MATMPIAIJCUSP,"");
1255:     if (is_gpumatrix) {
1256:       VecSetType(mpjac->xsub,VECMPICUSP);
1257:       VecSetType(mpjac->ysub,VECMPICUSP);
1258:     }
1259: #elif defined(PETSC_HAVE_VECCUDA)
1260:     PetscObjectTypeCompareAny((PetscObject)mpjac->submats,&is_gpumatrix,MATAIJCUSPARSE,MATMPIAIJCUSPARSE,"");
1261:     if (is_gpumatrix) {
1262:       VecSetType(mpjac->xsub,VECMPICUDA);
1263:       VecSetType(mpjac->ysub,VECMPICUDA);
1264:     }
1265: #elif defined(PETSC_HAVE_VIENNACL)
1266:     PetscObjectTypeCompareAny((PetscObject)mpjac->submats,&is_gpumatrix,MATAIJVIENNACL,MATMPIAIJVIENNACL,"");
1267:     if (is_gpumatrix) {
1268:       VecSetType(mpjac->xsub,VECMPIVIENNACL);
1269:       VecSetType(mpjac->ysub,VECMPIVIENNACL);
1270:     }
1271: #endif
1272:     PetscLogObjectParent((PetscObject)pc,(PetscObject)mpjac->xsub);
1273:     PetscLogObjectParent((PetscObject)pc,(PetscObject)mpjac->ysub);

1275:     pc->ops->reset   = PCReset_BJacobi_Multiproc;
1276:     pc->ops->destroy = PCDestroy_BJacobi_Multiproc;
1277:     pc->ops->apply   = PCApply_BJacobi_Multiproc;
1278:   } else { /* pc->setupcalled */
1279:     subcomm = PetscSubcommChild(mpjac->psubcomm);
1280:     if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1281:       /* destroy old matrix blocks, then get new matrix blocks */
1282:       if (mpjac->submats) {MatDestroy(&mpjac->submats);}
1283:       MatGetMultiProcBlock(pc->pmat,subcomm,MAT_INITIAL_MATRIX,&mpjac->submats);
1284:     } else {
1285:       MatGetMultiProcBlock(pc->pmat,subcomm,MAT_REUSE_MATRIX,&mpjac->submats);
1286:     }
1287:     KSPSetOperators(jac->ksp[0],mpjac->submats,mpjac->submats);
1288:   }

1290:   if (!wasSetup && pc->setfromoptionscalled) {
1291:     KSPSetFromOptions(jac->ksp[0]);
1292:   }
1293:   return(0);
1294: }