Actual source code: pbjacobi.c

petsc-3.14.6 2021-03-30
Report Typos and Errors

  2: /*
  3:    Include files needed for the PBJacobi preconditioner:
  4:      pcimpl.h - private include file intended for use by all preconditioners
  5: */

  7: #include <petsc/private/pcimpl.h>

  9: /*
 10:    Private context (data structure) for the PBJacobi preconditioner.
 11: */
 12: typedef struct {
 13:   const MatScalar *diag;
 14:   PetscInt        bs,mbs;
 15: } PC_PBJacobi;


 18: static PetscErrorCode PCApply_PBJacobi_1(PC pc,Vec x,Vec y)
 19: {
 20:   PC_PBJacobi       *jac = (PC_PBJacobi*)pc->data;
 21:   PetscErrorCode    ierr;
 22:   PetscInt          i,m = jac->mbs;
 23:   const MatScalar   *diag = jac->diag;
 24:   const PetscScalar *xx;
 25:   PetscScalar       *yy;

 28:   VecGetArrayRead(x,&xx);
 29:   VecGetArray(y,&yy);
 30:   for (i=0; i<m; i++) yy[i] = diag[i]*xx[i];
 31:   VecRestoreArrayRead(x,&xx);
 32:   VecRestoreArray(y,&yy);
 33:   PetscLogFlops(m);
 34:   return(0);
 35: }

 37: static PetscErrorCode PCApply_PBJacobi_2(PC pc,Vec x,Vec y)
 38: {
 39:   PC_PBJacobi     *jac = (PC_PBJacobi*)pc->data;
 40:   PetscErrorCode  ierr;
 41:   PetscInt        i,m = jac->mbs;
 42:   const MatScalar *diag = jac->diag;
 43:   PetscScalar     x0,x1,*yy;
 44:   const PetscScalar *xx;

 47:   VecGetArrayRead(x,&xx);
 48:   VecGetArray(y,&yy);
 49:   for (i=0; i<m; i++) {
 50:     x0        = xx[2*i]; x1 = xx[2*i+1];
 51:     yy[2*i]   = diag[0]*x0 + diag[2]*x1;
 52:     yy[2*i+1] = diag[1]*x0 + diag[3]*x1;
 53:     diag     += 4;
 54:   }
 55:   VecRestoreArrayRead(x,&xx);
 56:   VecRestoreArray(y,&yy);
 57:   PetscLogFlops(6.0*m);
 58:   return(0);
 59: }
 60: static PetscErrorCode PCApply_PBJacobi_3(PC pc,Vec x,Vec y)
 61: {
 62:   PC_PBJacobi     *jac = (PC_PBJacobi*)pc->data;
 63:   PetscErrorCode  ierr;
 64:   PetscInt        i,m = jac->mbs;
 65:   const MatScalar *diag = jac->diag;
 66:   PetscScalar     x0,x1,x2,*yy;
 67:   const PetscScalar *xx;

 70:   VecGetArrayRead(x,&xx);
 71:   VecGetArray(y,&yy);
 72:   for (i=0; i<m; i++) {
 73:     x0 = xx[3*i]; x1 = xx[3*i+1]; x2 = xx[3*i+2];

 75:     yy[3*i]   = diag[0]*x0 + diag[3]*x1 + diag[6]*x2;
 76:     yy[3*i+1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2;
 77:     yy[3*i+2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2;
 78:     diag     += 9;
 79:   }
 80:   VecRestoreArrayRead(x,&xx);
 81:   VecRestoreArray(y,&yy);
 82:   PetscLogFlops(15.0*m);
 83:   return(0);
 84: }
 85: static PetscErrorCode PCApply_PBJacobi_4(PC pc,Vec x,Vec y)
 86: {
 87:   PC_PBJacobi     *jac = (PC_PBJacobi*)pc->data;
 88:   PetscErrorCode  ierr;
 89:   PetscInt        i,m = jac->mbs;
 90:   const MatScalar *diag = jac->diag;
 91:   PetscScalar     x0,x1,x2,x3,*yy;
 92:   const PetscScalar *xx;

 95:   VecGetArrayRead(x,&xx);
 96:   VecGetArray(y,&yy);
 97:   for (i=0; i<m; i++) {
 98:     x0 = xx[4*i]; x1 = xx[4*i+1]; x2 = xx[4*i+2]; x3 = xx[4*i+3];

100:     yy[4*i]   = diag[0]*x0 + diag[4]*x1 + diag[8]*x2  + diag[12]*x3;
101:     yy[4*i+1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2  + diag[13]*x3;
102:     yy[4*i+2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2 + diag[14]*x3;
103:     yy[4*i+3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2 + diag[15]*x3;
104:     diag     += 16;
105:   }
106:   VecRestoreArrayRead(x,&xx);
107:   VecRestoreArray(y,&yy);
108:   PetscLogFlops(28.0*m);
109:   return(0);
110: }
111: static PetscErrorCode PCApply_PBJacobi_5(PC pc,Vec x,Vec y)
112: {
113:   PC_PBJacobi     *jac = (PC_PBJacobi*)pc->data;
114:   PetscErrorCode  ierr;
115:   PetscInt        i,m = jac->mbs;
116:   const MatScalar *diag = jac->diag;
117:   PetscScalar     x0,x1,x2,x3,x4,*yy;
118:   const PetscScalar *xx;

121:   VecGetArrayRead(x,&xx);
122:   VecGetArray(y,&yy);
123:   for (i=0; i<m; i++) {
124:     x0 = xx[5*i]; x1 = xx[5*i+1]; x2 = xx[5*i+2]; x3 = xx[5*i+3]; x4 = xx[5*i+4];

126:     yy[5*i]   = diag[0]*x0 + diag[5]*x1 + diag[10]*x2  + diag[15]*x3 + diag[20]*x4;
127:     yy[5*i+1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2  + diag[16]*x3 + diag[21]*x4;
128:     yy[5*i+2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4;
129:     yy[5*i+3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4;
130:     yy[5*i+4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4;
131:     diag     += 25;
132:   }
133:   VecRestoreArrayRead(x,&xx);
134:   VecRestoreArray(y,&yy);
135:   PetscLogFlops(45.0*m);
136:   return(0);
137: }
138: static PetscErrorCode PCApply_PBJacobi_6(PC pc,Vec x,Vec y)
139: {
140:   PC_PBJacobi     *jac = (PC_PBJacobi*)pc->data;
141:   PetscErrorCode  ierr;
142:   PetscInt        i,m = jac->mbs;
143:   const MatScalar *diag = jac->diag;
144:   PetscScalar     x0,x1,x2,x3,x4,x5,*yy;
145:   const PetscScalar *xx;

148:   VecGetArrayRead(x,&xx);
149:   VecGetArray(y,&yy);
150:   for (i=0; i<m; i++) {
151:     x0 = xx[6*i]; x1 = xx[6*i+1]; x2 = xx[6*i+2]; x3 = xx[6*i+3]; x4 = xx[6*i+4]; x5 = xx[6*i+5];

153:     yy[6*i]   = diag[0]*x0 + diag[6]*x1  + diag[12]*x2  + diag[18]*x3 + diag[24]*x4 + diag[30]*x5;
154:     yy[6*i+1] = diag[1]*x0 + diag[7]*x1  + diag[13]*x2  + diag[19]*x3 + diag[25]*x4 + diag[31]*x5;
155:     yy[6*i+2] = diag[2]*x0 + diag[8]*x1  + diag[14]*x2  + diag[20]*x3 + diag[26]*x4 + diag[32]*x5;
156:     yy[6*i+3] = diag[3]*x0 + diag[9]*x1  + diag[15]*x2  + diag[21]*x3 + diag[27]*x4 + diag[33]*x5;
157:     yy[6*i+4] = diag[4]*x0 + diag[10]*x1 + diag[16]*x2  + diag[22]*x3 + diag[28]*x4 + diag[34]*x5;
158:     yy[6*i+5] = diag[5]*x0 + diag[11]*x1 + diag[17]*x2  + diag[23]*x3 + diag[29]*x4 + diag[35]*x5;
159:     diag     += 36;
160:   }
161:   VecRestoreArrayRead(x,&xx);
162:   VecRestoreArray(y,&yy);
163:   PetscLogFlops(66.0*m);
164:   return(0);
165: }
166: static PetscErrorCode PCApply_PBJacobi_7(PC pc,Vec x,Vec y)
167: {
168:   PC_PBJacobi     *jac = (PC_PBJacobi*)pc->data;
169:   PetscErrorCode  ierr;
170:   PetscInt        i,m = jac->mbs;
171:   const MatScalar *diag = jac->diag;
172:   PetscScalar     x0,x1,x2,x3,x4,x5,x6,*yy;
173:   const PetscScalar *xx;

176:   VecGetArrayRead(x,&xx);
177:   VecGetArray(y,&yy);
178:   for (i=0; i<m; i++) {
179:     x0 = xx[7*i]; x1 = xx[7*i+1]; x2 = xx[7*i+2]; x3 = xx[7*i+3]; x4 = xx[7*i+4]; x5 = xx[7*i+5]; x6 = xx[7*i+6];

181:     yy[7*i]   = diag[0]*x0 + diag[7]*x1  + diag[14]*x2  + diag[21]*x3 + diag[28]*x4 + diag[35]*x5 + diag[42]*x6;
182:     yy[7*i+1] = diag[1]*x0 + diag[8]*x1  + diag[15]*x2  + diag[22]*x3 + diag[29]*x4 + diag[36]*x5 + diag[43]*x6;
183:     yy[7*i+2] = diag[2]*x0 + diag[9]*x1  + diag[16]*x2  + diag[23]*x3 + diag[30]*x4 + diag[37]*x5 + diag[44]*x6;
184:     yy[7*i+3] = diag[3]*x0 + diag[10]*x1 + diag[17]*x2  + diag[24]*x3 + diag[31]*x4 + diag[38]*x5 + diag[45]*x6;
185:     yy[7*i+4] = diag[4]*x0 + diag[11]*x1 + diag[18]*x2  + diag[25]*x3 + diag[32]*x4 + diag[39]*x5 + diag[46]*x6;
186:     yy[7*i+5] = diag[5]*x0 + diag[12]*x1 + diag[19]*x2  + diag[26]*x3 + diag[33]*x4 + diag[40]*x5 + diag[47]*x6;
187:     yy[7*i+6] = diag[6]*x0 + diag[13]*x1 + diag[20]*x2  + diag[27]*x3 + diag[34]*x4 + diag[41]*x5 + diag[48]*x6;
188:     diag     += 49;
189:   }
190:   VecRestoreArrayRead(x,&xx);
191:   VecRestoreArray(y,&yy);
192:   PetscLogFlops(91.0*m); /* 2*bs2 - bs */
193:   return(0);
194: }
195: static PetscErrorCode PCApply_PBJacobi_N(PC pc,Vec x,Vec y)
196: {
197:   PC_PBJacobi       *jac = (PC_PBJacobi*)pc->data;
198:   PetscErrorCode    ierr;
199:   PetscInt          i,ib,jb;
200:   const PetscInt    m = jac->mbs;
201:   const PetscInt    bs = jac->bs;
202:   const MatScalar   *diag = jac->diag;
203:   PetscScalar       *yy;
204:   const PetscScalar *xx;

207:   VecGetArrayRead(x,&xx);
208:   VecGetArray(y,&yy);
209:   for (i=0; i<m; i++) {
210:     for (ib=0; ib<bs; ib++){
211:       PetscScalar rowsum = 0;
212:       for (jb=0; jb<bs; jb++){
213:         rowsum += diag[ib+jb*bs] * xx[bs*i+jb];
214:       }
215:       yy[bs*i+ib] = rowsum;
216:     }
217:     diag += bs*bs;
218:   }
219:   VecRestoreArrayRead(x,&xx);
220:   VecRestoreArray(y,&yy);
221:   PetscLogFlops((2.0*bs*bs-bs)*m); /* 2*bs2 - bs */
222:   return(0);
223: }

225: static PetscErrorCode PCApplyTranspose_PBJacobi_N(PC pc,Vec x,Vec y)
226: {
227:   PC_PBJacobi       *jac = (PC_PBJacobi*)pc->data;
228:   PetscErrorCode    ierr;
229:   PetscInt          i,j,k,m = jac->mbs,bs=jac->bs;
230:   const MatScalar   *diag = jac->diag;
231:   const PetscScalar *xx;
232:   PetscScalar       *yy;

235:   VecGetArrayRead(x,&xx);
236:   VecGetArray(y,&yy);
237:   for (i=0; i<m; i++) {
238:     for (j=0; j<bs; j++) yy[i*bs+j] = 0.;
239:     for (j=0; j<bs; j++) {
240:       for (k=0; k<bs; k++) {
241:         yy[i*bs+k] += diag[k*bs+j]*xx[i*bs+j];
242:       }
243:     }
244:     diag += bs*bs;
245:   }
246:   VecRestoreArrayRead(x,&xx);
247:   VecRestoreArray(y,&yy);
248:   PetscLogFlops(m*bs*(2*bs-1));
249:   return(0);
250: }

252: /* -------------------------------------------------------------------------- */
253: static PetscErrorCode PCSetUp_PBJacobi(PC pc)
254: {
255:   PC_PBJacobi    *jac = (PC_PBJacobi*)pc->data;
257:   Mat            A = pc->pmat;
258:   MatFactorError err;
259:   PetscInt       nlocal;

262:   MatInvertBlockDiagonal(A,&jac->diag);
263:   MatFactorGetError(A,&err);
264:   if (err) pc->failedreason = (PCFailedReason)err;

266:   MatGetBlockSize(A,&jac->bs);
267:   MatGetLocalSize(A,&nlocal,NULL);
268:   jac->mbs = nlocal/jac->bs;
269:   switch (jac->bs) {
270:   case 1:
271:     pc->ops->apply = PCApply_PBJacobi_1;
272:     break;
273:   case 2:
274:     pc->ops->apply = PCApply_PBJacobi_2;
275:     break;
276:   case 3:
277:     pc->ops->apply = PCApply_PBJacobi_3;
278:     break;
279:   case 4:
280:     pc->ops->apply = PCApply_PBJacobi_4;
281:     break;
282:   case 5:
283:     pc->ops->apply = PCApply_PBJacobi_5;
284:     break;
285:   case 6:
286:     pc->ops->apply = PCApply_PBJacobi_6;
287:     break;
288:   case 7:
289:     pc->ops->apply = PCApply_PBJacobi_7;
290:     break;
291:   default:
292:     pc->ops->apply = PCApply_PBJacobi_N;
293:     break;
294:   }
295:   pc->ops->applytranspose = PCApplyTranspose_PBJacobi_N;
296:   return(0);
297: }
298: /* -------------------------------------------------------------------------- */
299: static PetscErrorCode PCDestroy_PBJacobi(PC pc)
300: {

304:   /*
305:       Free the private data structure that was hanging off the PC
306:   */
307:   PetscFree(pc->data);
308:   return(0);
309: }

311: static PetscErrorCode PCView_PBJacobi(PC pc,PetscViewer viewer)
312: {
314:   PC_PBJacobi    *jac = (PC_PBJacobi*)pc->data;
315:   PetscBool      iascii;

318:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
319:   if (iascii) {
320:     PetscViewerASCIIPrintf(viewer,"  point-block size %D\n",jac->bs);
321:   }
322:   return(0);
323: }

325: /* -------------------------------------------------------------------------- */
326: /*MC
327:      PCPBJACOBI - Point block Jacobi preconditioner


330:    Notes:
331:     See PCJACOBI for point Jacobi preconditioning

333:    This works for AIJ and BAIJ matrices and uses the blocksize provided to the matrix

335:    Uses dense LU factorization with partial pivoting to invert the blocks; if a zero pivot
336:    is detected a PETSc error is generated.

338:    Developer Notes:
339:     This should support the PCSetErrorIfFailure() flag set to PETSC_TRUE to allow
340:    the factorization to continue even after a zero pivot is found resulting in a Nan and hence
341:    terminating KSP with a KSP_DIVERGED_NANORIF allowing
342:    a nonlinear solver/ODE integrator to recover without stopping the program as currently happens.

344:    Developer Note: Perhaps should provide an option that allows generation of a valid preconditioner
345:    even if a block is singular as the PCJACOBI does.

347:    Level: beginner


350: .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PC, PCJACOBI

352: M*/

354: PETSC_EXTERN PetscErrorCode PCCreate_PBJacobi(PC pc)
355: {
356:   PC_PBJacobi    *jac;

360:   /*
361:      Creates the private data structure for this preconditioner and
362:      attach it to the PC object.
363:   */
364:   PetscNewLog(pc,&jac);
365:   pc->data = (void*)jac;

367:   /*
368:      Initialize the pointers to vectors to ZERO; these will be used to store
369:      diagonal entries of the matrix for fast preconditioner application.
370:   */
371:   jac->diag = NULL;

373:   /*
374:       Set the pointers for the functions that are provided above.
375:       Now when the user-level routines (such as PCApply(), PCDestroy(), etc.)
376:       are called, they will automatically call these functions.  Note we
377:       choose not to provide a couple of these functions since they are
378:       not needed.
379:   */
380:   pc->ops->apply               = NULL; /*set depending on the block size */
381:   pc->ops->applytranspose      = NULL;
382:   pc->ops->setup               = PCSetUp_PBJacobi;
383:   pc->ops->destroy             = PCDestroy_PBJacobi;
384:   pc->ops->setfromoptions      = NULL;
385:   pc->ops->view                = PCView_PBJacobi;
386:   pc->ops->applyrichardson     = NULL;
387:   pc->ops->applysymmetricleft  = NULL;
388:   pc->ops->applysymmetricright = NULL;
389:   return(0);
390: }