Actual source code: vpbjacobi.c


  2: /*
  3:    Include files needed for the variable size block PBJacobi preconditioner:
  4:      pcimpl.h - private include file intended for use by all preconditioners
  5: */

  7: #include <petsc/private/pcimpl.h>

  9: /*
 10:    Private context (data structure) for the VPBJacobi preconditioner.
 11: */
 12: typedef struct {
 13:   MatScalar *diag;
 14: } PC_VPBJacobi;

 16: static PetscErrorCode PCApply_VPBJacobi(PC pc,Vec x,Vec y)
 17: {
 18:   PC_VPBJacobi      *jac = (PC_VPBJacobi*)pc->data;
 19:   PetscErrorCode    ierr;
 20:   PetscInt          i,ncnt = 0;
 21:   const MatScalar   *diag = jac->diag;
 22:   PetscInt          ib,jb,bs;
 23:   const PetscScalar *xx;
 24:   PetscScalar       *yy,x0,x1,x2,x3,x4,x5,x6;
 25:   PetscInt          nblocks;
 26:   const PetscInt    *bsizes;

 29:   MatGetVariableBlockSizes(pc->pmat,&nblocks,&bsizes);
 30:   VecGetArrayRead(x,&xx);
 31:   VecGetArray(y,&yy);
 32:   for (i=0; i<nblocks; i++) {
 33:     bs = bsizes[i];
 34:     switch (bs) {
 35:     case 1:
 36:       yy[ncnt] = *diag*xx[ncnt];
 37:       break;
 38:     case 2:
 39:       x0         = xx[ncnt]; x1 = xx[ncnt+1];
 40:       yy[ncnt]   = diag[0]*x0 + diag[2]*x1;
 41:       yy[ncnt+1] = diag[1]*x0 + diag[3]*x1;
 42:       break;
 43:     case 3:
 44:       x0 = xx[ncnt]; x1 = xx[ncnt+1]; x2 = xx[ncnt+2];
 45:       yy[ncnt]   = diag[0]*x0 + diag[3]*x1 + diag[6]*x2;
 46:       yy[ncnt+1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2;
 47:       yy[ncnt+2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2;
 48:       break;
 49:     case 4:
 50:       x0 = xx[ncnt]; x1 = xx[ncnt+1]; x2 = xx[ncnt+2]; x3 = xx[ncnt+3];
 51:       yy[ncnt]   = diag[0]*x0 + diag[4]*x1 + diag[8]*x2  + diag[12]*x3;
 52:       yy[ncnt+1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2  + diag[13]*x3;
 53:       yy[ncnt+2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2 + diag[14]*x3;
 54:       yy[ncnt+3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2 + diag[15]*x3;
 55:       break;
 56:     case 5:
 57:       x0 = xx[ncnt]; x1 = xx[ncnt+1]; x2 = xx[ncnt+2]; x3 = xx[ncnt+3]; x4 = xx[ncnt+4];
 58:       yy[ncnt]   = diag[0]*x0 + diag[5]*x1 + diag[10]*x2  + diag[15]*x3 + diag[20]*x4;
 59:       yy[ncnt+1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2  + diag[16]*x3 + diag[21]*x4;
 60:       yy[ncnt+2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4;
 61:       yy[ncnt+3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4;
 62:       yy[ncnt+4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4;
 63:       break;
 64:     case 6:
 65:       x0 = xx[ncnt]; x1 = xx[ncnt+1]; x2 = xx[ncnt+2]; x3 = xx[ncnt+3]; x4 = xx[ncnt+4]; x5 = xx[ncnt+5];
 66:       yy[ncnt]   = diag[0]*x0 + diag[6]*x1  + diag[12]*x2  + diag[18]*x3 + diag[24]*x4 + diag[30]*x5;
 67:       yy[ncnt+1] = diag[1]*x0 + diag[7]*x1  + diag[13]*x2  + diag[19]*x3 + diag[25]*x4 + diag[31]*x5;
 68:       yy[ncnt+2] = diag[2]*x0 + diag[8]*x1  + diag[14]*x2  + diag[20]*x3 + diag[26]*x4 + diag[32]*x5;
 69:       yy[ncnt+3] = diag[3]*x0 + diag[9]*x1  + diag[15]*x2  + diag[21]*x3 + diag[27]*x4 + diag[33]*x5;
 70:       yy[ncnt+4] = diag[4]*x0 + diag[10]*x1 + diag[16]*x2  + diag[22]*x3 + diag[28]*x4 + diag[34]*x5;
 71:       yy[ncnt+5] = diag[5]*x0 + diag[11]*x1 + diag[17]*x2  + diag[23]*x3 + diag[29]*x4 + diag[35]*x5;
 72:       break;
 73:     case 7:
 74:       x0 = xx[ncnt]; x1 = xx[ncnt+1]; x2 = xx[ncnt+2]; x3 = xx[ncnt+3]; x4 = xx[ncnt+4]; x5 = xx[ncnt+5]; x6 = xx[ncnt+6];
 75:       yy[ncnt]   = diag[0]*x0 + diag[7]*x1  + diag[14]*x2  + diag[21]*x3 + diag[28]*x4 + diag[35]*x5 + diag[42]*x6;
 76:       yy[ncnt+1] = diag[1]*x0 + diag[8]*x1  + diag[15]*x2  + diag[22]*x3 + diag[29]*x4 + diag[36]*x5 + diag[43]*x6;
 77:       yy[ncnt+2] = diag[2]*x0 + diag[9]*x1  + diag[16]*x2  + diag[23]*x3 + diag[30]*x4 + diag[37]*x5 + diag[44]*x6;
 78:       yy[ncnt+3] = diag[3]*x0 + diag[10]*x1 + diag[17]*x2  + diag[24]*x3 + diag[31]*x4 + diag[38]*x5 + diag[45]*x6;
 79:       yy[ncnt+4] = diag[4]*x0 + diag[11]*x1 + diag[18]*x2  + diag[25]*x3 + diag[32]*x4 + diag[39]*x5 + diag[46]*x6;
 80:       yy[ncnt+5] = diag[5]*x0 + diag[12]*x1 + diag[19]*x2  + diag[26]*x3 + diag[33]*x4 + diag[40]*x5 + diag[47]*x6;
 81:       yy[ncnt+6] = diag[6]*x0 + diag[13]*x1 + diag[20]*x2  + diag[27]*x3 + diag[34]*x4 + diag[41]*x5 + diag[48]*x6;
 82:       break;
 83:     default:
 84:       for (ib=0; ib<bs; ib++) {
 85:         PetscScalar rowsum = 0;
 86:         for (jb=0; jb<bs; jb++) {
 87:           rowsum += diag[ib+jb*bs] * xx[ncnt+jb];
 88:         }
 89:         yy[ncnt+ib] = rowsum;
 90:       }
 91:     }
 92:     ncnt += bsizes[i];
 93:     diag += bsizes[i]*bsizes[i];
 94:   }
 95:   VecRestoreArrayRead(x,&xx);
 96:   VecRestoreArray(y,&yy);
 97:   return(0);
 98: }

100: /* -------------------------------------------------------------------------- */
101: static PetscErrorCode PCSetUp_VPBJacobi(PC pc)
102: {
103:   PC_VPBJacobi    *jac = (PC_VPBJacobi*)pc->data;
105:   Mat            A = pc->pmat;
106:   MatFactorError err;
107:   PetscInt       i,nsize = 0,nlocal;
108:   PetscInt       nblocks;
109:   const PetscInt *bsizes;

112:   MatGetVariableBlockSizes(pc->pmat,&nblocks,&bsizes);
113:   MatGetLocalSize(pc->pmat,&nlocal,NULL);
114:   if (nlocal && !nblocks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatSetVariableBlockSizes() before using PCVPBJACOBI");
115:   if (!jac->diag) {
116:     for (i=0; i<nblocks; i++) nsize += bsizes[i]*bsizes[i];
117:     PetscMalloc1(nsize,&jac->diag);
118:   }
119:   MatInvertVariableBlockDiagonal(A,nblocks,bsizes,jac->diag);
120:   MatFactorGetError(A,&err);
121:   if (err) pc->failedreason = (PCFailedReason)err;
122:   pc->ops->apply = PCApply_VPBJacobi;
123:   return(0);
124: }
125: /* -------------------------------------------------------------------------- */
126: static PetscErrorCode PCDestroy_VPBJacobi(PC pc)
127: {
128:   PC_VPBJacobi    *jac = (PC_VPBJacobi*)pc->data;

132:   /*
133:       Free the private data structure that was hanging off the PC
134:   */
135:   PetscFree(jac->diag);
136:   PetscFree(pc->data);
137:   return(0);
138: }

140: /* -------------------------------------------------------------------------- */
141: /*MC
142:      PCVPBJACOBI - Variable size point block Jacobi preconditioner

144:    Notes:
145:      See PCJACOBI for point Jacobi preconditioning, PCPBJACOBI for fixed point block size, and PCBJACOBI for large size blocks

147:      This works for AIJ matrices

149:      Uses dense LU factorization with partial pivoting to invert the blocks; if a zero pivot
150:      is detected a PETSc error is generated.

152:      One must call MatSetVariableBlockSizes() to use this preconditioner

154:    Developer Notes:
155:      This should support the PCSetErrorIfFailure() flag set to PETSC_TRUE to allow
156:      the factorization to continue even after a zero pivot is found resulting in a Nan and hence
157:      terminating KSP with a KSP_DIVERGED_NANORIF allowing
158:      a nonlinear solver/ODE integrator to recover without stopping the program as currently happens.

160:      Perhaps should provide an option that allows generation of a valid preconditioner
161:      even if a block is singular as the PCJACOBI does.

163:    Level: beginner

165: .seealso:  MatSetVariableBlockSizes(), PCCreate(), PCSetType(), PCType (for list of available types), PC, PCJACOBI, PCPBJACOBI, PCBJACOBI

167: M*/

169: PETSC_EXTERN PetscErrorCode PCCreate_VPBJacobi(PC pc)
170: {
171:   PC_VPBJacobi   *jac;

175:   /*
176:      Creates the private data structure for this preconditioner and
177:      attach it to the PC object.
178:   */
179:   PetscNewLog(pc,&jac);
180:   pc->data = (void*)jac;

182:   /*
183:      Initialize the pointers to vectors to ZERO; these will be used to store
184:      diagonal entries of the matrix for fast preconditioner application.
185:   */
186:   jac->diag = NULL;

188:   /*
189:       Set the pointers for the functions that are provided above.
190:       Now when the user-level routines (such as PCApply(), PCDestroy(), etc.)
191:       are called, they will automatically call these functions.  Note we
192:       choose not to provide a couple of these functions since they are
193:       not needed.
194:   */
195:   pc->ops->apply               = PCApply_VPBJacobi;
196:   pc->ops->applytranspose      = NULL;
197:   pc->ops->setup               = PCSetUp_VPBJacobi;
198:   pc->ops->destroy             = PCDestroy_VPBJacobi;
199:   pc->ops->setfromoptions      = NULL;
200:   pc->ops->applyrichardson     = NULL;
201:   pc->ops->applysymmetricleft  = NULL;
202:   pc->ops->applysymmetricright = NULL;
203:   return(0);
204: }