petsc-3.12.5 2020-03-29
  1: #include <../src/ksp/ksp/utils/lmvm/lmvm.h> /*I "petscksp.h" I*/

  3: /*
  4:   Limited-memory Symmetric-Rank-1 method for approximating both 
  5:   the forward product and inverse application of a Jacobian.
  6: */

  8: typedef struct {
  9:   Vec *P, *Q;
 10:   Vec work;
 11:   PetscBool allocated, needP, needQ;
 12:   PetscReal *stp, *ytq;
 13: } Mat_LSR1;

 15: /*------------------------------------------------------------*/

 17: /*
 18:   The solution method is adapted from Algorithm 8 of Erway and Marcia 
 19:   "On Solving Large-Scale Limited-Memory Quasi-Newton Equations" 
 20:   (https://arxiv.org/abs/1510.06378).
 21:   
 22:   Q[i] = S[i] - (B_i)^{-1}*Y[i] terms are computed ahead of time whenever 
 23:   the matrix is updated with a new (S[i], Y[i]) pair. This allows 
 24:   repeated calls of MatMult inside KSP solvers without unnecessarily 
 25:   recomputing Q[i] terms in expensive nested-loops.

 27:   dX <- J0^{-1} * F
 28:   for i = 0,1,2,...,k
 29:     # Q[i] = S[i] - (B_i)^{-1}*Y[i]
 30:     zeta = (Q[i]^T F) / (Q[i]^T Y[i])
 31:     dX <- dX + (zeta * Q[i])
 32:   end
 33: */
 34: static PetscErrorCode MatSolve_LMVMSR1(Mat B, Vec F, Vec dX)
 35: {
 36:   Mat_LMVM          *lmvm = (Mat_LMVM*)B->data;
 37:   Mat_LSR1          *lsr1 = (Mat_LSR1*)lmvm->ctx;
 38:   PetscErrorCode    ierr;
 39:   PetscInt          i, j;
 40:   PetscScalar       qjtyi, qtf, ytq;
 41: 
 43:   VecCheckSameSize(F, 2, dX, 3);
 44:   VecCheckMatCompatible(B, dX, 3, F, 2);
 45: 
 46:   if (lsr1->needQ) {
 47:     /* Pre-compute (Q[i] = S[i] - (B_i)^{-1} * Y[i]) and (Y[i]^T Q[i]) */
 48:     for (i = 0; i <= lmvm->k; ++i) {
 49:       MatLMVMApplyJ0Inv(B, lmvm->Y[i], lsr1->Q[i]);
 50:       VecAYPX(lsr1->Q[i], -1.0, lmvm->S[i]);
 51:       for (j = 0; j <= i-1; ++j) {
 52:         VecDot(lsr1->Q[j], lmvm->Y[i], &qjtyi);
 53:         VecAXPY(lsr1->Q[i], -PetscRealPart(qjtyi)/lsr1->ytq[j], lsr1->Q[j]);
 54:       }
 55:       VecDot(lmvm->Y[i], lsr1->Q[i], &ytq);
 56:       lsr1->ytq[i] = PetscRealPart(ytq);
 57:     }
 58:     lsr1->needQ = PETSC_FALSE;
 59:   }
 60: 
 61:   /* Invert the initial Jacobian onto F (or apply scaling) */
 62:   MatLMVMApplyJ0Inv(B, F, dX);
 63:   /* Start outer loop */
 64:   for (i = 0; i <= lmvm->k; ++i) {
 65:     VecDot(lsr1->Q[i], F, &qtf);
 66:     VecAXPY(dX, PetscRealPart(qtf)/lsr1->ytq[i], lsr1->Q[i]);
 67:   }
 68:   return(0);
 69: }

 71: /*------------------------------------------------------------*/

 73: /*
 74:   The forward product is the matrix-free implementation of 
 75:   Equation (6.24) in Nocedal and Wright "Numerical Optimization" 
 76:   2nd edition, pg 144.
 77:   
 78:   Note that the structure of the forward product is identical to 
 79:   the solution, with S and Y exchanging roles.
 80:   
 81:   P[i] = Y[i] - (B_i)*S[i] terms are computed ahead of time whenever 
 82:   the matrix is updated with a new (S[i], Y[i]) pair. This allows 
 83:   repeated calls of MatMult inside KSP solvers without unnecessarily 
 84:   recomputing P[i] terms in expensive nested-loops.

 86:   Z <- J0 * X
 87:   for i = 0,1,2,...,k
 88:     # P[i] = Y[i] - (B_i)*S[i]
 89:     zeta = (P[i]^T X) / (P[i]^T S[i])
 90:     Z <- Z + (zeta * P[i])
 91:   end
 92: */
 93: static PetscErrorCode MatMult_LMVMSR1(Mat B, Vec X, Vec Z)
 94: {
 95:   Mat_LMVM          *lmvm = (Mat_LMVM*)B->data;
 96:   Mat_LSR1          *lsr1 = (Mat_LSR1*)lmvm->ctx;
 97:   PetscErrorCode    ierr;
 98:   PetscInt          i, j;
 99:   PetscScalar       pjtsi, ptx, stp;
100: 
102:   VecCheckSameSize(X, 2, Z, 3);
103:   VecCheckMatCompatible(B, X, 2, Z, 3);
104: 
105:   if (lsr1->needP) {
106:     /* Pre-compute (P[i] = Y[i] - (B_i) * S[i]) and (S[i]^T P[i]) */
107:     for (i = 0; i <= lmvm->k; ++i) {
108:       MatLMVMApplyJ0Fwd(B, lmvm->S[i], lsr1->P[i]);
109:       VecAYPX(lsr1->P[i], -1.0, lmvm->Y[i]);
110:       for (j = 0; j <= i-1; ++j) {
111:         VecDot(lsr1->P[j], lmvm->S[i], &pjtsi);
112:         VecAXPY(lsr1->P[i], -PetscRealPart(pjtsi)/lsr1->stp[j], lsr1->P[j]);
113:       }
114:       VecDot(lmvm->S[i], lsr1->P[i], &stp);
115:       lsr1->stp[i] = PetscRealPart(stp);
116:     }
117:     lsr1->needP = PETSC_FALSE;
118:   }
119: 
120:   MatLMVMApplyJ0Fwd(B, X, Z);
121:   for (i = 0; i <= lmvm->k; ++i) {
122:     VecDot(lsr1->P[i], X, &ptx);
123:     VecAXPY(Z, PetscRealPart(ptx)/lsr1->stp[i], lsr1->P[i]);
124:   }
125:   return(0);
126: }

128: /*------------------------------------------------------------*/

130: static PetscErrorCode MatUpdate_LMVMSR1(Mat B, Vec X, Vec F)
131: {
132:   Mat_LMVM          *lmvm = (Mat_LMVM*)B->data;
133:   Mat_LSR1          *lsr1 = (Mat_LSR1*)lmvm->ctx;
134:   PetscErrorCode    ierr;
135:   PetscReal         snorm, pnorm;
136:   PetscScalar       sktw;

139:   if (!lmvm->m) return(0);
140:   if (lmvm->prev_set) {
141:     /* Compute the new (S = X - Xprev) and (Y = F - Fprev) vectors */
142:     VecAYPX(lmvm->Xprev, -1.0, X);
143:     VecAYPX(lmvm->Fprev, -1.0, F);
144:     /* See if the updates can be accepted 
145:        NOTE: This tests abs(S[k]^T (Y[k] - B_k*S[k])) >= eps * norm(S[k]) * norm(Y[k] - B_k*S[k]) */
146:     MatMult(B, lmvm->Xprev, lsr1->work);
147:     VecAYPX(lsr1->work, -1.0, lmvm->Fprev);
148:     VecDot(lmvm->Xprev, lsr1->work, &sktw);
149:     VecNorm(lmvm->Xprev, NORM_2, &snorm);
150:     VecNorm(lsr1->work, NORM_2, &pnorm);
151:     if (PetscAbsReal(PetscRealPart(sktw)) >= lmvm->eps * snorm * pnorm) {
152:       /* Update is good, accept it */
153:       lsr1->needP = lsr1->needQ = PETSC_TRUE;
154:       MatUpdateKernel_LMVM(B, lmvm->Xprev, lmvm->Fprev);
155:     } else {
156:       /* Update is bad, skip it */
157:       ++lmvm->nrejects;
158:     }
159:   }
160:   /* Save the solution and function to be used in the next update */
161:   VecCopy(X, lmvm->Xprev);
162:   VecCopy(F, lmvm->Fprev);
163:   lmvm->prev_set = PETSC_TRUE;
164:   return(0);
165: }

167: /*------------------------------------------------------------*/

169: static PetscErrorCode MatCopy_LMVMSR1(Mat B, Mat M, MatStructure str)
170: {
171:   Mat_LMVM          *bdata = (Mat_LMVM*)B->data;
172:   Mat_LSR1          *bctx = (Mat_LSR1*)bdata->ctx;
173:   Mat_LMVM          *mdata = (Mat_LMVM*)M->data;
174:   Mat_LSR1          *mctx = (Mat_LSR1*)mdata->ctx;
175:   PetscErrorCode    ierr;
176:   PetscInt          i;

179:   mctx->needP = bctx->needP;
180:   mctx->needQ = bctx->needQ;
181:   for (i=0; i<=bdata->k; ++i) {
182:     mctx->stp[i] = bctx->stp[i];
183:     mctx->ytq[i] = bctx->ytq[i];
184:     VecCopy(bctx->P[i], mctx->P[i]);
185:     VecCopy(bctx->Q[i], mctx->Q[i]);
186:   }
187:   return(0);
188: }

190: /*------------------------------------------------------------*/

192: static PetscErrorCode MatReset_LMVMSR1(Mat B, PetscBool destructive)
193: {
194:   Mat_LMVM          *lmvm = (Mat_LMVM*)B->data;
195:   Mat_LSR1          *lsr1 = (Mat_LSR1*)lmvm->ctx;
196:   PetscErrorCode    ierr;
197: 
199:   lsr1->needP = lsr1->needQ = PETSC_TRUE;
200:   if (destructive && lsr1->allocated) {
201:     VecDestroy(&lsr1->work);
202:     PetscFree2(lsr1->stp, lsr1->ytq);
203:     VecDestroyVecs(lmvm->m, &lsr1->P);
204:     VecDestroyVecs(lmvm->m, &lsr1->Q);
205:     lsr1->allocated = PETSC_FALSE;
206:   }
207:   MatReset_LMVM(B, destructive);
208:   return(0);
209: }

211: /*------------------------------------------------------------*/

213: static PetscErrorCode MatAllocate_LMVMSR1(Mat B, Vec X, Vec F)
214: {
215:   Mat_LMVM          *lmvm = (Mat_LMVM*)B->data;
216:   Mat_LSR1          *lsr1 = (Mat_LSR1*)lmvm->ctx;
217:   PetscErrorCode    ierr;
218: 
220:   MatAllocate_LMVM(B, X, F);
221:   if (!lsr1->allocated) {
222:     VecDuplicate(X, &lsr1->work);
223:     PetscMalloc2(lmvm->m, &lsr1->stp, lmvm->m, &lsr1->ytq);
224:     if (lmvm->m > 0) {
225:       VecDuplicateVecs(X, lmvm->m, &lsr1->P);
226:       VecDuplicateVecs(X, lmvm->m, &lsr1->Q);
227:     }
228:     lsr1->allocated = PETSC_TRUE;
229:   }
230:   return(0);
231: }

233: /*------------------------------------------------------------*/

235: static PetscErrorCode MatDestroy_LMVMSR1(Mat B)
236: {
237:   Mat_LMVM          *lmvm = (Mat_LMVM*)B->data;
238:   Mat_LSR1          *lsr1 = (Mat_LSR1*)lmvm->ctx;
239:   PetscErrorCode    ierr;

242:   if (lsr1->allocated) {
243:     VecDestroy(&lsr1->work);
244:     PetscFree2(lsr1->stp, lsr1->ytq);
245:     VecDestroyVecs(lmvm->m, &lsr1->P);
246:     VecDestroyVecs(lmvm->m, &lsr1->Q);
247:     lsr1->allocated = PETSC_FALSE;
248:   }
249:   PetscFree(lmvm->ctx);
250:   MatDestroy_LMVM(B);
251:   return(0);
252: }

254: /*------------------------------------------------------------*/

256: static PetscErrorCode MatSetUp_LMVMSR1(Mat B)
257: {
258:   Mat_LMVM          *lmvm = (Mat_LMVM*)B->data;
259:   Mat_LSR1          *lsr1 = (Mat_LSR1*)lmvm->ctx;
260:   PetscErrorCode    ierr;
261: 
263:   MatSetUp_LMVM(B);
264:   if (!lsr1->allocated && lmvm->m > 0) {
265:     VecDuplicate(lmvm->Xprev, &lsr1->work);
266:     PetscMalloc2(lmvm->m, &lsr1->stp, lmvm->m, &lsr1->ytq);
267:     if (lmvm->m > 0) {
268:       VecDuplicateVecs(lmvm->Xprev, lmvm->m, &lsr1->P);
269:       VecDuplicateVecs(lmvm->Xprev, lmvm->m, &lsr1->Q);
270:     }
271:     lsr1->allocated = PETSC_TRUE;
272:   }
273:   return(0);
274: }

276: /*------------------------------------------------------------*/

278: PetscErrorCode MatCreate_LMVMSR1(Mat B)
279: {
280:   Mat_LMVM          *lmvm;
281:   Mat_LSR1          *lsr1;
282:   PetscErrorCode    ierr;

285:   MatCreate_LMVM(B);
286:   PetscObjectChangeTypeName((PetscObject)B, MATLMVMSR1);
287:   MatSetOption(B, MAT_SYMMETRIC, PETSC_TRUE);
288:   B->ops->setup = MatSetUp_LMVMSR1;
289:   B->ops->destroy = MatDestroy_LMVMSR1;
290:   B->ops->solve = MatSolve_LMVMSR1;
291: 
292:   lmvm = (Mat_LMVM*)B->data;
293:   lmvm->square = PETSC_TRUE;
294:   lmvm->ops->allocate = MatAllocate_LMVMSR1;
295:   lmvm->ops->reset = MatReset_LMVMSR1;
296:   lmvm->ops->update = MatUpdate_LMVMSR1;
297:   lmvm->ops->mult = MatMult_LMVMSR1;
298:   lmvm->ops->copy = MatCopy_LMVMSR1;
299: 
300:   PetscNewLog(B, &lsr1);
301:   lmvm->ctx = (void*)lsr1;
302:   lsr1->allocated = PETSC_FALSE;
303:   lsr1->needP = lsr1->needQ = PETSC_TRUE;
304:   return(0);
305: }

307: /*------------------------------------------------------------*/

309: /*@
310:    MatCreateLMVMSR1 - Creates a limited-memory Symmetric-Rank-1 approximation
311:    matrix used for a Jacobian. L-SR1 is symmetric by construction, but is not 
312:    guaranteed to be positive-definite.
313:    
314:    The provided local and global sizes must match the solution and function vectors 
315:    used with MatLMVMUpdate() and MatSolve(). The resulting L-SR1 matrix will have 
316:    storage vectors allocated with VecCreateSeq() in serial and VecCreateMPI() in 
317:    parallel. To use the L-SR1 matrix with other vector types, the matrix must be 
318:    created using MatCreate() and MatSetType(), followed by MatLMVMAllocate(). 
319:    This ensures that the internal storage and work vectors are duplicated from the 
320:    correct type of vector.

322:    Collective

324:    Input Parameters:
325: +  comm - MPI communicator, set to PETSC_COMM_SELF
326: .  n - number of local rows for storage vectors
327: -  N - global size of the storage vectors

329:    Output Parameter:
330: .  B - the matrix

332:    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions()
333:    paradigm instead of this routine directly.

335:    Options Database Keys:
336: .   -mat_lmvm_num_vecs - maximum number of correction vectors (i.e.: updates) stored

338:    Level: intermediate

340: .seealso: MatCreate(), MATLMVM, MATLMVMSR1, MatCreateLMVMBFGS(), MatCreateLMVMDFP(), 
341:           MatCreateLMVMBrdn(), MatCreateLMVMBadBrdn(), MatCreateLMVMSymBrdn()
342: @*/
343: PetscErrorCode MatCreateLMVMSR1(MPI_Comm comm, PetscInt n, PetscInt N, Mat *B)
344: {
345:   PetscErrorCode    ierr;
346: 
348:   MatCreate(comm, B);
349:   MatSetSizes(*B, n, n, N, N);
350:   MatSetType(*B, MATLMVMSR1);
351:   MatSetUp(*B);
352:   return(0);
353: }