Actual source code: sr1.c
petsc-3.11.4 2019-09-28
1: #include <../src/ksp/ksp/utils/lmvm/lmvm.h> /*I "petscksp.h" I*/
3: /*
4: Limited-memory Symmetric-Rank-1 method for approximating both
5: the forward product and inverse application of a Jacobian.
6: */
8: typedef struct {
9: Vec *P, *Q;
10: Vec work;
11: PetscBool allocated, needP, needQ;
12: PetscReal *stp, *ytq;
13: } Mat_LSR1;
15: /*------------------------------------------------------------*/
17: /*
18: The solution method is adapted from Algorithm 8 of Erway and Marcia
19: "On Solving Large-Scale Limited-Memory Quasi-Newton Equations"
20: (https://arxiv.org/abs/1510.06378).
21:
22: Q[i] = S[i] - (B_i)^{-1}*Y[i] terms are computed ahead of time whenever
23: the matrix is updated with a new (S[i], Y[i]) pair. This allows
24: repeated calls of MatMult inside KSP solvers without unnecessarily
25: recomputing Q[i] terms in expensive nested-loops.
27: dX <- J0^{-1} * F
28: for i = 0,1,2,...,k
29: # Q[i] = S[i] - (B_i)^{-1}*Y[i]
30: zeta = (Q[i]^T F) / (Q[i]^T Y[i])
31: dX <- dX + (zeta * Q[i])
32: end
33: */
34: static PetscErrorCode MatSolve_LMVMSR1(Mat B, Vec F, Vec dX)
35: {
36: Mat_LMVM *lmvm = (Mat_LMVM*)B->data;
37: Mat_LSR1 *lsr1 = (Mat_LSR1*)lmvm->ctx;
38: PetscErrorCode ierr;
39: PetscInt i, j;
40: PetscScalar qjtyi, qtf, ytq;
41:
43: VecCheckSameSize(F, 2, dX, 3);
44: VecCheckMatCompatible(B, dX, 3, F, 2);
45:
46: if (lsr1->needQ) {
47: /* Pre-compute (Q[i] = S[i] - (B_i)^{-1} * Y[i]) and (Y[i]^T Q[i]) */
48: for (i = 0; i <= lmvm->k; ++i) {
49: MatLMVMApplyJ0Inv(B, lmvm->Y[i], lsr1->Q[i]);
50: VecAYPX(lsr1->Q[i], -1.0, lmvm->S[i]);
51: for (j = 0; j <= i-1; ++j) {
52: VecDot(lsr1->Q[j], lmvm->Y[i], &qjtyi);
53: VecAXPY(lsr1->Q[i], -PetscRealPart(qjtyi)/lsr1->ytq[j], lsr1->Q[j]);
54: }
55: VecDot(lmvm->Y[i], lsr1->Q[i], &ytq);
56: lsr1->ytq[i] = PetscRealPart(ytq);
57: }
58: lsr1->needQ = PETSC_FALSE;
59: }
60:
61: /* Invert the initial Jacobian onto F (or apply scaling) */
62: MatLMVMApplyJ0Inv(B, F, dX);
63: /* Start outer loop */
64: for (i = 0; i <= lmvm->k; ++i) {
65: VecDot(lsr1->Q[i], F, &qtf);
66: VecAXPY(dX, PetscRealPart(qtf)/lsr1->ytq[i], lsr1->Q[i]);
67: }
68: return(0);
69: }
71: /*------------------------------------------------------------*/
73: /*
74: The forward product is the matrix-free implementation of
75: Equation (6.24) in Nocedal and Wright "Numerical Optimization"
76: 2nd edition, pg 144.
77:
78: Note that the structure of the forward product is identical to
79: the solution, with S and Y exchanging roles.
80:
81: P[i] = Y[i] - (B_i)*S[i] terms are computed ahead of time whenever
82: the matrix is updated with a new (S[i], Y[i]) pair. This allows
83: repeated calls of MatMult inside KSP solvers without unnecessarily
84: recomputing P[i] terms in expensive nested-loops.
86: Z <- J0 * X
87: for i = 0,1,2,...,k
88: # P[i] = Y[i] - (B_i)*S[i]
89: zeta = (P[i]^T X) / (P[i]^T S[i])
90: Z <- Z + (zeta * P[i])
91: end
92: */
93: static PetscErrorCode MatMult_LMVMSR1(Mat B, Vec X, Vec Z)
94: {
95: Mat_LMVM *lmvm = (Mat_LMVM*)B->data;
96: Mat_LSR1 *lsr1 = (Mat_LSR1*)lmvm->ctx;
97: PetscErrorCode ierr;
98: PetscInt i, j;
99: PetscScalar pjtsi, ptx, stp;
100:
102: VecCheckSameSize(X, 2, Z, 3);
103: VecCheckMatCompatible(B, X, 2, Z, 3);
104:
105: if (lsr1->needP) {
106: /* Pre-compute (P[i] = Y[i] - (B_i) * S[i]) and (S[i]^T P[i]) */
107: for (i = 0; i <= lmvm->k; ++i) {
108: MatLMVMApplyJ0Fwd(B, lmvm->S[i], lsr1->P[i]);
109: VecAYPX(lsr1->P[i], -1.0, lmvm->Y[i]);
110: for (j = 0; j <= i-1; ++j) {
111: VecDot(lsr1->P[j], lmvm->S[i], &pjtsi);
112: VecAXPY(lsr1->P[i], -PetscRealPart(pjtsi)/lsr1->stp[j], lsr1->P[j]);
113: }
114: VecDot(lmvm->S[i], lsr1->P[i], &stp);
115: lsr1->stp[i] = PetscRealPart(stp);
116: }
117: lsr1->needP = PETSC_FALSE;
118: }
119:
120: MatLMVMApplyJ0Fwd(B, X, Z);
121: for (i = 0; i <= lmvm->k; ++i) {
122: VecDot(lsr1->P[i], X, &ptx);
123: VecAXPY(Z, PetscRealPart(ptx)/lsr1->stp[i], lsr1->P[i]);
124: }
125: return(0);
126: }
128: /*------------------------------------------------------------*/
130: static PetscErrorCode MatUpdate_LMVMSR1(Mat B, Vec X, Vec F)
131: {
132: Mat_LMVM *lmvm = (Mat_LMVM*)B->data;
133: Mat_LSR1 *lsr1 = (Mat_LSR1*)lmvm->ctx;
134: PetscErrorCode ierr;
135: PetscReal snorm, pnorm;
136: PetscScalar sktw;
139: if (!lmvm->m) return(0);
140: if (lmvm->prev_set) {
141: /* Compute the new (S = X - Xprev) and (Y = F - Fprev) vectors */
142: VecAYPX(lmvm->Xprev, -1.0, X);
143: VecAYPX(lmvm->Fprev, -1.0, F);
144: /* See if the updates can be accepted
145: NOTE: This tests abs(S[k]^T (Y[k] - B_k*S[k])) >= eps * norm(S[k]) * norm(Y[k] - B_k*S[k]) */
146: MatMult(B, lmvm->Xprev, lsr1->work);
147: VecAYPX(lsr1->work, -1.0, lmvm->Fprev);
148: VecDot(lmvm->Xprev, lsr1->work, &sktw);
149: VecNorm(lmvm->Xprev, NORM_2, &snorm);
150: VecNorm(lsr1->work, NORM_2, &pnorm);
151: if (PetscAbsReal(PetscRealPart(sktw)) >= lmvm->eps * snorm * pnorm) {
152: /* Update is good, accept it */
153: lsr1->needP = lsr1->needQ = PETSC_TRUE;
154: MatUpdateKernel_LMVM(B, lmvm->Xprev, lmvm->Fprev);
155: } else {
156: /* Update is bad, skip it */
157: ++lmvm->nrejects;
158: }
159: }
160: /* Save the solution and function to be used in the next update */
161: VecCopy(X, lmvm->Xprev);
162: VecCopy(F, lmvm->Fprev);
163: lmvm->prev_set = PETSC_TRUE;
164: return(0);
165: }
167: /*------------------------------------------------------------*/
169: static PetscErrorCode MatCopy_LMVMSR1(Mat B, Mat M, MatStructure str)
170: {
171: Mat_LMVM *bdata = (Mat_LMVM*)B->data;
172: Mat_LSR1 *bctx = (Mat_LSR1*)bdata->ctx;
173: Mat_LMVM *mdata = (Mat_LMVM*)M->data;
174: Mat_LSR1 *mctx = (Mat_LSR1*)mdata->ctx;
175: PetscErrorCode ierr;
176: PetscInt i;
179: mctx->needP = bctx->needP;
180: mctx->needQ = bctx->needQ;
181: for (i=0; i<=bdata->k; ++i) {
182: mctx->stp[i] = bctx->stp[i];
183: mctx->ytq[i] = bctx->ytq[i];
184: VecCopy(bctx->P[i], mctx->P[i]);
185: VecCopy(bctx->Q[i], mctx->Q[i]);
186: }
187: return(0);
188: }
190: /*------------------------------------------------------------*/
192: static PetscErrorCode MatReset_LMVMSR1(Mat B, PetscBool destructive)
193: {
194: Mat_LMVM *lmvm = (Mat_LMVM*)B->data;
195: Mat_LSR1 *lsr1 = (Mat_LSR1*)lmvm->ctx;
196: PetscErrorCode ierr;
197:
199: lsr1->needP = lsr1->needQ = PETSC_TRUE;
200: if (destructive && lsr1->allocated) {
201: VecDestroy(&lsr1->work);
202: PetscFree2(lsr1->stp, lsr1->ytq);
203: VecDestroyVecs(lmvm->m, &lsr1->P);
204: VecDestroyVecs(lmvm->m, &lsr1->Q);
205: lsr1->allocated = PETSC_FALSE;
206: }
207: MatReset_LMVM(B, destructive);
208: return(0);
209: }
211: /*------------------------------------------------------------*/
213: static PetscErrorCode MatAllocate_LMVMSR1(Mat B, Vec X, Vec F)
214: {
215: Mat_LMVM *lmvm = (Mat_LMVM*)B->data;
216: Mat_LSR1 *lsr1 = (Mat_LSR1*)lmvm->ctx;
217: PetscErrorCode ierr;
218:
220: MatAllocate_LMVM(B, X, F);
221: if (!lsr1->allocated) {
222: VecDuplicate(X, &lsr1->work);
223: PetscMalloc2(lmvm->m, &lsr1->stp, lmvm->m, &lsr1->ytq);
224: if (lmvm->m > 0) {
225: VecDuplicateVecs(X, lmvm->m, &lsr1->P);
226: VecDuplicateVecs(X, lmvm->m, &lsr1->Q);
227: }
228: lsr1->allocated = PETSC_TRUE;
229: }
230: return(0);
231: }
233: /*------------------------------------------------------------*/
235: static PetscErrorCode MatDestroy_LMVMSR1(Mat B)
236: {
237: Mat_LMVM *lmvm = (Mat_LMVM*)B->data;
238: Mat_LSR1 *lsr1 = (Mat_LSR1*)lmvm->ctx;
239: PetscErrorCode ierr;
242: if (lsr1->allocated) {
243: VecDestroy(&lsr1->work);
244: PetscFree2(lsr1->stp, lsr1->ytq);
245: VecDestroyVecs(lmvm->m, &lsr1->P);
246: VecDestroyVecs(lmvm->m, &lsr1->Q);
247: lsr1->allocated = PETSC_FALSE;
248: }
249: PetscFree(lmvm->ctx);
250: MatDestroy_LMVM(B);
251: return(0);
252: }
254: /*------------------------------------------------------------*/
256: static PetscErrorCode MatSetUp_LMVMSR1(Mat B)
257: {
258: Mat_LMVM *lmvm = (Mat_LMVM*)B->data;
259: Mat_LSR1 *lsr1 = (Mat_LSR1*)lmvm->ctx;
260: PetscErrorCode ierr;
261:
263: MatSetUp_LMVM(B);
264: if (!lsr1->allocated && lmvm->m > 0) {
265: VecDuplicate(lmvm->Xprev, &lsr1->work);
266: PetscMalloc2(lmvm->m, &lsr1->stp, lmvm->m, &lsr1->ytq);
267: if (lmvm->m > 0) {
268: VecDuplicateVecs(lmvm->Xprev, lmvm->m, &lsr1->P);
269: VecDuplicateVecs(lmvm->Xprev, lmvm->m, &lsr1->Q);
270: }
271: lsr1->allocated = PETSC_TRUE;
272: }
273: return(0);
274: }
276: /*------------------------------------------------------------*/
278: PetscErrorCode MatCreate_LMVMSR1(Mat B)
279: {
280: Mat_LMVM *lmvm;
281: Mat_LSR1 *lsr1;
282: PetscErrorCode ierr;
285: MatCreate_LMVM(B);
286: PetscObjectChangeTypeName((PetscObject)B, MATLMVMSR1);
287: MatSetOption(B, MAT_SYMMETRIC, PETSC_TRUE);
288: B->ops->setup = MatSetUp_LMVMSR1;
289: B->ops->destroy = MatDestroy_LMVMSR1;
290: B->ops->solve = MatSolve_LMVMSR1;
291:
292: lmvm = (Mat_LMVM*)B->data;
293: lmvm->square = PETSC_TRUE;
294: lmvm->ops->allocate = MatAllocate_LMVMSR1;
295: lmvm->ops->reset = MatReset_LMVMSR1;
296: lmvm->ops->update = MatUpdate_LMVMSR1;
297: lmvm->ops->mult = MatMult_LMVMSR1;
298: lmvm->ops->copy = MatCopy_LMVMSR1;
299:
300: PetscNewLog(B, &lsr1);
301: lmvm->ctx = (void*)lsr1;
302: lsr1->allocated = PETSC_FALSE;
303: lsr1->needP = lsr1->needQ = PETSC_TRUE;
304: return(0);
305: }
307: /*------------------------------------------------------------*/
309: /*@
310: MatCreateLMVMSR1 - Creates a limited-memory Symmetric-Rank-1 approximation
311: matrix used for a Jacobian. L-SR1 is symmetric by construction, but is not
312: guaranteed to be positive-definite.
313:
314: The provided local and global sizes must match the solution and function vectors
315: used with MatLMVMUpdate() and MatSolve(). The resulting L-SR1 matrix will have
316: storage vectors allocated with VecCreateSeq() in serial and VecCreateMPI() in
317: parallel. To use the L-SR1 matrix with other vector types, the matrix must be
318: created using MatCreate() and MatSetType(), followed by MatLMVMAllocate().
319: This ensures that the internal storage and work vectors are duplicated from the
320: correct type of vector.
322: Collective on MPI_Comm
324: Input Parameters:
325: + comm - MPI communicator, set to PETSC_COMM_SELF
326: . n - number of local rows for storage vectors
327: - N - global size of the storage vectors
329: Output Parameter:
330: . B - the matrix
332: It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions()
333: paradigm instead of this routine directly.
335: Options Database Keys:
336: . -mat_lmvm_num_vecs - maximum number of correction vectors (i.e.: updates) stored
338: Level: intermediate
340: .seealso: MatCreate(), MATLMVM, MATLMVMSR1, MatCreateLMVMBFGS(), MatCreateLMVMDFP(),
341: MatCreateLMVMBrdn(), MatCreateLMVMBadBrdn(), MatCreateLMVMSymBrdn()
342: @*/
343: PetscErrorCode MatCreateLMVMSR1(MPI_Comm comm, PetscInt n, PetscInt N, Mat *B)
344: {
345: PetscErrorCode ierr;
346:
348: MatCreate(comm, B);
349: MatSetSizes(*B, n, n, N, N);
350: MatSetType(*B, MATLMVMSR1);
351: MatSetUp(*B);
352: return(0);
353: }