Actual source code: brdn.c
petsc-3.13.6 2020-09-29
1: #include <../src/ksp/ksp/utils/lmvm/brdn/brdn.h>
3: /*------------------------------------------------------------*/
5: /*
6: The solution method is the matrix-free implementation of the inverse Hessian
7: representation in page 312 of Griewank "Broyden Updating, The Good and The Bad!"
8: (http://www.emis.ams.org/journals/DMJDMV/vol-ismp/45_griewank-andreas-broyden.pdf).
9:
10: Q[i] = (B_i)^{-1}*S[i] terms are computed ahead of time whenever
11: the matrix is updated with a new (S[i], Y[i]) pair. This allows
12: repeated calls of MatSolve without incurring redundant computation.
13:
14: dX <- J0^{-1} * F
15:
16: for i=0,1,2,...,k
17: # Q[i] = (B_i)^{-1} * Y[i]
18: tau = (S[i]^T dX) / (S[i]^T Q[i])
19: dX <- dX + (tau * (S[i] - Q[i]))
20: end
21: */
23: static PetscErrorCode MatSolve_LMVMBrdn(Mat B, Vec F, Vec dX)
24: {
25: Mat_LMVM *lmvm = (Mat_LMVM*)B->data;
26: Mat_Brdn *lbrdn = (Mat_Brdn*)lmvm->ctx;
27: PetscErrorCode ierr;
28: PetscInt i, j;
29: PetscScalar sjtqi, stx, stq;
30:
32: VecCheckSameSize(F, 2, dX, 3);
33: VecCheckMatCompatible(B, dX, 3, F, 2);
34:
35: if (lbrdn->needQ) {
36: /* Pre-compute (Q[i] = (B_i)^{-1} * Y[i]) */
37: for (i = 0; i <= lmvm->k; ++i) {
38: MatLMVMApplyJ0Inv(B, lmvm->Y[i], lbrdn->Q[i]);
39: for (j = 0; j <= i-1; ++j) {
40: VecDot(lmvm->S[j], lbrdn->Q[i], &sjtqi);
41: VecAXPBYPCZ(lbrdn->Q[i], PetscRealPart(sjtqi)/lbrdn->stq[j], -PetscRealPart(sjtqi)/lbrdn->stq[j], 1.0, lmvm->S[j], lbrdn->Q[j]);
42: }
43: VecDot(lmvm->S[i], lbrdn->Q[i], &stq);
44: lbrdn->stq[i] = PetscRealPart(stq);
45: }
46: lbrdn->needQ = PETSC_FALSE;
47: }
48:
49: MatLMVMApplyJ0Inv(B, F, dX);
50: for (i = 0; i <= lmvm->k; ++i) {
51: VecDot(lmvm->S[i], dX, &stx);
52: VecAXPBYPCZ(dX, PetscRealPart(stx)/lbrdn->stq[i], -PetscRealPart(stx)/lbrdn->stq[i], 1.0, lmvm->S[i], lbrdn->Q[i]);
53: }
54: return(0);
55: }
57: /*------------------------------------------------------------*/
59: /*
60: The forward product is the matrix-free implementation of Equation 2 in
61: page 302 of Griewank "Broyden Updating, The Good and The Bad!"
62: (http://www.emis.ams.org/journals/DMJDMV/vol-ismp/45_griewank-andreas-broyden.pdf).
63:
64: P[i] = (B_i)*S[i] terms are computed ahead of time whenever
65: the matrix is updated with a new (S[i], Y[i]) pair. This allows
66: repeated calls of MatMult inside KSP solvers without unnecessarily
67: recomputing P[i] terms in expensive nested-loops.
68:
69: Z <- J0 * X
70:
71: for i=0,1,2,...,k
72: # P[i] = B_i * S[i]
73: tau = (S[i]^T X) / (S[i]^T S[i])
74: dX <- dX + (tau * (Y[i] - P[i]))
75: end
76: */
78: static PetscErrorCode MatMult_LMVMBrdn(Mat B, Vec X, Vec Z)
79: {
80: Mat_LMVM *lmvm = (Mat_LMVM*)B->data;
81: Mat_Brdn *lbrdn = (Mat_Brdn*)lmvm->ctx;
82: PetscErrorCode ierr;
83: PetscInt i, j;
84: PetscScalar sjtsi, stx;
85:
87: VecCheckSameSize(X, 2, Z, 3);
88: VecCheckMatCompatible(B, X, 2, Z, 3);
89:
90: if (lbrdn->needP) {
91: /* Pre-compute (P[i] = (B_i) * S[i]) */
92: for (i = 0; i <= lmvm->k; ++i) {
93: MatLMVMApplyJ0Fwd(B, lmvm->S[i], lbrdn->P[i]);
94: for (j = 0; j <= i-1; ++j) {
95: VecDot(lmvm->S[j], lmvm->S[i], &sjtsi);
96: VecAXPBYPCZ(lbrdn->P[i], PetscRealPart(sjtsi)/lbrdn->sts[j], -PetscRealPart(sjtsi)/lbrdn->sts[j], 1.0, lmvm->Y[j], lbrdn->P[j]);
97: }
98: }
99: lbrdn->needP = PETSC_FALSE;
100: }
101:
102: MatLMVMApplyJ0Fwd(B, X, Z);
103: for (i = 0; i <= lmvm->k; ++i) {
104: VecDot(lmvm->S[i], X, &stx);
105: VecAXPBYPCZ(Z, PetscRealPart(stx)/lbrdn->sts[i], -PetscRealPart(stx)/lbrdn->sts[i], 1.0, lmvm->Y[i], lbrdn->P[i]);
106: }
107: return(0);
108: }
110: /*------------------------------------------------------------*/
112: static PetscErrorCode MatUpdate_LMVMBrdn(Mat B, Vec X, Vec F)
113: {
114: Mat_LMVM *lmvm = (Mat_LMVM*)B->data;
115: Mat_Brdn *lbrdn = (Mat_Brdn*)lmvm->ctx;
116: PetscErrorCode ierr;
117: PetscInt old_k, i;
118: PetscScalar sts;
121: if (!lmvm->m) return(0);
122: if (lmvm->prev_set) {
123: /* Compute the new (S = X - Xprev) and (Y = F - Fprev) vectors */
124: VecAYPX(lmvm->Xprev, -1.0, X);
125: VecAYPX(lmvm->Fprev, -1.0, F);
126: /* Accept the update */
127: lbrdn->needP = lbrdn->needQ = PETSC_TRUE;
128: old_k = lmvm->k;
129: MatUpdateKernel_LMVM(B, lmvm->Xprev, lmvm->Fprev);
130: /* If we hit the memory limit, shift the sts array */
131: if (old_k == lmvm->k) {
132: for (i = 0; i <= lmvm->k-1; ++i) {
133: lbrdn->sts[i] = lbrdn->sts[i+1];
134: }
135: }
136: VecDot(lmvm->S[lmvm->k], lmvm->S[lmvm->k], &sts);
137: lbrdn->sts[lmvm->k] = PetscRealPart(sts);
138: }
139: /* Save the solution and function to be used in the next update */
140: VecCopy(X, lmvm->Xprev);
141: VecCopy(F, lmvm->Fprev);
142: lmvm->prev_set = PETSC_TRUE;
143: return(0);
144: }
146: /*------------------------------------------------------------*/
148: static PetscErrorCode MatCopy_LMVMBrdn(Mat B, Mat M, MatStructure str)
149: {
150: Mat_LMVM *bdata = (Mat_LMVM*)B->data;
151: Mat_Brdn *bctx = (Mat_Brdn*)bdata->ctx;
152: Mat_LMVM *mdata = (Mat_LMVM*)M->data;
153: Mat_Brdn *mctx = (Mat_Brdn*)mdata->ctx;
154: PetscErrorCode ierr;
155: PetscInt i;
158: mctx->needP = bctx->needP;
159: mctx->needQ = bctx->needQ;
160: for (i=0; i<=bdata->k; ++i) {
161: mctx->sts[i] = bctx->sts[i];
162: mctx->stq[i] = bctx->stq[i];
163: VecCopy(bctx->P[i], mctx->P[i]);
164: VecCopy(bctx->Q[i], mctx->Q[i]);
165: }
166: return(0);
167: }
169: /*------------------------------------------------------------*/
171: static PetscErrorCode MatReset_LMVMBrdn(Mat B, PetscBool destructive)
172: {
173: Mat_LMVM *lmvm = (Mat_LMVM*)B->data;
174: Mat_Brdn *lbrdn = (Mat_Brdn*)lmvm->ctx;
175: PetscErrorCode ierr;
176:
178: lbrdn->needP = lbrdn->needQ = PETSC_TRUE;
179: if (destructive && lbrdn->allocated) {
180: PetscFree2(lbrdn->sts, lbrdn->stq);
181: VecDestroyVecs(lmvm->m, &lbrdn->P);
182: VecDestroyVecs(lmvm->m, &lbrdn->Q);
183: lbrdn->allocated = PETSC_FALSE;
184: }
185: MatReset_LMVM(B, destructive);
186: return(0);
187: }
189: /*------------------------------------------------------------*/
191: static PetscErrorCode MatAllocate_LMVMBrdn(Mat B, Vec X, Vec F)
192: {
193: Mat_LMVM *lmvm = (Mat_LMVM*)B->data;
194: Mat_Brdn *lbrdn = (Mat_Brdn*)lmvm->ctx;
195: PetscErrorCode ierr;
196:
198: MatAllocate_LMVM(B, X, F);
199: if (!lbrdn->allocated) {
200: PetscMalloc2(lmvm->m, &lbrdn->sts, lmvm->m, &lbrdn->stq);
201: if (lmvm->m > 0) {
202: VecDuplicateVecs(X, lmvm->m, &lbrdn->P);
203: VecDuplicateVecs(X, lmvm->m, &lbrdn->Q);
204: }
205: lbrdn->allocated = PETSC_TRUE;
206: }
207: return(0);
208: }
210: /*------------------------------------------------------------*/
212: static PetscErrorCode MatDestroy_LMVMBrdn(Mat B)
213: {
214: Mat_LMVM *lmvm = (Mat_LMVM*)B->data;
215: Mat_Brdn *lbrdn = (Mat_Brdn*)lmvm->ctx;
216: PetscErrorCode ierr;
219: if (lbrdn->allocated) {
220: PetscFree2(lbrdn->sts, lbrdn->stq);
221: VecDestroyVecs(lmvm->m, &lbrdn->P);
222: VecDestroyVecs(lmvm->m, &lbrdn->Q);
223: lbrdn->allocated = PETSC_FALSE;
224: }
225: PetscFree(lmvm->ctx);
226: MatDestroy_LMVM(B);
227: return(0);
228: }
230: /*------------------------------------------------------------*/
232: static PetscErrorCode MatSetUp_LMVMBrdn(Mat B)
233: {
234: Mat_LMVM *lmvm = (Mat_LMVM*)B->data;
235: Mat_Brdn *lbrdn = (Mat_Brdn*)lmvm->ctx;
236: PetscErrorCode ierr;
237:
239: MatSetUp_LMVM(B);
240: if (!lbrdn->allocated) {
241: PetscMalloc2(lmvm->m, &lbrdn->sts, lmvm->m, &lbrdn->stq);
242: if (lmvm->m > 0) {
243: VecDuplicateVecs(lmvm->Xprev, lmvm->m, &lbrdn->P);
244: VecDuplicateVecs(lmvm->Xprev, lmvm->m, &lbrdn->Q);
245: }
246: lbrdn->allocated = PETSC_TRUE;
247: }
248: return(0);
249: }
251: /*------------------------------------------------------------*/
253: PetscErrorCode MatCreate_LMVMBrdn(Mat B)
254: {
255: Mat_LMVM *lmvm;
256: Mat_Brdn *lbrdn;
257: PetscErrorCode ierr;
260: MatCreate_LMVM(B);
261: PetscObjectChangeTypeName((PetscObject)B, MATLMVMBROYDEN);
262: B->ops->setup = MatSetUp_LMVMBrdn;
263: B->ops->destroy = MatDestroy_LMVMBrdn;
264: B->ops->solve = MatSolve_LMVMBrdn;
266: lmvm = (Mat_LMVM*)B->data;
267: lmvm->square = PETSC_TRUE;
268: lmvm->ops->allocate = MatAllocate_LMVMBrdn;
269: lmvm->ops->reset = MatReset_LMVMBrdn;
270: lmvm->ops->mult = MatMult_LMVMBrdn;
271: lmvm->ops->update = MatUpdate_LMVMBrdn;
272: lmvm->ops->copy = MatCopy_LMVMBrdn;
274: PetscNewLog(B, &lbrdn);
275: lmvm->ctx = (void*)lbrdn;
276: lbrdn->allocated = PETSC_FALSE;
277: lbrdn->needP = lbrdn->needQ = PETSC_TRUE;
278: return(0);
279: }
281: /*------------------------------------------------------------*/
283: /*@
284: MatCreateLMVMBroyden - Creates a limited-memory "good" Broyden-type approximation
285: matrix used for a Jacobian. L-Brdn is not guaranteed to be symmetric or
286: positive-definite.
287:
288: The provided local and global sizes must match the solution and function vectors
289: used with MatLMVMUpdate() and MatSolve(). The resulting L-Brdn matrix will have
290: storage vectors allocated with VecCreateSeq() in serial and VecCreateMPI() in
291: parallel. To use the L-Brdn matrix with other vector types, the matrix must be
292: created using MatCreate() and MatSetType(), followed by MatLMVMAllocate().
293: This ensures that the internal storage and work vectors are duplicated from the
294: correct type of vector.
296: Collective
298: Input Parameters:
299: + comm - MPI communicator, set to PETSC_COMM_SELF
300: . n - number of local rows for storage vectors
301: - N - global size of the storage vectors
303: Output Parameter:
304: . B - the matrix
306: It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions()
307: paradigm instead of this routine directly.
309: Options Database Keys:
310: . -mat_lmvm_num_vecs - maximum number of correction vectors (i.e.: updates) stored
312: Level: intermediate
314: .seealso: MatCreate(), MATLMVM, MATLMVMBRDN, MatCreateLMVMDFP(), MatCreateLMVMSR1(),
315: MatCreateLMVMBFGS(), MatCreateLMVMBadBrdn(), MatCreateLMVMSymBrdn()
316: @*/
317: PetscErrorCode MatCreateLMVMBroyden(MPI_Comm comm, PetscInt n, PetscInt N, Mat *B)
318: {
319: PetscErrorCode ierr;
320:
322: MatCreate(comm, B);
323: MatSetSizes(*B, n, n, N, N);
324: MatSetType(*B, MATLMVMBROYDEN);
325: MatSetUp(*B);
326: return(0);
327: }