Actual source code: denseqn.c
1: #include <../src/ksp/ksp/utils/lmvm/dense/denseqn.h>
2: #include <../src/ksp/ksp/utils/lmvm/blas_cyclic/blas_cyclic.h>
3: #include <petscblaslapack.h>
4: #include <petscmat.h>
5: #include <petscsys.h>
6: #include <petscsystypes.h>
7: #include <petscis.h>
8: #include <petscoptions.h>
9: #include <petscdevice.h>
10: #include <petsc/private/deviceimpl.h>
12: static PetscErrorCode MatMult_LMVMDQN(Mat, Vec, Vec);
13: static PetscErrorCode MatMult_LMVMDBFGS(Mat, Vec, Vec);
14: static PetscErrorCode MatMult_LMVMDDFP(Mat, Vec, Vec);
15: static PetscErrorCode MatSolve_LMVMDQN(Mat, Vec, Vec);
16: static PetscErrorCode MatSolve_LMVMDBFGS(Mat, Vec, Vec);
17: static PetscErrorCode MatSolve_LMVMDDFP(Mat, Vec, Vec);
19: static inline PetscInt recycle_index(PetscInt m, PetscInt idx)
20: {
21: return idx % m;
22: }
24: static inline PetscInt history_index(PetscInt m, PetscInt num_updates, PetscInt idx)
25: {
26: return (idx - num_updates) + PetscMin(m, num_updates);
27: }
29: static inline PetscInt oldest_update(PetscInt m, PetscInt idx)
30: {
31: return PetscMax(0, idx - m);
32: }
34: static PetscErrorCode MatView_LMVMDQN(Mat B, PetscViewer pv)
35: {
36: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
37: Mat_DQN *lqn = (Mat_DQN *)lmvm->ctx;
39: PetscBool isascii;
41: PetscFunctionBegin;
42: PetscCall(PetscObjectTypeCompare((PetscObject)pv, PETSCVIEWERASCII, &isascii));
43: PetscCall(MatView_LMVM(B, pv));
44: PetscCall(SymBroydenRescaleView(lqn->rescale, pv));
45: if (isascii) PetscCall(PetscViewerASCIIPrintf(pv, "Counts: S x : %" PetscInt_FMT ", S^T x : %" PetscInt_FMT ", Y x : %" PetscInt_FMT ", Y^T x: %" PetscInt_FMT "\n", lqn->S_count, lqn->St_count, lqn->Y_count, lqn->Yt_count));
46: PetscFunctionReturn(PETSC_SUCCESS);
47: }
49: static PetscErrorCode MatLMVMDQNResetDestructive(Mat B)
50: {
51: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
52: Mat_DQN *lqn = (Mat_DQN *)lmvm->ctx;
54: PetscFunctionBegin;
55: PetscCall(MatDestroy(&lqn->HY));
56: PetscCall(MatDestroy(&lqn->BS));
57: PetscCall(MatDestroy(&lqn->StY_triu));
58: PetscCall(MatDestroy(&lqn->YtS_triu));
59: PetscCall(VecDestroy(&lqn->StFprev));
60: PetscCall(VecDestroy(&lqn->Fprev_ref));
61: lqn->Fprev_state = 0;
62: PetscCall(MatDestroy(&lqn->YtS_triu_strict));
63: PetscCall(MatDestroy(&lqn->StY_triu_strict));
64: PetscCall(MatDestroy(&lqn->StBS));
65: PetscCall(MatDestroy(&lqn->YtHY));
66: PetscCall(MatDestroy(&lqn->J));
67: PetscCall(MatDestroy(&lqn->temp_mat));
68: PetscCall(VecDestroy(&lqn->diag_vec));
69: PetscCall(VecDestroy(&lqn->diag_vec_recycle_order));
70: PetscCall(VecDestroy(&lqn->inv_diag_vec));
71: PetscCall(VecDestroy(&lqn->column_work));
72: PetscCall(VecDestroy(&lqn->column_work2));
73: PetscCall(VecDestroy(&lqn->rwork1));
74: PetscCall(VecDestroy(&lqn->rwork2));
75: PetscCall(VecDestroy(&lqn->rwork3));
76: PetscCall(VecDestroy(&lqn->rwork2_local));
77: PetscCall(VecDestroy(&lqn->rwork3_local));
78: PetscCall(VecDestroy(&lqn->cyclic_work_vec));
79: PetscCall(VecDestroyVecs(lmvm->m, &lqn->PQ));
80: PetscCall(PetscFree(lqn->stp));
81: PetscCall(PetscFree(lqn->yts));
82: PetscCall(PetscFree(lqn->ytq));
83: lqn->allocated = PETSC_FALSE;
84: PetscFunctionReturn(PETSC_SUCCESS);
85: }
87: static PetscErrorCode MatReset_LMVMDQN_Internal(Mat B, MatLMVMResetMode mode)
88: {
89: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
90: Mat_DQN *lqn = (Mat_DQN *)lmvm->ctx;
92: PetscFunctionBegin;
93: lqn->watchdog = 0;
94: lqn->needPQ = PETSC_TRUE;
95: lqn->num_updates = 0;
96: lqn->num_mult_updates = 0;
97: if (MatLMVMResetClearsBases(mode)) PetscCall(MatLMVMDQNResetDestructive(B));
98: else {
99: if (lqn->BS) PetscCall(MatZeroEntries(lqn->BS));
100: if (lqn->HY) PetscCall(MatZeroEntries(lqn->HY));
101: if (lqn->StY_triu) { /* Set to identity by default so it is invertible */
102: PetscCall(MatZeroEntries(lqn->StY_triu));
103: PetscCall(MatShift(lqn->StY_triu, 1.0));
104: }
105: if (lqn->YtS_triu) {
106: PetscCall(MatZeroEntries(lqn->YtS_triu));
107: PetscCall(MatShift(lqn->YtS_triu, 1.0));
108: }
109: if (lqn->YtS_triu_strict) PetscCall(MatZeroEntries(lqn->YtS_triu_strict));
110: if (lqn->StY_triu_strict) PetscCall(MatZeroEntries(lqn->StY_triu_strict));
111: if (lqn->StBS) {
112: PetscCall(MatZeroEntries(lqn->StBS));
113: PetscCall(MatShift(lqn->StBS, 1.0));
114: }
115: if (lqn->YtHY) {
116: PetscCall(MatZeroEntries(lqn->YtHY));
117: PetscCall(MatShift(lqn->YtHY, 1.0));
118: }
119: if (lqn->Fprev_ref) PetscCall(VecDestroy(&lqn->Fprev_ref));
120: lqn->Fprev_state = 0;
121: if (lqn->StFprev) PetscCall(VecZeroEntries(lqn->StFprev));
122: }
123: PetscFunctionReturn(PETSC_SUCCESS);
124: }
126: static PetscErrorCode MatReset_LMVMDQN(Mat B, MatLMVMResetMode mode)
127: {
128: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
129: Mat_DQN *lqn = (Mat_DQN *)lmvm->ctx;
131: PetscFunctionBegin;
132: PetscCall(SymBroydenRescaleReset(B, lqn->rescale, mode));
133: PetscCall(MatReset_LMVMDQN_Internal(B, mode));
134: PetscFunctionReturn(PETSC_SUCCESS);
135: }
137: static PetscErrorCode MatAllocate_LMVMDQN_Internal(Mat B)
138: {
139: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
140: Mat_DQN *lqn = (Mat_DQN *)lmvm->ctx;
142: PetscFunctionBegin;
143: if (!lqn->allocated) {
144: if (lmvm->m > 0) {
145: PetscMPIInt rank;
146: PetscInt n, N, m, M;
147: PetscBool is_dbfgs, is_ddfp, is_dqn;
148: VecType vec_type;
149: MPI_Comm comm = PetscObjectComm((PetscObject)B);
150: Mat Sfull = lmvm->basis[LMBASIS_S]->vecs;
152: PetscCall(PetscObjectTypeCompare((PetscObject)B, MATLMVMDBFGS, &is_dbfgs));
153: PetscCall(PetscObjectTypeCompare((PetscObject)B, MATLMVMDDFP, &is_ddfp));
154: PetscCall(PetscObjectTypeCompare((PetscObject)B, MATLMVMDQN, &is_dqn));
156: PetscCallMPI(MPI_Comm_rank(comm, &rank));
157: PetscCall(MatGetSize(B, &N, NULL));
158: PetscCall(MatGetLocalSize(B, &n, NULL));
159: M = lmvm->m;
160: m = (rank == 0) ? M : 0;
162: /* For DBFGS: Create data needed for MatSolve() eagerly; data needed for MatMult() will be created on demand
163: * For DDFP : Create data needed for MatMult() eagerly; data needed for MatSolve() will be created on demand
164: * For DQN : Create all data eagerly */
165: PetscCall(VecGetType(lmvm->Xprev, &vec_type));
166: if (is_dqn) {
167: PetscCall(MatCreateDenseFromVecType(comm, vec_type, m, m, M, M, -1, NULL, &lqn->StY_triu));
168: PetscCall(MatCreateDenseFromVecType(comm, vec_type, m, m, M, M, -1, NULL, &lqn->YtS_triu));
169: PetscCall(MatCreateVecs(lqn->StY_triu, &lqn->diag_vec, &lqn->rwork1));
170: PetscCall(MatCreateVecs(lqn->StY_triu, &lqn->rwork2, &lqn->rwork3));
171: } else if (is_ddfp) {
172: PetscCall(MatCreateDenseFromVecType(comm, vec_type, m, m, M, M, -1, NULL, &lqn->YtS_triu));
173: PetscCall(MatDuplicate(Sfull, MAT_SHARE_NONZERO_PATTERN, &lqn->HY));
174: PetscCall(MatCreateVecs(lqn->YtS_triu, &lqn->diag_vec, &lqn->rwork1));
175: PetscCall(MatCreateVecs(lqn->YtS_triu, &lqn->rwork2, &lqn->rwork3));
176: } else if (is_dbfgs) {
177: PetscCall(MatCreateDenseFromVecType(comm, vec_type, m, m, M, M, -1, NULL, &lqn->StY_triu));
178: PetscCall(MatDuplicate(Sfull, MAT_SHARE_NONZERO_PATTERN, &lqn->BS));
179: PetscCall(MatCreateVecs(lqn->StY_triu, &lqn->diag_vec, &lqn->rwork1));
180: PetscCall(MatCreateVecs(lqn->StY_triu, &lqn->rwork2, &lqn->rwork3));
181: } else {
182: SETERRQ(PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_INCOMP, "MatAllocate_LMVMDQN is only available for dense derived types. (DBFGS, DDFP, DQN");
183: }
184: /* initialize StY_triu and YtS_triu to identity, if they exist, so it is invertible */
185: if (lqn->StY_triu) {
186: PetscCall(MatZeroEntries(lqn->StY_triu));
187: PetscCall(MatShift(lqn->StY_triu, 1.0));
188: }
189: if (lqn->YtS_triu) {
190: PetscCall(MatZeroEntries(lqn->YtS_triu));
191: PetscCall(MatShift(lqn->YtS_triu, 1.0));
192: }
193: if (lqn->use_recursive && (is_dbfgs || is_ddfp)) {
194: PetscCall(VecDuplicateVecs(lmvm->Xprev, lmvm->m, &lqn->PQ));
195: PetscCall(VecDuplicate(lmvm->Xprev, &lqn->column_work2));
196: PetscCall(PetscMalloc1(lmvm->m, &lqn->yts));
197: if (is_dbfgs) {
198: PetscCall(PetscMalloc1(lmvm->m, &lqn->stp));
199: } else if (is_ddfp) {
200: PetscCall(PetscMalloc1(lmvm->m, &lqn->ytq));
201: }
202: }
203: PetscCall(VecDuplicate(lqn->rwork2, &lqn->cyclic_work_vec));
204: PetscCall(VecZeroEntries(lqn->rwork1));
205: PetscCall(VecZeroEntries(lqn->rwork2));
206: PetscCall(VecZeroEntries(lqn->rwork3));
207: PetscCall(VecZeroEntries(lqn->diag_vec));
208: }
209: PetscCall(VecDuplicate(lmvm->Xprev, &lqn->column_work));
210: lqn->allocated = PETSC_TRUE;
211: }
212: PetscFunctionReturn(PETSC_SUCCESS);
213: }
215: static PetscErrorCode MatSetUp_LMVMDQN(Mat B)
216: {
217: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
218: Mat_DQN *lqn = (Mat_DQN *)lmvm->ctx;
220: PetscFunctionBegin;
221: PetscCall(MatSetUp_LMVM(B));
222: PetscCall(SymBroydenRescaleInitializeJ0(B, lqn->rescale));
223: PetscCall(MatAllocate_LMVMDQN_Internal(B));
224: PetscFunctionReturn(PETSC_SUCCESS);
225: }
227: static PetscErrorCode MatSetFromOptions_LMVMDQN(Mat B, PetscOptionItems PetscOptionsObject)
228: {
229: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
230: Mat_DQN *lqn = (Mat_DQN *)lmvm->ctx;
231: PetscBool is_dbfgs, is_ddfp, is_dqn;
233: PetscFunctionBegin;
234: PetscCall(PetscObjectTypeCompare((PetscObject)B, MATLMVMDBFGS, &is_dbfgs));
235: PetscCall(PetscObjectTypeCompare((PetscObject)B, MATLMVMDDFP, &is_ddfp));
236: PetscCall(PetscObjectTypeCompare((PetscObject)B, MATLMVMDQN, &is_dqn));
237: PetscCall(MatSetFromOptions_LMVM(B, PetscOptionsObject));
238: PetscOptionsHeadBegin(PetscOptionsObject, "Dense symmetric Broyden method for approximating SPD Jacobian actions");
239: if (is_dqn) {
240: PetscCall(PetscOptionsEnum("-mat_lqn_type", "Implementation options for L-QN", "MatLMVMDenseType", MatLMVMDenseTypes, (PetscEnum)lqn->strategy, (PetscEnum *)&lqn->strategy, NULL));
241: } else if (is_dbfgs) {
242: PetscCall(PetscOptionsBool("-mat_lbfgs_recursive", "Use recursive formulation for MatMult_LMVMDBFGS, instead of Cholesky", "", lqn->use_recursive, &lqn->use_recursive, NULL));
243: PetscCall(PetscOptionsEnum("-mat_lbfgs_type", "Implementation options for L-BFGS", "MatLMVMDenseType", MatLMVMDenseTypes, (PetscEnum)lqn->strategy, (PetscEnum *)&lqn->strategy, NULL));
244: } else if (is_ddfp) {
245: PetscCall(PetscOptionsBool("-mat_ldfp_recursive", "Use recursive formulation for MatSolve_LMVMDDFP, instead of Cholesky", "", lqn->use_recursive, &lqn->use_recursive, NULL));
246: PetscCall(PetscOptionsEnum("-mat_ldfp_type", "Implementation options for L-DFP", "MatLMVMDenseType", MatLMVMDenseTypes, (PetscEnum)lqn->strategy, (PetscEnum *)&lqn->strategy, NULL));
247: } else {
248: SETERRQ(PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_INCOMP, "MatSetFromOptions_LMVMDQN is only available for dense derived types. (DBFGS, DDFP, DQN");
249: }
250: PetscCall(SymBroydenRescaleSetFromOptions(B, lqn->rescale, PetscOptionsObject));
251: PetscOptionsHeadEnd();
252: PetscFunctionReturn(PETSC_SUCCESS);
253: }
255: static PetscErrorCode MatDestroy_LMVMDQN(Mat B)
256: {
257: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
258: Mat_DQN *lqn = (Mat_DQN *)lmvm->ctx;
260: PetscFunctionBegin;
261: PetscCall(SymBroydenRescaleDestroy(&lqn->rescale));
262: PetscCall(MatReset_LMVMDQN_Internal(B, MAT_LMVM_RESET_ALL));
263: PetscCall(PetscFree(lqn->workscalar));
264: PetscCall(PetscFree(lmvm->ctx));
265: PetscCall(MatDestroy_LMVM(B));
266: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatLMVMSymBroydenSetDelta_C", NULL));
267: PetscFunctionReturn(PETSC_SUCCESS);
268: }
270: static PetscErrorCode MatUpdate_LMVMDQN(Mat B, Vec X, Vec F)
271: {
272: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
273: Mat_DQN *lqn = (Mat_DQN *)lmvm->ctx;
274: Mat Sfull = lmvm->basis[LMBASIS_S]->vecs;
275: Mat Yfull = lmvm->basis[LMBASIS_Y]->vecs;
277: PetscBool is_ddfp, is_dbfgs, is_dqn;
278: PetscDeviceContext dctx;
280: PetscFunctionBegin;
281: if (!lmvm->m) PetscFunctionReturn(PETSC_SUCCESS);
282: PetscCall(PetscObjectTypeCompare((PetscObject)B, MATLMVMDBFGS, &is_dbfgs));
283: PetscCall(PetscObjectTypeCompare((PetscObject)B, MATLMVMDDFP, &is_ddfp));
284: PetscCall(PetscObjectTypeCompare((PetscObject)B, MATLMVMDQN, &is_dqn));
285: PetscCall(PetscDeviceContextGetCurrentContext(&dctx));
286: if (lmvm->prev_set) {
287: Vec FX[2];
288: PetscScalar dotFX[2];
289: PetscScalar stFprev;
290: PetscScalar curvature, yTy;
291: PetscReal curvtol;
293: /* Compute the new (S = X - Xprev) and (Y = F - Fprev) vectors */
294: PetscCall(VecAYPX(lmvm->Xprev, -1.0, X));
295: /* Test if the updates can be accepted */
296: FX[0] = lmvm->Fprev; /* dotFX[0] = s^T Fprev */
297: FX[1] = F; /* dotFX[1] = s^T F */
298: PetscCall(VecMDot(lmvm->Xprev, 2, FX, dotFX));
299: PetscCall(VecAYPX(lmvm->Fprev, -1.0, F));
300: PetscCall(VecDot(lmvm->Fprev, lmvm->Fprev, &yTy));
301: stFprev = PetscConj(dotFX[0]);
302: curvature = PetscConj(dotFX[1] - dotFX[0]); /* s^T y */
303: if (PetscRealPart(yTy) < lmvm->eps) {
304: curvtol = 0.0;
305: } else {
306: curvtol = lmvm->eps * PetscRealPart(yTy);
307: }
308: if (PetscRealPart(curvature) > curvtol) {
309: PetscInt m = lmvm->m;
310: PetscInt k = lmvm->k;
311: PetscInt h_old = k - oldest_update(m, k);
312: PetscInt h_new = k + 1 - oldest_update(m, k + 1);
313: PetscInt idx = recycle_index(m, k);
315: /* Update is good, accept it */
316: PetscCall(MatUpdateKernel_LMVM(B, lmvm->Xprev, lmvm->Fprev));
317: lqn->num_updates++;
318: lqn->watchdog = 0;
319: lqn->needPQ = PETSC_TRUE;
321: if (h_old == m && lqn->strategy == MAT_LMVM_DENSE_REORDER) {
322: if (is_dqn) {
323: PetscCall(MatMove_LR3(B, lqn->StY_triu, m - 1));
324: PetscCall(MatMove_LR3(B, lqn->YtS_triu, m - 1));
325: } else if (is_dbfgs) {
326: PetscCall(MatMove_LR3(B, lqn->StY_triu, m - 1));
327: } else if (is_ddfp) {
328: PetscCall(MatMove_LR3(B, lqn->YtS_triu, m - 1));
329: } else {
330: SETERRQ(PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_INCOMP, "MatUpdate_LMVMDQN is only available for dense derived types. (DBFGS, DDFP, DQN");
331: }
332: }
334: if (lqn->use_recursive && (is_dbfgs || is_ddfp)) lqn->yts[idx] = PetscRealPart(curvature);
336: if (is_dqn || is_dbfgs) { /* implement the scheme of Byrd, Nocedal, and Schnabel to save a MatMultTranspose call in the common case the *
337: * H_k is immediately applied to F after begin updated. The S^T y computation can be split up as S^T (F - F_prev) */
338: PetscInt local_n;
339: PetscScalar *StFprev;
340: PetscMemType memtype;
341: PetscInt StYidx;
343: StYidx = (lqn->strategy == MAT_LMVM_DENSE_REORDER) ? history_index(m, lqn->num_updates, k) : idx;
344: if (!lqn->StFprev) PetscCall(VecDuplicate(lqn->rwork1, &lqn->StFprev));
345: PetscCall(VecGetLocalSize(lqn->StFprev, &local_n));
346: PetscCall(VecGetArrayAndMemType(lqn->StFprev, &StFprev, &memtype));
347: if (local_n) {
348: if (PetscMemTypeHost(memtype)) {
349: StFprev[idx] = stFprev;
350: } else {
351: PetscCall(PetscDeviceRegisterMemory(&stFprev, PETSC_MEMTYPE_HOST, 1 * sizeof(stFprev)));
352: PetscCall(PetscDeviceRegisterMemory(StFprev, memtype, local_n * sizeof(*StFprev)));
353: PetscCall(PetscDeviceArrayCopy(dctx, &StFprev[idx], &stFprev, 1));
354: }
355: }
356: PetscCall(VecRestoreArrayAndMemType(lqn->StFprev, &StFprev));
358: {
359: Vec this_sy_col;
360: /* Now StFprev is updated for the new S vector. Write -StFprev into the appropriate row */
361: PetscCall(MatDenseGetColumnVecWrite(lqn->StY_triu, StYidx, &this_sy_col));
362: PetscCall(VecAXPBY(this_sy_col, -1.0, 0.0, lqn->StFprev));
364: /* Now compute the new StFprev */
365: PetscCall(MatMultHermitianTransposeColumnRange(Sfull, F, lqn->StFprev, 0, h_new));
366: lqn->St_count++;
368: /* Now add StFprev: this_sy_col == S^T (F - Fprev) == S^T y */
369: PetscCall(VecAXPY(this_sy_col, 1.0, lqn->StFprev));
371: if (lqn->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecRecycleOrderToHistoryOrder(B, this_sy_col, lqn->num_updates, lqn->cyclic_work_vec));
372: PetscCall(MatDenseRestoreColumnVecWrite(lqn->StY_triu, StYidx, &this_sy_col));
373: }
374: }
376: if (is_ddfp || is_dqn) {
377: PetscInt YtSidx;
379: YtSidx = (lqn->strategy == MAT_LMVM_DENSE_REORDER) ? history_index(m, lqn->num_updates, k) : idx;
381: {
382: Vec this_ys_col;
384: PetscCall(MatDenseGetColumnVecWrite(lqn->YtS_triu, YtSidx, &this_ys_col));
385: PetscCall(MatMultHermitianTransposeColumnRange(Yfull, lmvm->Xprev, this_ys_col, 0, h_new));
386: lqn->Yt_count++;
388: if (lqn->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecRecycleOrderToHistoryOrder(B, this_ys_col, lqn->num_updates, lqn->cyclic_work_vec));
389: PetscCall(MatDenseRestoreColumnVecWrite(lqn->YtS_triu, YtSidx, &this_ys_col));
390: }
391: }
393: if (is_dbfgs || is_dqn) {
394: PetscCall(MatGetDiagonal(lqn->StY_triu, lqn->diag_vec));
395: } else if (is_ddfp) {
396: PetscCall(MatGetDiagonal(lqn->YtS_triu, lqn->diag_vec));
397: } else {
398: SETERRQ(PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_INCOMP, "MatUpdate_LMVMDQN is only available for dense derived types. (DBFGS, DDFP, DQN");
399: }
401: if (lqn->strategy == MAT_LMVM_DENSE_REORDER) {
402: if (!lqn->diag_vec_recycle_order) PetscCall(VecDuplicate(lqn->diag_vec, &lqn->diag_vec_recycle_order));
403: PetscCall(VecCopy(lqn->diag_vec, lqn->diag_vec_recycle_order));
404: PetscCall(VecHistoryOrderToRecycleOrder(B, lqn->diag_vec_recycle_order, lqn->num_updates, lqn->cyclic_work_vec));
405: } else {
406: if (!lqn->diag_vec_recycle_order) {
407: PetscCall(PetscObjectReference((PetscObject)lqn->diag_vec));
408: lqn->diag_vec_recycle_order = lqn->diag_vec;
409: }
410: }
412: PetscCall(SymBroydenRescaleUpdate(B, lqn->rescale));
413: } else {
414: /* Update is bad, skip it */
415: ++lmvm->nrejects;
416: ++lqn->watchdog;
417: PetscInt m = lmvm->m;
418: PetscInt k = lmvm->k;
419: PetscInt h = k - oldest_update(m, k);
421: /* we still have to maintain StFprev */
422: if (!lqn->StFprev) PetscCall(VecDuplicate(lqn->rwork1, &lqn->StFprev));
423: PetscCall(MatMultHermitianTransposeColumnRange(Sfull, F, lqn->StFprev, 0, h));
424: lqn->St_count++;
425: }
426: }
428: if (lqn->watchdog > lqn->max_seq_rejects) PetscCall(MatLMVMReset(B, PETSC_FALSE));
430: /* Save the solution and function to be used in the next update */
431: PetscCall(VecCopy(X, lmvm->Xprev));
432: PetscCall(VecCopy(F, lmvm->Fprev));
433: PetscCall(PetscObjectReference((PetscObject)F));
434: PetscCall(VecDestroy(&lqn->Fprev_ref));
435: lqn->Fprev_ref = F;
436: PetscCall(PetscObjectStateGet((PetscObject)F, &lqn->Fprev_state));
437: lmvm->prev_set = PETSC_TRUE;
438: PetscFunctionReturn(PETSC_SUCCESS);
439: }
441: static PetscErrorCode MatDestroyThenCopy(Mat src, Mat *dst)
442: {
443: PetscFunctionBegin;
444: PetscCall(MatDestroy(dst));
445: if (src) PetscCall(MatDuplicate(src, MAT_COPY_VALUES, dst));
446: PetscFunctionReturn(PETSC_SUCCESS);
447: }
449: static PetscErrorCode VecDestroyThenCopy(Vec src, Vec *dst)
450: {
451: PetscFunctionBegin;
452: PetscCall(VecDestroy(dst));
453: if (src) {
454: PetscCall(VecDuplicate(src, dst));
455: PetscCall(VecCopy(src, *dst));
456: }
457: PetscFunctionReturn(PETSC_SUCCESS);
458: }
460: static PetscErrorCode MatCopy_LMVMDQN(Mat B, Mat M, MatStructure str)
461: {
462: Mat_LMVM *bdata = (Mat_LMVM *)B->data;
463: Mat_DQN *blqn = (Mat_DQN *)bdata->ctx;
464: Mat_LMVM *mdata = (Mat_LMVM *)M->data;
465: Mat_DQN *mlqn = (Mat_DQN *)mdata->ctx;
466: PetscInt i;
467: PetscBool is_dbfgs, is_ddfp, is_dqn;
469: PetscFunctionBegin;
470: PetscCall(SymBroydenRescaleCopy(blqn->rescale, mlqn->rescale));
471: mlqn->num_updates = blqn->num_updates;
472: mlqn->num_mult_updates = blqn->num_mult_updates;
473: mlqn->dense_type = blqn->dense_type;
474: mlqn->strategy = blqn->strategy;
475: mlqn->S_count = 0;
476: mlqn->St_count = 0;
477: mlqn->Y_count = 0;
478: mlqn->Yt_count = 0;
479: mlqn->watchdog = blqn->watchdog;
480: mlqn->max_seq_rejects = blqn->max_seq_rejects;
481: mlqn->use_recursive = blqn->use_recursive;
482: mlqn->needPQ = blqn->needPQ;
483: if (blqn->allocated) {
484: PetscCall(MatAllocate_LMVMDQN_Internal(M));
485: PetscCall(PetscObjectTypeCompare((PetscObject)B, MATLMVMDBFGS, &is_dbfgs));
486: PetscCall(PetscObjectTypeCompare((PetscObject)B, MATLMVMDDFP, &is_ddfp));
487: PetscCall(PetscObjectTypeCompare((PetscObject)B, MATLMVMDQN, &is_dqn));
488: PetscCall(MatDestroyThenCopy(blqn->HY, &mlqn->BS));
489: PetscCall(VecDestroyThenCopy(blqn->StFprev, &mlqn->StFprev));
490: PetscCall(MatDestroyThenCopy(blqn->StY_triu, &mlqn->StY_triu));
491: PetscCall(MatDestroyThenCopy(blqn->StY_triu_strict, &mlqn->StY_triu_strict));
492: PetscCall(MatDestroyThenCopy(blqn->YtS_triu, &mlqn->YtS_triu));
493: PetscCall(MatDestroyThenCopy(blqn->YtS_triu_strict, &mlqn->YtS_triu_strict));
494: PetscCall(MatDestroyThenCopy(blqn->YtHY, &mlqn->YtHY));
495: PetscCall(MatDestroyThenCopy(blqn->StBS, &mlqn->StBS));
496: PetscCall(MatDestroyThenCopy(blqn->J, &mlqn->J));
497: PetscCall(VecDestroyThenCopy(blqn->diag_vec, &mlqn->diag_vec));
498: PetscCall(VecDestroyThenCopy(blqn->diag_vec_recycle_order, &mlqn->diag_vec_recycle_order));
499: PetscCall(VecDestroyThenCopy(blqn->inv_diag_vec, &mlqn->inv_diag_vec));
500: if (blqn->use_recursive && (is_dbfgs || is_ddfp)) {
501: for (i = 0; i < bdata->m; i++) {
502: PetscCall(VecDestroyThenCopy(blqn->PQ[i], &mlqn->PQ[i]));
503: mlqn->yts[i] = blqn->yts[i];
504: if (is_dbfgs) {
505: mlqn->stp[i] = blqn->stp[i];
506: } else if (is_ddfp) {
507: mlqn->ytq[i] = blqn->ytq[i];
508: }
509: }
510: }
511: }
512: PetscCall(PetscObjectReference((PetscObject)blqn->Fprev_ref));
513: PetscCall(VecDestroy(&mlqn->Fprev_ref));
514: mlqn->Fprev_ref = blqn->Fprev_ref;
515: mlqn->Fprev_state = blqn->Fprev_state;
516: PetscFunctionReturn(PETSC_SUCCESS);
517: }
519: static PetscErrorCode MatMult_LMVMDQN(Mat B, Vec X, Vec Z)
520: {
521: PetscFunctionBegin;
522: PetscCall(MatMult_LMVMDDFP(B, X, Z));
523: PetscFunctionReturn(PETSC_SUCCESS);
524: }
526: static PetscErrorCode MatSolve_LMVMDQN(Mat H, Vec F, Vec dX)
527: {
528: PetscFunctionBegin;
529: PetscCall(MatSolve_LMVMDBFGS(H, F, dX));
530: PetscFunctionReturn(PETSC_SUCCESS);
531: }
533: static PetscErrorCode MatLMVMSymBroydenSetDelta_LMVMDQN(Mat B, PetscScalar delta)
534: {
535: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
536: Mat_DQN *lqn = (Mat_DQN *)lmvm->ctx;
538: PetscFunctionBegin;
539: PetscCall(SymBroydenRescaleSetDelta(B, lqn->rescale, PetscAbsReal(PetscRealPart(delta))));
540: PetscFunctionReturn(PETSC_SUCCESS);
541: }
543: /*
544: This dense representation uses Davidon-Fletcher-Powell (DFP) for MatMult,
545: and Broyden-Fletcher-Goldfarb-Shanno (BFGS) for MatSolve. This implementation
546: results in avoiding costly Cholesky factorization, at the cost of duality cap.
547: Please refer to MatLMVMDDFP and MatLMVMDBFGS for more information.
548: */
549: PetscErrorCode MatCreate_LMVMDQN(Mat B)
550: {
551: Mat_LMVM *lmvm;
552: Mat_DQN *lqn;
554: PetscFunctionBegin;
555: PetscCall(MatCreate_LMVM(B));
556: PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATLMVMDQN));
557: PetscCall(MatSetOption(B, MAT_HERMITIAN, PETSC_TRUE));
558: PetscCall(MatSetOption(B, MAT_SPD, PETSC_TRUE));
559: PetscCall(MatSetOption(B, MAT_SPD_ETERNAL, PETSC_TRUE));
560: B->ops->view = MatView_LMVMDQN;
561: B->ops->setup = MatSetUp_LMVMDQN;
562: B->ops->setfromoptions = MatSetFromOptions_LMVMDQN;
563: B->ops->destroy = MatDestroy_LMVMDQN;
565: lmvm = (Mat_LMVM *)B->data;
566: lmvm->ops->reset = MatReset_LMVMDQN;
567: lmvm->ops->update = MatUpdate_LMVMDQN;
568: lmvm->ops->mult = MatMult_LMVMDQN;
569: lmvm->ops->solve = MatSolve_LMVMDQN;
570: lmvm->ops->copy = MatCopy_LMVMDQN;
572: lmvm->ops->multht = lmvm->ops->mult;
573: lmvm->ops->solveht = lmvm->ops->solve;
575: PetscCall(PetscNew(&lqn));
576: lmvm->ctx = (void *)lqn;
577: lqn->allocated = PETSC_FALSE;
578: lqn->use_recursive = PETSC_FALSE;
579: lqn->needPQ = PETSC_FALSE;
580: lqn->watchdog = 0;
581: lqn->max_seq_rejects = lmvm->m / 2;
582: lqn->strategy = MAT_LMVM_DENSE_INPLACE;
584: PetscCall(SymBroydenRescaleCreate(&lqn->rescale));
585: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatLMVMSymBroydenSetDelta_C", MatLMVMSymBroydenSetDelta_LMVMDQN));
586: PetscFunctionReturn(PETSC_SUCCESS);
587: }
589: /*@
590: MatCreateLMVMDQN - Creates a dense representation of the limited-memory
591: Quasi-Newton approximation to a Hessian.
593: Collective
595: Input Parameters:
596: + comm - MPI communicator
597: . n - number of local rows for storage vectors
598: - N - global size of the storage vectors
600: Output Parameter:
601: . B - the matrix
603: Level: advanced
605: Note:
606: It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`
607: paradigm instead of this routine directly.
609: .seealso: `MatCreate()`, `MATLMVM`, `MATLMVMDBFGS`, `MATLMVMDDFP`, `MatCreateLMVMDDFP()`, `MatCreateLMVMDBFGS()`
610: @*/
611: PetscErrorCode MatCreateLMVMDQN(MPI_Comm comm, PetscInt n, PetscInt N, Mat *B)
612: {
613: PetscFunctionBegin;
614: PetscCall(KSPInitializePackage());
615: PetscCall(MatCreate(comm, B));
616: PetscCall(MatSetSizes(*B, n, n, N, N));
617: PetscCall(MatSetType(*B, MATLMVMDQN));
618: PetscCall(MatSetUp(*B));
619: PetscFunctionReturn(PETSC_SUCCESS);
620: }
622: static PetscErrorCode MatDQNApplyJ0Fwd(Mat B, Vec X, Vec Z)
623: {
624: PetscFunctionBegin;
625: PetscCall(MatLMVMApplyJ0Fwd(B, X, Z));
626: PetscFunctionReturn(PETSC_SUCCESS);
627: }
629: static PetscErrorCode MatDQNApplyJ0Inv(Mat B, Vec F, Vec dX)
630: {
631: PetscFunctionBegin;
632: PetscCall(MatLMVMApplyJ0Inv(B, F, dX));
633: PetscFunctionReturn(PETSC_SUCCESS);
634: }
636: /* This is not Bunch-Kaufman LDLT: here L is strictly lower triangular part of STY */
637: static PetscErrorCode MatGetLDLT(Mat B, Mat result)
638: {
639: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
640: Mat_DQN *lbfgs = (Mat_DQN *)lmvm->ctx;
641: PetscInt m_local;
643: PetscFunctionBegin;
644: if (!lbfgs->temp_mat) PetscCall(MatDuplicate(lbfgs->YtS_triu_strict, MAT_SHARE_NONZERO_PATTERN, &lbfgs->temp_mat));
645: PetscCall(MatCopy(lbfgs->YtS_triu_strict, lbfgs->temp_mat, SAME_NONZERO_PATTERN));
646: PetscCall(MatDiagonalScale(lbfgs->temp_mat, lbfgs->inv_diag_vec, NULL));
647: PetscCall(MatGetLocalSize(result, &m_local, NULL));
648: // need to conjugate and conjugate again because we have MatTransposeMatMult but not MatHermitianTransposeMatMult()
649: PetscCall(MatConjugate(lbfgs->temp_mat));
650: if (m_local) {
651: Mat temp_local, YtS_local, result_local;
652: PetscCall(MatDenseGetLocalMatrix(lbfgs->YtS_triu_strict, &YtS_local));
653: PetscCall(MatDenseGetLocalMatrix(lbfgs->temp_mat, &temp_local));
654: PetscCall(MatDenseGetLocalMatrix(result, &result_local));
655: PetscCall(MatTransposeMatMult(YtS_local, temp_local, MAT_REUSE_MATRIX, PETSC_DETERMINE, &result_local));
656: }
657: PetscCall(MatConjugate(result));
658: PetscFunctionReturn(PETSC_SUCCESS);
659: }
661: static PetscErrorCode MatLMVMDBFGSUpdateMultData(Mat B)
662: {
663: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
664: Mat_DQN *lbfgs = (Mat_DQN *)lmvm->ctx;
665: PetscInt m = lmvm->m, m_local;
666: PetscInt k = lmvm->k;
667: PetscInt h = k - oldest_update(m, k);
668: PetscInt j_0;
669: PetscInt prev_oldest;
670: Mat J_local;
671: Mat Sfull = lmvm->basis[LMBASIS_S]->vecs;
672: Mat Yfull = lmvm->basis[LMBASIS_Y]->vecs;
674: PetscFunctionBegin;
675: if (!lbfgs->YtS_triu_strict) {
676: PetscCall(MatDuplicate(lbfgs->StY_triu, MAT_SHARE_NONZERO_PATTERN, &lbfgs->YtS_triu_strict));
677: PetscCall(MatDestroy(&lbfgs->StBS));
678: PetscCall(MatDuplicate(lbfgs->StY_triu, MAT_SHARE_NONZERO_PATTERN, &lbfgs->StBS));
679: PetscCall(MatDestroy(&lbfgs->J));
680: PetscCall(MatDuplicate(lbfgs->StY_triu, MAT_SHARE_NONZERO_PATTERN, &lbfgs->J));
681: PetscCall(MatDestroy(&lbfgs->BS));
682: PetscCall(MatDuplicate(Yfull, MAT_SHARE_NONZERO_PATTERN, &lbfgs->BS));
683: PetscCall(MatShift(lbfgs->StBS, 1.0));
684: lbfgs->num_mult_updates = oldest_update(m, k);
685: }
686: if (lbfgs->num_mult_updates == k) PetscFunctionReturn(PETSC_SUCCESS);
688: /* B_0 may have been updated, we must recompute B_0 S and S^T B_0 S */
689: for (PetscInt j = oldest_update(m, k); j < k; j++) {
690: Vec s_j;
691: Vec Bs_j;
692: Vec StBs_j;
693: PetscInt S_idx = recycle_index(m, j);
694: PetscInt StBS_idx = lbfgs->strategy == MAT_LMVM_DENSE_INPLACE ? S_idx : history_index(m, k, j);
696: PetscCall(MatDenseGetColumnVecWrite(lbfgs->BS, S_idx, &Bs_j));
697: PetscCall(MatDenseGetColumnVecRead(Sfull, S_idx, &s_j));
698: PetscCall(MatDQNApplyJ0Fwd(B, s_j, Bs_j));
699: PetscCall(MatDenseRestoreColumnVecRead(Sfull, S_idx, &s_j));
700: PetscCall(MatDenseGetColumnVecWrite(lbfgs->StBS, StBS_idx, &StBs_j));
701: PetscCall(MatMultHermitianTransposeColumnRange(Sfull, Bs_j, StBs_j, 0, h));
702: lbfgs->St_count++;
703: if (lbfgs->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecRecycleOrderToHistoryOrder(B, StBs_j, lbfgs->num_updates, lbfgs->cyclic_work_vec));
704: PetscCall(MatDenseRestoreColumnVecWrite(lbfgs->StBS, StBS_idx, &StBs_j));
705: PetscCall(MatDenseRestoreColumnVecWrite(lbfgs->BS, S_idx, &Bs_j));
706: }
707: prev_oldest = oldest_update(m, lbfgs->num_mult_updates);
708: if (lbfgs->strategy == MAT_LMVM_DENSE_REORDER && prev_oldest < oldest_update(m, k)) {
709: /* move the YtS entries that have been computed and need to be kept back up */
710: PetscInt m_keep = m - (oldest_update(m, k) - prev_oldest);
712: PetscCall(MatMove_LR3(B, lbfgs->YtS_triu_strict, m_keep));
713: }
714: PetscCall(MatGetLocalSize(lbfgs->YtS_triu_strict, &m_local, NULL));
715: j_0 = PetscMax(lbfgs->num_mult_updates, oldest_update(m, k));
716: for (PetscInt j = j_0; j < k; j++) {
717: PetscInt S_idx = recycle_index(m, j);
718: PetscInt YtS_idx = lbfgs->strategy == MAT_LMVM_DENSE_INPLACE ? S_idx : history_index(m, k, j);
719: Vec s_j, Yts_j;
721: PetscCall(MatDenseGetColumnVecRead(Sfull, S_idx, &s_j));
722: PetscCall(MatDenseGetColumnVecWrite(lbfgs->YtS_triu_strict, YtS_idx, &Yts_j));
723: PetscCall(MatMultHermitianTransposeColumnRange(Yfull, s_j, Yts_j, 0, h));
724: lbfgs->Yt_count++;
725: if (lbfgs->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecRecycleOrderToHistoryOrder(B, Yts_j, lbfgs->num_updates, lbfgs->cyclic_work_vec));
726: PetscCall(MatDenseRestoreColumnVecWrite(lbfgs->YtS_triu_strict, YtS_idx, &Yts_j));
727: PetscCall(MatDenseRestoreColumnVecRead(Sfull, S_idx, &s_j));
728: /* zero the corresponding row */
729: if (m_local > 0) {
730: Mat YtS_local, YtS_row;
732: PetscCall(MatDenseGetLocalMatrix(lbfgs->YtS_triu_strict, &YtS_local));
733: PetscCall(MatDenseGetSubMatrix(YtS_local, YtS_idx, YtS_idx + 1, PETSC_DECIDE, PETSC_DECIDE, &YtS_row));
734: PetscCall(MatZeroEntries(YtS_row));
735: PetscCall(MatDenseRestoreSubMatrix(YtS_local, &YtS_row));
736: }
737: }
738: if (!lbfgs->inv_diag_vec) PetscCall(VecDuplicate(lbfgs->diag_vec, &lbfgs->inv_diag_vec));
739: PetscCall(VecCopy(lbfgs->diag_vec, lbfgs->inv_diag_vec));
740: PetscCall(VecReciprocal(lbfgs->inv_diag_vec));
741: PetscCall(MatDenseGetLocalMatrix(lbfgs->J, &J_local));
742: PetscCall(MatSetFactorType(J_local, MAT_FACTOR_NONE));
743: PetscCall(MatGetLDLT(B, lbfgs->J));
744: PetscCall(MatAXPY(lbfgs->J, 1.0, lbfgs->StBS, SAME_NONZERO_PATTERN));
745: if (m_local) {
746: PetscCall(MatSetOption(J_local, MAT_SPD, PETSC_TRUE));
747: PetscCall(MatCholeskyFactor(J_local, NULL, NULL));
748: }
749: lbfgs->num_mult_updates = lbfgs->num_updates;
750: PetscFunctionReturn(PETSC_SUCCESS);
751: }
753: /* Solves for
754: * [ I | -S R^{-T} ] [ I | 0 ] [ H_0 | 0 ] [ I | Y ] [ I ]
755: * [-----+---] [-----+---] [---+---] [-------------]
756: * [ Y^T | I ] [ 0 | D ] [ 0 | I ] [ -R^{-1} S^T ] */
758: static PetscErrorCode MatSolve_LMVMDBFGS(Mat H, Vec F, Vec dX)
759: {
760: Mat_LMVM *lmvm = (Mat_LMVM *)H->data;
761: Mat_DQN *lbfgs = (Mat_DQN *)lmvm->ctx;
762: Vec rwork1 = lbfgs->rwork1;
763: PetscInt m = lmvm->m;
764: PetscInt k = lmvm->k;
765: PetscInt h = k - oldest_update(m, k);
766: Mat Sfull = lmvm->basis[LMBASIS_S]->vecs;
767: Mat Yfull = lmvm->basis[LMBASIS_Y]->vecs;
768: PetscObjectState Fstate;
770: PetscFunctionBegin;
771: VecCheckSameSize(F, 2, dX, 3);
772: VecCheckMatCompatible(H, dX, 3, F, 2);
774: /* Block Version */
775: if (!lbfgs->num_updates) {
776: PetscCall(MatDQNApplyJ0Inv(H, F, dX));
777: PetscFunctionReturn(PETSC_SUCCESS); /* No updates stored yet */
778: }
780: PetscCall(PetscObjectStateGet((PetscObject)F, &Fstate));
781: if (F == lbfgs->Fprev_ref && Fstate == lbfgs->Fprev_state) {
782: PetscCall(VecCopy(lbfgs->StFprev, rwork1));
783: } else {
784: PetscCall(MatMultHermitianTransposeColumnRange(Sfull, F, rwork1, 0, h));
785: lbfgs->St_count++;
786: }
788: /* Reordering rwork1, as STY is in history order, while S is in recycled order */
789: if (lbfgs->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecRecycleOrderToHistoryOrder(H, rwork1, lbfgs->num_updates, lbfgs->cyclic_work_vec));
790: PetscCall(MatUpperTriangularSolveInPlace(H, lbfgs->StY_triu, rwork1, PETSC_FALSE, lbfgs->num_updates, lbfgs->strategy));
791: PetscCall(VecScale(rwork1, -1.0));
792: if (lbfgs->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecHistoryOrderToRecycleOrder(H, rwork1, lbfgs->num_updates, lbfgs->cyclic_work_vec));
794: PetscCall(VecCopy(F, lbfgs->column_work));
795: PetscCall(MatMultAddColumnRange(Yfull, rwork1, lbfgs->column_work, lbfgs->column_work, 0, h));
796: lbfgs->Y_count++;
798: PetscCall(VecPointwiseMult(rwork1, lbfgs->diag_vec_recycle_order, rwork1));
799: PetscCall(MatDQNApplyJ0Inv(H, lbfgs->column_work, dX));
801: PetscCall(MatMultHermitianTransposeAddColumnRange(Yfull, dX, rwork1, rwork1, 0, h));
802: lbfgs->Yt_count++;
804: if (lbfgs->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecRecycleOrderToHistoryOrder(H, rwork1, lbfgs->num_updates, lbfgs->cyclic_work_vec));
805: PetscCall(MatUpperTriangularSolveInPlace(H, lbfgs->StY_triu, rwork1, PETSC_TRUE, lbfgs->num_updates, lbfgs->strategy));
806: PetscCall(VecScale(rwork1, -1.0));
807: if (lbfgs->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecHistoryOrderToRecycleOrder(H, rwork1, lbfgs->num_updates, lbfgs->cyclic_work_vec));
809: PetscCall(MatMultAddColumnRange(Sfull, rwork1, dX, dX, 0, h));
810: lbfgs->S_count++;
811: PetscFunctionReturn(PETSC_SUCCESS);
812: }
814: /* Solves for
815: B_0 - [ Y | B_0 S] [ -D | L^T ]^-1 [ Y^T ]
816: [-----+-----------] [---------]
817: [ L | S^T B_0 S ] [ S^T B_0 ]
819: Above is equivalent to
821: B_0 - [ Y | B_0 S] [[ I | 0 ][ -D | 0 ][ I | -D^{-1} L^T ]]^-1 [ Y^T ]
822: [[-----------+---][-----+---][---+-------------]] [---------]
823: [[ -L D^{-1} | I ][ 0 | J ][ 0 | I ]] [ S^T B_0 ]
825: where J = S^T B_0 S + L D^{-1} L^T
827: becomes
829: B_0 - [ Y | B_0 S] [ I | D^{-1} L^T ][ -D^{-1} | 0 ][ I | 0 ] [ Y^T ]
830: [---+------------][----------+--------][----------+---] [---------]
831: [ 0 | I ][ 0 | J^{-1} ][ L D^{-1} | I ] [ S^T B_0 ]
833: =
835: B_0 + [ Y | B_0 S] [ D^{-1} | 0 ][ I | L^T ][ I | 0 ][ I | 0 ] [ Y^T ]
836: [--------+---][---+-----][---+---------][----------+---] [---------]
837: [ 0 | I ][ 0 | I ][ 0 | -J^{-1} ][ L D^{-1} | I ] [ S^T B_0 ]
839: (Note that YtS_triu_strict is L^T)
840: Byrd, Nocedal, Schnabel 1994
842: Alternative approach: considering the fact that DFP is dual to BFGS, use MatMult of DPF:
843: (See ddfp.c's MatMult_LMVMDDFP)
845: */
846: static PetscErrorCode MatMult_LMVMDBFGS(Mat B, Vec X, Vec Z)
847: {
848: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
849: Mat_DQN *lbfgs = (Mat_DQN *)lmvm->ctx;
850: Mat J_local;
851: PetscInt idx, i, j, m_local, local_n;
852: PetscInt m = lmvm->m;
853: PetscInt k = lmvm->k;
854: PetscInt h = k - oldest_update(m, k);
855: Mat Sfull = lmvm->basis[LMBASIS_S]->vecs;
856: Mat Yfull = lmvm->basis[LMBASIS_Y]->vecs;
858: PetscFunctionBegin;
859: VecCheckSameSize(X, 2, Z, 3);
860: VecCheckMatCompatible(B, X, 2, Z, 3);
862: /* Cholesky Version */
863: /* Start with the B0 term */
864: PetscCall(MatDQNApplyJ0Fwd(B, X, Z));
865: if (!lbfgs->num_updates) PetscFunctionReturn(PETSC_SUCCESS); /* No updates stored yet */
867: if (lbfgs->use_recursive) {
868: PetscDeviceContext dctx;
869: PetscMemType memtype;
870: PetscScalar stz, ytx, stp, sjtpi, yjtsi, *workscalar;
871: PetscInt oldest = oldest_update(m, k);
873: PetscCall(PetscDeviceContextGetCurrentContext(&dctx));
874: /* Recursive formulation to avoid Cholesky. Not a dense formulation */
875: PetscCall(MatMultHermitianTransposeColumnRange(Yfull, X, lbfgs->rwork1, 0, h));
876: lbfgs->Yt_count++;
878: PetscCall(VecGetLocalSize(lbfgs->rwork1, &local_n));
880: if (lbfgs->needPQ) {
881: PetscInt oldest = oldest_update(m, k);
882: for (i = oldest; i < k; ++i) {
883: idx = recycle_index(m, i);
884: /* column_work = S[idx] */
885: PetscCall(MatGetColumnVector(Sfull, lbfgs->column_work, idx));
886: PetscCall(MatDQNApplyJ0Fwd(B, lbfgs->column_work, lbfgs->PQ[idx]));
887: PetscCall(MatMultHermitianTransposeColumnRange(Yfull, lbfgs->column_work, lbfgs->rwork3, 0, h));
888: PetscCall(VecGetArrayAndMemType(lbfgs->rwork3, &workscalar, &memtype));
889: for (j = oldest; j < i; ++j) {
890: PetscInt idx_j = recycle_index(m, j);
891: /* Copy yjtsi in device-aware manner */
892: if (local_n) {
893: if (PetscMemTypeHost(memtype)) {
894: yjtsi = workscalar[idx_j];
895: } else {
896: PetscCall(PetscDeviceRegisterMemory(&yjtsi, PETSC_MEMTYPE_HOST, sizeof(yjtsi)));
897: PetscCall(PetscDeviceRegisterMemory(workscalar, memtype, local_n * sizeof(*workscalar)));
898: PetscCall(PetscDeviceArrayCopy(dctx, &yjtsi, &workscalar[idx_j], 1));
899: }
900: }
901: PetscCallMPI(MPI_Bcast(&yjtsi, 1, MPIU_SCALAR, 0, PetscObjectComm((PetscObject)B)));
902: /* column_work2 = S[j] */
903: PetscCall(MatGetColumnVector(Sfull, lbfgs->column_work2, idx_j));
904: PetscCall(VecDot(lbfgs->PQ[idx], lbfgs->column_work2, &sjtpi));
905: /* column_work2 = Y[j] */
906: PetscCall(MatGetColumnVector(Yfull, lbfgs->column_work2, idx_j));
907: /* Compute the pure BFGS component of the forward product */
908: PetscCall(VecAXPBYPCZ(lbfgs->PQ[idx], -sjtpi / lbfgs->stp[idx_j], yjtsi / lbfgs->yts[idx_j], 1.0, lbfgs->PQ[idx_j], lbfgs->column_work2));
909: }
910: PetscCall(VecDot(lbfgs->PQ[idx], lbfgs->column_work, &stp));
911: lbfgs->stp[idx] = PetscRealPart(stp);
912: }
913: lbfgs->needPQ = PETSC_FALSE;
914: }
916: PetscCall(VecGetArrayAndMemType(lbfgs->rwork1, &workscalar, &memtype));
917: for (i = oldest; i < k; ++i) {
918: idx = recycle_index(m, i);
919: /* Copy stz[i], ytx[i] in device-aware manner */
920: if (local_n) {
921: if (PetscMemTypeHost(memtype)) {
922: ytx = workscalar[idx];
923: } else {
924: PetscCall(PetscDeviceRegisterMemory(&ytx, PETSC_MEMTYPE_HOST, 1 * sizeof(ytx)));
925: PetscCall(PetscDeviceRegisterMemory(workscalar, memtype, local_n * sizeof(*workscalar)));
926: PetscCall(PetscDeviceArrayCopy(dctx, &ytx, &workscalar[idx], 1));
927: }
928: }
929: PetscCallMPI(MPI_Bcast(&ytx, 1, MPIU_SCALAR, 0, PetscObjectComm((PetscObject)B)));
930: /* column_work : S[i], column_work2 : Y[i] */
931: PetscCall(MatGetColumnVector(Sfull, lbfgs->column_work, idx));
932: PetscCall(MatGetColumnVector(Yfull, lbfgs->column_work2, idx));
933: PetscCall(VecDot(Z, lbfgs->column_work, &stz));
934: PetscCall(VecAXPBYPCZ(Z, -stz / lbfgs->stp[idx], ytx / lbfgs->yts[idx], 1.0, lbfgs->PQ[idx], lbfgs->column_work2));
935: }
936: PetscCall(VecRestoreArrayAndMemType(lbfgs->rwork1, &workscalar));
937: } else {
938: PetscCall(MatLMVMDBFGSUpdateMultData(B));
939: PetscCall(MatMultHermitianTransposeColumnRange(Yfull, X, lbfgs->rwork1, 0, h));
940: lbfgs->Yt_count++;
941: PetscCall(MatMultHermitianTransposeColumnRange(Sfull, Z, lbfgs->rwork2, 0, h));
942: lbfgs->St_count++;
943: if (lbfgs->strategy == MAT_LMVM_DENSE_REORDER) {
944: PetscCall(VecRecycleOrderToHistoryOrder(B, lbfgs->rwork1, lbfgs->num_updates, lbfgs->cyclic_work_vec));
945: PetscCall(VecRecycleOrderToHistoryOrder(B, lbfgs->rwork2, lbfgs->num_updates, lbfgs->cyclic_work_vec));
946: }
948: PetscCall(VecPointwiseMult(lbfgs->rwork1, lbfgs->rwork1, lbfgs->inv_diag_vec));
949: if (PetscDefined(USE_COMPLEX)) PetscCall(MatConjugate(lbfgs->YtS_triu_strict));
950: PetscCall(MatMultTransposeAdd(lbfgs->YtS_triu_strict, lbfgs->rwork1, lbfgs->rwork2, lbfgs->rwork2));
951: if (PetscDefined(USE_COMPLEX)) PetscCall(MatConjugate(lbfgs->YtS_triu_strict));
953: if (!lbfgs->rwork2_local) PetscCall(VecCreateLocalVector(lbfgs->rwork2, &lbfgs->rwork2_local));
954: if (!lbfgs->rwork3_local) PetscCall(VecCreateLocalVector(lbfgs->rwork3, &lbfgs->rwork3_local));
955: PetscCall(VecGetLocalVectorRead(lbfgs->rwork2, lbfgs->rwork2_local));
956: PetscCall(VecGetLocalVector(lbfgs->rwork3, lbfgs->rwork3_local));
957: PetscCall(MatDenseGetLocalMatrix(lbfgs->J, &J_local));
958: PetscCall(VecGetSize(lbfgs->rwork2_local, &m_local));
959: if (m_local) {
960: PetscCall(MatDenseGetLocalMatrix(lbfgs->J, &J_local));
961: PetscCall(MatSolve(J_local, lbfgs->rwork2_local, lbfgs->rwork3_local));
962: }
963: PetscCall(VecRestoreLocalVector(lbfgs->rwork3, lbfgs->rwork3_local));
964: PetscCall(VecRestoreLocalVectorRead(lbfgs->rwork2, lbfgs->rwork2_local));
965: PetscCall(VecScale(lbfgs->rwork3, -1.0));
967: PetscCall(MatMult(lbfgs->YtS_triu_strict, lbfgs->rwork3, lbfgs->rwork2));
968: PetscCall(VecPointwiseMult(lbfgs->rwork2, lbfgs->rwork2, lbfgs->inv_diag_vec));
969: PetscCall(VecAXPY(lbfgs->rwork1, 1.0, lbfgs->rwork2));
971: if (lbfgs->strategy == MAT_LMVM_DENSE_REORDER) {
972: PetscCall(VecHistoryOrderToRecycleOrder(B, lbfgs->rwork1, lbfgs->num_updates, lbfgs->cyclic_work_vec));
973: PetscCall(VecHistoryOrderToRecycleOrder(B, lbfgs->rwork3, lbfgs->num_updates, lbfgs->cyclic_work_vec));
974: }
976: PetscCall(MatMultAddColumnRange(Yfull, lbfgs->rwork1, Z, Z, 0, h));
977: lbfgs->Y_count++;
978: PetscCall(MatMultAddColumnRange(lbfgs->BS, lbfgs->rwork3, Z, Z, 0, h));
979: lbfgs->S_count++;
980: }
981: PetscFunctionReturn(PETSC_SUCCESS);
982: }
984: /*
985: This dense representation reduces the L-BFGS update to a series of
986: matrix-vector products with dense matrices in lieu of the conventional matrix-free
987: two-loop algorithm.
988: */
989: PetscErrorCode MatCreate_LMVMDBFGS(Mat B)
990: {
991: Mat_LMVM *lmvm;
992: Mat_DQN *lbfgs;
994: PetscFunctionBegin;
995: PetscCall(MatCreate_LMVM(B));
996: PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATLMVMDBFGS));
997: PetscCall(MatSetOption(B, MAT_HERMITIAN, PETSC_TRUE));
998: PetscCall(MatSetOption(B, MAT_SPD, PETSC_TRUE));
999: PetscCall(MatSetOption(B, MAT_SPD_ETERNAL, PETSC_TRUE));
1000: B->ops->view = MatView_LMVMDQN;
1001: B->ops->setup = MatSetUp_LMVMDQN;
1002: B->ops->setfromoptions = MatSetFromOptions_LMVMDQN;
1003: B->ops->destroy = MatDestroy_LMVMDQN;
1005: lmvm = (Mat_LMVM *)B->data;
1006: lmvm->ops->reset = MatReset_LMVMDQN;
1007: lmvm->ops->update = MatUpdate_LMVMDQN;
1008: lmvm->ops->mult = MatMult_LMVMDBFGS;
1009: lmvm->ops->solve = MatSolve_LMVMDBFGS;
1010: lmvm->ops->copy = MatCopy_LMVMDQN;
1012: lmvm->ops->multht = lmvm->ops->mult;
1013: lmvm->ops->solveht = lmvm->ops->solve;
1015: PetscCall(PetscNew(&lbfgs));
1016: lmvm->ctx = (void *)lbfgs;
1017: lbfgs->allocated = PETSC_FALSE;
1018: lbfgs->use_recursive = PETSC_TRUE;
1019: lbfgs->needPQ = PETSC_TRUE;
1020: lbfgs->watchdog = 0;
1021: lbfgs->max_seq_rejects = lmvm->m / 2;
1022: lbfgs->strategy = MAT_LMVM_DENSE_INPLACE;
1024: PetscCall(SymBroydenRescaleCreate(&lbfgs->rescale));
1025: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatLMVMSymBroydenSetDelta_C", MatLMVMSymBroydenSetDelta_LMVMDQN));
1026: PetscFunctionReturn(PETSC_SUCCESS);
1027: }
1029: /*@
1030: MatCreateLMVMDBFGS - Creates a dense representation of the limited-memory
1031: Broyden-Fletcher-Goldfarb-Shanno (BFGS) approximation to a Hessian.
1033: Collective
1035: Input Parameters:
1036: + comm - MPI communicator
1037: . n - number of local rows for storage vectors
1038: - N - global size of the storage vectors
1040: Output Parameter:
1041: . B - the matrix
1043: Level: advanced
1045: Note:
1046: It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions()
1047: paradigm instead of this routine directly.
1049: .seealso: `MatCreate()`, `MATLMVM`, `MATLMVMDBFGS`, `MatCreateLMVMBFGS()`
1050: @*/
1051: PetscErrorCode MatCreateLMVMDBFGS(MPI_Comm comm, PetscInt n, PetscInt N, Mat *B)
1052: {
1053: PetscFunctionBegin;
1054: PetscCall(KSPInitializePackage());
1055: PetscCall(MatCreate(comm, B));
1056: PetscCall(MatSetSizes(*B, n, n, N, N));
1057: PetscCall(MatSetType(*B, MATLMVMDBFGS));
1058: PetscCall(MatSetUp(*B));
1059: PetscFunctionReturn(PETSC_SUCCESS);
1060: }
1062: /* here R is strictly upper triangular part of STY */
1063: static PetscErrorCode MatGetRTDR(Mat B, Mat result)
1064: {
1065: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
1066: Mat_DQN *ldfp = (Mat_DQN *)lmvm->ctx;
1067: PetscInt m_local;
1069: PetscFunctionBegin;
1070: if (!ldfp->temp_mat) PetscCall(MatDuplicate(ldfp->StY_triu_strict, MAT_SHARE_NONZERO_PATTERN, &ldfp->temp_mat));
1071: PetscCall(MatCopy(ldfp->StY_triu_strict, ldfp->temp_mat, SAME_NONZERO_PATTERN));
1072: PetscCall(MatDiagonalScale(ldfp->temp_mat, ldfp->inv_diag_vec, NULL));
1073: PetscCall(MatGetLocalSize(result, &m_local, NULL));
1074: // need to conjugate and conjugate again because we have MatTransposeMatMult but not MatHermitianTransposeMatMult()
1075: PetscCall(MatConjugate(ldfp->temp_mat));
1076: if (m_local) {
1077: Mat temp_local, StY_local, result_local;
1078: PetscCall(MatDenseGetLocalMatrix(ldfp->StY_triu_strict, &StY_local));
1079: PetscCall(MatDenseGetLocalMatrix(ldfp->temp_mat, &temp_local));
1080: PetscCall(MatDenseGetLocalMatrix(result, &result_local));
1081: PetscCall(MatTransposeMatMult(StY_local, temp_local, MAT_REUSE_MATRIX, PETSC_DETERMINE, &result_local));
1082: }
1083: PetscCall(MatConjugate(result));
1084: PetscFunctionReturn(PETSC_SUCCESS);
1085: }
1087: static PetscErrorCode MatLMVMDDFPUpdateSolveData(Mat B)
1088: {
1089: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
1090: Mat_DQN *ldfp = (Mat_DQN *)lmvm->ctx;
1091: PetscInt m = lmvm->m, m_local;
1092: PetscInt k = lmvm->k;
1093: PetscInt h = k - oldest_update(m, k);
1094: PetscInt j_0;
1095: PetscInt prev_oldest;
1096: Mat Sfull = lmvm->basis[LMBASIS_S]->vecs;
1097: Mat Yfull = lmvm->basis[LMBASIS_Y]->vecs;
1098: Mat J_local;
1100: PetscFunctionBegin;
1101: if (!ldfp->StY_triu_strict) {
1102: PetscCall(MatDuplicate(ldfp->YtS_triu, MAT_SHARE_NONZERO_PATTERN, &ldfp->StY_triu_strict));
1103: PetscCall(MatDestroy(&ldfp->YtHY));
1104: PetscCall(MatDuplicate(ldfp->YtS_triu, MAT_SHARE_NONZERO_PATTERN, &ldfp->YtHY));
1105: PetscCall(MatDestroy(&ldfp->J));
1106: PetscCall(MatDuplicate(ldfp->YtS_triu, MAT_SHARE_NONZERO_PATTERN, &ldfp->J));
1107: PetscCall(MatDestroy(&ldfp->HY));
1108: PetscCall(MatDuplicate(Yfull, MAT_SHARE_NONZERO_PATTERN, &ldfp->HY));
1109: PetscCall(MatShift(ldfp->YtHY, 1.0));
1110: ldfp->num_mult_updates = oldest_update(m, k);
1111: }
1112: if (ldfp->num_mult_updates == k) PetscFunctionReturn(PETSC_SUCCESS);
1114: /* H_0 may have been updated, we must recompute H_0 Y and Y^T H_0 Y */
1115: for (PetscInt j = oldest_update(m, k); j < k; j++) {
1116: Vec y_j;
1117: Vec Hy_j;
1118: Vec YtHy_j;
1119: PetscInt Y_idx = recycle_index(m, j);
1120: PetscInt YtHY_idx = ldfp->strategy == MAT_LMVM_DENSE_INPLACE ? Y_idx : history_index(m, k, j);
1122: PetscCall(MatDenseGetColumnVecWrite(ldfp->HY, Y_idx, &Hy_j));
1123: PetscCall(MatDenseGetColumnVecRead(Yfull, Y_idx, &y_j));
1124: PetscCall(MatDQNApplyJ0Inv(B, y_j, Hy_j));
1125: PetscCall(MatDenseRestoreColumnVecRead(Yfull, Y_idx, &y_j));
1126: PetscCall(MatDenseGetColumnVecWrite(ldfp->YtHY, YtHY_idx, &YtHy_j));
1127: PetscCall(MatMultHermitianTransposeColumnRange(Yfull, Hy_j, YtHy_j, 0, h));
1128: ldfp->Yt_count++;
1129: if (ldfp->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecRecycleOrderToHistoryOrder(B, YtHy_j, ldfp->num_updates, ldfp->cyclic_work_vec));
1130: PetscCall(MatDenseRestoreColumnVecWrite(ldfp->YtHY, YtHY_idx, &YtHy_j));
1131: PetscCall(MatDenseRestoreColumnVecWrite(ldfp->HY, Y_idx, &Hy_j));
1132: }
1133: prev_oldest = oldest_update(m, ldfp->num_mult_updates);
1134: if (ldfp->strategy == MAT_LMVM_DENSE_REORDER && prev_oldest < oldest_update(m, k)) {
1135: /* move the YtS entries that have been computed and need to be kept back up */
1136: PetscInt m_keep = m - (oldest_update(m, k) - prev_oldest);
1138: PetscCall(MatMove_LR3(B, ldfp->StY_triu_strict, m_keep));
1139: }
1140: PetscCall(MatGetLocalSize(ldfp->StY_triu_strict, &m_local, NULL));
1141: j_0 = PetscMax(ldfp->num_mult_updates, oldest_update(m, k));
1142: for (PetscInt j = j_0; j < k; j++) {
1143: PetscInt Y_idx = recycle_index(m, j);
1144: PetscInt StY_idx = ldfp->strategy == MAT_LMVM_DENSE_INPLACE ? Y_idx : history_index(m, k, j);
1145: Vec y_j, Sty_j;
1147: PetscCall(MatDenseGetColumnVecRead(Yfull, Y_idx, &y_j));
1148: PetscCall(MatDenseGetColumnVecWrite(ldfp->StY_triu_strict, StY_idx, &Sty_j));
1149: PetscCall(MatMultHermitianTransposeColumnRange(Sfull, y_j, Sty_j, 0, h));
1150: ldfp->St_count++;
1151: if (ldfp->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecRecycleOrderToHistoryOrder(B, Sty_j, ldfp->num_updates, ldfp->cyclic_work_vec));
1152: PetscCall(MatDenseRestoreColumnVecWrite(ldfp->StY_triu_strict, StY_idx, &Sty_j));
1153: PetscCall(MatDenseRestoreColumnVecRead(Yfull, Y_idx, &y_j));
1154: /* zero the corresponding row */
1155: if (m_local > 0) {
1156: Mat StY_local, StY_row;
1158: PetscCall(MatDenseGetLocalMatrix(ldfp->StY_triu_strict, &StY_local));
1159: PetscCall(MatDenseGetSubMatrix(StY_local, StY_idx, StY_idx + 1, PETSC_DECIDE, PETSC_DECIDE, &StY_row));
1160: PetscCall(MatZeroEntries(StY_row));
1161: PetscCall(MatDenseRestoreSubMatrix(StY_local, &StY_row));
1162: }
1163: }
1164: if (!ldfp->inv_diag_vec) PetscCall(VecDuplicate(ldfp->diag_vec, &ldfp->inv_diag_vec));
1165: PetscCall(VecCopy(ldfp->diag_vec, ldfp->inv_diag_vec));
1166: PetscCall(VecReciprocal(ldfp->inv_diag_vec));
1167: PetscCall(MatDenseGetLocalMatrix(ldfp->J, &J_local));
1168: PetscCall(MatSetFactorType(J_local, MAT_FACTOR_NONE));
1169: PetscCall(MatGetRTDR(B, ldfp->J));
1170: PetscCall(MatAXPY(ldfp->J, 1.0, ldfp->YtHY, SAME_NONZERO_PATTERN));
1171: if (m_local) {
1172: PetscCall(MatSetOption(J_local, MAT_SPD, PETSC_TRUE));
1173: PetscCall(MatCholeskyFactor(J_local, NULL, NULL));
1174: }
1175: ldfp->num_mult_updates = ldfp->num_updates;
1176: PetscFunctionReturn(PETSC_SUCCESS);
1177: }
1179: /* Solves for
1181: H_0 - [ S | H_0 Y] [ -D | R.T ]^-1 [ S^T ]
1182: [-----+-----------] [---------]
1183: [ R | Y^T H_0 Y ] [ Y^T H_0 ]
1185: Above is equivalent to
1187: H_0 - [ S | H_0 Y] [[ I | 0 ][ -D | 0 ][ I | -D^{-1} R^T ]]^-1 [ S^T ]
1188: [[-----------+---][----+---][---+-------------]] [---------]
1189: [[ -R D^{-1} | I ][ 0 | J ][ 0 | I ]] [ Y^T H_0 ]
1191: where J = Y^T H_0 Y + R D^{-1} R.T
1193: becomes
1195: H_0 - [ S | H_0 Y] [ I | D^{-1} R^T ][ -D^{-1} | 0 ][ I | 0 ] [ S^T ]
1196: [---+------------][----------+--------][----------+---] [---------]
1197: [ 0 | I ][ 0 | J^{-1} ][ R D^{-1} | I ] [ Y^T H_0 ]
1199: =
1201: H_0 + [ S | H_0 Y] [ D^{-1} | 0 ][ I | R^T ][ I | 0 ][ I | 0 ] [ S^T ]
1202: [--------+---][---+-----][---+---------][----------+---] [---------]
1203: [ 0 | I ][ 0 | I ][ 0 | -J^{-1} ][ R D^{-1} | I ] [ Y^T H_0 ]
1205: (Note that StY_triu_strict is R)
1206: Byrd, Nocedal, Schnabel 1994
1208: */
1209: static PetscErrorCode MatSolve_LMVMDDFP(Mat H, Vec F, Vec dX)
1210: {
1211: Mat_LMVM *lmvm = (Mat_LMVM *)H->data;
1212: Mat_DQN *ldfp = (Mat_DQN *)lmvm->ctx;
1213: PetscInt m = lmvm->m;
1214: PetscInt k = lmvm->k;
1215: PetscInt h = k - oldest_update(m, k);
1216: PetscInt idx, i, j, local_n;
1217: PetscInt m_local;
1218: Mat J_local;
1219: Mat Sfull = lmvm->basis[LMBASIS_S]->vecs;
1220: Mat Yfull = lmvm->basis[LMBASIS_Y]->vecs;
1222: PetscFunctionBegin;
1223: VecCheckSameSize(F, 2, dX, 3);
1224: VecCheckMatCompatible(H, dX, 3, F, 2);
1226: /* Cholesky Version */
1227: /* Start with the B0 term */
1228: PetscCall(MatDQNApplyJ0Inv(H, F, dX));
1229: if (!ldfp->num_updates) PetscFunctionReturn(PETSC_SUCCESS); /* No updates stored yet */
1231: if (ldfp->use_recursive) {
1232: PetscDeviceContext dctx;
1233: PetscMemType memtype;
1234: PetscScalar stf, ytx, ytq, yjtqi, sjtyi, *workscalar;
1236: PetscCall(PetscDeviceContextGetCurrentContext(&dctx));
1237: /* Recursive formulation to avoid Cholesky. Not a dense formulation */
1238: PetscCall(MatMultHermitianTransposeColumnRange(Sfull, F, ldfp->rwork1, 0, h));
1239: ldfp->Yt_count++;
1241: PetscCall(VecGetLocalSize(ldfp->rwork1, &local_n));
1243: PetscInt oldest = oldest_update(m, k);
1245: if (ldfp->needPQ) {
1246: PetscInt oldest = oldest_update(m, k);
1247: for (i = oldest; i < k; ++i) {
1248: idx = recycle_index(m, i);
1249: /* column_work = S[idx] */
1250: PetscCall(MatGetColumnVector(Yfull, ldfp->column_work, idx));
1251: PetscCall(MatDQNApplyJ0Inv(H, ldfp->column_work, ldfp->PQ[idx]));
1252: PetscCall(MatMultHermitianTransposeColumnRange(Sfull, ldfp->column_work, ldfp->rwork3, 0, h));
1253: PetscCall(VecGetArrayAndMemType(ldfp->rwork3, &workscalar, &memtype));
1254: for (j = oldest; j < i; ++j) {
1255: PetscInt idx_j = recycle_index(m, j);
1256: /* Copy sjtyi in device-aware manner */
1257: if (local_n) {
1258: if (PetscMemTypeHost(memtype)) {
1259: sjtyi = workscalar[idx_j];
1260: } else {
1261: PetscCall(PetscDeviceRegisterMemory(&sjtyi, PETSC_MEMTYPE_HOST, 1 * sizeof(sjtyi)));
1262: PetscCall(PetscDeviceRegisterMemory(workscalar, memtype, local_n * sizeof(*workscalar)));
1263: PetscCall(PetscDeviceArrayCopy(dctx, &sjtyi, &workscalar[idx_j], 1));
1264: }
1265: }
1266: PetscCallMPI(MPI_Bcast(&sjtyi, 1, MPIU_SCALAR, 0, PetscObjectComm((PetscObject)H)));
1267: /* column_work2 = Y[j] */
1268: PetscCall(MatGetColumnVector(Yfull, ldfp->column_work2, idx_j));
1269: PetscCall(VecDot(ldfp->PQ[idx], ldfp->column_work2, &yjtqi));
1270: /* column_work2 = Y[j] */
1271: PetscCall(MatGetColumnVector(Sfull, ldfp->column_work2, idx_j));
1272: /* Compute the pure BFGS component of the forward product */
1273: PetscCall(VecAXPBYPCZ(ldfp->PQ[idx], -yjtqi / ldfp->ytq[idx_j], sjtyi / ldfp->yts[idx_j], 1.0, ldfp->PQ[idx_j], ldfp->column_work2));
1274: }
1275: PetscCall(VecDot(ldfp->PQ[idx], ldfp->column_work, &ytq));
1276: ldfp->ytq[idx] = PetscRealPart(ytq);
1277: }
1278: ldfp->needPQ = PETSC_FALSE;
1279: }
1281: PetscCall(VecGetArrayAndMemType(ldfp->rwork1, &workscalar, &memtype));
1282: for (i = oldest; i < k; ++i) {
1283: idx = recycle_index(m, i);
1284: /* Copy stz[i], ytx[i] in device-aware manner */
1285: if (local_n) {
1286: if (PetscMemTypeHost(memtype)) {
1287: stf = workscalar[idx];
1288: } else {
1289: PetscCall(PetscDeviceRegisterMemory(&stf, PETSC_MEMTYPE_HOST, sizeof(stf)));
1290: PetscCall(PetscDeviceRegisterMemory(workscalar, memtype, local_n * sizeof(*workscalar)));
1291: PetscCall(PetscDeviceArrayCopy(dctx, &stf, &workscalar[idx], 1));
1292: }
1293: }
1294: PetscCallMPI(MPI_Bcast(&stf, 1, MPIU_SCALAR, 0, PetscObjectComm((PetscObject)H)));
1295: /* column_work : S[i], column_work2 : Y[i] */
1296: PetscCall(MatGetColumnVector(Sfull, ldfp->column_work, idx));
1297: PetscCall(MatGetColumnVector(Yfull, ldfp->column_work2, idx));
1298: PetscCall(VecDot(dX, ldfp->column_work2, &ytx));
1299: PetscCall(VecAXPBYPCZ(dX, -ytx / ldfp->ytq[idx], stf / ldfp->yts[idx], 1.0, ldfp->PQ[idx], ldfp->column_work));
1300: }
1301: PetscCall(VecRestoreArrayAndMemType(ldfp->rwork1, &workscalar));
1302: } else {
1303: PetscCall(MatLMVMDDFPUpdateSolveData(H));
1304: PetscCall(MatMultHermitianTransposeColumnRange(Sfull, F, ldfp->rwork1, 0, h));
1305: ldfp->St_count++;
1306: PetscCall(MatMultHermitianTransposeColumnRange(Yfull, dX, ldfp->rwork2, 0, h));
1307: ldfp->Yt_count++;
1308: if (ldfp->strategy == MAT_LMVM_DENSE_REORDER) {
1309: PetscCall(VecRecycleOrderToHistoryOrder(H, ldfp->rwork1, ldfp->num_updates, ldfp->cyclic_work_vec));
1310: PetscCall(VecRecycleOrderToHistoryOrder(H, ldfp->rwork2, ldfp->num_updates, ldfp->cyclic_work_vec));
1311: }
1313: PetscCall(VecPointwiseMult(ldfp->rwork3, ldfp->rwork1, ldfp->inv_diag_vec));
1314: if (PetscDefined(USE_COMPLEX)) PetscCall(MatConjugate(ldfp->StY_triu_strict));
1315: PetscCall(MatMultTransposeAdd(ldfp->StY_triu_strict, ldfp->rwork3, ldfp->rwork2, ldfp->rwork2));
1316: if (PetscDefined(USE_COMPLEX)) PetscCall(MatConjugate(ldfp->StY_triu_strict));
1318: if (!ldfp->rwork2_local) PetscCall(VecCreateLocalVector(ldfp->rwork2, &ldfp->rwork2_local));
1319: if (!ldfp->rwork3_local) PetscCall(VecCreateLocalVector(ldfp->rwork3, &ldfp->rwork3_local));
1320: PetscCall(VecGetLocalVectorRead(ldfp->rwork2, ldfp->rwork2_local));
1321: PetscCall(VecGetLocalVector(ldfp->rwork3, ldfp->rwork3_local));
1322: PetscCall(MatDenseGetLocalMatrix(ldfp->J, &J_local));
1323: PetscCall(VecGetSize(ldfp->rwork2_local, &m_local));
1324: if (m_local) {
1325: Mat J_local;
1327: PetscCall(MatDenseGetLocalMatrix(ldfp->J, &J_local));
1328: PetscCall(MatSolve(J_local, ldfp->rwork2_local, ldfp->rwork3_local));
1329: }
1330: PetscCall(VecRestoreLocalVector(ldfp->rwork3, ldfp->rwork3_local));
1331: PetscCall(VecRestoreLocalVectorRead(ldfp->rwork2, ldfp->rwork2_local));
1332: PetscCall(VecScale(ldfp->rwork3, -1.0));
1334: PetscCall(MatMultAdd(ldfp->StY_triu_strict, ldfp->rwork3, ldfp->rwork1, ldfp->rwork1));
1335: PetscCall(VecPointwiseMult(ldfp->rwork1, ldfp->rwork1, ldfp->inv_diag_vec));
1337: if (ldfp->strategy == MAT_LMVM_DENSE_REORDER) {
1338: PetscCall(VecHistoryOrderToRecycleOrder(H, ldfp->rwork1, ldfp->num_updates, ldfp->cyclic_work_vec));
1339: PetscCall(VecHistoryOrderToRecycleOrder(H, ldfp->rwork3, ldfp->num_updates, ldfp->cyclic_work_vec));
1340: }
1342: PetscCall(MatMultAddColumnRange(Sfull, ldfp->rwork1, dX, dX, 0, h));
1343: ldfp->S_count++;
1344: PetscCall(MatMultAddColumnRange(ldfp->HY, ldfp->rwork3, dX, dX, 0, h));
1345: ldfp->Y_count++;
1346: }
1347: PetscFunctionReturn(PETSC_SUCCESS);
1348: }
1350: /* Solves for
1351: (Theorem 1, Erway, Jain, and Marcia, 2013)
1353: B_0 - [ Y | B_0 S] [ -R^{-T} (D + S^T B_0 S) R^{-1} | R^{-T} ] [ Y^T ]
1354: ---------------------------------+--------] [---------]
1355: [ R^{-1} | 0 ] [ S^T B_0 ]
1357: (Note: R above is right triangular part of YTS)
1358: which becomes,
1360: [ I | -Y L^{-T} ] [ I | 0 ] [ B_0 | 0 ] [ I | S ] [ I ]
1361: [-----+---] [-----+---] [---+---] [-------------]
1362: [ S^T | I ] [ 0 | D ] [ 0 | I ] [ -L^{-1} Y^T ]
1364: (Note: L above is right triangular part of STY)
1366: */
1367: static PetscErrorCode MatMult_LMVMDDFP(Mat B, Vec X, Vec Z)
1368: {
1369: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
1370: Mat_DQN *ldfp = (Mat_DQN *)lmvm->ctx;
1371: Vec rwork1 = ldfp->rwork1;
1372: PetscInt m = lmvm->m;
1373: PetscInt k = lmvm->k;
1374: PetscInt h = k - oldest_update(m, k);
1375: Mat Sfull = lmvm->basis[LMBASIS_S]->vecs;
1376: Mat Yfull = lmvm->basis[LMBASIS_Y]->vecs;
1377: PetscObjectState Xstate;
1379: PetscFunctionBegin;
1380: VecCheckSameSize(X, 2, Z, 3);
1381: VecCheckMatCompatible(B, X, 2, Z, 3);
1383: /* DFP Version. Erway, Jain, Marcia, 2013, Theorem 1 */
1384: /* Block Version */
1385: if (!ldfp->num_updates) {
1386: PetscCall(MatDQNApplyJ0Fwd(B, X, Z));
1387: PetscFunctionReturn(PETSC_SUCCESS); /* No updates stored yet */
1388: }
1390: PetscCall(PetscObjectStateGet((PetscObject)X, &Xstate));
1391: PetscCall(MatMultHermitianTransposeColumnRange(Yfull, X, rwork1, 0, h));
1393: /* Reordering rwork1, as STY is in history order, while Y is in recycled order */
1394: if (ldfp->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecRecycleOrderToHistoryOrder(B, rwork1, ldfp->num_updates, ldfp->cyclic_work_vec));
1395: PetscCall(MatUpperTriangularSolveInPlace(B, ldfp->YtS_triu, rwork1, PETSC_FALSE, ldfp->num_updates, ldfp->strategy));
1396: PetscCall(VecScale(rwork1, -1.0));
1397: if (ldfp->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecHistoryOrderToRecycleOrder(B, rwork1, ldfp->num_updates, ldfp->cyclic_work_vec));
1399: PetscCall(VecCopy(X, ldfp->column_work));
1400: PetscCall(MatMultAddColumnRange(Sfull, rwork1, ldfp->column_work, ldfp->column_work, 0, h));
1401: ldfp->S_count++;
1403: PetscCall(VecPointwiseMult(rwork1, ldfp->diag_vec_recycle_order, rwork1));
1404: PetscCall(MatDQNApplyJ0Fwd(B, ldfp->column_work, Z));
1406: PetscCall(MatMultHermitianTransposeAddColumnRange(Sfull, Z, rwork1, rwork1, 0, h));
1407: ldfp->St_count++;
1409: if (ldfp->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecRecycleOrderToHistoryOrder(B, rwork1, ldfp->num_updates, ldfp->cyclic_work_vec));
1410: PetscCall(MatUpperTriangularSolveInPlace(B, ldfp->YtS_triu, rwork1, PETSC_TRUE, ldfp->num_updates, ldfp->strategy));
1411: PetscCall(VecScale(rwork1, -1.0));
1412: if (ldfp->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecHistoryOrderToRecycleOrder(B, rwork1, ldfp->num_updates, ldfp->cyclic_work_vec));
1414: PetscCall(MatMultAddColumnRange(Yfull, rwork1, Z, Z, 0, h));
1415: ldfp->Y_count++;
1416: PetscFunctionReturn(PETSC_SUCCESS);
1417: }
1419: /*
1420: This dense representation reduces the L-DFP update to a series of
1421: matrix-vector products with dense matrices in lieu of the conventional
1422: matrix-free two-loop algorithm.
1423: */
1424: PetscErrorCode MatCreate_LMVMDDFP(Mat B)
1425: {
1426: Mat_LMVM *lmvm;
1427: Mat_DQN *ldfp;
1429: PetscFunctionBegin;
1430: PetscCall(MatCreate_LMVM(B));
1431: PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATLMVMDDFP));
1432: PetscCall(MatSetOption(B, MAT_HERMITIAN, PETSC_TRUE));
1433: PetscCall(MatSetOption(B, MAT_SPD, PETSC_TRUE));
1434: PetscCall(MatSetOption(B, MAT_SPD_ETERNAL, PETSC_TRUE));
1435: B->ops->view = MatView_LMVMDQN;
1436: B->ops->setup = MatSetUp_LMVMDQN;
1437: B->ops->setfromoptions = MatSetFromOptions_LMVMDQN;
1438: B->ops->destroy = MatDestroy_LMVMDQN;
1440: lmvm = (Mat_LMVM *)B->data;
1441: lmvm->ops->reset = MatReset_LMVMDQN;
1442: lmvm->ops->update = MatUpdate_LMVMDQN;
1443: lmvm->ops->mult = MatMult_LMVMDDFP;
1444: lmvm->ops->solve = MatSolve_LMVMDDFP;
1445: lmvm->ops->copy = MatCopy_LMVMDQN;
1447: lmvm->ops->multht = lmvm->ops->mult;
1448: lmvm->ops->solveht = lmvm->ops->solve;
1450: PetscCall(PetscNew(&ldfp));
1451: lmvm->ctx = (void *)ldfp;
1452: ldfp->allocated = PETSC_FALSE;
1453: ldfp->watchdog = 0;
1454: ldfp->max_seq_rejects = lmvm->m / 2;
1455: ldfp->strategy = MAT_LMVM_DENSE_INPLACE;
1456: ldfp->use_recursive = PETSC_TRUE;
1457: ldfp->needPQ = PETSC_TRUE;
1459: PetscCall(SymBroydenRescaleCreate(&ldfp->rescale));
1460: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatLMVMSymBroydenSetDelta_C", MatLMVMSymBroydenSetDelta_LMVMDQN));
1461: PetscFunctionReturn(PETSC_SUCCESS);
1462: }
1464: /*@
1465: MatCreateLMVMDDFP - Creates a dense representation of the limited-memory
1466: Davidon-Fletcher-Powell (DFP) approximation to a Hessian.
1468: Collective
1470: Input Parameters:
1471: + comm - MPI communicator
1472: . n - number of local rows for storage vectors
1473: - N - global size of the storage vectors
1475: Output Parameter:
1476: . B - the matrix
1478: Level: advanced
1480: Note:
1481: It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions()
1482: paradigm instead of this routine directly.
1484: .seealso: `MatCreate()`, `MATLMVM`, `MATLMVMDDFP`, `MatCreateLMVMDFP()`
1485: @*/
1486: PetscErrorCode MatCreateLMVMDDFP(MPI_Comm comm, PetscInt n, PetscInt N, Mat *B)
1487: {
1488: PetscFunctionBegin;
1489: PetscCall(KSPInitializePackage());
1490: PetscCall(MatCreate(comm, B));
1491: PetscCall(MatSetSizes(*B, n, n, N, N));
1492: PetscCall(MatSetType(*B, MATLMVMDDFP));
1493: PetscCall(MatSetUp(*B));
1494: PetscFunctionReturn(PETSC_SUCCESS);
1495: }
1497: /*@
1498: MatLMVMDenseSetType - Sets the memory storage type for dense `MATLMVM`
1500: Input Parameters:
1501: + B - the `MATLMVM` matrix
1502: - type - scale type, see `MatLMVMDenseSetType`
1504: Options Database Keys:
1505: + -mat_lqn_type <reorder,inplace> - set the strategy
1506: . -mat_lbfgs_type <reorder,inplace> - set the strategy
1507: - -mat_ldfp_type <reorder,inplace> - set the strategy
1509: Level: intermediate
1511: MatLMVMDenseTypes\:
1512: + `MAT_LMVM_DENSE_REORDER` - reorders memory to minimize kernel launch
1513: - `MAT_LMVM_DENSE_INPLACE` - launches kernel inplace to minimize memory movement
1515: .seealso: [](ch_ksp), `MATLMVMDQN`, `MATLMVMDBFGS`, `MATLMVMDDFP`, `MatLMVMDenseType`
1516: @*/
1517: PetscErrorCode MatLMVMDenseSetType(Mat B, MatLMVMDenseType type)
1518: {
1519: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
1520: Mat_DQN *lqn = (Mat_DQN *)lmvm->ctx;
1522: PetscFunctionBegin;
1524: lqn->strategy = type;
1525: PetscFunctionReturn(PETSC_SUCCESS);
1526: }