Actual source code: denseqn.c
  1: #include <../src/ksp/ksp/utils/lmvm/dense/denseqn.h>
  2: #include <../src/ksp/ksp/utils/lmvm/diagbrdn/diagbrdn.h>
  3: #include <petscblaslapack.h>
  4: #include <petscmat.h>
  5: #include <petscsys.h>
  6: #include <petscsystypes.h>
  7: #include <petscis.h>
  8: #include <petscoptions.h>
  9: #include <petscdevice.h>
 10: #include <petsc/private/deviceimpl.h>
 12: static PetscErrorCode MatMult_LMVMDQN(Mat, Vec, Vec);
 13: static PetscErrorCode MatMult_LMVMDBFGS(Mat, Vec, Vec);
 14: static PetscErrorCode MatMult_LMVMDDFP(Mat, Vec, Vec);
 15: static PetscErrorCode MatSolve_LMVMDQN(Mat, Vec, Vec);
 16: static PetscErrorCode MatSolve_LMVMDBFGS(Mat, Vec, Vec);
 17: static PetscErrorCode MatSolve_LMVMDDFP(Mat, Vec, Vec);
 19: static inline PetscInt recycle_index(PetscInt m, PetscInt idx)
 20: {
 21:   return idx % m;
 22: }
 24: static inline PetscInt history_index(PetscInt m, PetscInt num_updates, PetscInt idx)
 25: {
 26:   return (idx - num_updates) + PetscMin(m, num_updates);
 27: }
 29: static inline PetscInt oldest_update(PetscInt m, PetscInt idx)
 30: {
 31:   return PetscMax(0, idx - m);
 32: }
 34: static PetscErrorCode MatView_LMVMDQN(Mat B, PetscViewer pv)
 35: {
 36:   Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
 37:   Mat_DQN  *ldfp = (Mat_DQN *)lmvm->ctx;
 39:   PetscBool isascii;
 41:   PetscFunctionBegin;
 42:   PetscCall(PetscObjectTypeCompare((PetscObject)pv, PETSCVIEWERASCII, &isascii));
 43:   PetscCall(MatView_LMVM(B, pv));
 44:   if (!(lmvm->J0 || lmvm->user_pc || lmvm->user_ksp || lmvm->user_scale)) { PetscCall(MatView(ldfp->diag_qn, pv)); }
 45:   if (isascii) { PetscCall(PetscViewerASCIIPrintf(pv, "Counts: S x : %" PetscInt_FMT ", S^T x : %" PetscInt_FMT ", Y x : %" PetscInt_FMT ",  Y^T x: %" PetscInt_FMT "\n", ldfp->S_count, ldfp->St_count, ldfp->Y_count, ldfp->Yt_count)); }
 46:   PetscFunctionReturn(PETSC_SUCCESS);
 47: }
 49: static PetscErrorCode MatReset_LMVMDQN(Mat, PetscBool);
 50: static PetscErrorCode MatAllocate_LMVMDQN(Mat B, Vec X, Vec F)
 51: {
 52:   Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
 53:   Mat_DQN  *lqn  = (Mat_DQN *)lmvm->ctx;
 54:   PetscBool is_dbfgs, is_ddfp, is_dqn, same, allocate = PETSC_FALSE;
 55:   VecType   vec_type;
 56:   PetscInt  m, n, M, N;
 57:   MPI_Comm  comm = PetscObjectComm((PetscObject)B);
 59:   PetscFunctionBegin;
 60:   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATLMVMDBFGS, &is_dbfgs));
 61:   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATLMVMDDFP, &is_ddfp));
 62:   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATLMVMDQN, &is_dqn));
 64:   if (lmvm->allocated) {
 65:     PetscCall(VecGetType(X, &vec_type));
 66:     PetscCall(PetscObjectTypeCompare((PetscObject)lmvm->Xprev, vec_type, &same));
 67:     if (!same) {
 68:       /* Given X vector has a different type than allocated X-type data structures.
 69:          We need to destroy all of this and duplicate again out of the given vector. */
 70:       allocate = PETSC_TRUE;
 71:       PetscCall(MatReset_LMVMDQN(B, PETSC_TRUE));
 72:     } else {
 73:       VecCheckMatCompatible(B, X, 2, F, 3);
 74:     }
 75:   } else {
 76:     allocate = PETSC_TRUE;
 77:   }
 78:   if (allocate) {
 79:     PetscCall(VecGetLocalSize(X, &n));
 80:     PetscCall(VecGetSize(X, &N));
 81:     PetscCall(VecGetLocalSize(F, &m));
 82:     PetscCall(VecGetSize(F, &M));
 83:     PetscCheck(N == M, comm, PETSC_ERR_ARG_SIZ, "Incorrect problem sizes! dim(X) not equal to dim(F)");
 84:     PetscCall(MatSetSizes(B, m, n, M, N));
 85:     PetscCall(PetscLayoutSetUp(B->rmap));
 86:     PetscCall(PetscLayoutSetUp(B->cmap));
 87:     PetscCall(VecDuplicate(X, &lmvm->Xprev));
 88:     PetscCall(VecDuplicate(F, &lmvm->Fprev));
 89:     if (lmvm->m > 0) {
 90:       PetscMPIInt rank;
 91:       PetscInt    m, M;
 93:       PetscCallMPI(MPI_Comm_rank(comm, &rank));
 94:       M = lmvm->m;
 95:       m = (rank == 0) ? M : 0;
 97:       /* For DBFGS: Create data needed for MatSolve() eagerly; data needed for MatMult() will be created on demand
 98:        * For DDFP : Create data needed for MatMult() eagerly; data needed for MatSolve() will be created on demand
 99:        * For DQN  : Create all data eagerly */
100:       PetscCall(VecGetType(X, &vec_type));
101:       PetscCall(MatCreateDenseFromVecType(comm, vec_type, n, m, N, M, -1, NULL, &lqn->Sfull));
102:       PetscCall(MatDuplicate(lqn->Sfull, MAT_SHARE_NONZERO_PATTERN, &lqn->Yfull));
103:       if (is_dqn) {
104:         PetscCall(MatCreateDenseFromVecType(comm, vec_type, m, m, M, M, -1, NULL, &lqn->StY_triu));
105:         PetscCall(MatCreateDenseFromVecType(comm, vec_type, m, m, M, M, -1, NULL, &lqn->YtS_triu));
106:         PetscCall(MatCreateVecs(lqn->StY_triu, &lqn->diag_vec, &lqn->rwork1));
107:         PetscCall(MatCreateVecs(lqn->StY_triu, &lqn->rwork2, &lqn->rwork3));
108:       } else if (is_ddfp) {
109:         PetscCall(MatCreateDenseFromVecType(comm, vec_type, m, m, M, M, -1, NULL, &lqn->YtS_triu));
110:         PetscCall(MatDuplicate(lqn->Sfull, MAT_SHARE_NONZERO_PATTERN, &lqn->HY));
111:         PetscCall(MatCreateVecs(lqn->YtS_triu, &lqn->diag_vec, &lqn->rwork1));
112:         PetscCall(MatCreateVecs(lqn->YtS_triu, &lqn->rwork2, &lqn->rwork3));
113:       } else if (is_dbfgs) {
114:         PetscCall(MatCreateDenseFromVecType(comm, vec_type, m, m, M, M, -1, NULL, &lqn->StY_triu));
115:         PetscCall(MatDuplicate(lqn->Sfull, MAT_SHARE_NONZERO_PATTERN, &lqn->BS));
116:         PetscCall(MatCreateVecs(lqn->StY_triu, &lqn->diag_vec, &lqn->rwork1));
117:         PetscCall(MatCreateVecs(lqn->StY_triu, &lqn->rwork2, &lqn->rwork3));
118:       } else {
119:         SETERRQ(PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_INCOMP, "MatAllocate_LMVMDQN is only available for dense derived types. (DBFGS, DDFP, DQN");
120:       }
121:       /* initialize StY_triu and YtS_triu to identity, if they exist, so it is invertible */
122:       if (lqn->StY_triu) {
123:         PetscCall(MatZeroEntries(lqn->StY_triu));
124:         PetscCall(MatShift(lqn->StY_triu, 1.0));
125:       }
126:       if (lqn->YtS_triu) {
127:         PetscCall(MatZeroEntries(lqn->YtS_triu));
128:         PetscCall(MatShift(lqn->YtS_triu, 1.0));
129:       }
130:       if (lqn->use_recursive && (is_dbfgs || is_ddfp)) {
131:         PetscCall(VecDuplicateVecs(X, lmvm->m, &lqn->PQ));
132:         PetscCall(VecDuplicate(lmvm->Xprev, &lqn->column_work2));
133:         PetscCall(PetscMalloc1(lmvm->m, &lqn->yts));
134:         if (is_dbfgs) {
135:           PetscCall(PetscMalloc1(lmvm->m, &lqn->stp));
136:         } else if (is_ddfp) {
137:           PetscCall(PetscMalloc1(lmvm->m, &lqn->ytq));
138:         }
139:       }
140:       PetscCall(VecDuplicate(lqn->rwork2, &lqn->cyclic_work_vec));
141:       PetscCall(VecZeroEntries(lqn->rwork1));
142:       PetscCall(VecZeroEntries(lqn->rwork2));
143:       PetscCall(VecZeroEntries(lqn->rwork3));
144:       PetscCall(VecZeroEntries(lqn->diag_vec));
145:     }
146:     PetscCall(VecDuplicate(lmvm->Xprev, &lqn->column_work));
147:     if (!(lmvm->J0 || lmvm->user_pc || lmvm->user_ksp || lmvm->user_scale)) { PetscCall(MatLMVMAllocate(lqn->diag_qn, X, F)); }
148:     lmvm->allocated = PETSC_TRUE;
149:     B->preallocated = PETSC_TRUE;
150:     B->assembled    = PETSC_TRUE;
151:   }
152:   PetscFunctionReturn(PETSC_SUCCESS);
153: }
155: static PetscErrorCode MatSetUp_LMVMDQN(Mat B)
156: {
157:   Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
158:   Mat_DQN  *lqn  = (Mat_DQN *)lmvm->ctx;
160:   PetscInt M, N;
161:   MPI_Comm comm = PetscObjectComm((PetscObject)B);
162:   Vec      Xtmp, Ftmp;
164:   PetscFunctionBegin;
165:   PetscCall(MatGetSize(B, &M, &N));
166:   PetscCheck(M != 0 && N != 0, comm, PETSC_ERR_ORDER, "MatSetSizes() must be called before MatSetUp()");
167:   if (!lmvm->allocated) {
168:     PetscCall(PetscLayoutSetUp(B->rmap));
169:     PetscCall(PetscLayoutSetUp(B->cmap));
170:     PetscCall(MatCreateVecs(B, &Xtmp, &Ftmp));
171:     if (lmvm->m > 0) PetscCall(PetscMalloc1(lmvm->m, &lqn->workscalar));
172:     PetscCall(MatAllocate_LMVMDQN(B, Xtmp, Ftmp));
173:     PetscCall(VecDestroy(&Xtmp));
174:     PetscCall(VecDestroy(&Ftmp));
175:   }
176:   PetscFunctionReturn(PETSC_SUCCESS);
177: }
179: static PetscErrorCode MatSetFromOptions_LMVMDQN_Private(Mat B, PetscOptionItems PetscOptionsObject)
180: {
181:   Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
182:   Mat_DQN  *lqn  = (Mat_DQN *)lmvm->ctx;
183:   PetscBool is_dbfgs, is_ddfp, is_dqn;
185:   PetscFunctionBegin;
186:   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATLMVMDBFGS, &is_dbfgs));
187:   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATLMVMDDFP, &is_ddfp));
188:   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATLMVMDQN, &is_dqn));
189:   if (is_dqn) {
190:     PetscCall(PetscOptionsEnum("-mat_lqn_type", "Implementation options for L-QN", "MatLMVMDenseType", MatLMVMDenseTypes, (PetscEnum)lqn->strategy, (PetscEnum *)&lqn->strategy, NULL));
191:     PetscCall(PetscOptionsEnum("-mat_lmvm_scale_type", "(developer) scaling type applied to J0", "MatLMVMSymBrdnScaleType", MatLMVMSymBroydenScaleTypes, (PetscEnum)lqn->scale_type, (PetscEnum *)&lqn->scale_type, NULL));
192:   } else if (is_dbfgs) {
193:     PetscCall(PetscOptionsBool("-mat_lbfgs_recursive", "Use recursive formulation for MatMult_LMVMDBFGS, instead of Cholesky", "", lqn->use_recursive, &lqn->use_recursive, NULL));
194:     PetscCall(PetscOptionsEnum("-mat_lbfgs_type", "Implementation options for L-BFGS", "MatLMVMDenseType", MatLMVMDenseTypes, (PetscEnum)lqn->strategy, (PetscEnum *)&lqn->strategy, NULL));
195:     PetscCall(PetscOptionsEnum("-mat_lmvm_scale_type", "(developer) scaling type applied to J0", "MatLMVMSymBrdnScaleType", MatLMVMSymBroydenScaleTypes, (PetscEnum)lqn->scale_type, (PetscEnum *)&lqn->scale_type, NULL));
196:   } else if (is_ddfp) {
197:     PetscCall(PetscOptionsBool("-mat_ldfp_recursive", "Use recursive formulation for MatSolve_LMVMDDFP, instead of Cholesky", "", lqn->use_recursive, &lqn->use_recursive, NULL));
198:     PetscCall(PetscOptionsEnum("-mat_ldfp_type", "Implementation options for L-DFP", "MatLMVMDenseType", MatLMVMDenseTypes, (PetscEnum)lqn->strategy, (PetscEnum *)&lqn->strategy, NULL));
199:     PetscCall(PetscOptionsEnum("-mat_lmvm_scale_type", "(developer) scaling type applied to J0", "MatLMVMSymBrdnScaleType", MatLMVMSymBroydenScaleTypes, (PetscEnum)lqn->scale_type, (PetscEnum *)&lqn->scale_type, NULL));
200:   } else {
201:     SETERRQ(PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_INCOMP, "MatSetFromOptions_LMVMDQN is only available for dense derived types. (DBFGS, DDFP, DQN");
202:   }
203:   PetscFunctionReturn(PETSC_SUCCESS);
204: }
206: static PetscErrorCode MatSetFromOptions_LMVMDQN(Mat B, PetscOptionItems PetscOptionsObject)
207: {
208:   Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
209:   Mat_DQN  *lqn  = (Mat_DQN *)lmvm->ctx;
211:   PetscFunctionBegin;
212:   PetscCall(MatSetFromOptions_LMVM(B, PetscOptionsObject));
213:   PetscOptionsBegin(PetscObjectComm((PetscObject)B), ((PetscObject)B)->prefix, "Dense QN method (MATLMVMDQN,MATLMVMDBFGS,MATLMVMDDFP)", NULL);
214:   PetscCall(MatSetFromOptions_LMVMDQN_Private(B, PetscOptionsObject));
215:   PetscOptionsEnd();
216:   lqn->allocated = PETSC_FALSE;
217:   if (lqn->scale_type == MAT_LMVM_SYMBROYDEN_SCALE_DIAGONAL) {
218:     const char *prefix;
220:     PetscCall(MatGetOptionsPrefix(B, &prefix));
221:     PetscCall(MatSetOptionsPrefix(lqn->diag_qn, prefix));
222:     PetscCall(MatAppendOptionsPrefix(lqn->diag_qn, "J0_"));
223:     PetscCall(MatSetFromOptions(lqn->diag_qn));
224:   }
225:   PetscFunctionReturn(PETSC_SUCCESS);
226: }
228: static PetscErrorCode MatLMVMDQNResetDestructive(Mat B)
229: {
230:   Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
231:   Mat_DQN  *lqn  = (Mat_DQN *)lmvm->ctx;
233:   PetscFunctionBegin;
234:   PetscCall(MatDestroy(&lqn->Sfull));
235:   PetscCall(MatDestroy(&lqn->Yfull));
236:   PetscCall(MatDestroy(&lqn->HY));
237:   PetscCall(MatDestroy(&lqn->BS));
238:   PetscCall(MatDestroy(&lqn->StY_triu));
239:   PetscCall(MatDestroy(&lqn->YtS_triu));
240:   PetscCall(VecDestroy(&lqn->StFprev));
241:   PetscCall(VecDestroy(&lqn->Fprev_ref));
242:   lqn->Fprev_state = 0;
243:   PetscCall(MatDestroy(&lqn->YtS_triu_strict));
244:   PetscCall(MatDestroy(&lqn->StY_triu_strict));
245:   PetscCall(MatDestroy(&lqn->StBS));
246:   PetscCall(MatDestroy(&lqn->YtHY));
247:   PetscCall(MatDestroy(&lqn->J));
248:   PetscCall(MatDestroy(&lqn->temp_mat));
249:   PetscCall(VecDestroy(&lqn->diag_vec));
250:   PetscCall(VecDestroy(&lqn->diag_vec_recycle_order));
251:   PetscCall(VecDestroy(&lqn->inv_diag_vec));
252:   PetscCall(VecDestroy(&lqn->column_work));
253:   PetscCall(VecDestroy(&lqn->column_work2));
254:   PetscCall(VecDestroy(&lqn->rwork1));
255:   PetscCall(VecDestroy(&lqn->rwork2));
256:   PetscCall(VecDestroy(&lqn->rwork3));
257:   PetscCall(VecDestroy(&lqn->rwork2_local));
258:   PetscCall(VecDestroy(&lqn->rwork3_local));
259:   PetscCall(VecDestroy(&lqn->cyclic_work_vec));
260:   PetscCall(VecDestroyVecs(lmvm->m, &lqn->PQ));
261:   PetscCall(PetscFree(lqn->stp));
262:   PetscCall(PetscFree(lqn->yts));
263:   PetscCall(PetscFree(lqn->ytq));
264:   lqn->allocated = PETSC_FALSE;
265:   PetscFunctionReturn(PETSC_SUCCESS);
266: }
268: static PetscErrorCode MatDestroy_LMVMDQN(Mat B)
269: {
270:   Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
271:   Mat_DQN  *lqn  = (Mat_DQN *)lmvm->ctx;
273:   PetscFunctionBegin;
274:   PetscCall(MatLMVMDQNResetDestructive(B));
275:   PetscCall(PetscFree(lqn->workscalar));
276:   PetscCall(MatDestroy(&lqn->diag_qn));
277:   PetscCall(PetscFree(lmvm->ctx));
278:   PetscCall(MatDestroy_LMVM(B));
279:   PetscFunctionReturn(PETSC_SUCCESS);
280: }
282: static PetscErrorCode MatReset_LMVMDQN(Mat B, PetscBool destructive)
283: {
284:   Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
285:   Mat_DQN  *lqn  = (Mat_DQN *)lmvm->ctx;
287:   PetscFunctionBegin;
288:   lqn->watchdog = 0;
289:   lqn->needPQ   = PETSC_TRUE;
290:   if (lqn->scale_type == MAT_LMVM_SYMBROYDEN_SCALE_DIAGONAL) {
291:     Mat_LMVM     *dbase   = (Mat_LMVM *)lqn->diag_qn->data;
292:     Mat_DiagBrdn *diagctx = (Mat_DiagBrdn *)dbase->ctx;
293:     if (!diagctx->allocated) PetscCall(MatLMVMAllocate(lqn->diag_qn, lmvm->Xprev, lmvm->Fprev));
294:     PetscCall(MatLMVMReset(lqn->diag_qn, destructive));
295:   }
296:   if (lqn->Sfull) PetscCall(MatZeroEntries(lqn->Sfull));
297:   if (lqn->Yfull) PetscCall(MatZeroEntries(lqn->Yfull));
298:   if (lqn->BS) PetscCall(MatZeroEntries(lqn->BS));
299:   if (lqn->HY) PetscCall(MatZeroEntries(lqn->HY));
300:   if (lqn->StY_triu) { /* Set to identity by default so it is invertible */
301:     PetscCall(MatZeroEntries(lqn->StY_triu));
302:     PetscCall(MatShift(lqn->StY_triu, 1.0));
303:   }
304:   if (lqn->YtS_triu) {
305:     PetscCall(MatZeroEntries(lqn->YtS_triu));
306:     PetscCall(MatShift(lqn->YtS_triu, 1.0));
307:   }
308:   if (lqn->YtS_triu_strict) PetscCall(MatZeroEntries(lqn->YtS_triu_strict));
309:   if (lqn->StY_triu_strict) PetscCall(MatZeroEntries(lqn->StY_triu_strict));
310:   if (lqn->StBS) {
311:     PetscCall(MatZeroEntries(lqn->StBS));
312:     PetscCall(MatShift(lqn->StBS, 1.0));
313:   }
314:   if (lqn->YtHY) {
315:     PetscCall(MatZeroEntries(lqn->YtHY));
316:     PetscCall(MatShift(lqn->YtHY, 1.0));
317:   }
318:   if (lqn->Fprev_ref) PetscCall(VecDestroy(&lqn->Fprev_ref));
319:   lqn->Fprev_state = 0;
320:   if (lqn->StFprev) PetscCall(VecZeroEntries(lqn->StFprev));
321:   if (destructive) { PetscCall(MatLMVMDQNResetDestructive(B)); }
322:   lqn->num_updates      = 0;
323:   lqn->num_mult_updates = 0;
324:   PetscCall(MatReset_LMVM(B, destructive));
325:   PetscFunctionReturn(PETSC_SUCCESS);
326: }
328: static PetscErrorCode MatUpdate_LMVMDQN(Mat B, Vec X, Vec F)
329: {
330:   Mat_LMVM     *lmvm    = (Mat_LMVM *)B->data;
331:   Mat_DQN      *lqn     = (Mat_DQN *)lmvm->ctx;
332:   Mat_LMVM     *dbase   = (Mat_LMVM *)lqn->diag_qn->data;
333:   Mat_DiagBrdn *diagctx = (Mat_DiagBrdn *)dbase->ctx;
335:   PetscBool          is_ddfp, is_dbfgs, is_dqn;
336:   PetscDeviceContext dctx;
338:   PetscFunctionBegin;
339:   if (!lmvm->m) PetscFunctionReturn(PETSC_SUCCESS);
340:   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATLMVMDBFGS, &is_dbfgs));
341:   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATLMVMDDFP, &is_ddfp));
342:   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATLMVMDQN, &is_dqn));
343:   PetscCall(PetscDeviceContextGetCurrentContext(&dctx));
344:   if (lmvm->prev_set) {
345:     Vec         FX[2];
346:     PetscScalar dotFX[2];
347:     PetscScalar stFprev;
348:     PetscScalar curvature, yTy;
349:     PetscReal   curvtol;
350:     Vec         workvec1;
352:     /* Compute the new (S = X - Xprev) and (Y = F - Fprev) vectors */
353:     PetscCall(VecAYPX(lmvm->Xprev, -1.0, X));
354:     /* Test if the updates can be accepted */
355:     FX[0] = lmvm->Fprev; /* dotFX[0] = s^T Fprev */
356:     FX[1] = F;           /* dotFX[1] = s^T F     */
357:     PetscCall(VecMDot(lmvm->Xprev, 2, FX, dotFX));
358:     PetscCall(VecAYPX(lmvm->Fprev, -1.0, F));
359:     PetscCall(VecDot(lmvm->Fprev, lmvm->Fprev, &yTy));
360:     stFprev   = PetscConj(dotFX[0]);
361:     curvature = PetscConj(dotFX[1] - dotFX[0]); /* s^T y */
362:     if (PetscRealPart(yTy) < lmvm->eps) {
363:       curvtol = 0.0;
364:     } else {
365:       curvtol = lmvm->eps * PetscRealPart(yTy);
366:     }
367:     if (PetscRealPart(curvature) > curvtol) {
368:       PetscInt m     = lmvm->m;
369:       PetscInt k     = lqn->num_updates;
370:       PetscInt h_new = k + 1 - oldest_update(m, k + 1);
371:       PetscInt idx   = recycle_index(m, k);
372:       PetscInt i, old_k;
374:       /* Update is good, accept it */
375:       lmvm->nupdates++;
376:       lqn->num_updates++;
377:       lqn->watchdog = 0;
378:       lqn->needPQ   = PETSC_TRUE;
379:       old_k         = lmvm->k;
381:       if (lmvm->k != m - 1) {
382:         lmvm->k++;
383:       } else if (lqn->strategy == MAT_LMVM_DENSE_REORDER) {
384:         if (is_dqn) {
385:           PetscCall(MatMove_LR3(B, lqn->StY_triu, m - 1));
386:           PetscCall(MatMove_LR3(B, lqn->YtS_triu, m - 1));
387:         } else if (is_dbfgs) {
388:           PetscCall(MatMove_LR3(B, lqn->StY_triu, m - 1));
389:         } else if (is_ddfp) {
390:           PetscCall(MatMove_LR3(B, lqn->YtS_triu, m - 1));
391:         } else {
392:           SETERRQ(PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_INCOMP, "MatUpdate_LMVMDQN is only available for dense derived types. (DBFGS, DDFP, DQN");
393:         }
394:       }
396:       if (lqn->use_recursive && (is_dbfgs || is_ddfp)) {
397:         if (old_k == lmvm->k) {
398:           for (i = 0; i <= lmvm->k - 1; ++i) {
399:             lqn->yts[i] = lqn->yts[i + 1];
400:             if (is_dbfgs) {
401:               lqn->stp[i] = lqn->stp[i + 1];
402:             } else if (is_ddfp) {
403:               lqn->ytq[i] = lqn->ytq[i + 1];
404:             }
405:           }
406:         }
407:         lqn->yts[lmvm->k] = PetscRealPart(curvature);
408:       }
410:       /* First update the S^T matrix */
411:       PetscCall(MatDenseGetColumnVecWrite(lqn->Sfull, idx, &workvec1));
412:       PetscCall(VecCopy(lmvm->Xprev, workvec1));
413:       PetscCall(MatDenseRestoreColumnVecWrite(lqn->Sfull, idx, &workvec1));
415:       /* Now repeat update for the Y^T matrix */
416:       PetscCall(MatDenseGetColumnVecWrite(lqn->Yfull, idx, &workvec1));
417:       PetscCall(VecCopy(lmvm->Fprev, workvec1));
418:       PetscCall(MatDenseRestoreColumnVecWrite(lqn->Yfull, idx, &workvec1));
420:       if (is_dqn || is_dbfgs) { /* implement the scheme of Byrd, Nocedal, and Schnabel to save a MatMultTranspose call in the common case the       *
421:          * H_k is immediately applied to F after begin updated.   The S^T y computation can be split up as S^T (F - F_prev) */
422:         PetscInt     local_n;
423:         PetscScalar *StFprev;
424:         PetscMemType memtype;
425:         PetscInt     StYidx;
427:         StYidx = (lqn->strategy == MAT_LMVM_DENSE_REORDER) ? history_index(m, lqn->num_updates, k) : idx;
428:         if (!lqn->StFprev) PetscCall(VecDuplicate(lqn->rwork1, &lqn->StFprev));
429:         PetscCall(VecGetLocalSize(lqn->StFprev, &local_n));
430:         PetscCall(VecGetArrayAndMemType(lqn->StFprev, &StFprev, &memtype));
431:         if (local_n) {
432:           if (PetscMemTypeHost(memtype)) {
433:             StFprev[idx] = stFprev;
434:           } else {
435:             PetscCall(PetscDeviceRegisterMemory(&stFprev, PETSC_MEMTYPE_HOST, 1 * sizeof(stFprev)));
436:             PetscCall(PetscDeviceRegisterMemory(StFprev, memtype, local_n * sizeof(*StFprev)));
437:             PetscCall(PetscDeviceArrayCopy(dctx, &StFprev[idx], &stFprev, 1));
438:           }
439:         }
440:         PetscCall(VecRestoreArrayAndMemType(lqn->StFprev, &StFprev));
442:         {
443:           Vec this_sy_col;
444:           /* Now StFprev is updated for the new S vector.  Write -StFprev into the appropriate row */
445:           PetscCall(MatDenseGetColumnVecWrite(lqn->StY_triu, StYidx, &this_sy_col));
446:           PetscCall(VecAXPBY(this_sy_col, -1.0, 0.0, lqn->StFprev));
448:           /* Now compute the new StFprev */
449:           PetscCall(MatMultHermitianTransposeColumnRange(lqn->Sfull, F, lqn->StFprev, 0, h_new));
450:           lqn->St_count++;
452:           /* Now add StFprev: this_sy_col == S^T (F - Fprev) == S^T y */
453:           PetscCall(VecAXPY(this_sy_col, 1.0, lqn->StFprev));
455:           if (lqn->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecRecycleOrderToHistoryOrder(B, this_sy_col, lqn->num_updates, lqn->cyclic_work_vec));
456:           PetscCall(MatDenseRestoreColumnVecWrite(lqn->StY_triu, StYidx, &this_sy_col));
457:         }
458:       }
460:       if (is_ddfp || is_dqn) {
461:         PetscInt YtSidx;
463:         YtSidx = (lqn->strategy == MAT_LMVM_DENSE_REORDER) ? history_index(m, lqn->num_updates, k) : idx;
465:         {
466:           Vec this_ys_col;
468:           PetscCall(MatDenseGetColumnVecWrite(lqn->YtS_triu, YtSidx, &this_ys_col));
469:           PetscCall(MatMultHermitianTransposeColumnRange(lqn->Yfull, lmvm->Xprev, this_ys_col, 0, h_new));
470:           lqn->Yt_count++;
472:           if (lqn->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecRecycleOrderToHistoryOrder(B, this_ys_col, lqn->num_updates, lqn->cyclic_work_vec));
473:           PetscCall(MatDenseRestoreColumnVecWrite(lqn->YtS_triu, YtSidx, &this_ys_col));
474:         }
475:       }
477:       if (is_dbfgs || is_dqn) {
478:         PetscCall(MatGetDiagonal(lqn->StY_triu, lqn->diag_vec));
479:       } else if (is_ddfp) {
480:         PetscCall(MatGetDiagonal(lqn->YtS_triu, lqn->diag_vec));
481:       } else {
482:         SETERRQ(PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_INCOMP, "MatUpdate_LMVMDQN is only available for dense derived types. (DBFGS, DDFP, DQN");
483:       }
485:       if (lqn->strategy == MAT_LMVM_DENSE_REORDER) {
486:         if (!lqn->diag_vec_recycle_order) PetscCall(VecDuplicate(lqn->diag_vec, &lqn->diag_vec_recycle_order));
487:         PetscCall(VecCopy(lqn->diag_vec, lqn->diag_vec_recycle_order));
488:         PetscCall(VecHistoryOrderToRecycleOrder(B, lqn->diag_vec_recycle_order, lqn->num_updates, lqn->cyclic_work_vec));
489:       } else {
490:         if (!lqn->diag_vec_recycle_order) {
491:           PetscCall(PetscObjectReference((PetscObject)lqn->diag_vec));
492:           lqn->diag_vec_recycle_order = lqn->diag_vec;
493:         }
494:       }
496:       if (lqn->scale_type == MAT_LMVM_SYMBROYDEN_SCALE_SCALAR) {
497:         PetscScalar sTy = curvature;
499:         diagctx->sigma = PetscRealPart(sTy) / PetscRealPart(yTy);
500:       } else if (lqn->scale_type == MAT_LMVM_SYMBROYDEN_SCALE_DIAGONAL) {
501:         PetscScalar sTy = curvature;
502:         PetscScalar sTDs, yTDy;
504:         if (!diagctx->invD) {
505:           PetscCall(VecDuplicate(lmvm->Fprev, &diagctx->invD));
506:           PetscCall(VecSet(diagctx->invD, PetscRealPart(sTy) / PetscRealPart(yTy)));
507:         }
508:         if (!diagctx->U) PetscCall(VecDuplicate(lmvm->Fprev, &diagctx->U));
509:         if (!diagctx->V) PetscCall(VecDuplicate(lmvm->Fprev, &diagctx->V));
510:         if (!diagctx->W) PetscCall(VecDuplicate(lmvm->Fprev, &diagctx->W));
512:         /* diagonal Broyden */
513:         PetscCall(VecReciprocal(diagctx->invD));
514:         PetscCall(VecPointwiseMult(diagctx->V, diagctx->invD, lmvm->Xprev));
515:         PetscCall(VecPointwiseMult(diagctx->U, lmvm->Fprev, lmvm->Fprev));
516:         if (PetscDefined(USE_COMPLEX)) PetscCall(VecAbs(diagctx->U));
517:         PetscCall(VecAXPY(diagctx->invD, 1.0 / sTy, diagctx->U));
518:         PetscCall(VecDot(diagctx->V, lmvm->Xprev, &sTDs));
519:         if (PetscDefined(USE_COMPLEX)) PetscCall(VecAbs(diagctx->V));
520:         PetscCall(VecPointwiseMult(diagctx->V, diagctx->V, diagctx->V));
521:         PetscCall(VecAXPY(diagctx->invD, -1.0 / PetscMax(PetscRealPart(sTDs), diagctx->tol), diagctx->V));
522:         PetscCall(VecReciprocal(diagctx->invD));
523:         PetscCall(VecAbs(diagctx->invD));
524:         PetscCall(VecDot(diagctx->U, diagctx->invD, &yTDy));
525:         PetscCall(VecScale(diagctx->invD, PetscRealPart(sTy) / PetscRealPart(yTDy)));
526:       }
527:     } else {
528:       /* Update is bad, skip it */
529:       ++lmvm->nrejects;
530:       ++lqn->watchdog;
531:       PetscInt m = lmvm->m;
532:       PetscInt k = lqn->num_updates;
533:       PetscInt h = k - oldest_update(m, k);
535:       /* we still have to maintain StFprev */
536:       if (!lqn->StFprev) PetscCall(VecDuplicate(lqn->rwork1, &lqn->StFprev));
537:       PetscCall(MatMultHermitianTransposeColumnRange(lqn->Sfull, F, lqn->StFprev, 0, h));
538:       lqn->St_count++;
539:     }
540:   } else {
541:     switch (lqn->scale_type) {
542:     case MAT_LMVM_SYMBROYDEN_SCALE_DIAGONAL:
543:       PetscCall(VecSet(diagctx->invD, diagctx->delta));
544:       break;
545:     case MAT_LMVM_SYMBROYDEN_SCALE_SCALAR:
546:       diagctx->sigma = diagctx->delta;
547:       break;
548:     default:
549:       diagctx->sigma = 1.0;
550:       break;
551:     }
552:   }
554:   if (lqn->watchdog > lqn->max_seq_rejects) PetscCall(MatLMVMReset(B, PETSC_FALSE));
556:   /* Save the solution and function to be used in the next update */
557:   PetscCall(VecCopy(X, lmvm->Xprev));
558:   PetscCall(VecCopy(F, lmvm->Fprev));
559:   PetscCall(PetscObjectReference((PetscObject)F));
560:   PetscCall(VecDestroy(&lqn->Fprev_ref));
561:   lqn->Fprev_ref = F;
562:   PetscCall(PetscObjectStateGet((PetscObject)F, &lqn->Fprev_state));
563:   lmvm->prev_set = PETSC_TRUE;
564:   PetscFunctionReturn(PETSC_SUCCESS);
565: }
567: static PetscErrorCode MatDestroyThenCopy(Mat src, Mat *dst)
568: {
569:   PetscFunctionBegin;
570:   PetscCall(MatDestroy(dst));
571:   if (src) { PetscCall(MatDuplicate(src, MAT_COPY_VALUES, dst)); }
572:   PetscFunctionReturn(PETSC_SUCCESS);
573: }
575: static PetscErrorCode VecDestroyThenCopy(Vec src, Vec *dst)
576: {
577:   PetscFunctionBegin;
578:   PetscCall(VecDestroy(dst));
579:   if (src) {
580:     PetscCall(VecDuplicate(src, dst));
581:     PetscCall(VecCopy(src, *dst));
582:   }
583:   PetscFunctionReturn(PETSC_SUCCESS);
584: }
586: static PetscErrorCode MatCopy_LMVMDQN(Mat B, Mat M, MatStructure str)
587: {
588:   Mat_LMVM *bdata = (Mat_LMVM *)B->data;
589:   Mat_DQN  *blqn  = (Mat_DQN *)bdata->ctx;
590:   Mat_LMVM *mdata = (Mat_LMVM *)M->data;
591:   Mat_DQN  *mlqn  = (Mat_DQN *)mdata->ctx;
592:   PetscInt  i;
593:   PetscBool is_dbfgs, is_ddfp, is_dqn;
595:   PetscFunctionBegin;
596:   mlqn->num_updates      = blqn->num_updates;
597:   mlqn->num_mult_updates = blqn->num_mult_updates;
598:   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATLMVMDBFGS, &is_dbfgs));
599:   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATLMVMDDFP, &is_ddfp));
600:   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATLMVMDQN, &is_dqn));
601:   PetscCall(MatDestroyThenCopy(blqn->Sfull, &mlqn->Sfull));
602:   PetscCall(MatDestroyThenCopy(blqn->Yfull, &mlqn->Yfull));
603:   PetscCall(MatDestroyThenCopy(blqn->HY, &mlqn->BS));
604:   PetscCall(VecDestroyThenCopy(blqn->StFprev, &mlqn->StFprev));
605:   PetscCall(MatDestroyThenCopy(blqn->StY_triu, &mlqn->StY_triu));
606:   PetscCall(MatDestroyThenCopy(blqn->StY_triu_strict, &mlqn->StY_triu_strict));
607:   PetscCall(MatDestroyThenCopy(blqn->YtS_triu, &mlqn->YtS_triu));
608:   PetscCall(MatDestroyThenCopy(blqn->YtS_triu_strict, &mlqn->YtS_triu_strict));
609:   PetscCall(MatDestroyThenCopy(blqn->YtHY, &mlqn->YtHY));
610:   PetscCall(MatDestroyThenCopy(blqn->StBS, &mlqn->StBS));
611:   PetscCall(MatDestroyThenCopy(blqn->J, &mlqn->J));
612:   PetscCall(VecDestroyThenCopy(blqn->diag_vec, &mlqn->diag_vec));
613:   PetscCall(VecDestroyThenCopy(blqn->diag_vec_recycle_order, &mlqn->diag_vec_recycle_order));
614:   PetscCall(VecDestroyThenCopy(blqn->inv_diag_vec, &mlqn->inv_diag_vec));
615:   if (blqn->use_recursive && (is_dbfgs || is_ddfp)) {
616:     for (i = 0; i <= bdata->k; i++) {
617:       PetscCall(VecDestroyThenCopy(blqn->PQ[i], &mlqn->PQ[i]));
618:       mlqn->yts[i] = blqn->yts[i];
619:       if (is_dbfgs) {
620:         mlqn->stp[i] = blqn->stp[i];
621:       } else if (is_ddfp) {
622:         mlqn->ytq[i] = blqn->ytq[i];
623:       }
624:     }
625:   }
626:   mlqn->dense_type      = blqn->dense_type;
627:   mlqn->strategy        = blqn->strategy;
628:   mlqn->scale_type      = blqn->scale_type;
629:   mlqn->S_count         = 0;
630:   mlqn->St_count        = 0;
631:   mlqn->Y_count         = 0;
632:   mlqn->Yt_count        = 0;
633:   mlqn->watchdog        = blqn->watchdog;
634:   mlqn->max_seq_rejects = blqn->max_seq_rejects;
635:   mlqn->allocated       = blqn->allocated;
636:   mlqn->use_recursive   = blqn->use_recursive;
637:   mlqn->needPQ          = blqn->needPQ;
638:   PetscCall(PetscObjectReference((PetscObject)blqn->Fprev_ref));
639:   PetscCall(VecDestroy(&mlqn->Fprev_ref));
640:   mlqn->Fprev_ref   = blqn->Fprev_ref;
641:   mlqn->Fprev_state = blqn->Fprev_state;
642:   if (!(bdata->J0 || bdata->user_pc || bdata->user_ksp || bdata->user_scale)) { PetscCall(MatCopy(blqn->diag_qn, mlqn->diag_qn, SAME_NONZERO_PATTERN)); }
643:   PetscFunctionReturn(PETSC_SUCCESS);
644: }
646: static PetscErrorCode MatMult_LMVMDQN(Mat B, Vec X, Vec Z)
647: {
648:   PetscFunctionBegin;
649:   PetscCall(MatMult_LMVMDDFP(B, X, Z));
650:   PetscFunctionReturn(PETSC_SUCCESS);
651: }
653: static PetscErrorCode MatSolve_LMVMDQN(Mat H, Vec F, Vec dX)
654: {
655:   PetscFunctionBegin;
656:   PetscCall(MatSolve_LMVMDBFGS(H, F, dX));
657:   PetscFunctionReturn(PETSC_SUCCESS);
658: }
660: /*
661:   This dense representation uses Davidon-Fletcher-Powell (DFP) for MatMult,
662:   and Broyden-Fletcher-Goldfarb-Shanno (BFGS) for MatSolve. This implementation
663:   results in avoiding costly Cholesky factorization, at the cost of duality cap.
664:   Please refer to MatLMVMDDFP and MatLMVMDBFGS for more information.
665: */
666: PetscErrorCode MatCreate_LMVMDQN(Mat B)
667: {
668:   Mat_LMVM *lmvm;
669:   Mat_DQN  *lqn;
671:   PetscFunctionBegin;
672:   PetscCall(MatCreate_LMVM(B));
673:   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATLMVMDQN));
674:   PetscCall(MatSetOption(B, MAT_SPD, PETSC_TRUE));
675:   PetscCall(MatSetOption(B, MAT_SPD_ETERNAL, PETSC_TRUE));
676:   B->ops->view           = MatView_LMVMDQN;
677:   B->ops->setup          = MatSetUp_LMVMDQN;
678:   B->ops->setfromoptions = MatSetFromOptions_LMVMDQN;
679:   B->ops->destroy        = MatDestroy_LMVMDQN;
681:   lmvm                = (Mat_LMVM *)B->data;
682:   lmvm->square        = PETSC_TRUE;
683:   lmvm->ops->allocate = MatAllocate_LMVMDQN;
684:   lmvm->ops->reset    = MatReset_LMVMDQN;
685:   lmvm->ops->update   = MatUpdate_LMVMDQN;
686:   lmvm->ops->mult     = MatMult_LMVMDQN;
687:   lmvm->ops->solve    = MatSolve_LMVMDQN;
688:   lmvm->ops->copy     = MatCopy_LMVMDQN;
690:   PetscCall(PetscNew(&lqn));
691:   lmvm->ctx            = (void *)lqn;
692:   lqn->allocated       = PETSC_FALSE;
693:   lqn->use_recursive   = PETSC_FALSE;
694:   lqn->needPQ          = PETSC_FALSE;
695:   lqn->watchdog        = 0;
696:   lqn->max_seq_rejects = lmvm->m / 2;
697:   lqn->strategy        = MAT_LMVM_DENSE_INPLACE;
698:   lqn->scale_type      = MAT_LMVM_SYMBROYDEN_SCALE_DIAGONAL;
700:   PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &lqn->diag_qn));
701:   PetscCall(MatSetType(lqn->diag_qn, MATLMVMDIAGBROYDEN));
702:   PetscCall(MatSetOptionsPrefix(lqn->diag_qn, "J0_"));
703:   PetscFunctionReturn(PETSC_SUCCESS);
704: }
706: /*@
707:   MatCreateLMVMDQN - Creates a dense representation of the limited-memory
708:   Quasi-Newton approximation to a Hessian.
710:   Collective
712:   Input Parameters:
713: + comm - MPI communicator
714: . n    - number of local rows for storage vectors
715: - N    - global size of the storage vectors
717:   Output Parameter:
718: . B - the matrix
720:   Level: advanced
722:   Note:
723:   It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`
724:   paradigm instead of this routine directly.
726: .seealso: `MatCreate()`, `MATLMVM`, `MATLMVMDBFGS`, `MATLMVMDDFP`, `MatCreateLMVMDDFP()`, `MatCreateLMVMDBFGS()`
727: @*/
728: PetscErrorCode MatCreateLMVMDQN(MPI_Comm comm, PetscInt n, PetscInt N, Mat *B)
729: {
730:   PetscFunctionBegin;
731:   PetscCall(KSPInitializePackage());
732:   PetscCall(MatCreate(comm, B));
733:   PetscCall(MatSetSizes(*B, n, n, N, N));
734:   PetscCall(MatSetType(*B, MATLMVMDQN));
735:   PetscCall(MatSetUp(*B));
736:   PetscFunctionReturn(PETSC_SUCCESS);
737: }
739: static PetscErrorCode MatDQNApplyJ0Fwd(Mat B, Vec X, Vec Z)
740: {
741:   Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
742:   Mat_DQN  *lqn  = (Mat_DQN *)lmvm->ctx;
744:   PetscFunctionBegin;
745:   if (lmvm->J0 || lmvm->user_pc || lmvm->user_ksp || lmvm->user_scale) {
746:     lqn->scale_type = MAT_LMVM_SYMBROYDEN_SCALE_USER;
747:     PetscCall(MatLMVMApplyJ0Fwd(B, X, Z));
748:   } else {
749:     Mat_LMVM     *dbase   = (Mat_LMVM *)lqn->diag_qn->data;
750:     Mat_DiagBrdn *diagctx = (Mat_DiagBrdn *)dbase->ctx;
752:     switch (lqn->scale_type) {
753:     case MAT_LMVM_SYMBROYDEN_SCALE_SCALAR:
754:       PetscCall(VecAXPBY(Z, 1.0 / diagctx->sigma, 0.0, X));
755:       break;
756:     case MAT_LMVM_SYMBROYDEN_SCALE_DIAGONAL:
757:       PetscCall(VecPointwiseDivide(Z, X, diagctx->invD));
758:       break;
759:     case MAT_LMVM_SYMBROYDEN_SCALE_NONE:
760:     default:
761:       PetscCall(VecCopy(X, Z));
762:       break;
763:     }
764:   }
765:   PetscFunctionReturn(PETSC_SUCCESS);
766: }
768: static PetscErrorCode MatDQNApplyJ0Inv(Mat B, Vec F, Vec dX)
769: {
770:   Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
771:   Mat_DQN  *lqn  = (Mat_DQN *)lmvm->ctx;
773:   PetscFunctionBegin;
774:   if (lmvm->J0 || lmvm->user_pc || lmvm->user_ksp || lmvm->user_scale) {
775:     lqn->scale_type = MAT_LMVM_SYMBROYDEN_SCALE_USER;
776:     PetscCall(MatLMVMApplyJ0Inv(B, F, dX));
777:   } else {
778:     Mat_LMVM     *dbase   = (Mat_LMVM *)lqn->diag_qn->data;
779:     Mat_DiagBrdn *diagctx = (Mat_DiagBrdn *)dbase->ctx;
781:     switch (lqn->scale_type) {
782:     case MAT_LMVM_SYMBROYDEN_SCALE_SCALAR:
783:       PetscCall(VecAXPBY(dX, diagctx->sigma, 0.0, F));
784:       break;
785:     case MAT_LMVM_SYMBROYDEN_SCALE_DIAGONAL:
786:       PetscCall(VecPointwiseMult(dX, F, diagctx->invD));
787:       break;
788:     case MAT_LMVM_SYMBROYDEN_SCALE_NONE:
789:     default:
790:       PetscCall(VecCopy(F, dX));
791:       break;
792:     }
793:   }
794:   PetscFunctionReturn(PETSC_SUCCESS);
795: }
797: /* This is not Bunch-Kaufman LDLT: here L is strictly lower triangular part of STY */
798: static PetscErrorCode MatGetLDLT(Mat B, Mat result)
799: {
800:   Mat_LMVM *lmvm  = (Mat_LMVM *)B->data;
801:   Mat_DQN  *lbfgs = (Mat_DQN *)lmvm->ctx;
802:   PetscInt  m_local;
804:   PetscFunctionBegin;
805:   if (!lbfgs->temp_mat) PetscCall(MatDuplicate(lbfgs->YtS_triu_strict, MAT_SHARE_NONZERO_PATTERN, &lbfgs->temp_mat));
806:   PetscCall(MatCopy(lbfgs->YtS_triu_strict, lbfgs->temp_mat, SAME_NONZERO_PATTERN));
807:   PetscCall(MatDiagonalScale(lbfgs->temp_mat, lbfgs->inv_diag_vec, NULL));
808:   PetscCall(MatGetLocalSize(result, &m_local, NULL));
809:   // need to conjugate and conjugate again because we have MatTransposeMatMult but not MatHermitianTransposeMatMult()
810:   PetscCall(MatConjugate(lbfgs->temp_mat));
811:   if (m_local) {
812:     Mat temp_local, YtS_local, result_local;
813:     PetscCall(MatDenseGetLocalMatrix(lbfgs->YtS_triu_strict, &YtS_local));
814:     PetscCall(MatDenseGetLocalMatrix(lbfgs->temp_mat, &temp_local));
815:     PetscCall(MatDenseGetLocalMatrix(result, &result_local));
816:     PetscCall(MatTransposeMatMult(YtS_local, temp_local, MAT_REUSE_MATRIX, PETSC_DETERMINE, &result_local));
817:   }
818:   PetscCall(MatConjugate(result));
819:   PetscFunctionReturn(PETSC_SUCCESS);
820: }
822: static PetscErrorCode MatLMVMDBFGSUpdateMultData(Mat B)
823: {
824:   Mat_LMVM *lmvm  = (Mat_LMVM *)B->data;
825:   Mat_DQN  *lbfgs = (Mat_DQN *)lmvm->ctx;
826:   PetscInt  m     = lmvm->m, m_local;
827:   PetscInt  k     = lbfgs->num_updates;
828:   PetscInt  h     = k - oldest_update(m, k);
829:   PetscInt  j_0;
830:   PetscInt  prev_oldest;
831:   Mat       J_local;
833:   PetscFunctionBegin;
834:   if (!lbfgs->YtS_triu_strict) {
835:     PetscCall(MatDuplicate(lbfgs->StY_triu, MAT_SHARE_NONZERO_PATTERN, &lbfgs->YtS_triu_strict));
836:     PetscCall(MatDestroy(&lbfgs->StBS));
837:     PetscCall(MatDuplicate(lbfgs->StY_triu, MAT_SHARE_NONZERO_PATTERN, &lbfgs->StBS));
838:     PetscCall(MatDestroy(&lbfgs->J));
839:     PetscCall(MatDuplicate(lbfgs->StY_triu, MAT_SHARE_NONZERO_PATTERN, &lbfgs->J));
840:     PetscCall(MatDestroy(&lbfgs->BS));
841:     PetscCall(MatDuplicate(lbfgs->Yfull, MAT_SHARE_NONZERO_PATTERN, &lbfgs->BS));
842:     PetscCall(MatShift(lbfgs->StBS, 1.0));
843:     lbfgs->num_mult_updates = oldest_update(m, k);
844:   }
845:   if (lbfgs->num_mult_updates == k) PetscFunctionReturn(PETSC_SUCCESS);
847:   /* B_0 may have been updated, we must recompute B_0 S and S^T B_0 S */
848:   for (PetscInt j = oldest_update(m, k); j < k; j++) {
849:     Vec      s_j;
850:     Vec      Bs_j;
851:     Vec      StBs_j;
852:     PetscInt S_idx    = recycle_index(m, j);
853:     PetscInt StBS_idx = lbfgs->strategy == MAT_LMVM_DENSE_INPLACE ? S_idx : history_index(m, k, j);
855:     PetscCall(MatDenseGetColumnVecWrite(lbfgs->BS, S_idx, &Bs_j));
856:     PetscCall(MatDenseGetColumnVecRead(lbfgs->Sfull, S_idx, &s_j));
857:     PetscCall(MatDQNApplyJ0Fwd(B, s_j, Bs_j));
858:     PetscCall(MatDenseRestoreColumnVecRead(lbfgs->Sfull, S_idx, &s_j));
859:     PetscCall(MatDenseGetColumnVecWrite(lbfgs->StBS, StBS_idx, &StBs_j));
860:     PetscCall(MatMultHermitianTransposeColumnRange(lbfgs->Sfull, Bs_j, StBs_j, 0, h));
861:     lbfgs->St_count++;
862:     if (lbfgs->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecRecycleOrderToHistoryOrder(B, StBs_j, lbfgs->num_updates, lbfgs->cyclic_work_vec));
863:     PetscCall(MatDenseRestoreColumnVecWrite(lbfgs->StBS, StBS_idx, &StBs_j));
864:     PetscCall(MatDenseRestoreColumnVecWrite(lbfgs->BS, S_idx, &Bs_j));
865:   }
866:   prev_oldest = oldest_update(m, lbfgs->num_mult_updates);
867:   if (lbfgs->strategy == MAT_LMVM_DENSE_REORDER && prev_oldest < oldest_update(m, k)) {
868:     /* move the YtS entries that have been computed and need to be kept back up */
869:     PetscInt m_keep = m - (oldest_update(m, k) - prev_oldest);
871:     PetscCall(MatMove_LR3(B, lbfgs->YtS_triu_strict, m_keep));
872:   }
873:   PetscCall(MatGetLocalSize(lbfgs->YtS_triu_strict, &m_local, NULL));
874:   j_0 = PetscMax(lbfgs->num_mult_updates, oldest_update(m, k));
875:   for (PetscInt j = j_0; j < k; j++) {
876:     PetscInt S_idx   = recycle_index(m, j);
877:     PetscInt YtS_idx = lbfgs->strategy == MAT_LMVM_DENSE_INPLACE ? S_idx : history_index(m, k, j);
878:     Vec      s_j, Yts_j;
880:     PetscCall(MatDenseGetColumnVecRead(lbfgs->Sfull, S_idx, &s_j));
881:     PetscCall(MatDenseGetColumnVecWrite(lbfgs->YtS_triu_strict, YtS_idx, &Yts_j));
882:     PetscCall(MatMultHermitianTransposeColumnRange(lbfgs->Yfull, s_j, Yts_j, 0, h));
883:     lbfgs->Yt_count++;
884:     if (lbfgs->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecRecycleOrderToHistoryOrder(B, Yts_j, lbfgs->num_updates, lbfgs->cyclic_work_vec));
885:     PetscCall(MatDenseRestoreColumnVecWrite(lbfgs->YtS_triu_strict, YtS_idx, &Yts_j));
886:     PetscCall(MatDenseRestoreColumnVecRead(lbfgs->Sfull, S_idx, &s_j));
887:     /* zero the corresponding row */
888:     if (m_local > 0) {
889:       Mat YtS_local, YtS_row;
891:       PetscCall(MatDenseGetLocalMatrix(lbfgs->YtS_triu_strict, &YtS_local));
892:       PetscCall(MatDenseGetSubMatrix(YtS_local, YtS_idx, YtS_idx + 1, PETSC_DECIDE, PETSC_DECIDE, &YtS_row));
893:       PetscCall(MatZeroEntries(YtS_row));
894:       PetscCall(MatDenseRestoreSubMatrix(YtS_local, &YtS_row));
895:     }
896:   }
897:   if (!lbfgs->inv_diag_vec) PetscCall(VecDuplicate(lbfgs->diag_vec, &lbfgs->inv_diag_vec));
898:   PetscCall(VecCopy(lbfgs->diag_vec, lbfgs->inv_diag_vec));
899:   PetscCall(VecReciprocal(lbfgs->inv_diag_vec));
900:   PetscCall(MatDenseGetLocalMatrix(lbfgs->J, &J_local));
901:   PetscCall(MatSetFactorType(J_local, MAT_FACTOR_NONE));
902:   PetscCall(MatGetLDLT(B, lbfgs->J));
903:   PetscCall(MatAXPY(lbfgs->J, 1.0, lbfgs->StBS, SAME_NONZERO_PATTERN));
904:   if (m_local) {
905:     PetscCall(MatSetOption(J_local, MAT_SPD, PETSC_TRUE));
906:     PetscCall(MatCholeskyFactor(J_local, NULL, NULL));
907:   }
908:   lbfgs->num_mult_updates = lbfgs->num_updates;
909:   PetscFunctionReturn(PETSC_SUCCESS);
910: }
912: /* Solves for
913:  * [ I | -S R^{-T} ] [  I  | 0 ] [ H_0 | 0 ] [ I | Y ] [      I      ]
914:  *                   [-----+---] [-----+---] [---+---] [-------------]
915:  *                   [ Y^T | I ] [  0  | D ] [ 0 | I ] [ -R^{-1} S^T ]  */
917: static PetscErrorCode MatSolve_LMVMDBFGS(Mat H, Vec F, Vec dX)
918: {
919:   Mat_LMVM        *lmvm   = (Mat_LMVM *)H->data;
920:   Mat_DQN         *lbfgs  = (Mat_DQN *)lmvm->ctx;
921:   Vec              rwork1 = lbfgs->rwork1;
922:   PetscInt         m      = lmvm->m;
923:   PetscInt         k      = lbfgs->num_updates;
924:   PetscInt         h      = k - oldest_update(m, k);
925:   PetscObjectState Fstate;
927:   PetscFunctionBegin;
928:   VecCheckSameSize(F, 2, dX, 3);
929:   VecCheckMatCompatible(H, dX, 3, F, 2);
931:   /* Block Version */
932:   if (!lbfgs->num_updates) {
933:     PetscCall(MatDQNApplyJ0Inv(H, F, dX));
934:     PetscFunctionReturn(PETSC_SUCCESS); /* No updates stored yet */
935:   }
937:   PetscCall(PetscObjectStateGet((PetscObject)F, &Fstate));
938:   if (F == lbfgs->Fprev_ref && Fstate == lbfgs->Fprev_state) {
939:     PetscCall(VecCopy(lbfgs->StFprev, rwork1));
940:   } else {
941:     PetscCall(MatMultHermitianTransposeColumnRange(lbfgs->Sfull, F, rwork1, 0, h));
942:     lbfgs->St_count++;
943:   }
945:   /* Reordering rwork1, as STY is in history order, while S is in recycled order */
946:   if (lbfgs->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecRecycleOrderToHistoryOrder(H, rwork1, lbfgs->num_updates, lbfgs->cyclic_work_vec));
947:   PetscCall(MatUpperTriangularSolveInPlace(H, lbfgs->StY_triu, rwork1, PETSC_FALSE, lbfgs->num_updates, lbfgs->strategy));
948:   PetscCall(VecScale(rwork1, -1.0));
949:   if (lbfgs->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecHistoryOrderToRecycleOrder(H, rwork1, lbfgs->num_updates, lbfgs->cyclic_work_vec));
951:   PetscCall(VecCopy(F, lbfgs->column_work));
952:   PetscCall(MatMultAddColumnRange(lbfgs->Yfull, rwork1, lbfgs->column_work, lbfgs->column_work, 0, h));
953:   lbfgs->Y_count++;
955:   PetscCall(VecPointwiseMult(rwork1, lbfgs->diag_vec_recycle_order, rwork1));
956:   PetscCall(MatDQNApplyJ0Inv(H, lbfgs->column_work, dX));
958:   PetscCall(MatMultHermitianTransposeAddColumnRange(lbfgs->Yfull, dX, rwork1, rwork1, 0, h));
959:   lbfgs->Yt_count++;
961:   if (lbfgs->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecRecycleOrderToHistoryOrder(H, rwork1, lbfgs->num_updates, lbfgs->cyclic_work_vec));
962:   PetscCall(MatUpperTriangularSolveInPlace(H, lbfgs->StY_triu, rwork1, PETSC_TRUE, lbfgs->num_updates, lbfgs->strategy));
963:   PetscCall(VecScale(rwork1, -1.0));
964:   if (lbfgs->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecHistoryOrderToRecycleOrder(H, rwork1, lbfgs->num_updates, lbfgs->cyclic_work_vec));
966:   PetscCall(MatMultAddColumnRange(lbfgs->Sfull, rwork1, dX, dX, 0, h));
967:   lbfgs->S_count++;
968:   PetscFunctionReturn(PETSC_SUCCESS);
969: }
971: /* Solves for
972:    B_0 - [ Y | B_0 S] [ -D  |    L^T    ]^-1 [   Y^T   ]
973:                       [-----+-----------]    [---------]
974:                       [  L  | S^T B_0 S ]    [ S^T B_0 ]
976:    Above is equivalent to
978:    B_0 - [ Y | B_0 S] [[     I     | 0 ][ -D  | 0 ][ I | -D^{-1} L^T ]]^-1 [   Y^T   ]
979:                       [[-----------+---][-----+---][---+-------------]]    [---------]
980:                       [[ -L D^{-1} | I ][  0  | J ][ 0 |       I     ]]    [ S^T B_0 ]
982:    where J = S^T B_0 S + L D^{-1} L^T
984:    becomes
986:    B_0 - [ Y | B_0 S] [ I | D^{-1} L^T ][ -D^{-1}  |   0    ][    I     | 0 ] [   Y^T   ]
987:                       [---+------------][----------+--------][----------+---] [---------]
988:                       [ 0 |     I      ][     0    | J^{-1} ][ L D^{-1} | I ] [ S^T B_0 ]
990:                       =
992:    B_0 + [ Y | B_0 S] [ D^{-1} | 0 ][ I | L^T ][ I |    0    ][     I    | 0 ] [   Y^T   ]
993:                       [--------+---][---+-----][---+---------][----------+---] [---------]
994:                       [ 0      | I ][ 0 |  I  ][ 0 | -J^{-1} ][ L D^{-1} | I ] [ S^T B_0 ]
996:                       (Note that YtS_triu_strict is L^T)
997:    Byrd, Nocedal, Schnabel 1994
999:    Alternative approach: considering the fact that DFP is dual to BFGS, use MatMult of DPF:
1000:    (See ddfp.c's MatMult_LMVMDDFP)
1002: */
1003: static PetscErrorCode MatMult_LMVMDBFGS(Mat B, Vec X, Vec Z)
1004: {
1005:   Mat_LMVM *lmvm  = (Mat_LMVM *)B->data;
1006:   Mat_DQN  *lbfgs = (Mat_DQN *)lmvm->ctx;
1007:   Mat       J_local;
1008:   PetscInt  idx, i, j, m_local, local_n;
1009:   PetscInt  m = lmvm->m;
1010:   PetscInt  k = lbfgs->num_updates;
1011:   PetscInt  h = k - oldest_update(m, k);
1013:   PetscFunctionBegin;
1014:   VecCheckSameSize(X, 2, Z, 3);
1015:   VecCheckMatCompatible(B, X, 2, Z, 3);
1017:   /* Cholesky Version */
1018:   /* Start with the B0 term */
1019:   PetscCall(MatDQNApplyJ0Fwd(B, X, Z));
1020:   if (!lbfgs->num_updates) { PetscFunctionReturn(PETSC_SUCCESS); /* No updates stored yet */ }
1022:   if (lbfgs->use_recursive) {
1023:     PetscDeviceContext dctx;
1024:     PetscMemType       memtype;
1025:     PetscScalar        stz, ytx, stp, sjtpi, yjtsi, *workscalar;
1026:     PetscInt           oldest = oldest_update(m, k);
1028:     PetscCall(PetscDeviceContextGetCurrentContext(&dctx));
1029:     /* Recursive formulation to avoid Cholesky. Not a dense formulation */
1030:     PetscCall(MatMultHermitianTransposeColumnRange(lbfgs->Yfull, X, lbfgs->rwork1, 0, h));
1031:     lbfgs->Yt_count++;
1033:     PetscCall(VecGetLocalSize(lbfgs->rwork1, &local_n));
1035:     if (lbfgs->needPQ) {
1036:       PetscInt oldest = oldest_update(m, k);
1037:       for (i = 0; i <= lmvm->k; ++i) {
1038:         idx = recycle_index(m, i + oldest);
1039:         /* column_work = S[idx] */
1040:         PetscCall(MatGetColumnVector(lbfgs->Sfull, lbfgs->column_work, idx));
1041:         PetscCall(MatDQNApplyJ0Fwd(B, lbfgs->column_work, lbfgs->PQ[idx]));
1042:         PetscCall(MatMultHermitianTransposeColumnRange(lbfgs->Yfull, lbfgs->column_work, lbfgs->rwork3, 0, h));
1043:         PetscCall(VecGetArrayAndMemType(lbfgs->rwork3, &workscalar, &memtype));
1044:         for (j = 0; j < i; ++j) {
1045:           PetscInt idx_j = recycle_index(m, j + oldest);
1046:           /* Copy yjtsi in device-aware manner */
1047:           if (local_n) {
1048:             if (PetscMemTypeHost(memtype)) {
1049:               yjtsi = workscalar[idx_j];
1050:             } else {
1051:               PetscCall(PetscDeviceRegisterMemory(&yjtsi, PETSC_MEMTYPE_HOST, sizeof(yjtsi)));
1052:               PetscCall(PetscDeviceRegisterMemory(workscalar, memtype, local_n * sizeof(*workscalar)));
1053:               PetscCall(PetscDeviceArrayCopy(dctx, &yjtsi, &workscalar[idx_j], 1));
1054:             }
1055:           }
1056:           PetscCallMPI(MPI_Bcast(&yjtsi, 1, MPIU_SCALAR, 0, PetscObjectComm((PetscObject)B)));
1057:           /* column_work2 = S[j] */
1058:           PetscCall(MatGetColumnVector(lbfgs->Sfull, lbfgs->column_work2, idx_j));
1059:           PetscCall(VecDot(lbfgs->column_work2, lbfgs->PQ[idx], &sjtpi));
1060:           /* column_work2 = Y[j] */
1061:           PetscCall(MatGetColumnVector(lbfgs->Yfull, lbfgs->column_work2, idx_j));
1062:           /* Compute the pure BFGS component of the forward product */
1063:           PetscCall(VecAXPBYPCZ(lbfgs->PQ[idx], -PetscRealPart(sjtpi) / lbfgs->stp[idx_j], PetscRealPart(yjtsi) / lbfgs->yts[j], 1.0, lbfgs->PQ[idx_j], lbfgs->column_work2));
1064:         }
1065:         PetscCall(VecDot(lbfgs->column_work, lbfgs->PQ[idx], &stp));
1066:         lbfgs->stp[idx] = PetscRealPart(stp);
1067:       }
1068:       lbfgs->needPQ = PETSC_FALSE;
1069:     }
1071:     PetscCall(VecGetArrayAndMemType(lbfgs->rwork1, &workscalar, &memtype));
1072:     for (i = 0; i <= lmvm->k; ++i) {
1073:       idx = recycle_index(m, i + oldest);
1074:       /* Copy stz[i], ytx[i] in device-aware manner */
1075:       if (local_n) {
1076:         if (PetscMemTypeHost(memtype)) {
1077:           ytx = workscalar[idx];
1078:         } else {
1079:           PetscCall(PetscDeviceRegisterMemory(&ytx, PETSC_MEMTYPE_HOST, 1 * sizeof(ytx)));
1080:           PetscCall(PetscDeviceRegisterMemory(workscalar, memtype, local_n * sizeof(*workscalar)));
1081:           PetscCall(PetscDeviceArrayCopy(dctx, &ytx, &workscalar[idx], 1));
1082:         }
1083:       }
1084:       PetscCallMPI(MPI_Bcast(&ytx, 1, MPIU_SCALAR, 0, PetscObjectComm((PetscObject)B)));
1085:       /* column_work : S[i], column_work2 : Y[i] */
1086:       PetscCall(MatGetColumnVector(lbfgs->Sfull, lbfgs->column_work, idx));
1087:       PetscCall(MatGetColumnVector(lbfgs->Yfull, lbfgs->column_work2, idx));
1088:       PetscCall(VecDot(lbfgs->column_work, Z, &stz));
1089:       PetscCall(VecAXPBYPCZ(Z, -PetscRealPart(stz) / lbfgs->stp[idx], PetscRealPart(ytx) / lbfgs->yts[i], 1.0, lbfgs->PQ[idx], lbfgs->column_work2));
1090:     }
1091:     PetscCall(VecRestoreArrayAndMemType(lbfgs->rwork1, &workscalar));
1092:   } else {
1093:     PetscCall(MatLMVMDBFGSUpdateMultData(B));
1094:     PetscCall(MatMultHermitianTransposeColumnRange(lbfgs->Yfull, X, lbfgs->rwork1, 0, h));
1095:     lbfgs->Yt_count++;
1096:     PetscCall(MatMultHermitianTransposeColumnRange(lbfgs->Sfull, Z, lbfgs->rwork2, 0, h));
1097:     lbfgs->St_count++;
1098:     if (lbfgs->strategy == MAT_LMVM_DENSE_REORDER) {
1099:       PetscCall(VecRecycleOrderToHistoryOrder(B, lbfgs->rwork1, lbfgs->num_updates, lbfgs->cyclic_work_vec));
1100:       PetscCall(VecRecycleOrderToHistoryOrder(B, lbfgs->rwork2, lbfgs->num_updates, lbfgs->cyclic_work_vec));
1101:     }
1103:     PetscCall(VecPointwiseMult(lbfgs->rwork3, lbfgs->rwork1, lbfgs->inv_diag_vec));
1104:     PetscCall(MatMultTransposeAdd(lbfgs->YtS_triu_strict, lbfgs->rwork3, lbfgs->rwork2, lbfgs->rwork2));
1106:     if (!lbfgs->rwork2_local) PetscCall(VecCreateLocalVector(lbfgs->rwork2, &lbfgs->rwork2_local));
1107:     if (!lbfgs->rwork3_local) PetscCall(VecCreateLocalVector(lbfgs->rwork3, &lbfgs->rwork3_local));
1108:     PetscCall(VecGetLocalVectorRead(lbfgs->rwork2, lbfgs->rwork2_local));
1109:     PetscCall(VecGetLocalVector(lbfgs->rwork3, lbfgs->rwork3_local));
1110:     PetscCall(MatDenseGetLocalMatrix(lbfgs->J, &J_local));
1111:     PetscCall(VecGetSize(lbfgs->rwork2_local, &m_local));
1112:     if (m_local) {
1113:       PetscCall(MatDenseGetLocalMatrix(lbfgs->J, &J_local));
1114:       PetscCall(MatSolve(J_local, lbfgs->rwork2_local, lbfgs->rwork3_local));
1115:     }
1116:     PetscCall(VecRestoreLocalVector(lbfgs->rwork3, lbfgs->rwork3_local));
1117:     PetscCall(VecRestoreLocalVectorRead(lbfgs->rwork2, lbfgs->rwork2_local));
1118:     PetscCall(VecScale(lbfgs->rwork3, -1.0));
1120:     PetscCall(MatMultAdd(lbfgs->YtS_triu_strict, lbfgs->rwork3, lbfgs->rwork1, lbfgs->rwork1));
1121:     PetscCall(VecPointwiseMult(lbfgs->rwork1, lbfgs->rwork1, lbfgs->inv_diag_vec));
1123:     if (lbfgs->strategy == MAT_LMVM_DENSE_REORDER) {
1124:       PetscCall(VecHistoryOrderToRecycleOrder(B, lbfgs->rwork1, lbfgs->num_updates, lbfgs->cyclic_work_vec));
1125:       PetscCall(VecHistoryOrderToRecycleOrder(B, lbfgs->rwork3, lbfgs->num_updates, lbfgs->cyclic_work_vec));
1126:     }
1128:     PetscCall(MatMultAddColumnRange(lbfgs->Yfull, lbfgs->rwork1, Z, Z, 0, h));
1129:     lbfgs->Y_count++;
1130:     PetscCall(MatMultAddColumnRange(lbfgs->BS, lbfgs->rwork3, Z, Z, 0, h));
1131:     lbfgs->S_count++;
1132:   }
1133:   PetscFunctionReturn(PETSC_SUCCESS);
1134: }
1136: /*
1137:   This dense representation reduces the L-BFGS update to a series of
1138:   matrix-vector products with dense matrices in lieu of the conventional matrix-free
1139:   two-loop algorithm.
1140: */
1141: PetscErrorCode MatCreate_LMVMDBFGS(Mat B)
1142: {
1143:   Mat_LMVM *lmvm;
1144:   Mat_DQN  *lbfgs;
1146:   PetscFunctionBegin;
1147:   PetscCall(MatCreate_LMVM(B));
1148:   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATLMVMDBFGS));
1149:   PetscCall(MatSetOption(B, MAT_SPD, PETSC_TRUE));
1150:   PetscCall(MatSetOption(B, MAT_SPD_ETERNAL, PETSC_TRUE));
1151:   B->ops->view           = MatView_LMVMDQN;
1152:   B->ops->setup          = MatSetUp_LMVMDQN;
1153:   B->ops->setfromoptions = MatSetFromOptions_LMVMDQN;
1154:   B->ops->destroy        = MatDestroy_LMVMDQN;
1156:   lmvm                = (Mat_LMVM *)B->data;
1157:   lmvm->square        = PETSC_TRUE;
1158:   lmvm->ops->allocate = MatAllocate_LMVMDQN;
1159:   lmvm->ops->reset    = MatReset_LMVMDQN;
1160:   lmvm->ops->update   = MatUpdate_LMVMDQN;
1161:   lmvm->ops->mult     = MatMult_LMVMDBFGS;
1162:   lmvm->ops->solve    = MatSolve_LMVMDBFGS;
1163:   lmvm->ops->copy     = MatCopy_LMVMDQN;
1165:   PetscCall(PetscNew(&lbfgs));
1166:   lmvm->ctx              = (void *)lbfgs;
1167:   lbfgs->allocated       = PETSC_FALSE;
1168:   lbfgs->use_recursive   = PETSC_TRUE;
1169:   lbfgs->needPQ          = PETSC_TRUE;
1170:   lbfgs->watchdog        = 0;
1171:   lbfgs->max_seq_rejects = lmvm->m / 2;
1172:   lbfgs->strategy        = MAT_LMVM_DENSE_INPLACE;
1173:   lbfgs->scale_type      = MAT_LMVM_SYMBROYDEN_SCALE_DIAGONAL;
1175:   PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &lbfgs->diag_qn));
1176:   PetscCall(MatSetType(lbfgs->diag_qn, MATLMVMDIAGBROYDEN));
1177:   PetscCall(MatSetOptionsPrefix(lbfgs->diag_qn, "J0_"));
1178:   PetscFunctionReturn(PETSC_SUCCESS);
1179: }
1181: /*@
1182:   MatCreateLMVMDBFGS - Creates a dense representation of the limited-memory
1183:   Broyden-Fletcher-Goldfarb-Shanno (BFGS) approximation to a Hessian.
1185:   Collective
1187:   Input Parameters:
1188: + comm - MPI communicator
1189: . n    - number of local rows for storage vectors
1190: - N    - global size of the storage vectors
1192:   Output Parameter:
1193: . B - the matrix
1195:   Level: advanced
1197:   Note:
1198:   It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions()
1199:   paradigm instead of this routine directly.
1201: .seealso: `MatCreate()`, `MATLMVM`, `MATLMVMDBFGS`, `MatCreateLMVMBFGS()`
1202: @*/
1203: PetscErrorCode MatCreateLMVMDBFGS(MPI_Comm comm, PetscInt n, PetscInt N, Mat *B)
1204: {
1205:   PetscFunctionBegin;
1206:   PetscCall(KSPInitializePackage());
1207:   PetscCall(MatCreate(comm, B));
1208:   PetscCall(MatSetSizes(*B, n, n, N, N));
1209:   PetscCall(MatSetType(*B, MATLMVMDBFGS));
1210:   PetscCall(MatSetUp(*B));
1211:   PetscFunctionReturn(PETSC_SUCCESS);
1212: }
1214: /* here R is strictly upper triangular part of STY */
1215: static PetscErrorCode MatGetRTDR(Mat B, Mat result)
1216: {
1217:   Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
1218:   Mat_DQN  *ldfp = (Mat_DQN *)lmvm->ctx;
1219:   PetscInt  m_local;
1221:   PetscFunctionBegin;
1222:   if (!ldfp->temp_mat) PetscCall(MatDuplicate(ldfp->StY_triu_strict, MAT_SHARE_NONZERO_PATTERN, &ldfp->temp_mat));
1223:   PetscCall(MatCopy(ldfp->StY_triu_strict, ldfp->temp_mat, SAME_NONZERO_PATTERN));
1224:   PetscCall(MatDiagonalScale(ldfp->temp_mat, ldfp->inv_diag_vec, NULL));
1225:   PetscCall(MatGetLocalSize(result, &m_local, NULL));
1226:   // need to conjugate and conjugate again because we have MatTransposeMatMult but not MatHermitianTransposeMatMult()
1227:   PetscCall(MatConjugate(ldfp->temp_mat));
1228:   if (m_local) {
1229:     Mat temp_local, StY_local, result_local;
1230:     PetscCall(MatDenseGetLocalMatrix(ldfp->StY_triu_strict, &StY_local));
1231:     PetscCall(MatDenseGetLocalMatrix(ldfp->temp_mat, &temp_local));
1232:     PetscCall(MatDenseGetLocalMatrix(result, &result_local));
1233:     PetscCall(MatTransposeMatMult(StY_local, temp_local, MAT_REUSE_MATRIX, PETSC_DETERMINE, &result_local));
1234:   }
1235:   PetscCall(MatConjugate(result));
1236:   PetscFunctionReturn(PETSC_SUCCESS);
1237: }
1239: static PetscErrorCode MatLMVMDDFPUpdateSolveData(Mat B)
1240: {
1241:   Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
1242:   Mat_DQN  *ldfp = (Mat_DQN *)lmvm->ctx;
1243:   PetscInt  m    = lmvm->m, m_local;
1244:   PetscInt  k    = ldfp->num_updates;
1245:   PetscInt  h    = k - oldest_update(m, k);
1246:   PetscInt  j_0;
1247:   PetscInt  prev_oldest;
1248:   Mat       J_local;
1250:   PetscFunctionBegin;
1251:   if (!ldfp->StY_triu_strict) {
1252:     PetscCall(MatDuplicate(ldfp->YtS_triu, MAT_SHARE_NONZERO_PATTERN, &ldfp->StY_triu_strict));
1253:     PetscCall(MatDestroy(&ldfp->YtHY));
1254:     PetscCall(MatDuplicate(ldfp->YtS_triu, MAT_SHARE_NONZERO_PATTERN, &ldfp->YtHY));
1255:     PetscCall(MatDestroy(&ldfp->J));
1256:     PetscCall(MatDuplicate(ldfp->YtS_triu, MAT_SHARE_NONZERO_PATTERN, &ldfp->J));
1257:     PetscCall(MatDestroy(&ldfp->HY));
1258:     PetscCall(MatDuplicate(ldfp->Yfull, MAT_SHARE_NONZERO_PATTERN, &ldfp->HY));
1259:     PetscCall(MatShift(ldfp->YtHY, 1.0));
1260:     ldfp->num_mult_updates = oldest_update(m, k);
1261:   }
1262:   if (ldfp->num_mult_updates == k) PetscFunctionReturn(PETSC_SUCCESS);
1264:   /* H_0 may have been updated, we must recompute H_0 Y and Y^T H_0 Y */
1265:   for (PetscInt j = oldest_update(m, k); j < k; j++) {
1266:     Vec      y_j;
1267:     Vec      Hy_j;
1268:     Vec      YtHy_j;
1269:     PetscInt Y_idx    = recycle_index(m, j);
1270:     PetscInt YtHY_idx = ldfp->strategy == MAT_LMVM_DENSE_INPLACE ? Y_idx : history_index(m, k, j);
1272:     PetscCall(MatDenseGetColumnVecWrite(ldfp->HY, Y_idx, &Hy_j));
1273:     PetscCall(MatDenseGetColumnVecRead(ldfp->Yfull, Y_idx, &y_j));
1274:     PetscCall(MatDQNApplyJ0Inv(B, y_j, Hy_j));
1275:     PetscCall(MatDenseRestoreColumnVecRead(ldfp->Yfull, Y_idx, &y_j));
1276:     PetscCall(MatDenseGetColumnVecWrite(ldfp->YtHY, YtHY_idx, &YtHy_j));
1277:     PetscCall(MatMultHermitianTransposeColumnRange(ldfp->Yfull, Hy_j, YtHy_j, 0, h));
1278:     ldfp->Yt_count++;
1279:     if (ldfp->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecRecycleOrderToHistoryOrder(B, YtHy_j, ldfp->num_updates, ldfp->cyclic_work_vec));
1280:     PetscCall(MatDenseRestoreColumnVecWrite(ldfp->YtHY, YtHY_idx, &YtHy_j));
1281:     PetscCall(MatDenseRestoreColumnVecWrite(ldfp->HY, Y_idx, &Hy_j));
1282:   }
1283:   prev_oldest = oldest_update(m, ldfp->num_mult_updates);
1284:   if (ldfp->strategy == MAT_LMVM_DENSE_REORDER && prev_oldest < oldest_update(m, k)) {
1285:     /* move the YtS entries that have been computed and need to be kept back up */
1286:     PetscInt m_keep = m - (oldest_update(m, k) - prev_oldest);
1288:     PetscCall(MatMove_LR3(B, ldfp->StY_triu_strict, m_keep));
1289:   }
1290:   PetscCall(MatGetLocalSize(ldfp->StY_triu_strict, &m_local, NULL));
1291:   j_0 = PetscMax(ldfp->num_mult_updates, oldest_update(m, k));
1292:   for (PetscInt j = j_0; j < k; j++) {
1293:     PetscInt Y_idx   = recycle_index(m, j);
1294:     PetscInt StY_idx = ldfp->strategy == MAT_LMVM_DENSE_INPLACE ? Y_idx : history_index(m, k, j);
1295:     Vec      y_j, Sty_j;
1297:     PetscCall(MatDenseGetColumnVecRead(ldfp->Yfull, Y_idx, &y_j));
1298:     PetscCall(MatDenseGetColumnVecWrite(ldfp->StY_triu_strict, StY_idx, &Sty_j));
1299:     PetscCall(MatMultHermitianTransposeColumnRange(ldfp->Sfull, y_j, Sty_j, 0, h));
1300:     ldfp->St_count++;
1301:     if (ldfp->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecRecycleOrderToHistoryOrder(B, Sty_j, ldfp->num_updates, ldfp->cyclic_work_vec));
1302:     PetscCall(MatDenseRestoreColumnVecWrite(ldfp->StY_triu_strict, StY_idx, &Sty_j));
1303:     PetscCall(MatDenseRestoreColumnVecRead(ldfp->Yfull, Y_idx, &y_j));
1304:     /* zero the corresponding row */
1305:     if (m_local > 0) {
1306:       Mat StY_local, StY_row;
1308:       PetscCall(MatDenseGetLocalMatrix(ldfp->StY_triu_strict, &StY_local));
1309:       PetscCall(MatDenseGetSubMatrix(StY_local, StY_idx, StY_idx + 1, PETSC_DECIDE, PETSC_DECIDE, &StY_row));
1310:       PetscCall(MatZeroEntries(StY_row));
1311:       PetscCall(MatDenseRestoreSubMatrix(StY_local, &StY_row));
1312:     }
1313:   }
1314:   if (!ldfp->inv_diag_vec) PetscCall(VecDuplicate(ldfp->diag_vec, &ldfp->inv_diag_vec));
1315:   PetscCall(VecCopy(ldfp->diag_vec, ldfp->inv_diag_vec));
1316:   PetscCall(VecReciprocal(ldfp->inv_diag_vec));
1317:   PetscCall(MatDenseGetLocalMatrix(ldfp->J, &J_local));
1318:   PetscCall(MatSetFactorType(J_local, MAT_FACTOR_NONE));
1319:   PetscCall(MatGetRTDR(B, ldfp->J));
1320:   PetscCall(MatAXPY(ldfp->J, 1.0, ldfp->YtHY, SAME_NONZERO_PATTERN));
1321:   if (m_local) {
1322:     PetscCall(MatSetOption(J_local, MAT_SPD, PETSC_TRUE));
1323:     PetscCall(MatCholeskyFactor(J_local, NULL, NULL));
1324:   }
1325:   ldfp->num_mult_updates = ldfp->num_updates;
1326:   PetscFunctionReturn(PETSC_SUCCESS);
1327: }
1329: /* Solves for
1331:    H_0 - [ S | H_0 Y] [ -D  |    R.T    ]^-1 [   S^T   ]
1332:                       [-----+-----------]    [---------]
1333:                       [  R  | Y^T H_0 Y ]    [ Y^T H_0 ]
1335:    Above is equivalent to
1337:    H_0 - [ S | H_0 Y] [[     I     | 0 ][ -D | 0 ][ I | -D^{-1} R^T ]]^-1 [   S^T   ]
1338:                       [[-----------+---][----+---][---+-------------]]    [---------]
1339:                       [[ -R D^{-1} | I ][  0 | J ][ 0 |      I      ]]    [ Y^T H_0 ]
1341:    where J = Y^T H_0 Y + R D^{-1} R.T
1343:    becomes
1345:    H_0 - [ S | H_0 Y] [ I | D^{-1} R^T ][ -D^{-1}  |   0    ][     I    | 0 ] [   S^T   ]
1346:                       [---+------------][----------+--------][----------+---] [---------]
1347:                       [ 0 |      I     ][     0    | J^{-1} ][ R D^{-1} | I ] [ Y^T H_0 ]
1349:                       =
1351:    H_0 + [ S | H_0 Y] [ D^{-1} | 0 ][ I | R^T ][ I |    0    ][     I    | 0 ] [   S^T   ]
1352:                       [--------+---][---+-----][---+---------][----------+---] [---------]
1353:                       [ 0      | I ][ 0 |  I  ][ 0 | -J^{-1} ][ R D^{-1} | I ] [ Y^T H_0 ]
1355:                       (Note that StY_triu_strict is R)
1356:    Byrd, Nocedal, Schnabel 1994
1358: */
1359: static PetscErrorCode MatSolve_LMVMDDFP(Mat H, Vec F, Vec dX)
1360: {
1361:   Mat_LMVM *lmvm = (Mat_LMVM *)H->data;
1362:   Mat_DQN  *ldfp = (Mat_DQN *)lmvm->ctx;
1363:   PetscInt  m    = lmvm->m;
1364:   PetscInt  k    = ldfp->num_updates;
1365:   PetscInt  h    = k - oldest_update(m, k);
1366:   PetscInt  idx, i, j, local_n;
1367:   PetscInt  m_local;
1368:   Mat       J_local;
1370:   PetscFunctionBegin;
1371:   VecCheckSameSize(F, 2, dX, 3);
1372:   VecCheckMatCompatible(H, dX, 3, F, 2);
1374:   /* Cholesky Version */
1375:   /* Start with the B0 term */
1376:   PetscCall(MatDQNApplyJ0Inv(H, F, dX));
1377:   if (!ldfp->num_updates) { PetscFunctionReturn(PETSC_SUCCESS); /* No updates stored yet */ }
1379:   if (ldfp->use_recursive) {
1380:     PetscDeviceContext dctx;
1381:     PetscMemType       memtype;
1382:     PetscScalar        stf, ytx, ytq, yjtqi, sjtyi, *workscalar;
1384:     PetscCall(PetscDeviceContextGetCurrentContext(&dctx));
1385:     /* Recursive formulation to avoid Cholesky. Not a dense formulation */
1386:     PetscCall(MatMultHermitianTransposeColumnRange(ldfp->Sfull, F, ldfp->rwork1, 0, h));
1387:     ldfp->Yt_count++;
1389:     PetscCall(VecGetLocalSize(ldfp->rwork1, &local_n));
1391:     PetscInt oldest = oldest_update(m, k);
1393:     if (ldfp->needPQ) {
1394:       PetscInt oldest = oldest_update(m, k);
1395:       for (i = 0; i <= lmvm->k; ++i) {
1396:         idx = recycle_index(m, i + oldest);
1397:         /* column_work = S[idx] */
1398:         PetscCall(MatGetColumnVector(ldfp->Yfull, ldfp->column_work, idx));
1399:         PetscCall(MatDQNApplyJ0Inv(H, ldfp->column_work, ldfp->PQ[idx]));
1400:         PetscCall(MatMultHermitianTransposeColumnRange(ldfp->Sfull, ldfp->column_work, ldfp->rwork3, 0, h));
1401:         PetscCall(VecGetArrayAndMemType(ldfp->rwork3, &workscalar, &memtype));
1402:         for (j = 0; j < i; ++j) {
1403:           PetscInt idx_j = recycle_index(m, j + oldest);
1404:           /* Copy sjtyi in device-aware manner */
1405:           if (local_n) {
1406:             if (PetscMemTypeHost(memtype)) {
1407:               sjtyi = workscalar[idx_j];
1408:             } else {
1409:               PetscCall(PetscDeviceRegisterMemory(&sjtyi, PETSC_MEMTYPE_HOST, 1 * sizeof(sjtyi)));
1410:               PetscCall(PetscDeviceRegisterMemory(workscalar, memtype, local_n * sizeof(*workscalar)));
1411:               PetscCall(PetscDeviceArrayCopy(dctx, &sjtyi, &workscalar[idx_j], 1));
1412:             }
1413:           }
1414:           PetscCallMPI(MPI_Bcast(&sjtyi, 1, MPIU_SCALAR, 0, PetscObjectComm((PetscObject)H)));
1415:           /* column_work2 = Y[j] */
1416:           PetscCall(MatGetColumnVector(ldfp->Yfull, ldfp->column_work2, idx_j));
1417:           PetscCall(VecDot(ldfp->column_work2, ldfp->PQ[idx], &yjtqi));
1418:           /* column_work2 = Y[j] */
1419:           PetscCall(MatGetColumnVector(ldfp->Sfull, ldfp->column_work2, idx_j));
1420:           /* Compute the pure BFGS component of the forward product */
1421:           PetscCall(VecAXPBYPCZ(ldfp->PQ[idx], -PetscRealPart(yjtqi) / ldfp->ytq[idx_j], PetscRealPart(sjtyi) / ldfp->yts[j], 1.0, ldfp->PQ[idx_j], ldfp->column_work2));
1422:         }
1423:         PetscCall(VecDot(ldfp->column_work, ldfp->PQ[idx], &ytq));
1424:         ldfp->ytq[idx] = PetscRealPart(ytq);
1425:       }
1426:       ldfp->needPQ = PETSC_FALSE;
1427:     }
1429:     PetscCall(VecGetArrayAndMemType(ldfp->rwork1, &workscalar, &memtype));
1430:     for (i = 0; i <= lmvm->k; ++i) {
1431:       idx = recycle_index(m, i + oldest);
1432:       /* Copy stz[i], ytx[i] in device-aware manner */
1433:       if (local_n) {
1434:         if (PetscMemTypeHost(memtype)) {
1435:           stf = workscalar[idx];
1436:         } else {
1437:           PetscCall(PetscDeviceRegisterMemory(&stf, PETSC_MEMTYPE_HOST, sizeof(stf)));
1438:           PetscCall(PetscDeviceRegisterMemory(workscalar, memtype, local_n * sizeof(*workscalar)));
1439:           PetscCall(PetscDeviceArrayCopy(dctx, &stf, &workscalar[idx], 1));
1440:         }
1441:       }
1442:       PetscCallMPI(MPI_Bcast(&stf, 1, MPIU_SCALAR, 0, PetscObjectComm((PetscObject)H)));
1443:       /* column_work : S[i], column_work2 : Y[i] */
1444:       PetscCall(MatGetColumnVector(ldfp->Sfull, ldfp->column_work, idx));
1445:       PetscCall(MatGetColumnVector(ldfp->Yfull, ldfp->column_work2, idx));
1446:       PetscCall(VecDot(ldfp->column_work2, dX, &ytx));
1447:       PetscCall(VecAXPBYPCZ(dX, -PetscRealPart(ytx) / ldfp->ytq[idx], PetscRealPart(stf) / ldfp->yts[i], 1.0, ldfp->PQ[idx], ldfp->column_work));
1448:     }
1449:     PetscCall(VecRestoreArrayAndMemType(ldfp->rwork1, &workscalar));
1450:   } else {
1451:     PetscCall(MatLMVMDDFPUpdateSolveData(H));
1452:     PetscCall(MatMultHermitianTransposeColumnRange(ldfp->Sfull, F, ldfp->rwork1, 0, h));
1453:     ldfp->St_count++;
1454:     PetscCall(MatMultHermitianTransposeColumnRange(ldfp->Yfull, dX, ldfp->rwork2, 0, h));
1455:     ldfp->Yt_count++;
1456:     if (ldfp->strategy == MAT_LMVM_DENSE_REORDER) {
1457:       PetscCall(VecRecycleOrderToHistoryOrder(H, ldfp->rwork1, ldfp->num_updates, ldfp->cyclic_work_vec));
1458:       PetscCall(VecRecycleOrderToHistoryOrder(H, ldfp->rwork2, ldfp->num_updates, ldfp->cyclic_work_vec));
1459:     }
1461:     PetscCall(VecPointwiseMult(ldfp->rwork3, ldfp->rwork1, ldfp->inv_diag_vec));
1462:     PetscCall(MatMultTransposeAdd(ldfp->StY_triu_strict, ldfp->rwork3, ldfp->rwork2, ldfp->rwork2));
1464:     if (!ldfp->rwork2_local) PetscCall(VecCreateLocalVector(ldfp->rwork2, &ldfp->rwork2_local));
1465:     if (!ldfp->rwork3_local) PetscCall(VecCreateLocalVector(ldfp->rwork3, &ldfp->rwork3_local));
1466:     PetscCall(VecGetLocalVectorRead(ldfp->rwork2, ldfp->rwork2_local));
1467:     PetscCall(VecGetLocalVector(ldfp->rwork3, ldfp->rwork3_local));
1468:     PetscCall(MatDenseGetLocalMatrix(ldfp->J, &J_local));
1469:     PetscCall(VecGetSize(ldfp->rwork2_local, &m_local));
1470:     if (m_local) {
1471:       Mat J_local;
1473:       PetscCall(MatDenseGetLocalMatrix(ldfp->J, &J_local));
1474:       PetscCall(MatSolve(J_local, ldfp->rwork2_local, ldfp->rwork3_local));
1475:     }
1476:     PetscCall(VecRestoreLocalVector(ldfp->rwork3, ldfp->rwork3_local));
1477:     PetscCall(VecRestoreLocalVectorRead(ldfp->rwork2, ldfp->rwork2_local));
1478:     PetscCall(VecScale(ldfp->rwork3, -1.0));
1480:     PetscCall(MatMultAdd(ldfp->StY_triu_strict, ldfp->rwork3, ldfp->rwork1, ldfp->rwork1));
1481:     PetscCall(VecPointwiseMult(ldfp->rwork1, ldfp->rwork1, ldfp->inv_diag_vec));
1483:     if (ldfp->strategy == MAT_LMVM_DENSE_REORDER) {
1484:       PetscCall(VecHistoryOrderToRecycleOrder(H, ldfp->rwork1, ldfp->num_updates, ldfp->cyclic_work_vec));
1485:       PetscCall(VecHistoryOrderToRecycleOrder(H, ldfp->rwork3, ldfp->num_updates, ldfp->cyclic_work_vec));
1486:     }
1488:     PetscCall(MatMultAddColumnRange(ldfp->Sfull, ldfp->rwork1, dX, dX, 0, h));
1489:     ldfp->S_count++;
1490:     PetscCall(MatMultAddColumnRange(ldfp->HY, ldfp->rwork3, dX, dX, 0, h));
1491:     ldfp->Y_count++;
1492:   }
1493:   PetscFunctionReturn(PETSC_SUCCESS);
1494: }
1496: /* Solves for
1497:    (Theorem 1, Erway, Jain, and Marcia, 2013)
1499:    B_0 - [ Y | B_0 S] [ -R^{-T} (D + S^T B_0 S) R^{-1} | R^{-T} ] [   Y^T   ]
1500:                       ---------------------------------+--------] [---------]
1501:                       [             R^{-1}             |   0    ] [ S^T B_0 ]
1503:    (Note: R above is right triangular part of YTS)
1504:    which becomes,
1506:    [ I | -Y L^{-T} ] [  I  | 0 ] [ B_0 | 0 ] [ I | S ] [      I      ]
1507:                      [-----+---] [-----+---] [---+---] [-------------]
1508:                      [ S^T | I ] [  0  | D ] [ 0 | I ] [ -L^{-1} Y^T ]
1510:    (Note: L above is right triangular part of STY)
1512: */
1513: static PetscErrorCode MatMult_LMVMDDFP(Mat B, Vec X, Vec Z)
1514: {
1515:   Mat_LMVM        *lmvm   = (Mat_LMVM *)B->data;
1516:   Mat_DQN         *ldfp   = (Mat_DQN *)lmvm->ctx;
1517:   Vec              rwork1 = ldfp->rwork1;
1518:   PetscInt         m      = lmvm->m;
1519:   PetscInt         k      = ldfp->num_updates;
1520:   PetscInt         h      = k - oldest_update(m, k);
1521:   PetscObjectState Xstate;
1523:   PetscFunctionBegin;
1524:   VecCheckSameSize(X, 2, Z, 3);
1525:   VecCheckMatCompatible(B, X, 2, Z, 3);
1527:   /* DFP Version. Erway, Jain, Marcia, 2013, Theorem 1 */
1528:   /* Block Version */
1529:   if (!ldfp->num_updates) {
1530:     PetscCall(MatDQNApplyJ0Fwd(B, X, Z));
1531:     PetscFunctionReturn(PETSC_SUCCESS); /* No updates stored yet */
1532:   }
1534:   PetscCall(PetscObjectStateGet((PetscObject)X, &Xstate));
1535:   PetscCall(MatMultHermitianTransposeColumnRange(ldfp->Yfull, X, rwork1, 0, h));
1537:   /* Reordering rwork1, as STY is in history order, while Y is in recycled order */
1538:   if (ldfp->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecRecycleOrderToHistoryOrder(B, rwork1, ldfp->num_updates, ldfp->cyclic_work_vec));
1539:   PetscCall(MatUpperTriangularSolveInPlace(B, ldfp->YtS_triu, rwork1, PETSC_FALSE, ldfp->num_updates, ldfp->strategy));
1540:   PetscCall(VecScale(rwork1, -1.0));
1541:   if (ldfp->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecHistoryOrderToRecycleOrder(B, rwork1, ldfp->num_updates, ldfp->cyclic_work_vec));
1543:   PetscCall(VecCopy(X, ldfp->column_work));
1544:   PetscCall(MatMultAddColumnRange(ldfp->Sfull, rwork1, ldfp->column_work, ldfp->column_work, 0, h));
1545:   ldfp->S_count++;
1547:   PetscCall(VecPointwiseMult(rwork1, ldfp->diag_vec_recycle_order, rwork1));
1548:   PetscCall(MatDQNApplyJ0Fwd(B, ldfp->column_work, Z));
1550:   PetscCall(MatMultHermitianTransposeAddColumnRange(ldfp->Sfull, Z, rwork1, rwork1, 0, h));
1551:   ldfp->St_count++;
1553:   if (ldfp->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecRecycleOrderToHistoryOrder(B, rwork1, ldfp->num_updates, ldfp->cyclic_work_vec));
1554:   PetscCall(MatUpperTriangularSolveInPlace(B, ldfp->YtS_triu, rwork1, PETSC_TRUE, ldfp->num_updates, ldfp->strategy));
1555:   PetscCall(VecScale(rwork1, -1.0));
1556:   if (ldfp->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecHistoryOrderToRecycleOrder(B, rwork1, ldfp->num_updates, ldfp->cyclic_work_vec));
1558:   PetscCall(MatMultAddColumnRange(ldfp->Yfull, rwork1, Z, Z, 0, h));
1559:   ldfp->Y_count++;
1560:   PetscFunctionReturn(PETSC_SUCCESS);
1561: }
1563: /*
1564:    This dense representation reduces the L-DFP update to a series of
1565:    matrix-vector products with dense matrices in lieu of the conventional
1566:    matrix-free two-loop algorithm.
1567: */
1568: PetscErrorCode MatCreate_LMVMDDFP(Mat B)
1569: {
1570:   Mat_LMVM *lmvm;
1571:   Mat_DQN  *ldfp;
1573:   PetscFunctionBegin;
1574:   PetscCall(MatCreate_LMVM(B));
1575:   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATLMVMDDFP));
1576:   PetscCall(MatSetOption(B, MAT_SPD, PETSC_TRUE));
1577:   PetscCall(MatSetOption(B, MAT_SPD_ETERNAL, PETSC_TRUE));
1578:   B->ops->view           = MatView_LMVMDQN;
1579:   B->ops->setup          = MatSetUp_LMVMDQN;
1580:   B->ops->setfromoptions = MatSetFromOptions_LMVMDQN;
1581:   B->ops->destroy        = MatDestroy_LMVMDQN;
1583:   lmvm                = (Mat_LMVM *)B->data;
1584:   lmvm->square        = PETSC_TRUE;
1585:   lmvm->ops->allocate = MatAllocate_LMVMDQN;
1586:   lmvm->ops->reset    = MatReset_LMVMDQN;
1587:   lmvm->ops->update   = MatUpdate_LMVMDQN;
1588:   lmvm->ops->mult     = MatMult_LMVMDDFP;
1589:   lmvm->ops->solve    = MatSolve_LMVMDDFP;
1590:   lmvm->ops->copy     = MatCopy_LMVMDQN;
1592:   PetscCall(PetscNew(&ldfp));
1593:   lmvm->ctx             = (void *)ldfp;
1594:   ldfp->allocated       = PETSC_FALSE;
1595:   ldfp->watchdog        = 0;
1596:   ldfp->max_seq_rejects = lmvm->m / 2;
1597:   ldfp->strategy        = MAT_LMVM_DENSE_INPLACE;
1598:   ldfp->scale_type      = MAT_LMVM_SYMBROYDEN_SCALE_DIAGONAL;
1599:   ldfp->use_recursive   = PETSC_TRUE;
1600:   ldfp->needPQ          = PETSC_TRUE;
1602:   PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &ldfp->diag_qn));
1603:   PetscCall(MatSetType(ldfp->diag_qn, MATLMVMDIAGBROYDEN));
1604:   PetscCall(MatSetOptionsPrefix(ldfp->diag_qn, "J0_"));
1605:   PetscFunctionReturn(PETSC_SUCCESS);
1606: }
1608: /*@
1609:   MatCreateLMVMDDFP - Creates a dense representation of the limited-memory
1610:   Davidon-Fletcher-Powell (DFP) approximation to a Hessian.
1612:   Collective
1614:   Input Parameters:
1615: + comm - MPI communicator
1616: . n    - number of local rows for storage vectors
1617: - N    - global size of the storage vectors
1619:   Output Parameter:
1620: . B - the matrix
1622:   Level: advanced
1624:   Note:
1625:   It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions()
1626:   paradigm instead of this routine directly.
1628: .seealso: `MatCreate()`, `MATLMVM`, `MATLMVMDDFP`, `MatCreateLMVMDFP()`
1629: @*/
1630: PetscErrorCode MatCreateLMVMDDFP(MPI_Comm comm, PetscInt n, PetscInt N, Mat *B)
1631: {
1632:   PetscFunctionBegin;
1633:   PetscCall(KSPInitializePackage());
1634:   PetscCall(MatCreate(comm, B));
1635:   PetscCall(MatSetSizes(*B, n, n, N, N));
1636:   PetscCall(MatSetType(*B, MATLMVMDDFP));
1637:   PetscCall(MatSetUp(*B));
1638:   PetscFunctionReturn(PETSC_SUCCESS);
1639: }
1641: /*@
1642:   MatLMVMDenseSetType - Sets the memory storage type for dense `MATLMVM`
1644:   Input Parameters:
1645: + B    - the `MATLMVM` matrix
1646: - type - scale type, see `MatLMVMDenseSetType`
1648:   Options Database Keys:
1649: + -mat_lqn_type   <reorder,inplace> - set the strategy
1650: . -mat_lbfgs_type <reorder,inplace> - set the strategy
1651: - -mat_ldfp_type  <reorder,inplace> - set the strategy
1653:   Level: intermediate
1655:   MatLMVMDenseTypes\:
1656: +   `MAT_LMVM_DENSE_REORDER` - reorders memory to minimize kernel launch
1657: -   `MAT_LMVM_DENSE_INPLACE` - launches kernel inplace to minimize memory movement
1659: .seealso: [](ch_ksp), `MATLMVMDQN`, `MATLMVMDBFGS`, `MATLMVMDDFP`, `MatLMVMDenseType`
1660: @*/
1661: PetscErrorCode MatLMVMDenseSetType(Mat B, MatLMVMDenseType type)
1662: {
1663:   Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
1664:   Mat_DQN  *lqn  = (Mat_DQN *)lmvm->ctx;
1666:   PetscFunctionBegin;
1668:   lqn->strategy = type;
1669:   PetscFunctionReturn(PETSC_SUCCESS);
1670: }