1 #include <../src/ksp/ksp/utils/lmvm/brdn/brdn.h> /*I "petscksp.h" I*/ 2 3 /*------------------------------------------------------------*/ 4 5 /* 6 The solution method is the matrix-free implementation of the inverse Hessian in 7 Equation 6 on page 312 of Griewank "Broyden Updating, The Good and The Bad!" 8 (http://www.emis.ams.org/journals/DMJDMV/vol-ismp/45_griewank-andreas-broyden.pdf). 9 10 Q[i] = (B_i)^{-1}*S[i] terms are computed ahead of time whenever 11 the matrix is updated with a new (S[i], Y[i]) pair. This allows 12 repeated calls of MatSolve without incurring redundant computation. 13 14 dX <- J0^{-1} * F 15 16 for i=0,1,2,...,k 17 # Q[i] = (B_i)^{-1} * Y[i] 18 tau = (Y[i]^T F) / (Y[i]^T Y[i]) 19 dX <- dX + (tau * (S[i] - Q[i])) 20 end 21 */ 22 23 static PetscErrorCode MatSolve_LMVMBadBrdn(Mat B, Vec F, Vec dX) 24 { 25 Mat_LMVM *lmvm = (Mat_LMVM*)B->data; 26 Mat_Brdn *lbb = (Mat_Brdn*)lmvm->ctx; 27 PetscErrorCode ierr; 28 PetscInt i, j; 29 PetscScalar yjtyi, ytf; 30 31 PetscFunctionBegin; 32 VecCheckSameSize(F, 2, dX, 3); 33 VecCheckMatCompatible(B, dX, 3, F, 2); 34 35 if (lbb->needQ) { 36 /* Pre-compute (Q[i] = (B_i)^{-1} * Y[i]) */ 37 for (i = 0; i <= lmvm->k; ++i) { 38 ierr = MatLMVMApplyJ0Inv(B, lmvm->Y[i], lbb->Q[i]);CHKERRQ(ierr); 39 for (j = 0; j <= i-1; ++j) { 40 ierr = VecDot(lmvm->Y[j], lmvm->Y[i], &yjtyi);CHKERRQ(ierr); 41 ierr = VecAXPBYPCZ(lbb->Q[i], PetscRealPart(yjtyi)/lbb->yty[j], -PetscRealPart(yjtyi)/lbb->yty[j], 1.0, lmvm->S[j], lbb->Q[j]);CHKERRQ(ierr); 42 } 43 } 44 lbb->needQ = PETSC_FALSE; 45 } 46 47 ierr = MatLMVMApplyJ0Inv(B, F, dX);CHKERRQ(ierr); 48 for (i = 0; i <= lmvm->k; ++i) { 49 ierr = VecDot(lmvm->Y[i], F, &ytf);CHKERRQ(ierr); 50 ierr = VecAXPBYPCZ(dX, PetscRealPart(ytf)/lbb->yty[i], -PetscRealPart(ytf)/lbb->yty[i], 1.0, lmvm->S[i], lbb->Q[i]);CHKERRQ(ierr); 51 } 52 PetscFunctionReturn(0); 53 } 54 55 /*------------------------------------------------------------*/ 56 57 /* 58 The forward product is the matrix-free implementation of the direct update in 59 Equation 6 on page 302 of Griewank "Broyden Updating, The Good and The Bad!" 60 (http://www.emis.ams.org/journals/DMJDMV/vol-ismp/45_griewank-andreas-broyden.pdf). 61 62 P[i] = (B_i)*S[i] terms are computed ahead of time whenever 63 the matrix is updated with a new (S[i], Y[i]) pair. This allows 64 repeated calls of MatMult inside KSP solvers without unnecessarily 65 recomputing P[i] terms in expensive nested-loops. 66 67 Z <- J0 * X 68 69 for i=0,1,2,...,k 70 # P[i] = B_i * S[i] 71 tau = (Y[i]^T X) / (Y[i]^T S[i]) 72 dX <- dX + (tau * (Y[i] - P[i])) 73 end 74 */ 75 76 static PetscErrorCode MatMult_LMVMBadBrdn(Mat B, Vec X, Vec Z) 77 { 78 Mat_LMVM *lmvm = (Mat_LMVM*)B->data; 79 Mat_Brdn *lbb = (Mat_Brdn*)lmvm->ctx; 80 PetscErrorCode ierr; 81 PetscInt i, j; 82 PetscScalar yjtsi, ytx; 83 84 PetscFunctionBegin; 85 VecCheckSameSize(X, 2, Z, 3); 86 VecCheckMatCompatible(B, X, 2, Z, 3); 87 88 if (lbb->needP) { 89 /* Pre-compute (P[i] = (B_i) * S[i]) */ 90 for (i = 0; i <= lmvm->k; ++i) { 91 ierr = MatLMVMApplyJ0Fwd(B, lmvm->S[i], lbb->P[i]);CHKERRQ(ierr); 92 for (j = 0; j <= i-1; ++j) { 93 ierr = VecDot(lmvm->Y[j], lmvm->S[i], &yjtsi);CHKERRQ(ierr); 94 ierr = VecAXPBYPCZ(lbb->P[i], PetscRealPart(yjtsi)/lbb->yts[j], -PetscRealPart(yjtsi)/lbb->yts[j], 1.0, lmvm->Y[j], lbb->P[j]);CHKERRQ(ierr); 95 } 96 } 97 lbb->needP = PETSC_FALSE; 98 } 99 100 ierr = MatLMVMApplyJ0Fwd(B, X, Z);CHKERRQ(ierr); 101 for (i = 0; i <= lmvm->k; ++i) { 102 ierr = VecDot(lmvm->Y[i], X, &ytx);CHKERRQ(ierr); 103 ierr = VecAXPBYPCZ(Z, PetscRealPart(ytx)/lbb->yts[i], -PetscRealPart(ytx)/lbb->yts[i], 1.0, lmvm->Y[i], lbb->P[i]);CHKERRQ(ierr); 104 } 105 PetscFunctionReturn(0); 106 } 107 108 /*------------------------------------------------------------*/ 109 110 static PetscErrorCode MatUpdate_LMVMBadBrdn(Mat B, Vec X, Vec F) 111 { 112 Mat_LMVM *lmvm = (Mat_LMVM*)B->data; 113 Mat_Brdn *lbb = (Mat_Brdn*)lmvm->ctx; 114 PetscErrorCode ierr; 115 PetscInt old_k, i; 116 PetscScalar yty, yts; 117 118 PetscFunctionBegin; 119 if (!lmvm->m) PetscFunctionReturn(0); 120 if (lmvm->prev_set) { 121 /* Compute the new (S = X - Xprev) and (Y = F - Fprev) vectors */ 122 ierr = VecAYPX(lmvm->Xprev, -1.0, X);CHKERRQ(ierr); 123 ierr = VecAYPX(lmvm->Fprev, -1.0, F);CHKERRQ(ierr); 124 /* Accept the update */ 125 lbb->needP = lbb->needQ = PETSC_TRUE; 126 old_k = lmvm->k; 127 ierr = MatUpdateKernel_LMVM(B, lmvm->Xprev, lmvm->Fprev);CHKERRQ(ierr); 128 /* If we hit the memory limit, shift the yty and yts arrays */ 129 if (old_k == lmvm->k) { 130 for (i = 0; i <= lmvm->k-1; ++i) { 131 lbb->yty[i] = lbb->yty[i+1]; 132 lbb->yts[i] = lbb->yts[i+1]; 133 } 134 } 135 /* Accumulate the latest yTy and yTs dot products */ 136 ierr = VecDotBegin(lmvm->Y[lmvm->k], lmvm->Y[lmvm->k], &yty);CHKERRQ(ierr); 137 ierr = VecDotBegin(lmvm->Y[lmvm->k], lmvm->S[lmvm->k], &yts);CHKERRQ(ierr); 138 ierr = VecDotEnd(lmvm->Y[lmvm->k], lmvm->Y[lmvm->k], &yty);CHKERRQ(ierr); 139 ierr = VecDotEnd(lmvm->Y[lmvm->k], lmvm->S[lmvm->k], &yts);CHKERRQ(ierr); 140 lbb->yty[lmvm->k] = PetscRealPart(yty); 141 lbb->yts[lmvm->k] = PetscRealPart(yts); 142 } 143 /* Save the solution and function to be used in the next update */ 144 ierr = VecCopy(X, lmvm->Xprev);CHKERRQ(ierr); 145 ierr = VecCopy(F, lmvm->Fprev);CHKERRQ(ierr); 146 lmvm->prev_set = PETSC_TRUE; 147 PetscFunctionReturn(0); 148 } 149 150 /*------------------------------------------------------------*/ 151 152 static PetscErrorCode MatCopy_LMVMBadBrdn(Mat B, Mat M, MatStructure str) 153 { 154 Mat_LMVM *bdata = (Mat_LMVM*)B->data; 155 Mat_Brdn *bctx = (Mat_Brdn*)bdata->ctx; 156 Mat_LMVM *mdata = (Mat_LMVM*)M->data; 157 Mat_Brdn *mctx = (Mat_Brdn*)mdata->ctx; 158 PetscErrorCode ierr; 159 PetscInt i; 160 161 PetscFunctionBegin; 162 mctx->needP = bctx->needP; 163 mctx->needQ = bctx->needQ; 164 for (i=0; i<=bdata->k; ++i) { 165 mctx->yty[i] = bctx->yty[i]; 166 mctx->yts[i] = bctx->yts[i]; 167 ierr = VecCopy(bctx->P[i], mctx->P[i]);CHKERRQ(ierr); 168 ierr = VecCopy(bctx->Q[i], mctx->Q[i]);CHKERRQ(ierr); 169 } 170 PetscFunctionReturn(0); 171 } 172 173 /*------------------------------------------------------------*/ 174 175 static PetscErrorCode MatReset_LMVMBadBrdn(Mat B, PetscBool destructive) 176 { 177 Mat_LMVM *lmvm = (Mat_LMVM*)B->data; 178 Mat_Brdn *lbb = (Mat_Brdn*)lmvm->ctx; 179 PetscErrorCode ierr; 180 181 PetscFunctionBegin; 182 lbb->needP = lbb->needQ = PETSC_TRUE; 183 if (destructive && lbb->allocated) { 184 ierr = PetscFree2(lbb->yty, lbb->yts);CHKERRQ(ierr); 185 ierr = VecDestroyVecs(lmvm->m, &lbb->P);CHKERRQ(ierr); 186 ierr = VecDestroyVecs(lmvm->m, &lbb->Q);CHKERRQ(ierr); 187 lbb->allocated = PETSC_FALSE; 188 } 189 ierr = MatReset_LMVM(B, destructive);CHKERRQ(ierr); 190 PetscFunctionReturn(0); 191 } 192 193 /*------------------------------------------------------------*/ 194 195 static PetscErrorCode MatAllocate_LMVMBadBrdn(Mat B, Vec X, Vec F) 196 { 197 Mat_LMVM *lmvm = (Mat_LMVM*)B->data; 198 Mat_Brdn *lbb = (Mat_Brdn*)lmvm->ctx; 199 PetscErrorCode ierr; 200 201 PetscFunctionBegin; 202 ierr = MatAllocate_LMVM(B, X, F);CHKERRQ(ierr); 203 if (!lbb->allocated) { 204 ierr = PetscMalloc2(lmvm->m, &lbb->yty, lmvm->m, &lbb->yts);CHKERRQ(ierr); 205 if (lmvm->m > 0) { 206 ierr = VecDuplicateVecs(X, lmvm->m, &lbb->P);CHKERRQ(ierr); 207 ierr = VecDuplicateVecs(X, lmvm->m, &lbb->Q);CHKERRQ(ierr); 208 } 209 lbb->allocated = PETSC_TRUE; 210 } 211 PetscFunctionReturn(0); 212 } 213 214 /*------------------------------------------------------------*/ 215 216 static PetscErrorCode MatDestroy_LMVMBadBrdn(Mat B) 217 { 218 Mat_LMVM *lmvm = (Mat_LMVM*)B->data; 219 Mat_Brdn *lbb = (Mat_Brdn*)lmvm->ctx; 220 PetscErrorCode ierr; 221 222 PetscFunctionBegin; 223 if (lbb->allocated) { 224 ierr = PetscFree2(lbb->yty, lbb->yts);CHKERRQ(ierr); 225 ierr = VecDestroyVecs(lmvm->m, &lbb->P);CHKERRQ(ierr); 226 ierr = VecDestroyVecs(lmvm->m, &lbb->Q);CHKERRQ(ierr); 227 lbb->allocated = PETSC_FALSE; 228 } 229 ierr = PetscFree(lmvm->ctx);CHKERRQ(ierr); 230 ierr = MatDestroy_LMVM(B);CHKERRQ(ierr); 231 PetscFunctionReturn(0); 232 } 233 234 /*------------------------------------------------------------*/ 235 236 static PetscErrorCode MatSetUp_LMVMBadBrdn(Mat B) 237 { 238 Mat_LMVM *lmvm = (Mat_LMVM*)B->data; 239 Mat_Brdn *lbb = (Mat_Brdn*)lmvm->ctx; 240 PetscErrorCode ierr; 241 242 PetscFunctionBegin; 243 ierr = MatSetUp_LMVM(B);CHKERRQ(ierr); 244 if (!lbb->allocated) { 245 ierr = PetscMalloc2(lmvm->m, &lbb->yty, lmvm->m, &lbb->yts);CHKERRQ(ierr); 246 if (lmvm->m > 0) { 247 ierr = VecDuplicateVecs(lmvm->Xprev, lmvm->m, &lbb->P);CHKERRQ(ierr); 248 ierr = VecDuplicateVecs(lmvm->Xprev, lmvm->m, &lbb->Q);CHKERRQ(ierr); 249 } 250 lbb->allocated = PETSC_TRUE; 251 } 252 PetscFunctionReturn(0); 253 } 254 255 /*------------------------------------------------------------*/ 256 257 PetscErrorCode MatCreate_LMVMBadBrdn(Mat B) 258 { 259 Mat_LMVM *lmvm; 260 Mat_Brdn *lbb; 261 PetscErrorCode ierr; 262 263 PetscFunctionBegin; 264 ierr = MatCreate_LMVM(B);CHKERRQ(ierr); 265 ierr = PetscObjectChangeTypeName((PetscObject)B, MATLMVMBADBROYDEN);CHKERRQ(ierr); 266 B->ops->setup = MatSetUp_LMVMBadBrdn; 267 B->ops->destroy = MatDestroy_LMVMBadBrdn; 268 B->ops->solve = MatSolve_LMVMBadBrdn; 269 270 lmvm = (Mat_LMVM*)B->data; 271 lmvm->square = PETSC_TRUE; 272 lmvm->ops->allocate = MatAllocate_LMVMBadBrdn; 273 lmvm->ops->reset = MatReset_LMVMBadBrdn; 274 lmvm->ops->mult = MatMult_LMVMBadBrdn; 275 lmvm->ops->update = MatUpdate_LMVMBadBrdn; 276 lmvm->ops->copy = MatCopy_LMVMBadBrdn; 277 278 ierr = PetscNewLog(B, &lbb);CHKERRQ(ierr); 279 lmvm->ctx = (void*)lbb; 280 lbb->allocated = PETSC_FALSE; 281 lbb->needP = lbb->needQ = PETSC_TRUE; 282 PetscFunctionReturn(0); 283 } 284 285 /*------------------------------------------------------------*/ 286 287 /*@ 288 MatCreateLMVMBadBroyden - Creates a limited-memory modified (aka "bad") Broyden-type 289 approximation matrix used for a Jacobian. L-BadBrdn is not guaranteed to be 290 symmetric or positive-definite. 291 292 The provided local and global sizes must match the solution and function vectors 293 used with MatLMVMUpdate() and MatSolve(). The resulting L-BadBrdn matrix will have 294 storage vectors allocated with VecCreateSeq() in serial and VecCreateMPI() in 295 parallel. To use the L-BadBrdn matrix with other vector types, the matrix must be 296 created using MatCreate() and MatSetType(), followed by MatLMVMAllocate(). 297 This ensures that the internal storage and work vectors are duplicated from the 298 correct type of vector. 299 300 Collective 301 302 Input Parameters: 303 + comm - MPI communicator, set to PETSC_COMM_SELF 304 . n - number of local rows for storage vectors 305 - N - global size of the storage vectors 306 307 Output Parameter: 308 . B - the matrix 309 310 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions() 311 paradigm instead of this routine directly. 312 313 Options Database Keys: 314 + -mat_lmvm_num_vecs - maximum number of correction vectors (i.e.: updates) stored 315 . -mat_lmvm_scale_type - (developer) type of scaling applied to J0 (none, scalar, diagonal) 316 . -mat_lmvm_theta - (developer) convex ratio between BFGS and DFP components of the diagonal J0 scaling 317 . -mat_lmvm_rho - (developer) update limiter for the J0 scaling 318 . -mat_lmvm_alpha - (developer) coefficient factor for the quadratic subproblem in J0 scaling 319 . -mat_lmvm_beta - (developer) exponential factor for the diagonal J0 scaling 320 - -mat_lmvm_sigma_hist - (developer) number of past updates to use in J0 scaling 321 322 Level: intermediate 323 324 .seealso: MatCreate(), MATLMVM, MATLMVMBADBRDN, MatCreateLMVMDFP(), MatCreateLMVMSR1(), 325 MatCreateLMVMBFGS(), MatCreateLMVMBrdn(), MatCreateLMVMSymBrdn() 326 @*/ 327 PetscErrorCode MatCreateLMVMBadBroyden(MPI_Comm comm, PetscInt n, PetscInt N, Mat *B) 328 { 329 PetscErrorCode ierr; 330 331 PetscFunctionBegin; 332 ierr = MatCreate(comm, B);CHKERRQ(ierr); 333 ierr = MatSetSizes(*B, n, n, N, N);CHKERRQ(ierr); 334 ierr = MatSetType(*B, MATLMVMBADBROYDEN);CHKERRQ(ierr); 335 ierr = MatSetUp(*B);CHKERRQ(ierr); 336 PetscFunctionReturn(0); 337 } 338