115ed3f7dSjeremylt /// @file 215ed3f7dSjeremylt /// Test creation and use of FDM element inverse 315ed3f7dSjeremylt /// \test Test creation and use of FDM element inverse 4*2b730f8bSJeremy L Thompson #include "t541-operator.h" 5*2b730f8bSJeremy L Thompson 615ed3f7dSjeremylt #include <ceed.h> 7*2b730f8bSJeremy L Thompson #include <math.h> 815ed3f7dSjeremylt #include <stdlib.h> 915ed3f7dSjeremylt #include <string.h> 1015ed3f7dSjeremylt 1115ed3f7dSjeremylt int main(int argc, char **argv) { 1215ed3f7dSjeremylt Ceed ceed; 1315ed3f7dSjeremylt CeedElemRestriction elem_restr_x_i, elem_restr_u_i, elem_restr_qd_i; 1415ed3f7dSjeremylt CeedBasis basis_x, basis_u; 1515ed3f7dSjeremylt CeedQFunction qf_setup_diff, qf_apply; 1615ed3f7dSjeremylt CeedOperator op_setup_diff, op_apply, op_inv; 1715ed3f7dSjeremylt CeedVector q_data_diff, X, U, V, W; 1815ed3f7dSjeremylt CeedInt num_elem = 1, P = 4, Q = 5, dim = 2; 1915ed3f7dSjeremylt CeedInt num_dofs = P * P, num_qpts = num_elem * Q * Q, q_data_size = dim * (dim + 1) / 2; 2015ed3f7dSjeremylt CeedScalar x[dim * num_elem * (2 * 2)]; 2115ed3f7dSjeremylt 2215ed3f7dSjeremylt CeedInit(argv[1], &ceed); 2315ed3f7dSjeremylt 2480a9ef05SNatalie Beams // Test skipped if using single precision 25*2b730f8bSJeremy L Thompson if (CEED_SCALAR_TYPE == CEED_SCALAR_FP32) return CeedError(ceed, CEED_ERROR_UNSUPPORTED, "Test not implemented in single precision"); 2680a9ef05SNatalie Beams 2715ed3f7dSjeremylt // DoF Coordinates 28*2b730f8bSJeremy L Thompson for (CeedInt i = 0; i < 2; i++) { 2915ed3f7dSjeremylt for (CeedInt j = 0; j < 2; j++) { 3015ed3f7dSjeremylt x[i + j * 2 + 0 * 4] = i; 3115ed3f7dSjeremylt x[i + j * 2 + 1 * 4] = j; 3215ed3f7dSjeremylt } 33*2b730f8bSJeremy L Thompson } 3415ed3f7dSjeremylt CeedVectorCreate(ceed, dim * num_elem * (2 * 2), &X); 3515ed3f7dSjeremylt CeedVectorSetArray(X, CEED_MEM_HOST, CEED_USE_POINTER, x); 3615ed3f7dSjeremylt 3715ed3f7dSjeremylt // Qdata Vector 3815ed3f7dSjeremylt CeedVectorCreate(ceed, q_data_size * num_qpts, &q_data_diff); 3915ed3f7dSjeremylt 4015ed3f7dSjeremylt // Element Setup 4115ed3f7dSjeremylt 4215ed3f7dSjeremylt // Restrictions 4315ed3f7dSjeremylt CeedInt strides_x[3] = {1, 2 * 2, 2 * 2 * dim}; 44*2b730f8bSJeremy L Thompson CeedElemRestrictionCreateStrided(ceed, num_elem, 2 * 2, dim, dim * num_elem * 2 * 2, strides_x, &elem_restr_x_i); 4515ed3f7dSjeremylt 4615ed3f7dSjeremylt CeedInt strides_u[3] = {1, P * P, P * P}; 47*2b730f8bSJeremy L Thompson CeedElemRestrictionCreateStrided(ceed, num_elem, P * P, 1, num_dofs, strides_u, &elem_restr_u_i); 4815ed3f7dSjeremylt 4915ed3f7dSjeremylt CeedInt strides_qd[3] = {1, Q * Q, q_data_size * Q * Q}; 50*2b730f8bSJeremy L Thompson CeedElemRestrictionCreateStrided(ceed, num_elem, Q * Q, q_data_size, num_qpts * q_data_size, strides_qd, &elem_restr_qd_i); 5115ed3f7dSjeremylt 5215ed3f7dSjeremylt // Bases 5315ed3f7dSjeremylt CeedBasisCreateTensorH1Lagrange(ceed, dim, dim, 2, Q, CEED_GAUSS, &basis_x); 5415ed3f7dSjeremylt CeedBasisCreateTensorH1Lagrange(ceed, dim, 1, P, Q, CEED_GAUSS, &basis_u); 5515ed3f7dSjeremylt 5615ed3f7dSjeremylt // QFunction - setup diff 57*2b730f8bSJeremy L Thompson CeedQFunctionCreateInterior(ceed, 1, setup_diff, setup_diff_loc, &qf_setup_diff); 5815ed3f7dSjeremylt CeedQFunctionAddInput(qf_setup_diff, "dx", dim * dim, CEED_EVAL_GRAD); 5915ed3f7dSjeremylt CeedQFunctionAddInput(qf_setup_diff, "weight", 1, CEED_EVAL_WEIGHT); 6015ed3f7dSjeremylt CeedQFunctionAddOutput(qf_setup_diff, "qdata", q_data_size, CEED_EVAL_NONE); 6115ed3f7dSjeremylt 6215ed3f7dSjeremylt // Operator - setup diff 63*2b730f8bSJeremy L Thompson CeedOperatorCreate(ceed, qf_setup_diff, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, &op_setup_diff); 64*2b730f8bSJeremy L Thompson CeedOperatorSetField(op_setup_diff, "dx", elem_restr_x_i, basis_x, CEED_VECTOR_ACTIVE); 65*2b730f8bSJeremy L Thompson CeedOperatorSetField(op_setup_diff, "weight", CEED_ELEMRESTRICTION_NONE, basis_x, CEED_VECTOR_NONE); 66*2b730f8bSJeremy L Thompson CeedOperatorSetField(op_setup_diff, "qdata", elem_restr_qd_i, CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); 6715ed3f7dSjeremylt 6815ed3f7dSjeremylt // Apply Setup Operator 6915ed3f7dSjeremylt CeedOperatorApply(op_setup_diff, X, q_data_diff, CEED_REQUEST_IMMEDIATE); 7015ed3f7dSjeremylt 7115ed3f7dSjeremylt // QFunction - apply 7215ed3f7dSjeremylt CeedQFunctionCreateInterior(ceed, 1, apply, apply_loc, &qf_apply); 7315ed3f7dSjeremylt CeedQFunctionAddInput(qf_apply, "u", dim, CEED_EVAL_GRAD); 7415ed3f7dSjeremylt CeedQFunctionAddInput(qf_apply, "qdata_diff", q_data_size, CEED_EVAL_NONE); 7515ed3f7dSjeremylt CeedQFunctionAddOutput(qf_apply, "v", dim, CEED_EVAL_GRAD); 7615ed3f7dSjeremylt 7715ed3f7dSjeremylt // Operator - apply 78*2b730f8bSJeremy L Thompson CeedOperatorCreate(ceed, qf_apply, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, &op_apply); 79*2b730f8bSJeremy L Thompson CeedOperatorSetField(op_apply, "u", elem_restr_u_i, basis_u, CEED_VECTOR_ACTIVE); 80*2b730f8bSJeremy L Thompson CeedOperatorSetField(op_apply, "qdata_diff", elem_restr_qd_i, CEED_BASIS_COLLOCATED, q_data_diff); 81*2b730f8bSJeremy L Thompson CeedOperatorSetField(op_apply, "v", elem_restr_u_i, basis_u, CEED_VECTOR_ACTIVE); 8215ed3f7dSjeremylt 8315ed3f7dSjeremylt // Create FDM element inverse 8415ed3f7dSjeremylt CeedOperatorCreateFDMElementInverse(op_apply, &op_inv, CEED_REQUEST_IMMEDIATE); 8515ed3f7dSjeremylt 8615ed3f7dSjeremylt // Create vectors 8715ed3f7dSjeremylt CeedVectorCreate(ceed, num_dofs, &U); 8815ed3f7dSjeremylt CeedVectorCreate(ceed, num_dofs, &V); 8915ed3f7dSjeremylt CeedVectorCreate(ceed, num_dofs, &W); 9015ed3f7dSjeremylt 9115ed3f7dSjeremylt // Create Schur complement for element corners 9215ed3f7dSjeremylt CeedScalar S[16]; 9315ed3f7dSjeremylt for (CeedInt i = 0; i < 4; i++) { 9415ed3f7dSjeremylt CeedScalar *u; 9515ed3f7dSjeremylt CeedVectorSetValue(U, 0.0); 9615ed3f7dSjeremylt CeedVectorGetArray(U, CEED_MEM_HOST, &u); 9715ed3f7dSjeremylt switch (i) { 98*2b730f8bSJeremy L Thompson case 0: 99*2b730f8bSJeremy L Thompson u[0] = 1.0; 100*2b730f8bSJeremy L Thompson break; 101*2b730f8bSJeremy L Thompson case 1: 102*2b730f8bSJeremy L Thompson u[P - 1] = 1.0; 103*2b730f8bSJeremy L Thompson break; 104*2b730f8bSJeremy L Thompson case 2: 105*2b730f8bSJeremy L Thompson u[P * P - P] = 1.0; 106*2b730f8bSJeremy L Thompson break; 107*2b730f8bSJeremy L Thompson case 3: 108*2b730f8bSJeremy L Thompson u[P * P - 1] = 1.0; 109*2b730f8bSJeremy L Thompson break; 11015ed3f7dSjeremylt } 11115ed3f7dSjeremylt CeedVectorRestoreArray(U, &u); 11215ed3f7dSjeremylt 11315ed3f7dSjeremylt CeedOperatorApply(op_inv, U, V, CEED_REQUEST_IMMEDIATE); 11415ed3f7dSjeremylt 11515ed3f7dSjeremylt const CeedScalar *v; 11615ed3f7dSjeremylt CeedVectorGetArrayRead(V, CEED_MEM_HOST, &v); 11785cf89eaSjeremylt S[0 * 4 + i] = -v[0]; 11885cf89eaSjeremylt S[1 * 4 + i] = -v[P - 1]; 11985cf89eaSjeremylt S[2 * 4 + i] = -v[P * P - P]; 12085cf89eaSjeremylt S[3 * 4 + i] = -v[P * P - 1]; 12115ed3f7dSjeremylt CeedVectorRestoreArrayRead(V, &v); 12215ed3f7dSjeremylt } 12315ed3f7dSjeremylt CeedScalar S_inv[16]; 12415ed3f7dSjeremylt { 12515ed3f7dSjeremylt CeedScalar det; 126*2b730f8bSJeremy L Thompson S_inv[0] = S[5] * S[10] * S[15] - S[5] * S[11] * S[14] - S[9] * S[6] * S[15] + S[9] * S[7] * S[14] + S[13] * S[6] * S[11] - S[13] * S[7] * S[10]; 12715ed3f7dSjeremylt 128*2b730f8bSJeremy L Thompson S_inv[4] = -S[4] * S[10] * S[15] + S[4] * S[11] * S[14] + S[8] * S[6] * S[15] - S[8] * S[7] * S[14] - S[12] * S[6] * S[11] + S[12] * S[7] * S[10]; 12915ed3f7dSjeremylt 130*2b730f8bSJeremy L Thompson S_inv[8] = S[4] * S[9] * S[15] - S[4] * S[11] * S[13] - S[8] * S[5] * S[15] + S[8] * S[7] * S[13] + S[12] * S[5] * S[11] - S[12] * S[7] * S[9]; 13115ed3f7dSjeremylt 132*2b730f8bSJeremy L Thompson S_inv[12] = -S[4] * S[9] * S[14] + S[4] * S[10] * S[13] + S[8] * S[5] * S[14] - S[8] * S[6] * S[13] - S[12] * S[5] * S[10] + S[12] * S[6] * S[9]; 13315ed3f7dSjeremylt 134*2b730f8bSJeremy L Thompson S_inv[1] = -S[1] * S[10] * S[15] + S[1] * S[11] * S[14] + S[9] * S[2] * S[15] - S[9] * S[3] * S[14] - S[13] * S[2] * S[11] + S[13] * S[3] * S[10]; 13515ed3f7dSjeremylt 136*2b730f8bSJeremy L Thompson S_inv[5] = S[0] * S[10] * S[15] - S[0] * S[11] * S[14] - S[8] * S[2] * S[15] + S[8] * S[3] * S[14] + S[12] * S[2] * S[11] - S[12] * S[3] * S[10]; 13715ed3f7dSjeremylt 138*2b730f8bSJeremy L Thompson S_inv[9] = -S[0] * S[9] * S[15] + S[0] * S[11] * S[13] + S[8] * S[1] * S[15] - S[8] * S[3] * S[13] - S[12] * S[1] * S[11] + S[12] * S[3] * S[9]; 13915ed3f7dSjeremylt 140*2b730f8bSJeremy L Thompson S_inv[13] = S[0] * S[9] * S[14] - S[0] * S[10] * S[13] - S[8] * S[1] * S[14] + S[8] * S[2] * S[13] + S[12] * S[1] * S[10] - S[12] * S[2] * S[9]; 14115ed3f7dSjeremylt 142*2b730f8bSJeremy L Thompson S_inv[2] = S[1] * S[6] * S[15] - S[1] * S[7] * S[14] - S[5] * S[2] * S[15] + S[5] * S[3] * S[14] + S[13] * S[2] * S[7] - S[13] * S[3] * S[6]; 14315ed3f7dSjeremylt 144*2b730f8bSJeremy L Thompson S_inv[6] = -S[0] * S[6] * S[15] + S[0] * S[7] * S[14] + S[4] * S[2] * S[15] - S[4] * S[3] * S[14] - S[12] * S[2] * S[7] + S[12] * S[3] * S[6]; 14515ed3f7dSjeremylt 146*2b730f8bSJeremy L Thompson S_inv[10] = S[0] * S[5] * S[15] - S[0] * S[7] * S[13] - S[4] * S[1] * S[15] + S[4] * S[3] * S[13] + S[12] * S[1] * S[7] - S[12] * S[3] * S[5]; 14715ed3f7dSjeremylt 148*2b730f8bSJeremy L Thompson S_inv[14] = -S[0] * S[5] * S[14] + S[0] * S[6] * S[13] + S[4] * S[1] * S[14] - S[4] * S[2] * S[13] - S[12] * S[1] * S[6] + S[12] * S[2] * S[5]; 14915ed3f7dSjeremylt 150*2b730f8bSJeremy L Thompson S_inv[3] = -S[1] * S[6] * S[11] + S[1] * S[7] * S[10] + S[5] * S[2] * S[11] - S[5] * S[3] * S[10] - S[9] * S[2] * S[7] + S[9] * S[3] * S[6]; 15115ed3f7dSjeremylt 152*2b730f8bSJeremy L Thompson S_inv[7] = S[0] * S[6] * S[11] - S[0] * S[7] * S[10] - S[4] * S[2] * S[11] + S[4] * S[3] * S[10] + S[8] * S[2] * S[7] - S[8] * S[3] * S[6]; 15315ed3f7dSjeremylt 154*2b730f8bSJeremy L Thompson S_inv[11] = -S[0] * S[5] * S[11] + S[0] * S[7] * S[9] + S[4] * S[1] * S[11] - S[4] * S[3] * S[9] - S[8] * S[1] * S[7] + S[8] * S[3] * S[5]; 15515ed3f7dSjeremylt 156*2b730f8bSJeremy L Thompson S_inv[15] = S[0] * S[5] * S[10] - S[0] * S[6] * S[9] - S[4] * S[1] * S[10] + S[4] * S[2] * S[9] + S[8] * S[1] * S[6] - S[8] * S[2] * S[5]; 15715ed3f7dSjeremylt 15815ed3f7dSjeremylt det = 1 / (S[0] * S_inv[0] + S[1] * S_inv[4] + S[2] * S_inv[8] + S[3] * S_inv[12]); 15915ed3f7dSjeremylt 160*2b730f8bSJeremy L Thompson for (CeedInt i = 0; i < 16; i++) S_inv[i] *= det; 16115ed3f7dSjeremylt } 16215ed3f7dSjeremylt 16315ed3f7dSjeremylt // Set initial values 16415ed3f7dSjeremylt { 16515ed3f7dSjeremylt CeedScalar nodes[P]; 16615ed3f7dSjeremylt CeedLobattoQuadrature(P, nodes, NULL); 16715ed3f7dSjeremylt CeedScalar *u; 16815ed3f7dSjeremylt CeedVectorGetArray(U, CEED_MEM_HOST, &u); 169*2b730f8bSJeremy L Thompson for (CeedInt i = 0; i < P; i++) { 170*2b730f8bSJeremy L Thompson for (CeedInt j = 0; j < P; j++) u[i * P + j] = -(nodes[i] - 1.0) * (nodes[i] + 1.0) - (nodes[j] - 1.0) * (nodes[j] + 1.0); 171*2b730f8bSJeremy L Thompson } 17215ed3f7dSjeremylt CeedVectorRestoreArray(U, &u); 17315ed3f7dSjeremylt } 17415ed3f7dSjeremylt 17515ed3f7dSjeremylt // Apply original operator 17615ed3f7dSjeremylt CeedOperatorApply(op_apply, U, V, CEED_REQUEST_IMMEDIATE); 17715ed3f7dSjeremylt 17815ed3f7dSjeremylt // Apply FDM element inverse 17915ed3f7dSjeremylt { 18015ed3f7dSjeremylt // -- Zero corners 18115ed3f7dSjeremylt CeedScalar *v; 18215ed3f7dSjeremylt CeedVectorGetArray(V, CEED_MEM_HOST, &v); 18315ed3f7dSjeremylt v[0] = 0.0; 18415ed3f7dSjeremylt v[P - 1] = 0.0; 18515ed3f7dSjeremylt v[P * P - P] = 0.0; 18615ed3f7dSjeremylt v[P * P - 1] = 0.0; 18715ed3f7dSjeremylt CeedVectorRestoreArray(V, &v); 18815ed3f7dSjeremylt 18915ed3f7dSjeremylt // -- Apply FDM inverse to interior 19015ed3f7dSjeremylt CeedOperatorApply(op_inv, V, W, CEED_REQUEST_IMMEDIATE); 19115ed3f7dSjeremylt 19215ed3f7dSjeremylt // -- Pick off corners 19315ed3f7dSjeremylt const CeedScalar *w; 19415ed3f7dSjeremylt CeedScalar w_Pi[4]; 19515ed3f7dSjeremylt CeedVectorGetArrayRead(W, CEED_MEM_HOST, &w); 19615ed3f7dSjeremylt w_Pi[0] = w[0]; 19715ed3f7dSjeremylt w_Pi[1] = w[P - 1]; 19815ed3f7dSjeremylt w_Pi[2] = w[P * P - P]; 19915ed3f7dSjeremylt w_Pi[3] = w[P * P - 1]; 20015ed3f7dSjeremylt CeedVectorRestoreArrayRead(W, &w); 20115ed3f7dSjeremylt 20215ed3f7dSjeremylt // -- Apply inverse of Schur complement 20315ed3f7dSjeremylt CeedScalar v_Pi[4]; 20415ed3f7dSjeremylt for (CeedInt i = 0; i < 4; i++) { 20515ed3f7dSjeremylt CeedScalar sum = 0.0; 20615ed3f7dSjeremylt for (CeedInt j = 0; j < 4; j++) { 20715ed3f7dSjeremylt sum += w_Pi[j] * S_inv[i * 4 + j]; 20815ed3f7dSjeremylt } 20985cf89eaSjeremylt v_Pi[i] = sum; 21015ed3f7dSjeremylt } 21115ed3f7dSjeremylt 21215ed3f7dSjeremylt // -- Set corners 21315ed3f7dSjeremylt CeedVectorGetArray(V, CEED_MEM_HOST, &v); 21415ed3f7dSjeremylt v[0] = v_Pi[0]; 21515ed3f7dSjeremylt v[P - 1] = v_Pi[1]; 21615ed3f7dSjeremylt v[P * P - P] = v_Pi[2]; 21715ed3f7dSjeremylt v[P * P - 1] = v_Pi[3]; 21815ed3f7dSjeremylt CeedVectorRestoreArray(V, &v); 21915ed3f7dSjeremylt 22015ed3f7dSjeremylt // -- Apply full FDM inverse again 22115ed3f7dSjeremylt CeedOperatorApply(op_inv, V, W, CEED_REQUEST_IMMEDIATE); 22215ed3f7dSjeremylt } 22315ed3f7dSjeremylt 22415ed3f7dSjeremylt // Check output 22515ed3f7dSjeremylt { 22615ed3f7dSjeremylt const CeedScalar *u, *w; 22715ed3f7dSjeremylt CeedVectorGetArrayRead(U, CEED_MEM_HOST, &u); 22815ed3f7dSjeremylt CeedVectorGetArrayRead(W, CEED_MEM_HOST, &w); 229*2b730f8bSJeremy L Thompson for (CeedInt i = 0; i < P; i++) { 230*2b730f8bSJeremy L Thompson for (CeedInt j = 0; j < P; j++) { 231*2b730f8bSJeremy L Thompson if (fabs(u[i * P + j] - w[i * P + j]) > 2e-3) { 23215ed3f7dSjeremylt // LCOV_EXCL_START 233*2b730f8bSJeremy L Thompson printf("[%" CeedInt_FMT ", %" CeedInt_FMT "] Error in inverse: %e != %e\n", i, j, w[i * P + j], u[i * P + j]); 23415ed3f7dSjeremylt // LCOV_EXCL_STOP 235*2b730f8bSJeremy L Thompson } 236*2b730f8bSJeremy L Thompson } 237*2b730f8bSJeremy L Thompson } 23815ed3f7dSjeremylt CeedVectorRestoreArrayRead(U, &u); 23915ed3f7dSjeremylt CeedVectorRestoreArrayRead(W, &w); 24015ed3f7dSjeremylt } 24115ed3f7dSjeremylt 24215ed3f7dSjeremylt // Cleanup 24315ed3f7dSjeremylt CeedQFunctionDestroy(&qf_setup_diff); 24415ed3f7dSjeremylt CeedQFunctionDestroy(&qf_apply); 24515ed3f7dSjeremylt CeedOperatorDestroy(&op_setup_diff); 24615ed3f7dSjeremylt CeedOperatorDestroy(&op_apply); 24715ed3f7dSjeremylt CeedOperatorDestroy(&op_inv); 24815ed3f7dSjeremylt CeedElemRestrictionDestroy(&elem_restr_u_i); 24915ed3f7dSjeremylt CeedElemRestrictionDestroy(&elem_restr_x_i); 25015ed3f7dSjeremylt CeedElemRestrictionDestroy(&elem_restr_qd_i); 25115ed3f7dSjeremylt CeedBasisDestroy(&basis_x); 25215ed3f7dSjeremylt CeedBasisDestroy(&basis_u); 25315ed3f7dSjeremylt CeedVectorDestroy(&X); 25415ed3f7dSjeremylt CeedVectorDestroy(&q_data_diff); 25515ed3f7dSjeremylt CeedVectorDestroy(&U); 25615ed3f7dSjeremylt CeedVectorDestroy(&V); 25715ed3f7dSjeremylt CeedVectorDestroy(&W); 25815ed3f7dSjeremylt CeedDestroy(&ceed); 25915ed3f7dSjeremylt return 0; 26015ed3f7dSjeremylt } 261