15e5bbd0aSStefano Zampini #include <petsc/private/pcbddcimpl.h> 25e5bbd0aSStefano Zampini #include <petsc/private/pcbddcprivateimpl.h> 372b8c272SStefano Zampini #include <petscblaslapack.h> 4839e9adbSstefano_zampini #include <../src/mat/impls/dense/seq/dense.h> 5674ae819SStefano Zampini 634a97f8cSStefano Zampini /* prototypes for deluxe functions */ 734a97f8cSStefano Zampini static PetscErrorCode PCBDDCScalingCreate_Deluxe(PC); 834a97f8cSStefano Zampini static PetscErrorCode PCBDDCScalingDestroy_Deluxe(PC); 934a97f8cSStefano Zampini static PetscErrorCode PCBDDCScalingSetUp_Deluxe(PC); 105a95e1ceSStefano Zampini static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Private(PC); 1134a97f8cSStefano Zampini static PetscErrorCode PCBDDCScalingReset_Deluxe_Solvers(PCBDDCDeluxeScaling); 12674ae819SStefano Zampini 139371c9d4SSatish Balay static PetscErrorCode PCBDDCMatTransposeMatSolve_SeqDense(Mat A, Mat B, Mat X) { 14839e9adbSstefano_zampini Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 151683a169SBarry Smith const PetscScalar *b; 161683a169SBarry Smith PetscScalar *x; 17839e9adbSstefano_zampini PetscInt n; 18839e9adbSstefano_zampini PetscBLASInt nrhs, info, m; 19839e9adbSstefano_zampini PetscBool flg; 20839e9adbSstefano_zampini 21839e9adbSstefano_zampini PetscFunctionBegin; 229566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 239566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &flg, MATSEQDENSE, MATMPIDENSE, NULL)); 2428b400f6SJacob Faibussowitsch PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix B must be MATDENSE matrix"); 259566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &flg, MATSEQDENSE, MATMPIDENSE, NULL)); 2628b400f6SJacob Faibussowitsch PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix"); 27839e9adbSstefano_zampini 289566063dSJacob Faibussowitsch PetscCall(MatGetSize(B, NULL, &n)); 299566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(n, &nrhs)); 309566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(B, &b)); 319566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(X, &x)); 329566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(x, b, m * nrhs)); 339566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(B, &b)); 34839e9adbSstefano_zampini 35839e9adbSstefano_zampini if (A->factortype == MAT_FACTOR_LU) { 36792fecdfSBarry Smith PetscCallBLAS("LAPACKgetrs", LAPACKgetrs_("T", &m, &nrhs, mat->v, &mat->lda, mat->pivots, x, &m, &info)); 3728b400f6SJacob Faibussowitsch PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "GETRS - Bad solve"); 383829e7e6SStefano Zampini } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only LU factor supported"); 39839e9adbSstefano_zampini 409566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(X, &x)); 419566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(nrhs * (2.0 * m * m - m))); 42839e9adbSstefano_zampini PetscFunctionReturn(0); 43839e9adbSstefano_zampini } 44839e9adbSstefano_zampini 459371c9d4SSatish Balay static PetscErrorCode PCBDDCScalingExtension_Basic(PC pc, Vec local_interface_vector, Vec global_vector) { 46674ae819SStefano Zampini PC_IS *pcis = (PC_IS *)pc->data; 47674ae819SStefano Zampini PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 48674ae819SStefano Zampini 49674ae819SStefano Zampini PetscFunctionBegin; 50674ae819SStefano Zampini /* Apply partition of unity */ 519566063dSJacob Faibussowitsch PetscCall(VecPointwiseMult(pcbddc->work_scaling, pcis->D, local_interface_vector)); 529566063dSJacob Faibussowitsch PetscCall(VecSet(global_vector, 0.0)); 539566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(pcis->global_to_B, pcbddc->work_scaling, global_vector, ADD_VALUES, SCATTER_REVERSE)); 549566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(pcis->global_to_B, pcbddc->work_scaling, global_vector, ADD_VALUES, SCATTER_REVERSE)); 55674ae819SStefano Zampini PetscFunctionReturn(0); 56674ae819SStefano Zampini } 57674ae819SStefano Zampini 589371c9d4SSatish Balay static PetscErrorCode PCBDDCScalingExtension_Deluxe(PC pc, Vec x, Vec y) { 59674ae819SStefano Zampini PC_IS *pcis = (PC_IS *)pc->data; 60674ae819SStefano Zampini PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 61674ae819SStefano Zampini PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx; 62674ae819SStefano Zampini 63674ae819SStefano Zampini PetscFunctionBegin; 649566063dSJacob Faibussowitsch PetscCall(VecSet(pcbddc->work_scaling, 0.0)); 659566063dSJacob Faibussowitsch PetscCall(VecSet(y, 0.0)); 665a95e1ceSStefano Zampini if (deluxe_ctx->n_simple) { /* scale deluxe vertices using diagonal scaling */ 675a95e1ceSStefano Zampini PetscInt i; 682b095fd8SStefano Zampini const PetscScalar *array_x, *array_D; 692b095fd8SStefano Zampini PetscScalar *array; 709566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x, &array_x)); 719566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(pcis->D, &array_D)); 729566063dSJacob Faibussowitsch PetscCall(VecGetArray(pcbddc->work_scaling, &array)); 739371c9d4SSatish Balay for (i = 0; i < deluxe_ctx->n_simple; i++) { array[deluxe_ctx->idx_simple_B[i]] = array_x[deluxe_ctx->idx_simple_B[i]] * array_D[deluxe_ctx->idx_simple_B[i]]; } 749566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(pcbddc->work_scaling, &array)); 759566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(pcis->D, &array_D)); 769566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x, &array_x)); 7734a97f8cSStefano Zampini } 78ac632422SStefano Zampini /* sequential part : all problems and Schur applications collapsed into a single matrix vector multiplication or a matvec and a solve */ 795a95e1ceSStefano Zampini if (deluxe_ctx->seq_mat) { 8072b8c272SStefano Zampini PetscInt i; 8172b8c272SStefano Zampini for (i = 0; i < deluxe_ctx->seq_n; i++) { 8216e386b8SStefano Zampini if (deluxe_ctx->change) { 839566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], x, deluxe_ctx->seq_work2[i], INSERT_VALUES, SCATTER_FORWARD)); 849566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], x, deluxe_ctx->seq_work2[i], INSERT_VALUES, SCATTER_FORWARD)); 8572b8c272SStefano Zampini if (deluxe_ctx->change_with_qr) { 8672b8c272SStefano Zampini Mat change; 8772b8c272SStefano Zampini 889566063dSJacob Faibussowitsch PetscCall(KSPGetOperators(deluxe_ctx->change[i], &change, NULL)); 899566063dSJacob Faibussowitsch PetscCall(MatMultTranspose(change, deluxe_ctx->seq_work2[i], deluxe_ctx->seq_work1[i])); 9072b8c272SStefano Zampini } else { 919566063dSJacob Faibussowitsch PetscCall(KSPSolve(deluxe_ctx->change[i], deluxe_ctx->seq_work2[i], deluxe_ctx->seq_work1[i])); 9216e386b8SStefano Zampini } 9372b8c272SStefano Zampini } else { 949566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], x, deluxe_ctx->seq_work1[i], INSERT_VALUES, SCATTER_FORWARD)); 959566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], x, deluxe_ctx->seq_work1[i], INSERT_VALUES, SCATTER_FORWARD)); 9672b8c272SStefano Zampini } 979566063dSJacob Faibussowitsch PetscCall(MatMultTranspose(deluxe_ctx->seq_mat[i], deluxe_ctx->seq_work1[i], deluxe_ctx->seq_work2[i])); 9872b8c272SStefano Zampini if (deluxe_ctx->seq_mat_inv_sum[i]) { 9972b8c272SStefano Zampini PetscScalar *x; 10072b8c272SStefano Zampini 1019566063dSJacob Faibussowitsch PetscCall(VecGetArray(deluxe_ctx->seq_work2[i], &x)); 1029566063dSJacob Faibussowitsch PetscCall(VecPlaceArray(deluxe_ctx->seq_work1[i], x)); 1039566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(deluxe_ctx->seq_work2[i], &x)); 1049566063dSJacob Faibussowitsch PetscCall(MatSolveTranspose(deluxe_ctx->seq_mat_inv_sum[i], deluxe_ctx->seq_work1[i], deluxe_ctx->seq_work2[i])); 1059566063dSJacob Faibussowitsch PetscCall(VecResetArray(deluxe_ctx->seq_work1[i])); 106ac632422SStefano Zampini } 10716e386b8SStefano Zampini if (deluxe_ctx->change) { 10816e386b8SStefano Zampini Mat change; 10916e386b8SStefano Zampini 1109566063dSJacob Faibussowitsch PetscCall(KSPGetOperators(deluxe_ctx->change[i], &change, NULL)); 1119566063dSJacob Faibussowitsch PetscCall(MatMult(change, deluxe_ctx->seq_work2[i], deluxe_ctx->seq_work1[i])); 1129566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work1[i], pcbddc->work_scaling, INSERT_VALUES, SCATTER_REVERSE)); 1139566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work1[i], pcbddc->work_scaling, INSERT_VALUES, SCATTER_REVERSE)); 11416e386b8SStefano Zampini } else { 1159566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work2[i], pcbddc->work_scaling, INSERT_VALUES, SCATTER_REVERSE)); 1169566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work2[i], pcbddc->work_scaling, INSERT_VALUES, SCATTER_REVERSE)); 11772b8c272SStefano Zampini } 118674ae819SStefano Zampini } 11916e386b8SStefano Zampini } 120674ae819SStefano Zampini /* put local boundary part in global vector */ 1219566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(pcis->global_to_B, pcbddc->work_scaling, y, ADD_VALUES, SCATTER_REVERSE)); 1229566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(pcis->global_to_B, pcbddc->work_scaling, y, ADD_VALUES, SCATTER_REVERSE)); 123674ae819SStefano Zampini PetscFunctionReturn(0); 124674ae819SStefano Zampini } 125674ae819SStefano Zampini 1269371c9d4SSatish Balay PetscErrorCode PCBDDCScalingExtension(PC pc, Vec local_interface_vector, Vec global_vector) { 127674ae819SStefano Zampini PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 128674ae819SStefano Zampini 129674ae819SStefano Zampini PetscFunctionBegin; 130674ae819SStefano Zampini PetscValidHeaderSpecific(pc, PC_CLASSID, 1); 131674ae819SStefano Zampini PetscValidHeaderSpecific(local_interface_vector, VEC_CLASSID, 2); 132674ae819SStefano Zampini PetscValidHeaderSpecific(global_vector, VEC_CLASSID, 3); 13308401ef6SPierre Jolivet PetscCheck(local_interface_vector != pcbddc->work_scaling, PETSC_COMM_SELF, PETSC_ERR_SUP, "Local vector cannot be pcbddc->work_scaling!"); 134cac4c232SBarry Smith PetscUseMethod(pc, "PCBDDCScalingExtension_C", (PC, Vec, Vec), (pc, local_interface_vector, global_vector)); 135674ae819SStefano Zampini PetscFunctionReturn(0); 136674ae819SStefano Zampini } 137674ae819SStefano Zampini 1389371c9d4SSatish Balay static PetscErrorCode PCBDDCScalingRestriction_Basic(PC pc, Vec global_vector, Vec local_interface_vector) { 139674ae819SStefano Zampini PC_IS *pcis = (PC_IS *)pc->data; 140674ae819SStefano Zampini 141674ae819SStefano Zampini PetscFunctionBegin; 1429566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(pcis->global_to_B, global_vector, local_interface_vector, INSERT_VALUES, SCATTER_FORWARD)); 1439566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(pcis->global_to_B, global_vector, local_interface_vector, INSERT_VALUES, SCATTER_FORWARD)); 144674ae819SStefano Zampini /* Apply partition of unity */ 1459566063dSJacob Faibussowitsch PetscCall(VecPointwiseMult(local_interface_vector, pcis->D, local_interface_vector)); 146674ae819SStefano Zampini PetscFunctionReturn(0); 147674ae819SStefano Zampini } 148674ae819SStefano Zampini 1499371c9d4SSatish Balay static PetscErrorCode PCBDDCScalingRestriction_Deluxe(PC pc, Vec x, Vec y) { 150674ae819SStefano Zampini PC_IS *pcis = (PC_IS *)pc->data; 151674ae819SStefano Zampini PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 152674ae819SStefano Zampini PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx; 153674ae819SStefano Zampini 154674ae819SStefano Zampini PetscFunctionBegin; 155674ae819SStefano Zampini /* get local boundary part of global vector */ 1569566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(pcis->global_to_B, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1579566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(pcis->global_to_B, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1585a95e1ceSStefano Zampini if (deluxe_ctx->n_simple) { /* scale deluxe vertices using diagonal scaling */ 1595a95e1ceSStefano Zampini PetscInt i; 1602b095fd8SStefano Zampini PetscScalar *array_y; 1612b095fd8SStefano Zampini const PetscScalar *array_D; 1629566063dSJacob Faibussowitsch PetscCall(VecGetArray(y, &array_y)); 1639566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(pcis->D, &array_D)); 1649371c9d4SSatish Balay for (i = 0; i < deluxe_ctx->n_simple; i++) { array_y[deluxe_ctx->idx_simple_B[i]] *= array_D[deluxe_ctx->idx_simple_B[i]]; } 1659566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(pcis->D, &array_D)); 1669566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(y, &array_y)); 16734a97f8cSStefano Zampini } 168729c86d3Sstefano_zampini /* sequential part : all problems and Schur applications collapsed into a single matrix vector multiplication or a matvec and a solve */ 1695a95e1ceSStefano Zampini if (deluxe_ctx->seq_mat) { 17072b8c272SStefano Zampini PetscInt i; 17172b8c272SStefano Zampini for (i = 0; i < deluxe_ctx->seq_n; i++) { 17216e386b8SStefano Zampini if (deluxe_ctx->change) { 17316e386b8SStefano Zampini Mat change; 17416e386b8SStefano Zampini 1759566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], y, deluxe_ctx->seq_work2[i], INSERT_VALUES, SCATTER_FORWARD)); 1769566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], y, deluxe_ctx->seq_work2[i], INSERT_VALUES, SCATTER_FORWARD)); 1779566063dSJacob Faibussowitsch PetscCall(KSPGetOperators(deluxe_ctx->change[i], &change, NULL)); 1789566063dSJacob Faibussowitsch PetscCall(MatMultTranspose(change, deluxe_ctx->seq_work2[i], deluxe_ctx->seq_work1[i])); 17916e386b8SStefano Zampini } else { 1809566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], y, deluxe_ctx->seq_work1[i], INSERT_VALUES, SCATTER_FORWARD)); 1819566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], y, deluxe_ctx->seq_work1[i], INSERT_VALUES, SCATTER_FORWARD)); 18216e386b8SStefano Zampini } 18372b8c272SStefano Zampini if (deluxe_ctx->seq_mat_inv_sum[i]) { 18472b8c272SStefano Zampini PetscScalar *x; 18572b8c272SStefano Zampini 1869566063dSJacob Faibussowitsch PetscCall(VecGetArray(deluxe_ctx->seq_work1[i], &x)); 1879566063dSJacob Faibussowitsch PetscCall(VecPlaceArray(deluxe_ctx->seq_work2[i], x)); 1889566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(deluxe_ctx->seq_work1[i], &x)); 1899566063dSJacob Faibussowitsch PetscCall(MatSolve(deluxe_ctx->seq_mat_inv_sum[i], deluxe_ctx->seq_work1[i], deluxe_ctx->seq_work2[i])); 1909566063dSJacob Faibussowitsch PetscCall(VecResetArray(deluxe_ctx->seq_work2[i])); 191ac632422SStefano Zampini } 1929566063dSJacob Faibussowitsch PetscCall(MatMult(deluxe_ctx->seq_mat[i], deluxe_ctx->seq_work1[i], deluxe_ctx->seq_work2[i])); 19316e386b8SStefano Zampini if (deluxe_ctx->change) { 19472b8c272SStefano Zampini if (deluxe_ctx->change_with_qr) { 19572b8c272SStefano Zampini Mat change; 19672b8c272SStefano Zampini 1979566063dSJacob Faibussowitsch PetscCall(KSPGetOperators(deluxe_ctx->change[i], &change, NULL)); 1989566063dSJacob Faibussowitsch PetscCall(MatMult(change, deluxe_ctx->seq_work2[i], deluxe_ctx->seq_work1[i])); 19972b8c272SStefano Zampini } else { 2009566063dSJacob Faibussowitsch PetscCall(KSPSolveTranspose(deluxe_ctx->change[i], deluxe_ctx->seq_work2[i], deluxe_ctx->seq_work1[i])); 20116e386b8SStefano Zampini } 2029566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work1[i], y, INSERT_VALUES, SCATTER_REVERSE)); 2039566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work1[i], y, INSERT_VALUES, SCATTER_REVERSE)); 20472b8c272SStefano Zampini } else { 2059566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work2[i], y, INSERT_VALUES, SCATTER_REVERSE)); 2069566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work2[i], y, INSERT_VALUES, SCATTER_REVERSE)); 20772b8c272SStefano Zampini } 20872b8c272SStefano Zampini } 209674ae819SStefano Zampini } 210674ae819SStefano Zampini PetscFunctionReturn(0); 211674ae819SStefano Zampini } 212674ae819SStefano Zampini 2139371c9d4SSatish Balay PetscErrorCode PCBDDCScalingRestriction(PC pc, Vec global_vector, Vec local_interface_vector) { 214674ae819SStefano Zampini PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 215674ae819SStefano Zampini 216674ae819SStefano Zampini PetscFunctionBegin; 217674ae819SStefano Zampini PetscValidHeaderSpecific(pc, PC_CLASSID, 1); 218674ae819SStefano Zampini PetscValidHeaderSpecific(global_vector, VEC_CLASSID, 2); 219674ae819SStefano Zampini PetscValidHeaderSpecific(local_interface_vector, VEC_CLASSID, 3); 22008401ef6SPierre Jolivet PetscCheck(local_interface_vector != pcbddc->work_scaling, PETSC_COMM_SELF, PETSC_ERR_SUP, "Local vector cannot be pcbddc->work_scaling!"); 221cac4c232SBarry Smith PetscUseMethod(pc, "PCBDDCScalingRestriction_C", (PC, Vec, Vec), (pc, global_vector, local_interface_vector)); 222674ae819SStefano Zampini PetscFunctionReturn(0); 223674ae819SStefano Zampini } 224674ae819SStefano Zampini 2259371c9d4SSatish Balay PetscErrorCode PCBDDCScalingSetUp(PC pc) { 226674ae819SStefano Zampini PC_IS *pcis = (PC_IS *)pc->data; 227674ae819SStefano Zampini PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 228674ae819SStefano Zampini 229674ae819SStefano Zampini PetscFunctionBegin; 230674ae819SStefano Zampini PetscValidHeaderSpecific(pc, PC_CLASSID, 1); 2319566063dSJacob Faibussowitsch PetscCall(PetscLogEventBegin(PC_BDDC_Scaling[pcbddc->current_level], pc, 0, 0, 0)); 232674ae819SStefano Zampini /* create work vector for the operator */ 2339566063dSJacob Faibussowitsch PetscCall(VecDestroy(&pcbddc->work_scaling)); 2349566063dSJacob Faibussowitsch PetscCall(VecDuplicate(pcis->vec1_B, &pcbddc->work_scaling)); 23534a97f8cSStefano Zampini /* always rebuild pcis->D */ 23628d874f6SStefano Zampini if (pcis->use_stiffness_scaling) { 23715f235b8SStefano Zampini PetscScalar *a; 23815f235b8SStefano Zampini PetscInt i, n; 23915f235b8SStefano Zampini 2409566063dSJacob Faibussowitsch PetscCall(MatGetDiagonal(pcbddc->local_mat, pcis->vec1_N)); 2419566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(pcis->N_to_B, pcis->vec1_N, pcis->D, INSERT_VALUES, SCATTER_FORWARD)); 2429566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(pcis->N_to_B, pcis->vec1_N, pcis->D, INSERT_VALUES, SCATTER_FORWARD)); 2439566063dSJacob Faibussowitsch PetscCall(VecAbs(pcis->D)); 2449566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(pcis->D, &n)); 2459566063dSJacob Faibussowitsch PetscCall(VecGetArray(pcis->D, &a)); 2469371c9d4SSatish Balay for (i = 0; i < n; i++) 2479371c9d4SSatish Balay if (PetscAbsScalar(a[i]) < PETSC_SMALL) a[i] = 1.0; 2489566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(pcis->D, &a)); 249674ae819SStefano Zampini } 2509566063dSJacob Faibussowitsch PetscCall(VecSet(pcis->vec1_global, 0.0)); 2519566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(pcis->global_to_B, pcis->D, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 2529566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(pcis->global_to_B, pcis->D, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 2539566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(pcis->global_to_B, pcis->vec1_global, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD)); 2549566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(pcis->global_to_B, pcis->vec1_global, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD)); 2559566063dSJacob Faibussowitsch PetscCall(VecPointwiseDivide(pcis->D, pcis->D, pcis->vec1_B)); 256674ae819SStefano Zampini /* now setup */ 257681e7c04SStefano Zampini if (pcbddc->use_deluxe_scaling) { 258*48a46eb9SPierre Jolivet if (!pcbddc->deluxe_ctx) PetscCall(PCBDDCScalingCreate_Deluxe(pc)); 2599566063dSJacob Faibussowitsch PetscCall(PCBDDCScalingSetUp_Deluxe(pc)); 2609566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBDDCScalingRestriction_C", PCBDDCScalingRestriction_Deluxe)); 2619566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBDDCScalingExtension_C", PCBDDCScalingExtension_Deluxe)); 262674ae819SStefano Zampini } else { 2639566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBDDCScalingRestriction_C", PCBDDCScalingRestriction_Basic)); 2649566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBDDCScalingExtension_C", PCBDDCScalingExtension_Basic)); 265674ae819SStefano Zampini } 2669566063dSJacob Faibussowitsch PetscCall(PetscLogEventEnd(PC_BDDC_Scaling[pcbddc->current_level], pc, 0, 0, 0)); 26734a97f8cSStefano Zampini 268674ae819SStefano Zampini /* test */ 269674ae819SStefano Zampini if (pcbddc->dbg_flag) { 27072b8c272SStefano Zampini Mat B0_B = NULL; 27172b8c272SStefano Zampini Vec B0_Bv = NULL, B0_Bv2 = NULL; 27234a97f8cSStefano Zampini Vec vec2_global; 273674ae819SStefano Zampini PetscViewer viewer = pcbddc->dbg_viewer; 27434a97f8cSStefano Zampini PetscReal error; 275674ae819SStefano Zampini 276674ae819SStefano Zampini /* extension -> from local to parallel */ 2779566063dSJacob Faibussowitsch PetscCall(VecSet(pcis->vec1_global, 0.0)); 2789566063dSJacob Faibussowitsch PetscCall(VecSetRandom(pcis->vec1_B, NULL)); 2799566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(pcis->global_to_B, pcis->vec1_B, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 2809566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(pcis->global_to_B, pcis->vec1_B, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 2819566063dSJacob Faibussowitsch PetscCall(VecDuplicate(pcis->vec1_global, &vec2_global)); 2829566063dSJacob Faibussowitsch PetscCall(VecCopy(pcis->vec1_global, vec2_global)); 2839566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(pcis->global_to_B, pcis->vec1_global, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD)); 2849566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(pcis->global_to_B, pcis->vec1_global, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD)); 28572b8c272SStefano Zampini if (pcbddc->benign_n) { 28672b8c272SStefano Zampini IS is_dummy; 28772b8c272SStefano Zampini 2889566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->benign_n, 0, 1, &is_dummy)); 2899566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrix(pcbddc->benign_B0, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B0_B)); 2909566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is_dummy)); 2919566063dSJacob Faibussowitsch PetscCall(MatCreateVecs(B0_B, NULL, &B0_Bv)); 2929566063dSJacob Faibussowitsch PetscCall(VecDuplicate(B0_Bv, &B0_Bv2)); 2939566063dSJacob Faibussowitsch PetscCall(MatMult(B0_B, pcis->vec1_B, B0_Bv)); 29472b8c272SStefano Zampini } 2959566063dSJacob Faibussowitsch PetscCall(PCBDDCScalingExtension(pc, pcis->vec1_B, pcis->vec1_global)); 29672b8c272SStefano Zampini if (pcbddc->benign_saddle_point) { 29772b8c272SStefano Zampini PetscReal errorl = 0.; 2989566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(pcis->global_to_B, pcis->vec1_global, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD)); 2999566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(pcis->global_to_B, pcis->vec1_global, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD)); 30072b8c272SStefano Zampini if (pcbddc->benign_n) { 3019566063dSJacob Faibussowitsch PetscCall(MatMult(B0_B, pcis->vec1_B, B0_Bv2)); 3029566063dSJacob Faibussowitsch PetscCall(VecAXPY(B0_Bv, -1.0, B0_Bv2)); 3039566063dSJacob Faibussowitsch PetscCall(VecNorm(B0_Bv, NORM_INFINITY, &errorl)); 30472b8c272SStefano Zampini } 3059566063dSJacob Faibussowitsch PetscCallMPI(MPI_Allreduce(&errorl, &error, 1, MPIU_REAL, MPI_SUM, PetscObjectComm((PetscObject)pc))); 30663a3b9bcSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "Error benign extension %1.14e\n", (double)error)); 30772b8c272SStefano Zampini } 3089566063dSJacob Faibussowitsch PetscCall(VecAXPY(pcis->vec1_global, -1.0, vec2_global)); 3099566063dSJacob Faibussowitsch PetscCall(VecNorm(pcis->vec1_global, NORM_INFINITY, &error)); 31063a3b9bcSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "Error scaling extension %1.14e\n", (double)error)); 3119566063dSJacob Faibussowitsch PetscCall(VecDestroy(&vec2_global)); 31234a97f8cSStefano Zampini 313674ae819SStefano Zampini /* restriction -> from parallel to local */ 3149566063dSJacob Faibussowitsch PetscCall(VecSet(pcis->vec1_global, 0.0)); 3159566063dSJacob Faibussowitsch PetscCall(VecSetRandom(pcis->vec1_B, NULL)); 3169566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(pcis->global_to_B, pcis->vec1_B, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 3179566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(pcis->global_to_B, pcis->vec1_B, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 3189566063dSJacob Faibussowitsch PetscCall(PCBDDCScalingRestriction(pc, pcis->vec1_global, pcis->vec1_B)); 3199566063dSJacob Faibussowitsch PetscCall(VecScale(pcis->vec1_B, -1.0)); 3209566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(pcis->global_to_B, pcis->vec1_B, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 3219566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(pcis->global_to_B, pcis->vec1_B, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 3229566063dSJacob Faibussowitsch PetscCall(VecNorm(pcis->vec1_global, NORM_INFINITY, &error)); 32363a3b9bcSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "Error scaling restriction %1.14e\n", (double)error)); 3249566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B0_B)); 3259566063dSJacob Faibussowitsch PetscCall(VecDestroy(&B0_Bv)); 3269566063dSJacob Faibussowitsch PetscCall(VecDestroy(&B0_Bv2)); 327674ae819SStefano Zampini } 328674ae819SStefano Zampini PetscFunctionReturn(0); 329674ae819SStefano Zampini } 330674ae819SStefano Zampini 3319371c9d4SSatish Balay PetscErrorCode PCBDDCScalingDestroy(PC pc) { 332674ae819SStefano Zampini PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 333674ae819SStefano Zampini 334674ae819SStefano Zampini PetscFunctionBegin; 3351baa6e33SBarry Smith if (pcbddc->deluxe_ctx) PetscCall(PCBDDCScalingDestroy_Deluxe(pc)); 3369566063dSJacob Faibussowitsch PetscCall(VecDestroy(&pcbddc->work_scaling)); 3379566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBDDCScalingRestriction_C", NULL)); 3389566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBDDCScalingExtension_C", NULL)); 339674ae819SStefano Zampini PetscFunctionReturn(0); 340674ae819SStefano Zampini } 341674ae819SStefano Zampini 3429371c9d4SSatish Balay static PetscErrorCode PCBDDCScalingCreate_Deluxe(PC pc) { 34334a97f8cSStefano Zampini PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 34434a97f8cSStefano Zampini PCBDDCDeluxeScaling deluxe_ctx; 34534a97f8cSStefano Zampini 34634a97f8cSStefano Zampini PetscFunctionBegin; 3479566063dSJacob Faibussowitsch PetscCall(PetscNew(&deluxe_ctx)); 34834a97f8cSStefano Zampini pcbddc->deluxe_ctx = deluxe_ctx; 34934a97f8cSStefano Zampini PetscFunctionReturn(0); 35034a97f8cSStefano Zampini } 35134a97f8cSStefano Zampini 3529371c9d4SSatish Balay static PetscErrorCode PCBDDCScalingDestroy_Deluxe(PC pc) { 35334a97f8cSStefano Zampini PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 35434a97f8cSStefano Zampini 35534a97f8cSStefano Zampini PetscFunctionBegin; 3569566063dSJacob Faibussowitsch PetscCall(PCBDDCScalingReset_Deluxe_Solvers(pcbddc->deluxe_ctx)); 3579566063dSJacob Faibussowitsch PetscCall(PetscFree(pcbddc->deluxe_ctx)); 35834a97f8cSStefano Zampini PetscFunctionReturn(0); 35934a97f8cSStefano Zampini } 36034a97f8cSStefano Zampini 3619371c9d4SSatish Balay static PetscErrorCode PCBDDCScalingReset_Deluxe_Solvers(PCBDDCDeluxeScaling deluxe_ctx) { 36272b8c272SStefano Zampini PetscInt i; 36334a97f8cSStefano Zampini 36434a97f8cSStefano Zampini PetscFunctionBegin; 3659566063dSJacob Faibussowitsch PetscCall(PetscFree(deluxe_ctx->idx_simple_B)); 36634a97f8cSStefano Zampini deluxe_ctx->n_simple = 0; 36772b8c272SStefano Zampini for (i = 0; i < deluxe_ctx->seq_n; i++) { 3689566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&deluxe_ctx->seq_scctx[i])); 3699566063dSJacob Faibussowitsch PetscCall(VecDestroy(&deluxe_ctx->seq_work1[i])); 3709566063dSJacob Faibussowitsch PetscCall(VecDestroy(&deluxe_ctx->seq_work2[i])); 3719566063dSJacob Faibussowitsch PetscCall(MatDestroy(&deluxe_ctx->seq_mat[i])); 3729566063dSJacob Faibussowitsch PetscCall(MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i])); 37372b8c272SStefano Zampini } 3749566063dSJacob Faibussowitsch PetscCall(PetscFree5(deluxe_ctx->seq_scctx, deluxe_ctx->seq_work1, deluxe_ctx->seq_work2, deluxe_ctx->seq_mat, deluxe_ctx->seq_mat_inv_sum)); 3759566063dSJacob Faibussowitsch PetscCall(PetscFree(deluxe_ctx->workspace)); 37672b8c272SStefano Zampini deluxe_ctx->seq_n = 0; 37734a97f8cSStefano Zampini PetscFunctionReturn(0); 37834a97f8cSStefano Zampini } 37934a97f8cSStefano Zampini 3809371c9d4SSatish Balay static PetscErrorCode PCBDDCScalingSetUp_Deluxe(PC pc) { 38134a97f8cSStefano Zampini PC_IS *pcis = (PC_IS *)pc->data; 38234a97f8cSStefano Zampini PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 38334a97f8cSStefano Zampini PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx; 384b96c3477SStefano Zampini PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 38534a97f8cSStefano Zampini 38634a97f8cSStefano Zampini PetscFunctionBegin; 38772b8c272SStefano Zampini /* reset data structures if the topology has changed */ 3881baa6e33SBarry Smith if (pcbddc->recompute_topography) PetscCall(PCBDDCScalingReset_Deluxe_Solvers(deluxe_ctx)); 38934a97f8cSStefano Zampini 390b1b3d7a2SStefano Zampini /* Compute data structures to solve sequential problems */ 3919566063dSJacob Faibussowitsch PetscCall(PCBDDCScalingSetUp_Deluxe_Private(pc)); 3925db18549SStefano Zampini 393b1b3d7a2SStefano Zampini /* diagonal scaling on interface dofs not contained in cc */ 394d62866d3SStefano Zampini if (sub_schurs->is_vertices || sub_schurs->is_dir) { 3951b968477SStefano Zampini PetscInt n_com, n_dir; 3961b968477SStefano Zampini n_com = 0; 397*48a46eb9SPierre Jolivet if (sub_schurs->is_vertices) PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_com)); 3981b968477SStefano Zampini n_dir = 0; 399*48a46eb9SPierre Jolivet if (sub_schurs->is_dir) PetscCall(ISGetLocalSize(sub_schurs->is_dir, &n_dir)); 40072b8c272SStefano Zampini if (!deluxe_ctx->n_simple) { 4011b968477SStefano Zampini deluxe_ctx->n_simple = n_dir + n_com; 4029566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(deluxe_ctx->n_simple, &deluxe_ctx->idx_simple_B)); 403d62866d3SStefano Zampini if (sub_schurs->is_vertices) { 4049bb4a8caSStefano Zampini PetscInt nmap; 4059bb4a8caSStefano Zampini const PetscInt *idxs; 4061b968477SStefano Zampini 4079566063dSJacob Faibussowitsch PetscCall(ISGetIndices(sub_schurs->is_vertices, &idxs)); 4089566063dSJacob Faibussowitsch PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, n_com, idxs, &nmap, deluxe_ctx->idx_simple_B)); 40963a3b9bcSJacob Faibussowitsch PetscCheck(nmap == n_com, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error when mapping simply scaled dofs (is_vertices)! %" PetscInt_FMT " != %" PetscInt_FMT, nmap, n_com); 4109566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(sub_schurs->is_vertices, &idxs)); 4111b968477SStefano Zampini } 412d62866d3SStefano Zampini if (sub_schurs->is_dir) { 4131b968477SStefano Zampini PetscInt nmap; 4141b968477SStefano Zampini const PetscInt *idxs; 4151b968477SStefano Zampini 4169566063dSJacob Faibussowitsch PetscCall(ISGetIndices(sub_schurs->is_dir, &idxs)); 4179566063dSJacob Faibussowitsch PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, n_dir, idxs, &nmap, deluxe_ctx->idx_simple_B + n_com)); 41863a3b9bcSJacob Faibussowitsch PetscCheck(nmap == n_dir, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error when mapping simply scaled dofs (sub_schurs->is_dir)! %" PetscInt_FMT " != %" PetscInt_FMT, nmap, n_dir); 4199566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(sub_schurs->is_dir, &idxs)); 4201b968477SStefano Zampini } 4219566063dSJacob Faibussowitsch PetscCall(PetscSortInt(deluxe_ctx->n_simple, deluxe_ctx->idx_simple_B)); 4229bb4a8caSStefano Zampini } else { 4232472a847SBarry Smith PetscCheck(deluxe_ctx->n_simple == n_dir + n_com, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Number of simply scaled dofs %" PetscInt_FMT " is different from the previous one computed %" PetscInt_FMT, n_dir + n_com, deluxe_ctx->n_simple); 42472b8c272SStefano Zampini } 42572b8c272SStefano Zampini } else { 426b1b3d7a2SStefano Zampini deluxe_ctx->n_simple = 0; 4270a545947SLisandro Dalcin deluxe_ctx->idx_simple_B = NULL; 428b1b3d7a2SStefano Zampini } 42934a97f8cSStefano Zampini PetscFunctionReturn(0); 43034a97f8cSStefano Zampini } 43134a97f8cSStefano Zampini 4329371c9d4SSatish Balay static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Private(PC pc) { 4335db18549SStefano Zampini PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 4345db18549SStefano Zampini PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx; 435b96c3477SStefano Zampini PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 43672b8c272SStefano Zampini PetscScalar *matdata, *matdata2; 43772b8c272SStefano Zampini PetscInt i, max_subset_size, cum, cum2; 43872b8c272SStefano Zampini const PetscInt *idxs; 43972b8c272SStefano Zampini PetscBool newsetup = PETSC_FALSE; 4405db18549SStefano Zampini 4415db18549SStefano Zampini PetscFunctionBegin; 44228b400f6SJacob Faibussowitsch PetscCheck(sub_schurs, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Missing PCBDDCSubSchurs"); 44386bfa4cfSStefano Zampini if (!sub_schurs->n_subs) PetscFunctionReturn(0); 4449221af80SStefano Zampini 44572b8c272SStefano Zampini /* Allocate arrays for subproblems */ 44672b8c272SStefano Zampini if (!deluxe_ctx->seq_n) { 44772b8c272SStefano Zampini deluxe_ctx->seq_n = sub_schurs->n_subs; 4489566063dSJacob Faibussowitsch PetscCall(PetscCalloc5(deluxe_ctx->seq_n, &deluxe_ctx->seq_scctx, deluxe_ctx->seq_n, &deluxe_ctx->seq_work1, deluxe_ctx->seq_n, &deluxe_ctx->seq_work2, deluxe_ctx->seq_n, &deluxe_ctx->seq_mat, deluxe_ctx->seq_n, &deluxe_ctx->seq_mat_inv_sum)); 44972b8c272SStefano Zampini newsetup = PETSC_TRUE; 45063a3b9bcSJacob Faibussowitsch } else PetscCheck(deluxe_ctx->seq_n == sub_schurs->n_subs, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Number of deluxe subproblems %" PetscInt_FMT " is different from the sub_schurs %" PetscInt_FMT, deluxe_ctx->seq_n, sub_schurs->n_subs); 4516080607fSStefano Zampini 4522f41f7d1SStefano Zampini /* the change of basis is just a reference to sub_schurs->change (if any) */ 4532f41f7d1SStefano Zampini deluxe_ctx->change = sub_schurs->change; 45472b8c272SStefano Zampini deluxe_ctx->change_with_qr = sub_schurs->change_with_qr; 4555db18549SStefano Zampini 45672b8c272SStefano Zampini /* Create objects for deluxe */ 45772b8c272SStefano Zampini max_subset_size = 0; 45872b8c272SStefano Zampini for (i = 0; i < sub_schurs->n_subs; i++) { 45972b8c272SStefano Zampini PetscInt subset_size; 4609566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 46172b8c272SStefano Zampini max_subset_size = PetscMax(subset_size, max_subset_size); 46272b8c272SStefano Zampini } 463*48a46eb9SPierre Jolivet if (newsetup) PetscCall(PetscMalloc1(2 * max_subset_size, &deluxe_ctx->workspace)); 46472b8c272SStefano Zampini cum = cum2 = 0; 4659566063dSJacob Faibussowitsch PetscCall(ISGetIndices(sub_schurs->is_Ej_all, &idxs)); 4669566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArray(sub_schurs->S_Ej_all, &matdata)); 4679566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &matdata2)); 46872b8c272SStefano Zampini for (i = 0; i < deluxe_ctx->seq_n; i++) { 46972b8c272SStefano Zampini PetscInt subset_size; 470925dfd53SStefano Zampini 4719566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 47272b8c272SStefano Zampini if (newsetup) { 47372b8c272SStefano Zampini IS sub; 47472b8c272SStefano Zampini /* work vectors */ 4759566063dSJacob Faibussowitsch PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, subset_size, deluxe_ctx->workspace, &deluxe_ctx->seq_work1[i])); 4769566063dSJacob Faibussowitsch PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, subset_size, deluxe_ctx->workspace + subset_size, &deluxe_ctx->seq_work2[i])); 47772b8c272SStefano Zampini 47872b8c272SStefano Zampini /* scatters */ 4799566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PETSC_COMM_SELF, subset_size, idxs + cum, PETSC_COPY_VALUES, &sub)); 4809566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(pcbddc->work_scaling, sub, deluxe_ctx->seq_work1[i], NULL, &deluxe_ctx->seq_scctx[i])); 4819566063dSJacob Faibussowitsch PetscCall(ISDestroy(&sub)); 482839e9adbSstefano_zampini } 48372b8c272SStefano Zampini 48472b8c272SStefano Zampini /* S_E_j */ 4859566063dSJacob Faibussowitsch PetscCall(MatDestroy(&deluxe_ctx->seq_mat[i])); 4869566063dSJacob Faibussowitsch PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, matdata + cum2, &deluxe_ctx->seq_mat[i])); 487839e9adbSstefano_zampini 48872b8c272SStefano Zampini /* \sum_k S^k_E_j */ 4899566063dSJacob Faibussowitsch PetscCall(MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i])); 4909566063dSJacob Faibussowitsch PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, matdata2 + cum2, &deluxe_ctx->seq_mat_inv_sum[i])); 4919566063dSJacob Faibussowitsch PetscCall(MatSetOption(deluxe_ctx->seq_mat_inv_sum[i], MAT_SPD, sub_schurs->is_posdef)); 4929566063dSJacob Faibussowitsch PetscCall(MatSetOption(deluxe_ctx->seq_mat_inv_sum[i], MAT_HERMITIAN, sub_schurs->is_hermitian)); 4933829e7e6SStefano Zampini if (sub_schurs->is_hermitian) { 4949566063dSJacob Faibussowitsch PetscCall(MatCholeskyFactor(deluxe_ctx->seq_mat_inv_sum[i], NULL, NULL)); 495d62866d3SStefano Zampini } else { 4969566063dSJacob Faibussowitsch PetscCall(MatLUFactor(deluxe_ctx->seq_mat_inv_sum[i], NULL, NULL, NULL)); 497d62866d3SStefano Zampini } 498839e9adbSstefano_zampini if (pcbddc->deluxe_singlemat) { 499839e9adbSstefano_zampini Mat X, Y; 5003829e7e6SStefano Zampini if (!sub_schurs->is_hermitian) { 5019566063dSJacob Faibussowitsch PetscCall(MatTranspose(deluxe_ctx->seq_mat[i], MAT_INITIAL_MATRIX, &X)); 502839e9adbSstefano_zampini } else { 5039566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)deluxe_ctx->seq_mat[i])); 504839e9adbSstefano_zampini X = deluxe_ctx->seq_mat[i]; 505839e9adbSstefano_zampini } 5069566063dSJacob Faibussowitsch PetscCall(MatDuplicate(X, MAT_DO_NOT_COPY_VALUES, &Y)); 5073829e7e6SStefano Zampini if (!sub_schurs->is_hermitian) { 5089566063dSJacob Faibussowitsch PetscCall(PCBDDCMatTransposeMatSolve_SeqDense(deluxe_ctx->seq_mat_inv_sum[i], X, Y)); 509839e9adbSstefano_zampini } else { 5109566063dSJacob Faibussowitsch PetscCall(MatMatSolve(deluxe_ctx->seq_mat_inv_sum[i], X, Y)); 511839e9adbSstefano_zampini } 512729c86d3Sstefano_zampini 5139566063dSJacob Faibussowitsch PetscCall(MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i])); 5149566063dSJacob Faibussowitsch PetscCall(MatDestroy(&deluxe_ctx->seq_mat[i])); 5159566063dSJacob Faibussowitsch PetscCall(MatDestroy(&X)); 5162f41f7d1SStefano Zampini if (deluxe_ctx->change) { 5172f41f7d1SStefano Zampini Mat C, CY; 51828b400f6SJacob Faibussowitsch PetscCheck(deluxe_ctx->change_with_qr, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only QR based change of basis"); 5199566063dSJacob Faibussowitsch PetscCall(KSPGetOperators(deluxe_ctx->change[i], &C, NULL)); 5209566063dSJacob Faibussowitsch PetscCall(MatMatMult(C, Y, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &CY)); 5219566063dSJacob Faibussowitsch PetscCall(MatMatTransposeMult(CY, C, MAT_REUSE_MATRIX, PETSC_DEFAULT, &Y)); 5229566063dSJacob Faibussowitsch PetscCall(MatDestroy(&CY)); 5239566063dSJacob Faibussowitsch PetscCall(MatProductClear(Y)); /* clear internal matproduct structure of Y since CY is destroyed */ 5242f41f7d1SStefano Zampini } 5259566063dSJacob Faibussowitsch PetscCall(MatTranspose(Y, MAT_INPLACE_MATRIX, &Y)); 526839e9adbSstefano_zampini deluxe_ctx->seq_mat[i] = Y; 52772b8c272SStefano Zampini } 52872b8c272SStefano Zampini cum += subset_size; 52972b8c272SStefano Zampini cum2 += subset_size * subset_size; 53072b8c272SStefano Zampini } 5319566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(sub_schurs->is_Ej_all, &idxs)); 5329566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArray(sub_schurs->S_Ej_all, &matdata)); 5339566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all, &matdata2)); 5342f41f7d1SStefano Zampini if (pcbddc->deluxe_singlemat) { 5352f41f7d1SStefano Zampini deluxe_ctx->change = NULL; 5362f41f7d1SStefano Zampini deluxe_ctx->change_with_qr = PETSC_FALSE; 5372f41f7d1SStefano Zampini } 5385db18549SStefano Zampini 53972b8c272SStefano Zampini if (deluxe_ctx->change && !deluxe_ctx->change_with_qr) { 54072b8c272SStefano Zampini for (i = 0; i < deluxe_ctx->seq_n; i++) { 54172b8c272SStefano Zampini if (newsetup) { 54272b8c272SStefano Zampini PC pc; 54372b8c272SStefano Zampini 5449566063dSJacob Faibussowitsch PetscCall(KSPGetPC(deluxe_ctx->change[i], &pc)); 5459566063dSJacob Faibussowitsch PetscCall(PCSetType(pc, PCLU)); 5469566063dSJacob Faibussowitsch PetscCall(KSPSetFromOptions(deluxe_ctx->change[i])); 54772b8c272SStefano Zampini } 5489566063dSJacob Faibussowitsch PetscCall(KSPSetUp(deluxe_ctx->change[i])); 54972b8c272SStefano Zampini } 55016e386b8SStefano Zampini } 5515db18549SStefano Zampini PetscFunctionReturn(0); 5525db18549SStefano Zampini } 553