15e5bbd0aSStefano Zampini #include <petsc/private/pcbddcimpl.h> 25e5bbd0aSStefano Zampini #include <petsc/private/pcbddcprivateimpl.h> 372b8c272SStefano Zampini #include <petscblaslapack.h> 4839e9adbSstefano_zampini #include <../src/mat/impls/dense/seq/dense.h> 5674ae819SStefano Zampini 634a97f8cSStefano Zampini /* prototypes for deluxe functions */ 734a97f8cSStefano Zampini static PetscErrorCode PCBDDCScalingCreate_Deluxe(PC); 834a97f8cSStefano Zampini static PetscErrorCode PCBDDCScalingDestroy_Deluxe(PC); 934a97f8cSStefano Zampini static PetscErrorCode PCBDDCScalingSetUp_Deluxe(PC); 105a95e1ceSStefano Zampini static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Private(PC); 1134a97f8cSStefano Zampini static PetscErrorCode PCBDDCScalingReset_Deluxe_Solvers(PCBDDCDeluxeScaling); 12674ae819SStefano Zampini 13*d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCBDDCMatTransposeMatSolve_SeqDense(Mat A, Mat B, Mat X) 14*d71ae5a4SJacob Faibussowitsch { 15839e9adbSstefano_zampini Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 161683a169SBarry Smith const PetscScalar *b; 171683a169SBarry Smith PetscScalar *x; 18839e9adbSstefano_zampini PetscInt n; 19839e9adbSstefano_zampini PetscBLASInt nrhs, info, m; 20839e9adbSstefano_zampini PetscBool flg; 21839e9adbSstefano_zampini 22839e9adbSstefano_zampini PetscFunctionBegin; 239566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 249566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &flg, MATSEQDENSE, MATMPIDENSE, NULL)); 2528b400f6SJacob Faibussowitsch PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix B must be MATDENSE matrix"); 269566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &flg, MATSEQDENSE, MATMPIDENSE, NULL)); 2728b400f6SJacob Faibussowitsch PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix"); 28839e9adbSstefano_zampini 299566063dSJacob Faibussowitsch PetscCall(MatGetSize(B, NULL, &n)); 309566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(n, &nrhs)); 319566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(B, &b)); 329566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(X, &x)); 339566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(x, b, m * nrhs)); 349566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(B, &b)); 35839e9adbSstefano_zampini 36839e9adbSstefano_zampini if (A->factortype == MAT_FACTOR_LU) { 37792fecdfSBarry Smith PetscCallBLAS("LAPACKgetrs", LAPACKgetrs_("T", &m, &nrhs, mat->v, &mat->lda, mat->pivots, x, &m, &info)); 3828b400f6SJacob Faibussowitsch PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "GETRS - Bad solve"); 393829e7e6SStefano Zampini } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only LU factor supported"); 40839e9adbSstefano_zampini 419566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(X, &x)); 429566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(nrhs * (2.0 * m * m - m))); 43839e9adbSstefano_zampini PetscFunctionReturn(0); 44839e9adbSstefano_zampini } 45839e9adbSstefano_zampini 46*d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCBDDCScalingExtension_Basic(PC pc, Vec local_interface_vector, Vec global_vector) 47*d71ae5a4SJacob Faibussowitsch { 48674ae819SStefano Zampini PC_IS *pcis = (PC_IS *)pc->data; 49674ae819SStefano Zampini PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 50674ae819SStefano Zampini 51674ae819SStefano Zampini PetscFunctionBegin; 52674ae819SStefano Zampini /* Apply partition of unity */ 539566063dSJacob Faibussowitsch PetscCall(VecPointwiseMult(pcbddc->work_scaling, pcis->D, local_interface_vector)); 549566063dSJacob Faibussowitsch PetscCall(VecSet(global_vector, 0.0)); 559566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(pcis->global_to_B, pcbddc->work_scaling, global_vector, ADD_VALUES, SCATTER_REVERSE)); 569566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(pcis->global_to_B, pcbddc->work_scaling, global_vector, ADD_VALUES, SCATTER_REVERSE)); 57674ae819SStefano Zampini PetscFunctionReturn(0); 58674ae819SStefano Zampini } 59674ae819SStefano Zampini 60*d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCBDDCScalingExtension_Deluxe(PC pc, Vec x, Vec y) 61*d71ae5a4SJacob Faibussowitsch { 62674ae819SStefano Zampini PC_IS *pcis = (PC_IS *)pc->data; 63674ae819SStefano Zampini PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 64674ae819SStefano Zampini PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx; 65674ae819SStefano Zampini 66674ae819SStefano Zampini PetscFunctionBegin; 679566063dSJacob Faibussowitsch PetscCall(VecSet(pcbddc->work_scaling, 0.0)); 689566063dSJacob Faibussowitsch PetscCall(VecSet(y, 0.0)); 695a95e1ceSStefano Zampini if (deluxe_ctx->n_simple) { /* scale deluxe vertices using diagonal scaling */ 705a95e1ceSStefano Zampini PetscInt i; 712b095fd8SStefano Zampini const PetscScalar *array_x, *array_D; 722b095fd8SStefano Zampini PetscScalar *array; 739566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x, &array_x)); 749566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(pcis->D, &array_D)); 759566063dSJacob Faibussowitsch PetscCall(VecGetArray(pcbddc->work_scaling, &array)); 76ad540459SPierre Jolivet for (i = 0; i < deluxe_ctx->n_simple; i++) array[deluxe_ctx->idx_simple_B[i]] = array_x[deluxe_ctx->idx_simple_B[i]] * array_D[deluxe_ctx->idx_simple_B[i]]; 779566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(pcbddc->work_scaling, &array)); 789566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(pcis->D, &array_D)); 799566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x, &array_x)); 8034a97f8cSStefano Zampini } 81ac632422SStefano Zampini /* sequential part : all problems and Schur applications collapsed into a single matrix vector multiplication or a matvec and a solve */ 825a95e1ceSStefano Zampini if (deluxe_ctx->seq_mat) { 8372b8c272SStefano Zampini PetscInt i; 8472b8c272SStefano Zampini for (i = 0; i < deluxe_ctx->seq_n; i++) { 8516e386b8SStefano Zampini if (deluxe_ctx->change) { 869566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], x, deluxe_ctx->seq_work2[i], INSERT_VALUES, SCATTER_FORWARD)); 879566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], x, deluxe_ctx->seq_work2[i], INSERT_VALUES, SCATTER_FORWARD)); 8872b8c272SStefano Zampini if (deluxe_ctx->change_with_qr) { 8972b8c272SStefano Zampini Mat change; 9072b8c272SStefano Zampini 919566063dSJacob Faibussowitsch PetscCall(KSPGetOperators(deluxe_ctx->change[i], &change, NULL)); 929566063dSJacob Faibussowitsch PetscCall(MatMultTranspose(change, deluxe_ctx->seq_work2[i], deluxe_ctx->seq_work1[i])); 9372b8c272SStefano Zampini } else { 949566063dSJacob Faibussowitsch PetscCall(KSPSolve(deluxe_ctx->change[i], deluxe_ctx->seq_work2[i], deluxe_ctx->seq_work1[i])); 9516e386b8SStefano Zampini } 9672b8c272SStefano Zampini } else { 979566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], x, deluxe_ctx->seq_work1[i], INSERT_VALUES, SCATTER_FORWARD)); 989566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], x, deluxe_ctx->seq_work1[i], INSERT_VALUES, SCATTER_FORWARD)); 9972b8c272SStefano Zampini } 1009566063dSJacob Faibussowitsch PetscCall(MatMultTranspose(deluxe_ctx->seq_mat[i], deluxe_ctx->seq_work1[i], deluxe_ctx->seq_work2[i])); 10172b8c272SStefano Zampini if (deluxe_ctx->seq_mat_inv_sum[i]) { 10272b8c272SStefano Zampini PetscScalar *x; 10372b8c272SStefano Zampini 1049566063dSJacob Faibussowitsch PetscCall(VecGetArray(deluxe_ctx->seq_work2[i], &x)); 1059566063dSJacob Faibussowitsch PetscCall(VecPlaceArray(deluxe_ctx->seq_work1[i], x)); 1069566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(deluxe_ctx->seq_work2[i], &x)); 1079566063dSJacob Faibussowitsch PetscCall(MatSolveTranspose(deluxe_ctx->seq_mat_inv_sum[i], deluxe_ctx->seq_work1[i], deluxe_ctx->seq_work2[i])); 1089566063dSJacob Faibussowitsch PetscCall(VecResetArray(deluxe_ctx->seq_work1[i])); 109ac632422SStefano Zampini } 11016e386b8SStefano Zampini if (deluxe_ctx->change) { 11116e386b8SStefano Zampini Mat change; 11216e386b8SStefano Zampini 1139566063dSJacob Faibussowitsch PetscCall(KSPGetOperators(deluxe_ctx->change[i], &change, NULL)); 1149566063dSJacob Faibussowitsch PetscCall(MatMult(change, deluxe_ctx->seq_work2[i], deluxe_ctx->seq_work1[i])); 1159566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work1[i], pcbddc->work_scaling, INSERT_VALUES, SCATTER_REVERSE)); 1169566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work1[i], pcbddc->work_scaling, INSERT_VALUES, SCATTER_REVERSE)); 11716e386b8SStefano Zampini } else { 1189566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work2[i], pcbddc->work_scaling, INSERT_VALUES, SCATTER_REVERSE)); 1199566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work2[i], pcbddc->work_scaling, INSERT_VALUES, SCATTER_REVERSE)); 12072b8c272SStefano Zampini } 121674ae819SStefano Zampini } 12216e386b8SStefano Zampini } 123674ae819SStefano Zampini /* put local boundary part in global vector */ 1249566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(pcis->global_to_B, pcbddc->work_scaling, y, ADD_VALUES, SCATTER_REVERSE)); 1259566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(pcis->global_to_B, pcbddc->work_scaling, y, ADD_VALUES, SCATTER_REVERSE)); 126674ae819SStefano Zampini PetscFunctionReturn(0); 127674ae819SStefano Zampini } 128674ae819SStefano Zampini 129*d71ae5a4SJacob Faibussowitsch PetscErrorCode PCBDDCScalingExtension(PC pc, Vec local_interface_vector, Vec global_vector) 130*d71ae5a4SJacob Faibussowitsch { 131674ae819SStefano Zampini PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 132674ae819SStefano Zampini 133674ae819SStefano Zampini PetscFunctionBegin; 134674ae819SStefano Zampini PetscValidHeaderSpecific(pc, PC_CLASSID, 1); 135674ae819SStefano Zampini PetscValidHeaderSpecific(local_interface_vector, VEC_CLASSID, 2); 136674ae819SStefano Zampini PetscValidHeaderSpecific(global_vector, VEC_CLASSID, 3); 13708401ef6SPierre Jolivet PetscCheck(local_interface_vector != pcbddc->work_scaling, PETSC_COMM_SELF, PETSC_ERR_SUP, "Local vector cannot be pcbddc->work_scaling!"); 138cac4c232SBarry Smith PetscUseMethod(pc, "PCBDDCScalingExtension_C", (PC, Vec, Vec), (pc, local_interface_vector, global_vector)); 139674ae819SStefano Zampini PetscFunctionReturn(0); 140674ae819SStefano Zampini } 141674ae819SStefano Zampini 142*d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCBDDCScalingRestriction_Basic(PC pc, Vec global_vector, Vec local_interface_vector) 143*d71ae5a4SJacob Faibussowitsch { 144674ae819SStefano Zampini PC_IS *pcis = (PC_IS *)pc->data; 145674ae819SStefano Zampini 146674ae819SStefano Zampini PetscFunctionBegin; 1479566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(pcis->global_to_B, global_vector, local_interface_vector, INSERT_VALUES, SCATTER_FORWARD)); 1489566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(pcis->global_to_B, global_vector, local_interface_vector, INSERT_VALUES, SCATTER_FORWARD)); 149674ae819SStefano Zampini /* Apply partition of unity */ 1509566063dSJacob Faibussowitsch PetscCall(VecPointwiseMult(local_interface_vector, pcis->D, local_interface_vector)); 151674ae819SStefano Zampini PetscFunctionReturn(0); 152674ae819SStefano Zampini } 153674ae819SStefano Zampini 154*d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCBDDCScalingRestriction_Deluxe(PC pc, Vec x, Vec y) 155*d71ae5a4SJacob Faibussowitsch { 156674ae819SStefano Zampini PC_IS *pcis = (PC_IS *)pc->data; 157674ae819SStefano Zampini PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 158674ae819SStefano Zampini PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx; 159674ae819SStefano Zampini 160674ae819SStefano Zampini PetscFunctionBegin; 161674ae819SStefano Zampini /* get local boundary part of global vector */ 1629566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(pcis->global_to_B, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1639566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(pcis->global_to_B, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1645a95e1ceSStefano Zampini if (deluxe_ctx->n_simple) { /* scale deluxe vertices using diagonal scaling */ 1655a95e1ceSStefano Zampini PetscInt i; 1662b095fd8SStefano Zampini PetscScalar *array_y; 1672b095fd8SStefano Zampini const PetscScalar *array_D; 1689566063dSJacob Faibussowitsch PetscCall(VecGetArray(y, &array_y)); 1699566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(pcis->D, &array_D)); 170ad540459SPierre Jolivet for (i = 0; i < deluxe_ctx->n_simple; i++) array_y[deluxe_ctx->idx_simple_B[i]] *= array_D[deluxe_ctx->idx_simple_B[i]]; 1719566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(pcis->D, &array_D)); 1729566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(y, &array_y)); 17334a97f8cSStefano Zampini } 174729c86d3Sstefano_zampini /* sequential part : all problems and Schur applications collapsed into a single matrix vector multiplication or a matvec and a solve */ 1755a95e1ceSStefano Zampini if (deluxe_ctx->seq_mat) { 17672b8c272SStefano Zampini PetscInt i; 17772b8c272SStefano Zampini for (i = 0; i < deluxe_ctx->seq_n; i++) { 17816e386b8SStefano Zampini if (deluxe_ctx->change) { 17916e386b8SStefano Zampini Mat change; 18016e386b8SStefano Zampini 1819566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], y, deluxe_ctx->seq_work2[i], INSERT_VALUES, SCATTER_FORWARD)); 1829566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], y, deluxe_ctx->seq_work2[i], INSERT_VALUES, SCATTER_FORWARD)); 1839566063dSJacob Faibussowitsch PetscCall(KSPGetOperators(deluxe_ctx->change[i], &change, NULL)); 1849566063dSJacob Faibussowitsch PetscCall(MatMultTranspose(change, deluxe_ctx->seq_work2[i], deluxe_ctx->seq_work1[i])); 18516e386b8SStefano Zampini } else { 1869566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], y, deluxe_ctx->seq_work1[i], INSERT_VALUES, SCATTER_FORWARD)); 1879566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], y, deluxe_ctx->seq_work1[i], INSERT_VALUES, SCATTER_FORWARD)); 18816e386b8SStefano Zampini } 18972b8c272SStefano Zampini if (deluxe_ctx->seq_mat_inv_sum[i]) { 19072b8c272SStefano Zampini PetscScalar *x; 19172b8c272SStefano Zampini 1929566063dSJacob Faibussowitsch PetscCall(VecGetArray(deluxe_ctx->seq_work1[i], &x)); 1939566063dSJacob Faibussowitsch PetscCall(VecPlaceArray(deluxe_ctx->seq_work2[i], x)); 1949566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(deluxe_ctx->seq_work1[i], &x)); 1959566063dSJacob Faibussowitsch PetscCall(MatSolve(deluxe_ctx->seq_mat_inv_sum[i], deluxe_ctx->seq_work1[i], deluxe_ctx->seq_work2[i])); 1969566063dSJacob Faibussowitsch PetscCall(VecResetArray(deluxe_ctx->seq_work2[i])); 197ac632422SStefano Zampini } 1989566063dSJacob Faibussowitsch PetscCall(MatMult(deluxe_ctx->seq_mat[i], deluxe_ctx->seq_work1[i], deluxe_ctx->seq_work2[i])); 19916e386b8SStefano Zampini if (deluxe_ctx->change) { 20072b8c272SStefano Zampini if (deluxe_ctx->change_with_qr) { 20172b8c272SStefano Zampini Mat change; 20272b8c272SStefano Zampini 2039566063dSJacob Faibussowitsch PetscCall(KSPGetOperators(deluxe_ctx->change[i], &change, NULL)); 2049566063dSJacob Faibussowitsch PetscCall(MatMult(change, deluxe_ctx->seq_work2[i], deluxe_ctx->seq_work1[i])); 20572b8c272SStefano Zampini } else { 2069566063dSJacob Faibussowitsch PetscCall(KSPSolveTranspose(deluxe_ctx->change[i], deluxe_ctx->seq_work2[i], deluxe_ctx->seq_work1[i])); 20716e386b8SStefano Zampini } 2089566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work1[i], y, INSERT_VALUES, SCATTER_REVERSE)); 2099566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work1[i], y, INSERT_VALUES, SCATTER_REVERSE)); 21072b8c272SStefano Zampini } else { 2119566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work2[i], y, INSERT_VALUES, SCATTER_REVERSE)); 2129566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work2[i], y, INSERT_VALUES, SCATTER_REVERSE)); 21372b8c272SStefano Zampini } 21472b8c272SStefano Zampini } 215674ae819SStefano Zampini } 216674ae819SStefano Zampini PetscFunctionReturn(0); 217674ae819SStefano Zampini } 218674ae819SStefano Zampini 219*d71ae5a4SJacob Faibussowitsch PetscErrorCode PCBDDCScalingRestriction(PC pc, Vec global_vector, Vec local_interface_vector) 220*d71ae5a4SJacob Faibussowitsch { 221674ae819SStefano Zampini PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 222674ae819SStefano Zampini 223674ae819SStefano Zampini PetscFunctionBegin; 224674ae819SStefano Zampini PetscValidHeaderSpecific(pc, PC_CLASSID, 1); 225674ae819SStefano Zampini PetscValidHeaderSpecific(global_vector, VEC_CLASSID, 2); 226674ae819SStefano Zampini PetscValidHeaderSpecific(local_interface_vector, VEC_CLASSID, 3); 22708401ef6SPierre Jolivet PetscCheck(local_interface_vector != pcbddc->work_scaling, PETSC_COMM_SELF, PETSC_ERR_SUP, "Local vector cannot be pcbddc->work_scaling!"); 228cac4c232SBarry Smith PetscUseMethod(pc, "PCBDDCScalingRestriction_C", (PC, Vec, Vec), (pc, global_vector, local_interface_vector)); 229674ae819SStefano Zampini PetscFunctionReturn(0); 230674ae819SStefano Zampini } 231674ae819SStefano Zampini 232*d71ae5a4SJacob Faibussowitsch PetscErrorCode PCBDDCScalingSetUp(PC pc) 233*d71ae5a4SJacob Faibussowitsch { 234674ae819SStefano Zampini PC_IS *pcis = (PC_IS *)pc->data; 235674ae819SStefano Zampini PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 236674ae819SStefano Zampini 237674ae819SStefano Zampini PetscFunctionBegin; 238674ae819SStefano Zampini PetscValidHeaderSpecific(pc, PC_CLASSID, 1); 2399566063dSJacob Faibussowitsch PetscCall(PetscLogEventBegin(PC_BDDC_Scaling[pcbddc->current_level], pc, 0, 0, 0)); 240674ae819SStefano Zampini /* create work vector for the operator */ 2419566063dSJacob Faibussowitsch PetscCall(VecDestroy(&pcbddc->work_scaling)); 2429566063dSJacob Faibussowitsch PetscCall(VecDuplicate(pcis->vec1_B, &pcbddc->work_scaling)); 24334a97f8cSStefano Zampini /* always rebuild pcis->D */ 24428d874f6SStefano Zampini if (pcis->use_stiffness_scaling) { 24515f235b8SStefano Zampini PetscScalar *a; 24615f235b8SStefano Zampini PetscInt i, n; 24715f235b8SStefano Zampini 2489566063dSJacob Faibussowitsch PetscCall(MatGetDiagonal(pcbddc->local_mat, pcis->vec1_N)); 2499566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(pcis->N_to_B, pcis->vec1_N, pcis->D, INSERT_VALUES, SCATTER_FORWARD)); 2509566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(pcis->N_to_B, pcis->vec1_N, pcis->D, INSERT_VALUES, SCATTER_FORWARD)); 2519566063dSJacob Faibussowitsch PetscCall(VecAbs(pcis->D)); 2529566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(pcis->D, &n)); 2539566063dSJacob Faibussowitsch PetscCall(VecGetArray(pcis->D, &a)); 2549371c9d4SSatish Balay for (i = 0; i < n; i++) 2559371c9d4SSatish Balay if (PetscAbsScalar(a[i]) < PETSC_SMALL) a[i] = 1.0; 2569566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(pcis->D, &a)); 257674ae819SStefano Zampini } 2589566063dSJacob Faibussowitsch PetscCall(VecSet(pcis->vec1_global, 0.0)); 2599566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(pcis->global_to_B, pcis->D, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 2609566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(pcis->global_to_B, pcis->D, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 2619566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(pcis->global_to_B, pcis->vec1_global, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD)); 2629566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(pcis->global_to_B, pcis->vec1_global, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD)); 2639566063dSJacob Faibussowitsch PetscCall(VecPointwiseDivide(pcis->D, pcis->D, pcis->vec1_B)); 264674ae819SStefano Zampini /* now setup */ 265681e7c04SStefano Zampini if (pcbddc->use_deluxe_scaling) { 26648a46eb9SPierre Jolivet if (!pcbddc->deluxe_ctx) PetscCall(PCBDDCScalingCreate_Deluxe(pc)); 2679566063dSJacob Faibussowitsch PetscCall(PCBDDCScalingSetUp_Deluxe(pc)); 2689566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBDDCScalingRestriction_C", PCBDDCScalingRestriction_Deluxe)); 2699566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBDDCScalingExtension_C", PCBDDCScalingExtension_Deluxe)); 270674ae819SStefano Zampini } else { 2719566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBDDCScalingRestriction_C", PCBDDCScalingRestriction_Basic)); 2729566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBDDCScalingExtension_C", PCBDDCScalingExtension_Basic)); 273674ae819SStefano Zampini } 2749566063dSJacob Faibussowitsch PetscCall(PetscLogEventEnd(PC_BDDC_Scaling[pcbddc->current_level], pc, 0, 0, 0)); 27534a97f8cSStefano Zampini 276674ae819SStefano Zampini /* test */ 277674ae819SStefano Zampini if (pcbddc->dbg_flag) { 27872b8c272SStefano Zampini Mat B0_B = NULL; 27972b8c272SStefano Zampini Vec B0_Bv = NULL, B0_Bv2 = NULL; 28034a97f8cSStefano Zampini Vec vec2_global; 281674ae819SStefano Zampini PetscViewer viewer = pcbddc->dbg_viewer; 28234a97f8cSStefano Zampini PetscReal error; 283674ae819SStefano Zampini 284674ae819SStefano Zampini /* extension -> from local to parallel */ 2859566063dSJacob Faibussowitsch PetscCall(VecSet(pcis->vec1_global, 0.0)); 2869566063dSJacob Faibussowitsch PetscCall(VecSetRandom(pcis->vec1_B, NULL)); 2879566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(pcis->global_to_B, pcis->vec1_B, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 2889566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(pcis->global_to_B, pcis->vec1_B, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 2899566063dSJacob Faibussowitsch PetscCall(VecDuplicate(pcis->vec1_global, &vec2_global)); 2909566063dSJacob Faibussowitsch PetscCall(VecCopy(pcis->vec1_global, vec2_global)); 2919566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(pcis->global_to_B, pcis->vec1_global, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD)); 2929566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(pcis->global_to_B, pcis->vec1_global, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD)); 29372b8c272SStefano Zampini if (pcbddc->benign_n) { 29472b8c272SStefano Zampini IS is_dummy; 29572b8c272SStefano Zampini 2969566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->benign_n, 0, 1, &is_dummy)); 2979566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrix(pcbddc->benign_B0, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B0_B)); 2989566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is_dummy)); 2999566063dSJacob Faibussowitsch PetscCall(MatCreateVecs(B0_B, NULL, &B0_Bv)); 3009566063dSJacob Faibussowitsch PetscCall(VecDuplicate(B0_Bv, &B0_Bv2)); 3019566063dSJacob Faibussowitsch PetscCall(MatMult(B0_B, pcis->vec1_B, B0_Bv)); 30272b8c272SStefano Zampini } 3039566063dSJacob Faibussowitsch PetscCall(PCBDDCScalingExtension(pc, pcis->vec1_B, pcis->vec1_global)); 30472b8c272SStefano Zampini if (pcbddc->benign_saddle_point) { 30572b8c272SStefano Zampini PetscReal errorl = 0.; 3069566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(pcis->global_to_B, pcis->vec1_global, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD)); 3079566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(pcis->global_to_B, pcis->vec1_global, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD)); 30872b8c272SStefano Zampini if (pcbddc->benign_n) { 3099566063dSJacob Faibussowitsch PetscCall(MatMult(B0_B, pcis->vec1_B, B0_Bv2)); 3109566063dSJacob Faibussowitsch PetscCall(VecAXPY(B0_Bv, -1.0, B0_Bv2)); 3119566063dSJacob Faibussowitsch PetscCall(VecNorm(B0_Bv, NORM_INFINITY, &errorl)); 31272b8c272SStefano Zampini } 3139566063dSJacob Faibussowitsch PetscCallMPI(MPI_Allreduce(&errorl, &error, 1, MPIU_REAL, MPI_SUM, PetscObjectComm((PetscObject)pc))); 31463a3b9bcSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "Error benign extension %1.14e\n", (double)error)); 31572b8c272SStefano Zampini } 3169566063dSJacob Faibussowitsch PetscCall(VecAXPY(pcis->vec1_global, -1.0, vec2_global)); 3179566063dSJacob Faibussowitsch PetscCall(VecNorm(pcis->vec1_global, NORM_INFINITY, &error)); 31863a3b9bcSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "Error scaling extension %1.14e\n", (double)error)); 3199566063dSJacob Faibussowitsch PetscCall(VecDestroy(&vec2_global)); 32034a97f8cSStefano Zampini 321674ae819SStefano Zampini /* restriction -> from parallel to local */ 3229566063dSJacob Faibussowitsch PetscCall(VecSet(pcis->vec1_global, 0.0)); 3239566063dSJacob Faibussowitsch PetscCall(VecSetRandom(pcis->vec1_B, NULL)); 3249566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(pcis->global_to_B, pcis->vec1_B, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 3259566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(pcis->global_to_B, pcis->vec1_B, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 3269566063dSJacob Faibussowitsch PetscCall(PCBDDCScalingRestriction(pc, pcis->vec1_global, pcis->vec1_B)); 3279566063dSJacob Faibussowitsch PetscCall(VecScale(pcis->vec1_B, -1.0)); 3289566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(pcis->global_to_B, pcis->vec1_B, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 3299566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(pcis->global_to_B, pcis->vec1_B, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 3309566063dSJacob Faibussowitsch PetscCall(VecNorm(pcis->vec1_global, NORM_INFINITY, &error)); 33163a3b9bcSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "Error scaling restriction %1.14e\n", (double)error)); 3329566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B0_B)); 3339566063dSJacob Faibussowitsch PetscCall(VecDestroy(&B0_Bv)); 3349566063dSJacob Faibussowitsch PetscCall(VecDestroy(&B0_Bv2)); 335674ae819SStefano Zampini } 336674ae819SStefano Zampini PetscFunctionReturn(0); 337674ae819SStefano Zampini } 338674ae819SStefano Zampini 339*d71ae5a4SJacob Faibussowitsch PetscErrorCode PCBDDCScalingDestroy(PC pc) 340*d71ae5a4SJacob Faibussowitsch { 341674ae819SStefano Zampini PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 342674ae819SStefano Zampini 343674ae819SStefano Zampini PetscFunctionBegin; 3441baa6e33SBarry Smith if (pcbddc->deluxe_ctx) PetscCall(PCBDDCScalingDestroy_Deluxe(pc)); 3459566063dSJacob Faibussowitsch PetscCall(VecDestroy(&pcbddc->work_scaling)); 3469566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBDDCScalingRestriction_C", NULL)); 3479566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBDDCScalingExtension_C", NULL)); 348674ae819SStefano Zampini PetscFunctionReturn(0); 349674ae819SStefano Zampini } 350674ae819SStefano Zampini 351*d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCBDDCScalingCreate_Deluxe(PC pc) 352*d71ae5a4SJacob Faibussowitsch { 35334a97f8cSStefano Zampini PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 35434a97f8cSStefano Zampini PCBDDCDeluxeScaling deluxe_ctx; 35534a97f8cSStefano Zampini 35634a97f8cSStefano Zampini PetscFunctionBegin; 3579566063dSJacob Faibussowitsch PetscCall(PetscNew(&deluxe_ctx)); 35834a97f8cSStefano Zampini pcbddc->deluxe_ctx = deluxe_ctx; 35934a97f8cSStefano Zampini PetscFunctionReturn(0); 36034a97f8cSStefano Zampini } 36134a97f8cSStefano Zampini 362*d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCBDDCScalingDestroy_Deluxe(PC pc) 363*d71ae5a4SJacob Faibussowitsch { 36434a97f8cSStefano Zampini PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 36534a97f8cSStefano Zampini 36634a97f8cSStefano Zampini PetscFunctionBegin; 3679566063dSJacob Faibussowitsch PetscCall(PCBDDCScalingReset_Deluxe_Solvers(pcbddc->deluxe_ctx)); 3689566063dSJacob Faibussowitsch PetscCall(PetscFree(pcbddc->deluxe_ctx)); 36934a97f8cSStefano Zampini PetscFunctionReturn(0); 37034a97f8cSStefano Zampini } 37134a97f8cSStefano Zampini 372*d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCBDDCScalingReset_Deluxe_Solvers(PCBDDCDeluxeScaling deluxe_ctx) 373*d71ae5a4SJacob Faibussowitsch { 37472b8c272SStefano Zampini PetscInt i; 37534a97f8cSStefano Zampini 37634a97f8cSStefano Zampini PetscFunctionBegin; 3779566063dSJacob Faibussowitsch PetscCall(PetscFree(deluxe_ctx->idx_simple_B)); 37834a97f8cSStefano Zampini deluxe_ctx->n_simple = 0; 37972b8c272SStefano Zampini for (i = 0; i < deluxe_ctx->seq_n; i++) { 3809566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&deluxe_ctx->seq_scctx[i])); 3819566063dSJacob Faibussowitsch PetscCall(VecDestroy(&deluxe_ctx->seq_work1[i])); 3829566063dSJacob Faibussowitsch PetscCall(VecDestroy(&deluxe_ctx->seq_work2[i])); 3839566063dSJacob Faibussowitsch PetscCall(MatDestroy(&deluxe_ctx->seq_mat[i])); 3849566063dSJacob Faibussowitsch PetscCall(MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i])); 38572b8c272SStefano Zampini } 3869566063dSJacob Faibussowitsch PetscCall(PetscFree5(deluxe_ctx->seq_scctx, deluxe_ctx->seq_work1, deluxe_ctx->seq_work2, deluxe_ctx->seq_mat, deluxe_ctx->seq_mat_inv_sum)); 3879566063dSJacob Faibussowitsch PetscCall(PetscFree(deluxe_ctx->workspace)); 38872b8c272SStefano Zampini deluxe_ctx->seq_n = 0; 38934a97f8cSStefano Zampini PetscFunctionReturn(0); 39034a97f8cSStefano Zampini } 39134a97f8cSStefano Zampini 392*d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCBDDCScalingSetUp_Deluxe(PC pc) 393*d71ae5a4SJacob Faibussowitsch { 39434a97f8cSStefano Zampini PC_IS *pcis = (PC_IS *)pc->data; 39534a97f8cSStefano Zampini PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 39634a97f8cSStefano Zampini PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx; 397b96c3477SStefano Zampini PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 39834a97f8cSStefano Zampini 39934a97f8cSStefano Zampini PetscFunctionBegin; 40072b8c272SStefano Zampini /* reset data structures if the topology has changed */ 4011baa6e33SBarry Smith if (pcbddc->recompute_topography) PetscCall(PCBDDCScalingReset_Deluxe_Solvers(deluxe_ctx)); 40234a97f8cSStefano Zampini 403b1b3d7a2SStefano Zampini /* Compute data structures to solve sequential problems */ 4049566063dSJacob Faibussowitsch PetscCall(PCBDDCScalingSetUp_Deluxe_Private(pc)); 4055db18549SStefano Zampini 406b1b3d7a2SStefano Zampini /* diagonal scaling on interface dofs not contained in cc */ 407d62866d3SStefano Zampini if (sub_schurs->is_vertices || sub_schurs->is_dir) { 4081b968477SStefano Zampini PetscInt n_com, n_dir; 4091b968477SStefano Zampini n_com = 0; 41048a46eb9SPierre Jolivet if (sub_schurs->is_vertices) PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_com)); 4111b968477SStefano Zampini n_dir = 0; 41248a46eb9SPierre Jolivet if (sub_schurs->is_dir) PetscCall(ISGetLocalSize(sub_schurs->is_dir, &n_dir)); 41372b8c272SStefano Zampini if (!deluxe_ctx->n_simple) { 4141b968477SStefano Zampini deluxe_ctx->n_simple = n_dir + n_com; 4159566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(deluxe_ctx->n_simple, &deluxe_ctx->idx_simple_B)); 416d62866d3SStefano Zampini if (sub_schurs->is_vertices) { 4179bb4a8caSStefano Zampini PetscInt nmap; 4189bb4a8caSStefano Zampini const PetscInt *idxs; 4191b968477SStefano Zampini 4209566063dSJacob Faibussowitsch PetscCall(ISGetIndices(sub_schurs->is_vertices, &idxs)); 4219566063dSJacob Faibussowitsch PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, n_com, idxs, &nmap, deluxe_ctx->idx_simple_B)); 42263a3b9bcSJacob Faibussowitsch PetscCheck(nmap == n_com, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error when mapping simply scaled dofs (is_vertices)! %" PetscInt_FMT " != %" PetscInt_FMT, nmap, n_com); 4239566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(sub_schurs->is_vertices, &idxs)); 4241b968477SStefano Zampini } 425d62866d3SStefano Zampini if (sub_schurs->is_dir) { 4261b968477SStefano Zampini PetscInt nmap; 4271b968477SStefano Zampini const PetscInt *idxs; 4281b968477SStefano Zampini 4299566063dSJacob Faibussowitsch PetscCall(ISGetIndices(sub_schurs->is_dir, &idxs)); 4309566063dSJacob Faibussowitsch PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, n_dir, idxs, &nmap, deluxe_ctx->idx_simple_B + n_com)); 43163a3b9bcSJacob Faibussowitsch PetscCheck(nmap == n_dir, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error when mapping simply scaled dofs (sub_schurs->is_dir)! %" PetscInt_FMT " != %" PetscInt_FMT, nmap, n_dir); 4329566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(sub_schurs->is_dir, &idxs)); 4331b968477SStefano Zampini } 4349566063dSJacob Faibussowitsch PetscCall(PetscSortInt(deluxe_ctx->n_simple, deluxe_ctx->idx_simple_B)); 4359bb4a8caSStefano Zampini } else { 4362472a847SBarry Smith PetscCheck(deluxe_ctx->n_simple == n_dir + n_com, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Number of simply scaled dofs %" PetscInt_FMT " is different from the previous one computed %" PetscInt_FMT, n_dir + n_com, deluxe_ctx->n_simple); 43772b8c272SStefano Zampini } 43872b8c272SStefano Zampini } else { 439b1b3d7a2SStefano Zampini deluxe_ctx->n_simple = 0; 4400a545947SLisandro Dalcin deluxe_ctx->idx_simple_B = NULL; 441b1b3d7a2SStefano Zampini } 44234a97f8cSStefano Zampini PetscFunctionReturn(0); 44334a97f8cSStefano Zampini } 44434a97f8cSStefano Zampini 445*d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Private(PC pc) 446*d71ae5a4SJacob Faibussowitsch { 4475db18549SStefano Zampini PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 4485db18549SStefano Zampini PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx; 449b96c3477SStefano Zampini PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 45072b8c272SStefano Zampini PetscScalar *matdata, *matdata2; 45172b8c272SStefano Zampini PetscInt i, max_subset_size, cum, cum2; 45272b8c272SStefano Zampini const PetscInt *idxs; 45372b8c272SStefano Zampini PetscBool newsetup = PETSC_FALSE; 4545db18549SStefano Zampini 4555db18549SStefano Zampini PetscFunctionBegin; 45628b400f6SJacob Faibussowitsch PetscCheck(sub_schurs, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Missing PCBDDCSubSchurs"); 45786bfa4cfSStefano Zampini if (!sub_schurs->n_subs) PetscFunctionReturn(0); 4589221af80SStefano Zampini 45972b8c272SStefano Zampini /* Allocate arrays for subproblems */ 46072b8c272SStefano Zampini if (!deluxe_ctx->seq_n) { 46172b8c272SStefano Zampini deluxe_ctx->seq_n = sub_schurs->n_subs; 4629566063dSJacob Faibussowitsch PetscCall(PetscCalloc5(deluxe_ctx->seq_n, &deluxe_ctx->seq_scctx, deluxe_ctx->seq_n, &deluxe_ctx->seq_work1, deluxe_ctx->seq_n, &deluxe_ctx->seq_work2, deluxe_ctx->seq_n, &deluxe_ctx->seq_mat, deluxe_ctx->seq_n, &deluxe_ctx->seq_mat_inv_sum)); 46372b8c272SStefano Zampini newsetup = PETSC_TRUE; 46463a3b9bcSJacob Faibussowitsch } else PetscCheck(deluxe_ctx->seq_n == sub_schurs->n_subs, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Number of deluxe subproblems %" PetscInt_FMT " is different from the sub_schurs %" PetscInt_FMT, deluxe_ctx->seq_n, sub_schurs->n_subs); 4656080607fSStefano Zampini 4662f41f7d1SStefano Zampini /* the change of basis is just a reference to sub_schurs->change (if any) */ 4672f41f7d1SStefano Zampini deluxe_ctx->change = sub_schurs->change; 46872b8c272SStefano Zampini deluxe_ctx->change_with_qr = sub_schurs->change_with_qr; 4695db18549SStefano Zampini 47072b8c272SStefano Zampini /* Create objects for deluxe */ 47172b8c272SStefano Zampini max_subset_size = 0; 47272b8c272SStefano Zampini for (i = 0; i < sub_schurs->n_subs; i++) { 47372b8c272SStefano Zampini PetscInt subset_size; 4749566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 47572b8c272SStefano Zampini max_subset_size = PetscMax(subset_size, max_subset_size); 47672b8c272SStefano Zampini } 47748a46eb9SPierre Jolivet if (newsetup) PetscCall(PetscMalloc1(2 * max_subset_size, &deluxe_ctx->workspace)); 47872b8c272SStefano Zampini cum = cum2 = 0; 4799566063dSJacob Faibussowitsch PetscCall(ISGetIndices(sub_schurs->is_Ej_all, &idxs)); 4809566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArray(sub_schurs->S_Ej_all, &matdata)); 4819566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &matdata2)); 48272b8c272SStefano Zampini for (i = 0; i < deluxe_ctx->seq_n; i++) { 48372b8c272SStefano Zampini PetscInt subset_size; 484925dfd53SStefano Zampini 4859566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 48672b8c272SStefano Zampini if (newsetup) { 48772b8c272SStefano Zampini IS sub; 48872b8c272SStefano Zampini /* work vectors */ 4899566063dSJacob Faibussowitsch PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, subset_size, deluxe_ctx->workspace, &deluxe_ctx->seq_work1[i])); 4909566063dSJacob Faibussowitsch PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, subset_size, deluxe_ctx->workspace + subset_size, &deluxe_ctx->seq_work2[i])); 49172b8c272SStefano Zampini 49272b8c272SStefano Zampini /* scatters */ 4939566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PETSC_COMM_SELF, subset_size, idxs + cum, PETSC_COPY_VALUES, &sub)); 4949566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(pcbddc->work_scaling, sub, deluxe_ctx->seq_work1[i], NULL, &deluxe_ctx->seq_scctx[i])); 4959566063dSJacob Faibussowitsch PetscCall(ISDestroy(&sub)); 496839e9adbSstefano_zampini } 49772b8c272SStefano Zampini 49872b8c272SStefano Zampini /* S_E_j */ 4999566063dSJacob Faibussowitsch PetscCall(MatDestroy(&deluxe_ctx->seq_mat[i])); 5009566063dSJacob Faibussowitsch PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, matdata + cum2, &deluxe_ctx->seq_mat[i])); 501839e9adbSstefano_zampini 50272b8c272SStefano Zampini /* \sum_k S^k_E_j */ 5039566063dSJacob Faibussowitsch PetscCall(MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i])); 5049566063dSJacob Faibussowitsch PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, matdata2 + cum2, &deluxe_ctx->seq_mat_inv_sum[i])); 5059566063dSJacob Faibussowitsch PetscCall(MatSetOption(deluxe_ctx->seq_mat_inv_sum[i], MAT_SPD, sub_schurs->is_posdef)); 5069566063dSJacob Faibussowitsch PetscCall(MatSetOption(deluxe_ctx->seq_mat_inv_sum[i], MAT_HERMITIAN, sub_schurs->is_hermitian)); 5073829e7e6SStefano Zampini if (sub_schurs->is_hermitian) { 5089566063dSJacob Faibussowitsch PetscCall(MatCholeskyFactor(deluxe_ctx->seq_mat_inv_sum[i], NULL, NULL)); 509d62866d3SStefano Zampini } else { 5109566063dSJacob Faibussowitsch PetscCall(MatLUFactor(deluxe_ctx->seq_mat_inv_sum[i], NULL, NULL, NULL)); 511d62866d3SStefano Zampini } 512839e9adbSstefano_zampini if (pcbddc->deluxe_singlemat) { 513839e9adbSstefano_zampini Mat X, Y; 5143829e7e6SStefano Zampini if (!sub_schurs->is_hermitian) { 5159566063dSJacob Faibussowitsch PetscCall(MatTranspose(deluxe_ctx->seq_mat[i], MAT_INITIAL_MATRIX, &X)); 516839e9adbSstefano_zampini } else { 5179566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)deluxe_ctx->seq_mat[i])); 518839e9adbSstefano_zampini X = deluxe_ctx->seq_mat[i]; 519839e9adbSstefano_zampini } 5209566063dSJacob Faibussowitsch PetscCall(MatDuplicate(X, MAT_DO_NOT_COPY_VALUES, &Y)); 5213829e7e6SStefano Zampini if (!sub_schurs->is_hermitian) { 5229566063dSJacob Faibussowitsch PetscCall(PCBDDCMatTransposeMatSolve_SeqDense(deluxe_ctx->seq_mat_inv_sum[i], X, Y)); 523839e9adbSstefano_zampini } else { 5249566063dSJacob Faibussowitsch PetscCall(MatMatSolve(deluxe_ctx->seq_mat_inv_sum[i], X, Y)); 525839e9adbSstefano_zampini } 526729c86d3Sstefano_zampini 5279566063dSJacob Faibussowitsch PetscCall(MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i])); 5289566063dSJacob Faibussowitsch PetscCall(MatDestroy(&deluxe_ctx->seq_mat[i])); 5299566063dSJacob Faibussowitsch PetscCall(MatDestroy(&X)); 5302f41f7d1SStefano Zampini if (deluxe_ctx->change) { 5312f41f7d1SStefano Zampini Mat C, CY; 53228b400f6SJacob Faibussowitsch PetscCheck(deluxe_ctx->change_with_qr, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only QR based change of basis"); 5339566063dSJacob Faibussowitsch PetscCall(KSPGetOperators(deluxe_ctx->change[i], &C, NULL)); 5349566063dSJacob Faibussowitsch PetscCall(MatMatMult(C, Y, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &CY)); 5359566063dSJacob Faibussowitsch PetscCall(MatMatTransposeMult(CY, C, MAT_REUSE_MATRIX, PETSC_DEFAULT, &Y)); 5369566063dSJacob Faibussowitsch PetscCall(MatDestroy(&CY)); 5379566063dSJacob Faibussowitsch PetscCall(MatProductClear(Y)); /* clear internal matproduct structure of Y since CY is destroyed */ 5382f41f7d1SStefano Zampini } 5399566063dSJacob Faibussowitsch PetscCall(MatTranspose(Y, MAT_INPLACE_MATRIX, &Y)); 540839e9adbSstefano_zampini deluxe_ctx->seq_mat[i] = Y; 54172b8c272SStefano Zampini } 54272b8c272SStefano Zampini cum += subset_size; 54372b8c272SStefano Zampini cum2 += subset_size * subset_size; 54472b8c272SStefano Zampini } 5459566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(sub_schurs->is_Ej_all, &idxs)); 5469566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArray(sub_schurs->S_Ej_all, &matdata)); 5479566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all, &matdata2)); 5482f41f7d1SStefano Zampini if (pcbddc->deluxe_singlemat) { 5492f41f7d1SStefano Zampini deluxe_ctx->change = NULL; 5502f41f7d1SStefano Zampini deluxe_ctx->change_with_qr = PETSC_FALSE; 5512f41f7d1SStefano Zampini } 5525db18549SStefano Zampini 55372b8c272SStefano Zampini if (deluxe_ctx->change && !deluxe_ctx->change_with_qr) { 55472b8c272SStefano Zampini for (i = 0; i < deluxe_ctx->seq_n; i++) { 55572b8c272SStefano Zampini if (newsetup) { 55672b8c272SStefano Zampini PC pc; 55772b8c272SStefano Zampini 5589566063dSJacob Faibussowitsch PetscCall(KSPGetPC(deluxe_ctx->change[i], &pc)); 5599566063dSJacob Faibussowitsch PetscCall(PCSetType(pc, PCLU)); 5609566063dSJacob Faibussowitsch PetscCall(KSPSetFromOptions(deluxe_ctx->change[i])); 56172b8c272SStefano Zampini } 5629566063dSJacob Faibussowitsch PetscCall(KSPSetUp(deluxe_ctx->change[i])); 56372b8c272SStefano Zampini } 56416e386b8SStefano Zampini } 5655db18549SStefano Zampini PetscFunctionReturn(0); 5665db18549SStefano Zampini } 567