15e5bbd0aSStefano Zampini #include <petsc/private/pcbddcimpl.h> 25e5bbd0aSStefano Zampini #include <petsc/private/pcbddcprivateimpl.h> 305709791SSatish Balay #include <../src/mat/impls/dense/seq/dense.h> 408122e43SStefano Zampini #include <petscblaslapack.h> 534a97f8cSStefano Zampini 69fbee547SJacob Faibussowitsch static inline PetscErrorCode PCBDDCAdjGetNextLayer_Private(PetscInt *, PetscInt, PetscBT, PetscInt *, PetscInt *, PetscInt *); 75ec10c6aSStefano Zampini static PetscErrorCode PCBDDCComputeExplicitSchur(Mat, PetscBool, MatReuse, Mat *); 8df4d28bfSStefano Zampini static PetscErrorCode PCBDDCReuseSolvers_Interior(PC, Vec, Vec); 9df4d28bfSStefano Zampini static PetscErrorCode PCBDDCReuseSolvers_Correction(PC, Vec, Vec); 10d62866d3SStefano Zampini 11ca92afb2SStefano Zampini /* if v2 is not present, correction is done in-place */ 12d71ae5a4SJacob Faibussowitsch PetscErrorCode PCBDDCReuseSolversBenignAdapt(PCBDDCReuseSolvers ctx, Vec v, Vec v2, PetscBool sol, PetscBool full) 13d71ae5a4SJacob Faibussowitsch { 14ca92afb2SStefano Zampini PetscScalar *array; 15ca92afb2SStefano Zampini PetscScalar *array2; 16ca92afb2SStefano Zampini 17ca92afb2SStefano Zampini PetscFunctionBegin; 183ba16761SJacob Faibussowitsch if (!ctx->benign_n) PetscFunctionReturn(PETSC_SUCCESS); 195cbda25cSStefano Zampini if (sol && full) { 205cbda25cSStefano Zampini PetscInt n_I, size_schur; 215cbda25cSStefano Zampini 225cbda25cSStefano Zampini /* get sizes */ 239566063dSJacob Faibussowitsch PetscCall(MatGetSize(ctx->benign_csAIB, &size_schur, NULL)); 249566063dSJacob Faibussowitsch PetscCall(VecGetSize(v, &n_I)); 255cbda25cSStefano Zampini n_I = n_I - size_schur; 265cbda25cSStefano Zampini /* get schur sol from array */ 279566063dSJacob Faibussowitsch PetscCall(VecGetArray(v, &array)); 289566063dSJacob Faibussowitsch PetscCall(VecPlaceArray(ctx->benign_dummy_schur_vec, array + n_I)); 299566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(v, &array)); 305cbda25cSStefano Zampini /* apply interior sol correction */ 319566063dSJacob Faibussowitsch PetscCall(MatMultTranspose(ctx->benign_csAIB, ctx->benign_dummy_schur_vec, ctx->benign_corr_work)); 329566063dSJacob Faibussowitsch PetscCall(VecResetArray(ctx->benign_dummy_schur_vec)); 339566063dSJacob Faibussowitsch PetscCall(MatMultAdd(ctx->benign_AIIm1ones, ctx->benign_corr_work, v, v)); 345cbda25cSStefano Zampini } 35ca92afb2SStefano Zampini if (v2) { 36ca92afb2SStefano Zampini PetscInt nl; 37ca92afb2SStefano Zampini 389566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(v, (const PetscScalar **)&array)); 399566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(v2, &nl)); 409566063dSJacob Faibussowitsch PetscCall(VecGetArray(v2, &array2)); 419566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(array2, array, nl)); 42ca92afb2SStefano Zampini } else { 439566063dSJacob Faibussowitsch PetscCall(VecGetArray(v, &array)); 44ca92afb2SStefano Zampini array2 = array; 45ca92afb2SStefano Zampini } 46ca92afb2SStefano Zampini if (!sol) { /* change rhs */ 47ca92afb2SStefano Zampini PetscInt n; 48ca92afb2SStefano Zampini for (n = 0; n < ctx->benign_n; n++) { 49ca92afb2SStefano Zampini PetscScalar sum = 0.; 50ca92afb2SStefano Zampini const PetscInt *cols; 51ca92afb2SStefano Zampini PetscInt nz, i; 52ca92afb2SStefano Zampini 539566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[n], &nz)); 549566063dSJacob Faibussowitsch PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[n], &cols)); 55ca92afb2SStefano Zampini for (i = 0; i < nz - 1; i++) sum += array[cols[i]]; 5622db5ddcSStefano Zampini #if defined(PETSC_USE_COMPLEX) 5722db5ddcSStefano Zampini sum = -(PetscRealPart(sum) / nz + PETSC_i * (PetscImaginaryPart(sum) / nz)); 5822db5ddcSStefano Zampini #else 59ca92afb2SStefano Zampini sum = -sum / nz; 6022db5ddcSStefano Zampini #endif 61ca92afb2SStefano Zampini for (i = 0; i < nz - 1; i++) array2[cols[i]] += sum; 62ca92afb2SStefano Zampini ctx->benign_save_vals[n] = array2[cols[nz - 1]]; 63ca92afb2SStefano Zampini array2[cols[nz - 1]] = sum; 649566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[n], &cols)); 65ca92afb2SStefano Zampini } 66ca92afb2SStefano Zampini } else { 67ca92afb2SStefano Zampini PetscInt n; 68ca92afb2SStefano Zampini for (n = 0; n < ctx->benign_n; n++) { 69ca92afb2SStefano Zampini PetscScalar sum = 0.; 70ca92afb2SStefano Zampini const PetscInt *cols; 71ca92afb2SStefano Zampini PetscInt nz, i; 729566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[n], &nz)); 739566063dSJacob Faibussowitsch PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[n], &cols)); 74ca92afb2SStefano Zampini for (i = 0; i < nz - 1; i++) sum += array[cols[i]]; 7522db5ddcSStefano Zampini #if defined(PETSC_USE_COMPLEX) 7622db5ddcSStefano Zampini sum = -(PetscRealPart(sum) / nz + PETSC_i * (PetscImaginaryPart(sum) / nz)); 7722db5ddcSStefano Zampini #else 78ca92afb2SStefano Zampini sum = -sum / nz; 7922db5ddcSStefano Zampini #endif 80ca92afb2SStefano Zampini for (i = 0; i < nz - 1; i++) array2[cols[i]] += sum; 81ca92afb2SStefano Zampini array2[cols[nz - 1]] = ctx->benign_save_vals[n]; 829566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[n], &cols)); 83ca92afb2SStefano Zampini } 84ca92afb2SStefano Zampini } 85ca92afb2SStefano Zampini if (v2) { 869566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(v, (const PetscScalar **)&array)); 879566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(v2, &array2)); 88ca92afb2SStefano Zampini } else { 899566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(v, &array)); 90ca92afb2SStefano Zampini } 915cbda25cSStefano Zampini if (!sol && full) { 925cbda25cSStefano Zampini Vec usedv; 935cbda25cSStefano Zampini PetscInt n_I, size_schur; 945cbda25cSStefano Zampini 955cbda25cSStefano Zampini /* get sizes */ 969566063dSJacob Faibussowitsch PetscCall(MatGetSize(ctx->benign_csAIB, &size_schur, NULL)); 979566063dSJacob Faibussowitsch PetscCall(VecGetSize(v, &n_I)); 985cbda25cSStefano Zampini n_I = n_I - size_schur; 995cbda25cSStefano Zampini /* compute schur rhs correction */ 1005cbda25cSStefano Zampini if (v2) { 1015cbda25cSStefano Zampini usedv = v2; 1025cbda25cSStefano Zampini } else { 1035cbda25cSStefano Zampini usedv = v; 1045cbda25cSStefano Zampini } 1055cbda25cSStefano Zampini /* apply schur rhs correction */ 1069566063dSJacob Faibussowitsch PetscCall(MatMultTranspose(ctx->benign_AIIm1ones, usedv, ctx->benign_corr_work)); 1079566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(usedv, (const PetscScalar **)&array)); 1089566063dSJacob Faibussowitsch PetscCall(VecPlaceArray(ctx->benign_dummy_schur_vec, array + n_I)); 1099566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(usedv, (const PetscScalar **)&array)); 1109566063dSJacob Faibussowitsch PetscCall(MatMultAdd(ctx->benign_csAIB, ctx->benign_corr_work, ctx->benign_dummy_schur_vec, ctx->benign_dummy_schur_vec)); 1119566063dSJacob Faibussowitsch PetscCall(VecResetArray(ctx->benign_dummy_schur_vec)); 1125cbda25cSStefano Zampini } 1133ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 114ca92afb2SStefano Zampini } 115ca92afb2SStefano Zampini 116d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCBDDCReuseSolvers_Solve_Private(PC pc, Vec rhs, Vec sol, PetscBool transpose, PetscBool full) 117d71ae5a4SJacob Faibussowitsch { 118df4d28bfSStefano Zampini PCBDDCReuseSolvers ctx; 119683d3df6SStefano Zampini PetscBool copy = PETSC_FALSE; 120d62866d3SStefano Zampini 121d62866d3SStefano Zampini PetscFunctionBegin; 1229566063dSJacob Faibussowitsch PetscCall(PCShellGetContext(pc, &ctx)); 123683d3df6SStefano Zampini if (full) { 1249566063dSJacob Faibussowitsch PetscCall(MatMumpsSetIcntl(ctx->F, 26, -1)); 1255cbda25cSStefano Zampini #if defined(PETSC_HAVE_MKL_PARDISO) 1269566063dSJacob Faibussowitsch PetscCall(MatMkl_PardisoSetCntl(ctx->F, 70, 0)); 1275cbda25cSStefano Zampini #endif 128683d3df6SStefano Zampini copy = ctx->has_vertices; 129d4933d67SStefano Zampini } else { /* interior solver */ 1309566063dSJacob Faibussowitsch PetscCall(MatMumpsSetIcntl(ctx->F, 26, 0)); 131d4933d67SStefano Zampini #if defined(PETSC_HAVE_MKL_PARDISO) 1329566063dSJacob Faibussowitsch PetscCall(MatMkl_PardisoSetCntl(ctx->F, 70, 1)); 133d4933d67SStefano Zampini #endif 134683d3df6SStefano Zampini copy = PETSC_TRUE; 135683d3df6SStefano Zampini } 136683d3df6SStefano Zampini /* copy rhs into factored matrix workspace */ 137683d3df6SStefano Zampini if (copy) { 138ca92afb2SStefano Zampini PetscInt n; 139df4d28bfSStefano Zampini PetscScalar *array, *array_solver; 140ca92afb2SStefano Zampini 1419566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(rhs, &n)); 1429566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(rhs, (const PetscScalar **)&array)); 1439566063dSJacob Faibussowitsch PetscCall(VecGetArray(ctx->rhs, &array_solver)); 1449566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(array_solver, array, n)); 1459566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(ctx->rhs, &array_solver)); 1469566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(rhs, (const PetscScalar **)&array)); 147683d3df6SStefano Zampini 1489566063dSJacob Faibussowitsch PetscCall(PCBDDCReuseSolversBenignAdapt(ctx, ctx->rhs, NULL, PETSC_FALSE, full)); 149683d3df6SStefano Zampini if (transpose) { 1509566063dSJacob Faibussowitsch PetscCall(MatSolveTranspose(ctx->F, ctx->rhs, ctx->sol)); 151683d3df6SStefano Zampini } else { 1529566063dSJacob Faibussowitsch PetscCall(MatSolve(ctx->F, ctx->rhs, ctx->sol)); 153683d3df6SStefano Zampini } 1549566063dSJacob Faibussowitsch PetscCall(PCBDDCReuseSolversBenignAdapt(ctx, ctx->sol, NULL, PETSC_TRUE, full)); 155683d3df6SStefano Zampini 156683d3df6SStefano Zampini /* get back data to caller worskpace */ 1579566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(ctx->sol, (const PetscScalar **)&array_solver)); 1589566063dSJacob Faibussowitsch PetscCall(VecGetArray(sol, &array)); 1599566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(array, array_solver, n)); 1609566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(sol, &array)); 1619566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(ctx->sol, (const PetscScalar **)&array_solver)); 162683d3df6SStefano Zampini } else { 163ca92afb2SStefano Zampini if (ctx->benign_n) { 1649566063dSJacob Faibussowitsch PetscCall(PCBDDCReuseSolversBenignAdapt(ctx, rhs, ctx->rhs, PETSC_FALSE, full)); 165ca92afb2SStefano Zampini if (transpose) { 1669566063dSJacob Faibussowitsch PetscCall(MatSolveTranspose(ctx->F, ctx->rhs, sol)); 167ca92afb2SStefano Zampini } else { 1689566063dSJacob Faibussowitsch PetscCall(MatSolve(ctx->F, ctx->rhs, sol)); 169ca92afb2SStefano Zampini } 1709566063dSJacob Faibussowitsch PetscCall(PCBDDCReuseSolversBenignAdapt(ctx, sol, NULL, PETSC_TRUE, full)); 171ca92afb2SStefano Zampini } else { 172e28d306cSStefano Zampini if (transpose) { 1739566063dSJacob Faibussowitsch PetscCall(MatSolveTranspose(ctx->F, rhs, sol)); 174e28d306cSStefano Zampini } else { 1759566063dSJacob Faibussowitsch PetscCall(MatSolve(ctx->F, rhs, sol)); 176e28d306cSStefano Zampini } 177683d3df6SStefano Zampini } 178ca92afb2SStefano Zampini } 1795cbda25cSStefano Zampini /* restore defaults */ 1809566063dSJacob Faibussowitsch PetscCall(MatMumpsSetIcntl(ctx->F, 26, -1)); 181d4933d67SStefano Zampini #if defined(PETSC_HAVE_MKL_PARDISO) 1829566063dSJacob Faibussowitsch PetscCall(MatMkl_PardisoSetCntl(ctx->F, 70, 0)); 183d4933d67SStefano Zampini #endif 1843ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 185d62866d3SStefano Zampini } 186d62866d3SStefano Zampini 187d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCBDDCReuseSolvers_Correction(PC pc, Vec rhs, Vec sol) 188d71ae5a4SJacob Faibussowitsch { 189e28d306cSStefano Zampini PetscFunctionBegin; 1909566063dSJacob Faibussowitsch PetscCall(PCBDDCReuseSolvers_Solve_Private(pc, rhs, sol, PETSC_FALSE, PETSC_TRUE)); 1913ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 192e28d306cSStefano Zampini } 193e28d306cSStefano Zampini 194d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCBDDCReuseSolvers_CorrectionTranspose(PC pc, Vec rhs, Vec sol) 195d71ae5a4SJacob Faibussowitsch { 196e28d306cSStefano Zampini PetscFunctionBegin; 1979566063dSJacob Faibussowitsch PetscCall(PCBDDCReuseSolvers_Solve_Private(pc, rhs, sol, PETSC_TRUE, PETSC_TRUE)); 1983ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 199683d3df6SStefano Zampini } 200683d3df6SStefano Zampini 201d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCBDDCReuseSolvers_Interior(PC pc, Vec rhs, Vec sol) 202d71ae5a4SJacob Faibussowitsch { 203683d3df6SStefano Zampini PetscFunctionBegin; 2049566063dSJacob Faibussowitsch PetscCall(PCBDDCReuseSolvers_Solve_Private(pc, rhs, sol, PETSC_FALSE, PETSC_FALSE)); 2053ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 206683d3df6SStefano Zampini } 207683d3df6SStefano Zampini 208d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCBDDCReuseSolvers_InteriorTranspose(PC pc, Vec rhs, Vec sol) 209d71ae5a4SJacob Faibussowitsch { 210683d3df6SStefano Zampini PetscFunctionBegin; 2119566063dSJacob Faibussowitsch PetscCall(PCBDDCReuseSolvers_Solve_Private(pc, rhs, sol, PETSC_TRUE, PETSC_FALSE)); 2123ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 213e28d306cSStefano Zampini } 214e28d306cSStefano Zampini 215d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCBDDCReuseSolvers_View(PC pc, PetscViewer viewer) 216d71ae5a4SJacob Faibussowitsch { 21715579a77SStefano Zampini PCBDDCReuseSolvers ctx; 2189f196a02SMartin Diehl PetscBool isascii; 21915579a77SStefano Zampini 22015579a77SStefano Zampini PetscFunctionBegin; 2219566063dSJacob Faibussowitsch PetscCall(PCShellGetContext(pc, &ctx)); 2229f196a02SMartin Diehl PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 2239f196a02SMartin Diehl if (isascii) PetscCall(PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_INFO)); 2249566063dSJacob Faibussowitsch PetscCall(MatView(ctx->F, viewer)); 2259f196a02SMartin Diehl if (isascii) PetscCall(PetscViewerPopFormat(viewer)); 2263ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 22715579a77SStefano Zampini } 22815579a77SStefano Zampini 229d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCBDDCReuseSolversReset(PCBDDCReuseSolvers reuse) 230d71ae5a4SJacob Faibussowitsch { 231ca92afb2SStefano Zampini PetscInt i; 232d62866d3SStefano Zampini 233d62866d3SStefano Zampini PetscFunctionBegin; 2349566063dSJacob Faibussowitsch PetscCall(MatDestroy(&reuse->F)); 2359566063dSJacob Faibussowitsch PetscCall(VecDestroy(&reuse->sol)); 2369566063dSJacob Faibussowitsch PetscCall(VecDestroy(&reuse->rhs)); 2379566063dSJacob Faibussowitsch PetscCall(PCDestroy(&reuse->interior_solver)); 2389566063dSJacob Faibussowitsch PetscCall(PCDestroy(&reuse->correction_solver)); 2399566063dSJacob Faibussowitsch PetscCall(ISDestroy(&reuse->is_R)); 2409566063dSJacob Faibussowitsch PetscCall(ISDestroy(&reuse->is_B)); 2419566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&reuse->correction_scatter_B)); 2429566063dSJacob Faibussowitsch PetscCall(VecDestroy(&reuse->sol_B)); 2439566063dSJacob Faibussowitsch PetscCall(VecDestroy(&reuse->rhs_B)); 24448a46eb9SPierre Jolivet for (i = 0; i < reuse->benign_n; i++) PetscCall(ISDestroy(&reuse->benign_zerodiag_subs[i])); 2459566063dSJacob Faibussowitsch PetscCall(PetscFree(reuse->benign_zerodiag_subs)); 2469566063dSJacob Faibussowitsch PetscCall(PetscFree(reuse->benign_save_vals)); 2479566063dSJacob Faibussowitsch PetscCall(MatDestroy(&reuse->benign_csAIB)); 2489566063dSJacob Faibussowitsch PetscCall(MatDestroy(&reuse->benign_AIIm1ones)); 2499566063dSJacob Faibussowitsch PetscCall(VecDestroy(&reuse->benign_corr_work)); 2509566063dSJacob Faibussowitsch PetscCall(VecDestroy(&reuse->benign_dummy_schur_vec)); 2513ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 252d62866d3SStefano Zampini } 253d5574798SStefano Zampini 254d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCBDDCReuseSolvers_Destroy(PC pc) 255d71ae5a4SJacob Faibussowitsch { 25632fe681dSStefano Zampini PCBDDCReuseSolvers ctx; 25732fe681dSStefano Zampini 25832fe681dSStefano Zampini PetscFunctionBegin; 25932fe681dSStefano Zampini PetscCall(PCShellGetContext(pc, &ctx)); 26032fe681dSStefano Zampini PetscCall(PCBDDCReuseSolversReset(ctx)); 26132fe681dSStefano Zampini PetscCall(PetscFree(ctx)); 26232fe681dSStefano Zampini PetscCall(PCShellSetContext(pc, NULL)); 2633ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 26432fe681dSStefano Zampini } 26532fe681dSStefano Zampini 266d71ae5a4SJacob Faibussowitsch static PetscErrorCode PCBDDCComputeExplicitSchur(Mat M, PetscBool issym, MatReuse reuse, Mat *S) 267d71ae5a4SJacob Faibussowitsch { 2683202ece2SStefano Zampini Mat B, C, D, Bd, Cd, AinvBd; 2693202ece2SStefano Zampini KSP ksp; 2703202ece2SStefano Zampini PC pc; 2713202ece2SStefano Zampini PetscBool isLU, isILU, isCHOL, Bdense, Cdense; 2723202ece2SStefano Zampini PetscReal fill = 2.0; 273f11841e3SStefano Zampini PetscInt n_I; 2743202ece2SStefano Zampini PetscMPIInt size; 2753202ece2SStefano Zampini 2763202ece2SStefano Zampini PetscFunctionBegin; 2779566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)M), &size)); 2787827d75bSBarry Smith PetscCheck(size == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not for parallel matrices"); 279f11841e3SStefano Zampini if (reuse == MAT_REUSE_MATRIX) { 280f11841e3SStefano Zampini PetscBool Sdense; 281f11841e3SStefano Zampini 2829566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)*S, MATSEQDENSE, &Sdense)); 28328b400f6SJacob Faibussowitsch PetscCheck(Sdense, PetscObjectComm((PetscObject)M), PETSC_ERR_SUP, "S should dense"); 284f11841e3SStefano Zampini } 2859566063dSJacob Faibussowitsch PetscCall(MatSchurComplementGetSubMatrices(M, NULL, NULL, &B, &C, &D)); 2869566063dSJacob Faibussowitsch PetscCall(MatSchurComplementGetKSP(M, &ksp)); 2879566063dSJacob Faibussowitsch PetscCall(KSPGetPC(ksp, &pc)); 2889566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)pc, PCLU, &isLU)); 2899566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)pc, PCILU, &isILU)); 2909566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)pc, PCCHOLESKY, &isCHOL)); 2919566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &Bdense)); 2929566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)C, MATSEQDENSE, &Cdense)); 2939566063dSJacob Faibussowitsch PetscCall(MatGetSize(B, &n_I, NULL)); 294f11841e3SStefano Zampini if (n_I) { 2953202ece2SStefano Zampini if (!Bdense) { 2969566063dSJacob Faibussowitsch PetscCall(MatConvert(B, MATSEQDENSE, MAT_INITIAL_MATRIX, &Bd)); 2973202ece2SStefano Zampini } else { 2983202ece2SStefano Zampini Bd = B; 2993202ece2SStefano Zampini } 3003202ece2SStefano Zampini 3013202ece2SStefano Zampini if (isLU || isILU || isCHOL) { 3023202ece2SStefano Zampini Mat fact; 3039566063dSJacob Faibussowitsch PetscCall(KSPSetUp(ksp)); 3049566063dSJacob Faibussowitsch PetscCall(PCFactorGetMatrix(pc, &fact)); 3059566063dSJacob Faibussowitsch PetscCall(MatDuplicate(Bd, MAT_DO_NOT_COPY_VALUES, &AinvBd)); 3069566063dSJacob Faibussowitsch PetscCall(MatMatSolve(fact, Bd, AinvBd)); 3073202ece2SStefano Zampini } else { 30807b1e237SStefano Zampini PetscBool ex = PETSC_TRUE; 30907b1e237SStefano Zampini 31007b1e237SStefano Zampini if (ex) { 3113202ece2SStefano Zampini Mat Ainvd; 3123202ece2SStefano Zampini 3139566063dSJacob Faibussowitsch PetscCall(PCComputeOperator(pc, MATDENSE, &Ainvd)); 3149566063dSJacob Faibussowitsch PetscCall(MatMatMult(Ainvd, Bd, MAT_INITIAL_MATRIX, fill, &AinvBd)); 3159566063dSJacob Faibussowitsch PetscCall(MatDestroy(&Ainvd)); 31607b1e237SStefano Zampini } else { 31707b1e237SStefano Zampini Vec sol, rhs; 31807b1e237SStefano Zampini PetscScalar *arrayrhs, *arraysol; 31907b1e237SStefano Zampini PetscInt i, nrhs, n; 32007b1e237SStefano Zampini 3219566063dSJacob Faibussowitsch PetscCall(MatDuplicate(Bd, MAT_DO_NOT_COPY_VALUES, &AinvBd)); 3229566063dSJacob Faibussowitsch PetscCall(MatGetSize(Bd, &n, &nrhs)); 3239566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(Bd, &arrayrhs)); 3249566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(AinvBd, &arraysol)); 3259566063dSJacob Faibussowitsch PetscCall(KSPGetSolution(ksp, &sol)); 3269566063dSJacob Faibussowitsch PetscCall(KSPGetRhs(ksp, &rhs)); 32707b1e237SStefano Zampini for (i = 0; i < nrhs; i++) { 3289566063dSJacob Faibussowitsch PetscCall(VecPlaceArray(rhs, arrayrhs + i * n)); 3299566063dSJacob Faibussowitsch PetscCall(VecPlaceArray(sol, arraysol + i * n)); 3309566063dSJacob Faibussowitsch PetscCall(KSPSolve(ksp, rhs, sol)); 3319566063dSJacob Faibussowitsch PetscCall(VecResetArray(rhs)); 3329566063dSJacob Faibussowitsch PetscCall(VecResetArray(sol)); 33307b1e237SStefano Zampini } 3349566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(Bd, &arrayrhs)); 3359566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(AinvBd, &arrayrhs)); 33607b1e237SStefano Zampini } 3373202ece2SStefano Zampini } 33848a46eb9SPierre Jolivet if (!Bdense & !issym) PetscCall(MatDestroy(&Bd)); 3395ec10c6aSStefano Zampini 3405ec10c6aSStefano Zampini if (!issym) { 3413202ece2SStefano Zampini if (!Cdense) { 3429566063dSJacob Faibussowitsch PetscCall(MatConvert(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &Cd)); 3433202ece2SStefano Zampini } else { 3443202ece2SStefano Zampini Cd = C; 3453202ece2SStefano Zampini } 3469566063dSJacob Faibussowitsch PetscCall(MatMatMult(Cd, AinvBd, reuse, fill, S)); 34748a46eb9SPierre Jolivet if (!Cdense) PetscCall(MatDestroy(&Cd)); 3485ec10c6aSStefano Zampini } else { 3499566063dSJacob Faibussowitsch PetscCall(MatTransposeMatMult(Bd, AinvBd, reuse, fill, S)); 35048a46eb9SPierre Jolivet if (!Bdense) PetscCall(MatDestroy(&Bd)); 3515ec10c6aSStefano Zampini } 3529566063dSJacob Faibussowitsch PetscCall(MatDestroy(&AinvBd)); 353f11841e3SStefano Zampini } 3543202ece2SStefano Zampini 3553202ece2SStefano Zampini if (D) { 3563202ece2SStefano Zampini Mat Dd; 3573202ece2SStefano Zampini PetscBool Ddense; 3583202ece2SStefano Zampini 3599566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)D, MATSEQDENSE, &Ddense)); 3603202ece2SStefano Zampini if (!Ddense) { 3619566063dSJacob Faibussowitsch PetscCall(MatConvert(D, MATSEQDENSE, MAT_INITIAL_MATRIX, &Dd)); 3623202ece2SStefano Zampini } else { 3633202ece2SStefano Zampini Dd = D; 3643202ece2SStefano Zampini } 365f11841e3SStefano Zampini if (n_I) { 3669566063dSJacob Faibussowitsch PetscCall(MatAYPX(*S, -1.0, Dd, SAME_NONZERO_PATTERN)); 367f11841e3SStefano Zampini } else { 368f11841e3SStefano Zampini if (reuse == MAT_INITIAL_MATRIX) { 3699566063dSJacob Faibussowitsch PetscCall(MatDuplicate(Dd, MAT_COPY_VALUES, S)); 370f11841e3SStefano Zampini } else { 3719566063dSJacob Faibussowitsch PetscCall(MatCopy(Dd, *S, SAME_NONZERO_PATTERN)); 372f11841e3SStefano Zampini } 373f11841e3SStefano Zampini } 37448a46eb9SPierre Jolivet if (!Ddense) PetscCall(MatDestroy(&Dd)); 3753202ece2SStefano Zampini } else { 3769566063dSJacob Faibussowitsch PetscCall(MatScale(*S, -1.0)); 3773202ece2SStefano Zampini } 3783ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3793202ece2SStefano Zampini } 38034a97f8cSStefano Zampini 381d71ae5a4SJacob Faibussowitsch PetscErrorCode PCBDDCSubSchursSetUp(PCBDDCSubSchurs sub_schurs, Mat Ain, Mat Sin, PetscBool exact_schur, PetscInt xadj[], PetscInt adjncy[], PetscInt nlayers, Vec scaling, PetscBool compute_Stilda, PetscBool reuse_solvers, PetscBool benign_trick, PetscInt benign_n, PetscInt benign_p0_lidx[], IS benign_zerodiag_subs[], Mat change, IS change_primal) 382d71ae5a4SJacob Faibussowitsch { 383be83ff47SStefano Zampini Mat F, A_II, A_IB, A_BI, A_BB, AE_II; 384be83ff47SStefano Zampini Mat S_all; 38557a87bf3SStefano Zampini Vec gstash, lstash; 38657a87bf3SStefano Zampini VecScatter sstash; 387b7ab4a40SStefano Zampini IS is_I, is_I_layer; 388dc456d91SStefano Zampini IS all_subsets, all_subsets_mult, all_subsets_n; 38957a87bf3SStefano Zampini PetscScalar *stasharray, *Bwork; 390791bdc09SStefano Zampini PetscInt *all_local_idx_N, *all_local_subid_N = NULL; 391dc456d91SStefano Zampini PetscInt *auxnum1, *auxnum2; 392791bdc09SStefano Zampini PetscInt *local_subs = sub_schurs->graph->local_subs; 393791bdc09SStefano Zampini PetscInt i, subset_size, max_subset_size, n_local_subs = sub_schurs->graph->n_local_subs; 394683d3df6SStefano Zampini PetscInt n_B, extra, local_size, global_size; 39557a87bf3SStefano Zampini PetscInt local_stash_size; 39608122e43SStefano Zampini PetscBLASInt B_N, B_ierr, B_lwork, *pivots; 3975a95e1ceSStefano Zampini MPI_Comm comm_n; 398f4f7d9d6SStefano Zampini PetscBool deluxe = PETSC_TRUE; 399f4f7d9d6SStefano Zampini PetscBool use_potr = PETSC_FALSE, use_sytr = PETSC_FALSE; 4003b03f7bbSStefano Zampini PetscViewer matl_dbg_viewer = NULL; 401791bdc09SStefano Zampini PetscBool flg, multi_element = sub_schurs->graph->multi_element; 402b1b3d7a2SStefano Zampini 403b1b3d7a2SStefano Zampini PetscFunctionBegin; 4049566063dSJacob Faibussowitsch PetscCall(MatDestroy(&sub_schurs->A)); 4059566063dSJacob Faibussowitsch PetscCall(MatDestroy(&sub_schurs->S)); 406e62b6521Sstefano_zampini if (Ain) { 4079566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)Ain)); 408a64f4aa4SStefano Zampini sub_schurs->A = Ain; 409a64f4aa4SStefano Zampini } 4103301b35fSStefano Zampini 4119566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)Sin)); 412a64f4aa4SStefano Zampini sub_schurs->S = Sin; 413ad540459SPierre Jolivet if (sub_schurs->schur_explicit) sub_schurs->schur_explicit = (PetscBool)(!!sub_schurs->A); 414a64f4aa4SStefano Zampini 4155a95e1ceSStefano Zampini /* preliminary checks */ 4167827d75bSBarry Smith PetscCheck(sub_schurs->schur_explicit || !compute_Stilda, PetscObjectComm((PetscObject)sub_schurs->l2gmap), PETSC_ERR_SUP, "Adaptive selection of constraints requires MUMPS and/or MKL_PARDISO"); 4175a95e1ceSStefano Zampini 41888113c35SStefano Zampini if (benign_trick) sub_schurs->is_posdef = PETSC_FALSE; 41988113c35SStefano Zampini 4203b03f7bbSStefano Zampini /* debug (MATLAB) */ 4217f9db97bSStefano Zampini if (sub_schurs->debug) { 4227f9db97bSStefano Zampini PetscMPIInt size, rank; 4237ebab0bbSStefano Zampini PetscInt nr, *print_schurs_ranks, print_schurs = PETSC_FALSE; 4247f9db97bSStefano Zampini 4259566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sub_schurs->l2gmap), &size)); 4269566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sub_schurs->l2gmap), &rank)); 4277f9db97bSStefano Zampini nr = size; 4289566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nr, &print_schurs_ranks)); 429d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)sub_schurs->l2gmap), sub_schurs->prefix, "BDDC sub_schurs options", "PC"); 4309566063dSJacob Faibussowitsch PetscCall(PetscOptionsIntArray("-sub_schurs_debug_ranks", "Ranks to debug (all if the option is not used)", NULL, print_schurs_ranks, &nr, &flg)); 4317f9db97bSStefano Zampini if (!flg) print_schurs = PETSC_TRUE; 4327f9db97bSStefano Zampini else { 4337ebab0bbSStefano Zampini print_schurs = PETSC_FALSE; 4349371c9d4SSatish Balay for (i = 0; i < nr; i++) 435835f2295SStefano Zampini if (print_schurs_ranks[i] == rank) { 4369371c9d4SSatish Balay print_schurs = PETSC_TRUE; 4379371c9d4SSatish Balay break; 4389371c9d4SSatish Balay } 4397f9db97bSStefano Zampini } 440d0609cedSBarry Smith PetscOptionsEnd(); 4419566063dSJacob Faibussowitsch PetscCall(PetscFree(print_schurs_ranks)); 4423b03f7bbSStefano Zampini if (print_schurs) { 4433b03f7bbSStefano Zampini char filename[256]; 4443b03f7bbSStefano Zampini 4459566063dSJacob Faibussowitsch PetscCall(PetscSNPrintf(filename, sizeof(filename), "sub_schurs_Schur_r%d.m", PetscGlobalRank)); 4469566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF, filename, &matl_dbg_viewer)); 4479566063dSJacob Faibussowitsch PetscCall(PetscViewerPushFormat(matl_dbg_viewer, PETSC_VIEWER_ASCII_MATLAB)); 4483b03f7bbSStefano Zampini } 4497f9db97bSStefano Zampini } 4507f9db97bSStefano Zampini 451791bdc09SStefano Zampini /* DEBUG: turn on/off multi-element code path */ 452791bdc09SStefano Zampini PetscCall(PetscOptionsGetBool(NULL, sub_schurs->prefix, "-sub_schurs_multielement_code", &multi_element, NULL)); 453791bdc09SStefano Zampini if (n_local_subs == 0) multi_element = PETSC_FALSE; 454791bdc09SStefano Zampini 4555a95e1ceSStefano Zampini /* restrict work on active processes */ 456991c41b4SStefano Zampini if (sub_schurs->restrict_comm) { 457991c41b4SStefano Zampini PetscSubcomm subcomm; 458991c41b4SStefano Zampini PetscMPIInt color, rank; 459991c41b4SStefano Zampini 4605a95e1ceSStefano Zampini color = 0; 4615a95e1ceSStefano Zampini if (!sub_schurs->n_subs) color = 1; /* this can happen if we are in a multilevel case or if the subdomain is disconnected */ 4629566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sub_schurs->l2gmap), &rank)); 4639566063dSJacob Faibussowitsch PetscCall(PetscSubcommCreate(PetscObjectComm((PetscObject)sub_schurs->l2gmap), &subcomm)); 4649566063dSJacob Faibussowitsch PetscCall(PetscSubcommSetNumber(subcomm, 2)); 4659566063dSJacob Faibussowitsch PetscCall(PetscSubcommSetTypeGeneral(subcomm, color, rank)); 4669566063dSJacob Faibussowitsch PetscCall(PetscCommDuplicate(PetscSubcommChild(subcomm), &comm_n, NULL)); 4679566063dSJacob Faibussowitsch PetscCall(PetscSubcommDestroy(&subcomm)); 4685a95e1ceSStefano Zampini if (!sub_schurs->n_subs) { 4699566063dSJacob Faibussowitsch PetscCall(PetscCommDestroy(&comm_n)); 4703ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 4715a95e1ceSStefano Zampini } 472991c41b4SStefano Zampini } else { 4739566063dSJacob Faibussowitsch PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)sub_schurs->l2gmap), &comm_n, NULL)); 474991c41b4SStefano Zampini } 4755a95e1ceSStefano Zampini 476b1b3d7a2SStefano Zampini /* get Schur complement matrices */ 477df4d28bfSStefano Zampini if (!sub_schurs->schur_explicit) { 478a64f4aa4SStefano Zampini Mat tA_IB, tA_BI, tA_BB; 4793301b35fSStefano Zampini PetscBool isseqsbaij; 4809566063dSJacob Faibussowitsch PetscCall(MatSchurComplementGetSubMatrices(sub_schurs->S, &A_II, NULL, &tA_IB, &tA_BI, &tA_BB)); 4819566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)tA_BB, MATSEQSBAIJ, &isseqsbaij)); 4823301b35fSStefano Zampini if (isseqsbaij) { 4839566063dSJacob Faibussowitsch PetscCall(MatConvert(tA_BB, MATSEQAIJ, MAT_INITIAL_MATRIX, &A_BB)); 4849566063dSJacob Faibussowitsch PetscCall(MatConvert(tA_IB, MATSEQAIJ, MAT_INITIAL_MATRIX, &A_IB)); 4859566063dSJacob Faibussowitsch PetscCall(MatConvert(tA_BI, MATSEQAIJ, MAT_INITIAL_MATRIX, &A_BI)); 486a64f4aa4SStefano Zampini } else { 4879566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)tA_BB)); 488a64f4aa4SStefano Zampini A_BB = tA_BB; 4899566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)tA_IB)); 490a64f4aa4SStefano Zampini A_IB = tA_IB; 4919566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)tA_BI)); 492a64f4aa4SStefano Zampini A_BI = tA_BI; 493f11841e3SStefano Zampini } 494a58a30b4SStefano Zampini } else { 4955a95e1ceSStefano Zampini A_II = NULL; 4965a95e1ceSStefano Zampini A_IB = NULL; 4975a95e1ceSStefano Zampini A_BI = NULL; 4985a95e1ceSStefano Zampini A_BB = NULL; 499b1b3d7a2SStefano Zampini } 5005a95e1ceSStefano Zampini S_all = NULL; 501b1b3d7a2SStefano Zampini 502b1b3d7a2SStefano Zampini /* determine interior problems */ 5039566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(sub_schurs->is_I, &i)); 5043dc780c3SStefano Zampini if (nlayers >= 0 && i) { /* Interior problems can be different from the original one */ 505b1b3d7a2SStefano Zampini PetscBT touched; 506b1b3d7a2SStefano Zampini const PetscInt *idx_B; 507b1b3d7a2SStefano Zampini PetscInt n_I, n_B, n_local_dofs, n_prev_added, j, layer, *local_numbering; 508b1b3d7a2SStefano Zampini 50928b400f6SJacob Faibussowitsch PetscCheck(xadj, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot request layering without adjacency"); 510b1b3d7a2SStefano Zampini /* get sizes */ 5119566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(sub_schurs->is_I, &n_I)); 5129566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(sub_schurs->is_B, &n_B)); 513b1b3d7a2SStefano Zampini 5149566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n_I + n_B, &local_numbering)); 5159566063dSJacob Faibussowitsch PetscCall(PetscBTCreate(n_I + n_B, &touched)); 5169566063dSJacob Faibussowitsch PetscCall(PetscBTMemzero(n_I + n_B, touched)); 517b1b3d7a2SStefano Zampini 518b1b3d7a2SStefano Zampini /* all boundary dofs must be skipped when adding layers */ 5199566063dSJacob Faibussowitsch PetscCall(ISGetIndices(sub_schurs->is_B, &idx_B)); 52048a46eb9SPierre Jolivet for (j = 0; j < n_B; j++) PetscCall(PetscBTSet(touched, idx_B[j])); 5219566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(local_numbering, idx_B, n_B)); 5229566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(sub_schurs->is_B, &idx_B)); 523b1b3d7a2SStefano Zampini 524b1b3d7a2SStefano Zampini /* add prescribed number of layers of dofs */ 525b1b3d7a2SStefano Zampini n_local_dofs = n_B; 526b1b3d7a2SStefano Zampini n_prev_added = n_B; 527b1b3d7a2SStefano Zampini for (layer = 0; layer < nlayers; layer++) { 528b6bace71SJacob Faibussowitsch PetscInt n_added = 0; 529b1b3d7a2SStefano Zampini if (n_local_dofs == n_I + n_B) break; 53063a3b9bcSJacob Faibussowitsch PetscCheck(n_local_dofs <= n_I + n_B, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error querying layer %" PetscInt_FMT ". Out of bound access (%" PetscInt_FMT " > %" PetscInt_FMT ")", layer, n_local_dofs, n_I + n_B); 5319566063dSJacob Faibussowitsch PetscCall(PCBDDCAdjGetNextLayer_Private(local_numbering + n_local_dofs, n_prev_added, touched, xadj, adjncy, &n_added)); 532b1b3d7a2SStefano Zampini n_prev_added = n_added; 533b1b3d7a2SStefano Zampini n_local_dofs += n_added; 534b1b3d7a2SStefano Zampini if (!n_added) break; 535b1b3d7a2SStefano Zampini } 5369566063dSJacob Faibussowitsch PetscCall(PetscBTDestroy(&touched)); 537b1b3d7a2SStefano Zampini 538883469d8SStefano Zampini /* IS for I layer dofs in original numbering */ 5399566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)sub_schurs->is_I), n_local_dofs - n_B, local_numbering + n_B, PETSC_COPY_VALUES, &is_I_layer)); 5409566063dSJacob Faibussowitsch PetscCall(PetscFree(local_numbering)); 5419566063dSJacob Faibussowitsch PetscCall(ISSort(is_I_layer)); 542883469d8SStefano Zampini /* IS for I layer dofs in I numbering */ 543df4d28bfSStefano Zampini if (!sub_schurs->schur_explicit) { 544b1b3d7a2SStefano Zampini ISLocalToGlobalMapping ItoNmap; 5459566063dSJacob Faibussowitsch PetscCall(ISLocalToGlobalMappingCreateIS(sub_schurs->is_I, &ItoNmap)); 5469566063dSJacob Faibussowitsch PetscCall(ISGlobalToLocalMappingApplyIS(ItoNmap, IS_GTOLM_DROP, is_I_layer, &is_I)); 5479566063dSJacob Faibussowitsch PetscCall(ISLocalToGlobalMappingDestroy(&ItoNmap)); 548b1b3d7a2SStefano Zampini 549b1b3d7a2SStefano Zampini /* II block */ 5509566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrix(A_II, is_I, is_I, MAT_INITIAL_MATRIX, &AE_II)); 551b1b3d7a2SStefano Zampini } 552b1b3d7a2SStefano Zampini } else { 553b1b3d7a2SStefano Zampini PetscInt n_I; 554b1b3d7a2SStefano Zampini 555b1b3d7a2SStefano Zampini /* IS for I dofs in original numbering */ 5569566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)sub_schurs->is_I)); 557a9b99552SStefano Zampini is_I_layer = sub_schurs->is_I; 558b1b3d7a2SStefano Zampini 559b1b3d7a2SStefano Zampini /* IS for I dofs in I numbering (strided 1) */ 560df4d28bfSStefano Zampini if (!sub_schurs->schur_explicit) { 5619566063dSJacob Faibussowitsch PetscCall(ISGetSize(sub_schurs->is_I, &n_I)); 5629566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PetscObjectComm((PetscObject)sub_schurs->is_I), n_I, 0, 1, &is_I)); 563b1b3d7a2SStefano Zampini 564b1b3d7a2SStefano Zampini /* II block is the same */ 5659566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)A_II)); 566b1b3d7a2SStefano Zampini AE_II = A_II; 567b1b3d7a2SStefano Zampini } 568b1b3d7a2SStefano Zampini } 5695a95e1ceSStefano Zampini 570883469d8SStefano Zampini /* Get info on subset sizes and sum of all subsets sizes */ 5715a95e1ceSStefano Zampini max_subset_size = 0; 572883469d8SStefano Zampini local_size = 0; 5735a95e1ceSStefano Zampini for (i = 0; i < sub_schurs->n_subs; i++) { 5749566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 5755a95e1ceSStefano Zampini max_subset_size = PetscMax(subset_size, max_subset_size); 576883469d8SStefano Zampini local_size += subset_size; 577883469d8SStefano Zampini } 578883469d8SStefano Zampini 579883469d8SStefano Zampini /* Work arrays for local indices */ 580883469d8SStefano Zampini extra = 0; 5819566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(sub_schurs->is_B, &n_B)); 58248a46eb9SPierre Jolivet if (sub_schurs->schur_explicit && is_I_layer) PetscCall(ISGetLocalSize(is_I_layer, &extra)); 5839566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n_B + extra, &all_local_idx_N)); 584791bdc09SStefano Zampini if (multi_element) PetscCall(PetscMalloc1(n_B + extra, &all_local_subid_N)); 585883469d8SStefano Zampini if (extra) { 586883469d8SStefano Zampini const PetscInt *idxs; 5879566063dSJacob Faibussowitsch PetscCall(ISGetIndices(is_I_layer, &idxs)); 5889566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(all_local_idx_N, idxs, extra)); 589791bdc09SStefano Zampini if (multi_element) 590791bdc09SStefano Zampini for (PetscInt j = 0; j < extra; j++) all_local_subid_N[j] = local_subs[idxs[j]]; 5919566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(is_I_layer, &idxs)); 592883469d8SStefano Zampini } 5939566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(sub_schurs->n_subs, &auxnum1)); 5949566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(sub_schurs->n_subs, &auxnum2)); 595883469d8SStefano Zampini 596883469d8SStefano Zampini /* Get local indices in local numbering */ 597883469d8SStefano Zampini local_size = 0; 59857a87bf3SStefano Zampini local_stash_size = 0; 5995a95e1ceSStefano Zampini for (i = 0; i < sub_schurs->n_subs; i++) { 600883469d8SStefano Zampini const PetscInt *idxs; 601883469d8SStefano Zampini 6029566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 6039566063dSJacob Faibussowitsch PetscCall(ISGetIndices(sub_schurs->is_subs[i], &idxs)); 604eb595f79SStefano Zampini /* start (smallest in global ordering) and multiplicity */ 605eb595f79SStefano Zampini auxnum1[i] = idxs[0]; 60657a87bf3SStefano Zampini auxnum2[i] = subset_size * subset_size; 607883469d8SStefano Zampini /* subset indices in local numbering */ 6089566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(all_local_idx_N + local_size + extra, idxs, subset_size)); 609791bdc09SStefano Zampini if (multi_element) 610791bdc09SStefano Zampini for (PetscInt j = 0; j < subset_size; j++) all_local_subid_N[j + local_size + extra] = local_subs[idxs[j]]; 6119566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(sub_schurs->is_subs[i], &idxs)); 612883469d8SStefano Zampini local_size += subset_size; 61357a87bf3SStefano Zampini local_stash_size += subset_size * subset_size; 614883469d8SStefano Zampini } 615883469d8SStefano Zampini 61679329b78SStefano Zampini /* allocate extra workspace needed only for GETRI or SYTRF when inverting the blocks or the entire Schur complement */ 61711955456SStefano Zampini use_potr = use_sytr = PETSC_FALSE; 61811955456SStefano Zampini if (benign_trick || (sub_schurs->is_hermitian && sub_schurs->is_posdef)) { 619f4f7d9d6SStefano Zampini use_potr = PETSC_TRUE; 62011955456SStefano Zampini } else if (sub_schurs->is_symmetric) { 62111955456SStefano Zampini use_sytr = PETSC_TRUE; 62211955456SStefano Zampini } 62379329b78SStefano Zampini if (local_size && !use_potr && compute_Stilda) { 62459ac4de7SStefano Zampini PetscScalar lwork, dummyscalar = 0.; 62559ac4de7SStefano Zampini PetscBLASInt dummyint = 0; 626d2627357SStefano Zampini 627d2627357SStefano Zampini B_lwork = -1; 6289566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(local_size, &B_N)); 6299566063dSJacob Faibussowitsch PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 630f4f7d9d6SStefano Zampini if (use_sytr) { 631792fecdfSBarry Smith PetscCallBLAS("LAPACKsytrf", LAPACKsytrf_("L", &B_N, &dummyscalar, &B_N, &dummyint, &lwork, &B_lwork, &B_ierr)); 632835f2295SStefano Zampini PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to SYTRF Lapack routine %" PetscBLASInt_FMT, B_ierr); 633f4f7d9d6SStefano Zampini } else { 634792fecdfSBarry Smith PetscCallBLAS("LAPACKgetri", LAPACKgetri_(&B_N, &dummyscalar, &B_N, &dummyint, &lwork, &B_lwork, &B_ierr)); 635835f2295SStefano Zampini PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to GETRI Lapack routine %" PetscBLASInt_FMT, B_ierr); 636f4f7d9d6SStefano Zampini } 6379566063dSJacob Faibussowitsch PetscCall(PetscFPTrapPop()); 6389566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lwork), &B_lwork)); 6399566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(B_lwork, &Bwork, B_N, &pivots)); 640056290a2SStefano Zampini } else { 641056290a2SStefano Zampini Bwork = NULL; 642056290a2SStefano Zampini pivots = NULL; 643d2627357SStefano Zampini } 644d2627357SStefano Zampini 64557a87bf3SStefano Zampini /* prepare data for summing up properly schurs on subsets */ 6469566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm_n, sub_schurs->n_subs, auxnum1, PETSC_OWN_POINTER, &all_subsets_n)); 6479566063dSJacob Faibussowitsch PetscCall(ISLocalToGlobalMappingApplyIS(sub_schurs->l2gmap, all_subsets_n, &all_subsets)); 6489566063dSJacob Faibussowitsch PetscCall(ISDestroy(&all_subsets_n)); 6499566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm_n, sub_schurs->n_subs, auxnum2, PETSC_OWN_POINTER, &all_subsets_mult)); 6509566063dSJacob Faibussowitsch PetscCall(ISRenumber(all_subsets, all_subsets_mult, &global_size, &all_subsets_n)); 6519566063dSJacob Faibussowitsch PetscCall(ISDestroy(&all_subsets)); 6529566063dSJacob Faibussowitsch PetscCall(ISDestroy(&all_subsets_mult)); 6539566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(all_subsets_n, &i)); 65463a3b9bcSJacob Faibussowitsch PetscCheck(i == local_stash_size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid size of new subset! %" PetscInt_FMT " != %" PetscInt_FMT, i, local_stash_size); 6559566063dSJacob Faibussowitsch PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, local_stash_size, NULL, &lstash)); 6569566063dSJacob Faibussowitsch PetscCall(VecCreateMPI(comm_n, PETSC_DECIDE, global_size, &gstash)); 6579566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(lstash, NULL, gstash, all_subsets_n, &sstash)); 6589566063dSJacob Faibussowitsch PetscCall(ISDestroy(&all_subsets_n)); 6592972d61bSStefano Zampini 6605a95e1ceSStefano Zampini /* subset indices in local boundary numbering */ 6615a95e1ceSStefano Zampini if (!sub_schurs->is_Ej_all) { 6625a95e1ceSStefano Zampini PetscInt *all_local_idx_B; 6635a95e1ceSStefano Zampini 6649566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(local_size, &all_local_idx_B)); 6659566063dSJacob Faibussowitsch PetscCall(ISGlobalToLocalMappingApply(sub_schurs->BtoNmap, IS_GTOLM_DROP, local_size, all_local_idx_N + extra, &subset_size, all_local_idx_B)); 66663a3b9bcSJacob Faibussowitsch PetscCheck(subset_size == local_size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in sub_schurs serial (BtoNmap)! %" PetscInt_FMT " != %" PetscInt_FMT, subset_size, local_size); 6679566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PETSC_COMM_SELF, local_size, all_local_idx_B, PETSC_OWN_POINTER, &sub_schurs->is_Ej_all)); 668b1b3d7a2SStefano Zampini } 669b1b3d7a2SStefano Zampini 67072b8c272SStefano Zampini if (change) { 67172b8c272SStefano Zampini ISLocalToGlobalMapping BtoS; 67272b8c272SStefano Zampini IS change_primal_B; 67372b8c272SStefano Zampini IS change_primal_all; 67472b8c272SStefano Zampini 67528b400f6SJacob Faibussowitsch PetscCheck(!sub_schurs->change_primal_sub, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen"); 67628b400f6SJacob Faibussowitsch PetscCheck(!sub_schurs->change, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen"); 6779566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(sub_schurs->n_subs, &sub_schurs->change_primal_sub)); 67872b8c272SStefano Zampini for (i = 0; i < sub_schurs->n_subs; i++) { 67972b8c272SStefano Zampini ISLocalToGlobalMapping NtoS; 6809566063dSJacob Faibussowitsch PetscCall(ISLocalToGlobalMappingCreateIS(sub_schurs->is_subs[i], &NtoS)); 6819566063dSJacob Faibussowitsch PetscCall(ISGlobalToLocalMappingApplyIS(NtoS, IS_GTOLM_DROP, change_primal, &sub_schurs->change_primal_sub[i])); 6829566063dSJacob Faibussowitsch PetscCall(ISLocalToGlobalMappingDestroy(&NtoS)); 68372b8c272SStefano Zampini } 6849566063dSJacob Faibussowitsch PetscCall(ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap, IS_GTOLM_DROP, change_primal, &change_primal_B)); 6859566063dSJacob Faibussowitsch PetscCall(ISLocalToGlobalMappingCreateIS(sub_schurs->is_Ej_all, &BtoS)); 6869566063dSJacob Faibussowitsch PetscCall(ISGlobalToLocalMappingApplyIS(BtoS, IS_GTOLM_DROP, change_primal_B, &change_primal_all)); 6879566063dSJacob Faibussowitsch PetscCall(ISLocalToGlobalMappingDestroy(&BtoS)); 6889566063dSJacob Faibussowitsch PetscCall(ISDestroy(&change_primal_B)); 6899566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(sub_schurs->n_subs, &sub_schurs->change)); 69072b8c272SStefano Zampini for (i = 0; i < sub_schurs->n_subs; i++) { 69172b8c272SStefano Zampini Mat change_sub; 69272b8c272SStefano Zampini 6939566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 6949566063dSJacob Faibussowitsch PetscCall(KSPCreate(PETSC_COMM_SELF, &sub_schurs->change[i])); 6953821be0aSBarry Smith PetscCall(KSPSetNestLevel(sub_schurs->change[i], 1)); /* do not seem to have direct access to a PC from which to get the level of nests */ 6969566063dSJacob Faibussowitsch PetscCall(KSPSetType(sub_schurs->change[i], KSPPREONLY)); 69772b8c272SStefano Zampini if (!sub_schurs->change_with_qr) { 6989566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrix(change, sub_schurs->is_subs[i], sub_schurs->is_subs[i], MAT_INITIAL_MATRIX, &change_sub)); 69972b8c272SStefano Zampini } else { 70072b8c272SStefano Zampini Mat change_subt; 7019566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrix(change, sub_schurs->is_subs[i], sub_schurs->is_subs[i], MAT_INITIAL_MATRIX, &change_subt)); 7029566063dSJacob Faibussowitsch PetscCall(MatConvert(change_subt, MATSEQDENSE, MAT_INITIAL_MATRIX, &change_sub)); 7039566063dSJacob Faibussowitsch PetscCall(MatDestroy(&change_subt)); 70472b8c272SStefano Zampini } 7059566063dSJacob Faibussowitsch PetscCall(KSPSetOperators(sub_schurs->change[i], change_sub, change_sub)); 7069566063dSJacob Faibussowitsch PetscCall(MatDestroy(&change_sub)); 7079566063dSJacob Faibussowitsch PetscCall(KSPSetOptionsPrefix(sub_schurs->change[i], sub_schurs->prefix)); 7089566063dSJacob Faibussowitsch PetscCall(KSPAppendOptionsPrefix(sub_schurs->change[i], "sub_schurs_change_")); 70972b8c272SStefano Zampini } 7109566063dSJacob Faibussowitsch PetscCall(ISDestroy(&change_primal_all)); 71172b8c272SStefano Zampini } 71272b8c272SStefano Zampini 7135a95e1ceSStefano Zampini /* Local matrix of all local Schur on subsets (transposed) */ 7145a95e1ceSStefano Zampini if (!sub_schurs->S_Ej_all) { 71504c5b2e6SStefano Zampini Mat T; 71604c5b2e6SStefano Zampini PetscScalar *v; 71704c5b2e6SStefano Zampini PetscInt *ii, *jj; 71804c5b2e6SStefano Zampini PetscInt cum, i, j, k; 71904c5b2e6SStefano Zampini 72004c5b2e6SStefano Zampini /* MatSeqAIJSetPreallocation + MatSetValues is slow for these kind of matrices (may have large blocks) 72104c5b2e6SStefano Zampini Allocate properly a representative matrix and duplicate */ 7229566063dSJacob Faibussowitsch PetscCall(PetscMalloc3(local_size + 1, &ii, local_stash_size, &jj, local_stash_size, &v)); 72304c5b2e6SStefano Zampini ii[0] = 0; 72404c5b2e6SStefano Zampini cum = 0; 72504c5b2e6SStefano Zampini for (i = 0; i < sub_schurs->n_subs; i++) { 7269566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 72704c5b2e6SStefano Zampini for (j = 0; j < subset_size; j++) { 72804c5b2e6SStefano Zampini const PetscInt row = cum + j; 72904c5b2e6SStefano Zampini PetscInt col = cum; 73004c5b2e6SStefano Zampini 73104c5b2e6SStefano Zampini ii[row + 1] = ii[row] + subset_size; 73204c5b2e6SStefano Zampini for (k = ii[row]; k < ii[row + 1]; k++) { 73304c5b2e6SStefano Zampini jj[k] = col; 73404c5b2e6SStefano Zampini col++; 73504c5b2e6SStefano Zampini } 73604c5b2e6SStefano Zampini } 73704c5b2e6SStefano Zampini cum += subset_size; 73804c5b2e6SStefano Zampini } 7399566063dSJacob Faibussowitsch PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, local_size, local_size, ii, jj, v, &T)); 7409566063dSJacob Faibussowitsch PetscCall(MatDuplicate(T, MAT_DO_NOT_COPY_VALUES, &sub_schurs->S_Ej_all)); 7419566063dSJacob Faibussowitsch PetscCall(MatDestroy(&T)); 7429566063dSJacob Faibussowitsch PetscCall(PetscFree3(ii, jj, v)); 74304c5b2e6SStefano Zampini } 74404c5b2e6SStefano Zampini /* matrices for deluxe scaling and adaptive selection */ 74504c5b2e6SStefano Zampini if (compute_Stilda) { 74648a46eb9SPierre Jolivet if (!sub_schurs->sum_S_Ej_tilda_all) PetscCall(MatDuplicate(sub_schurs->S_Ej_all, MAT_DO_NOT_COPY_VALUES, &sub_schurs->sum_S_Ej_tilda_all)); 74748a46eb9SPierre Jolivet if (!sub_schurs->sum_S_Ej_inv_all && deluxe) PetscCall(MatDuplicate(sub_schurs->S_Ej_all, MAT_DO_NOT_COPY_VALUES, &sub_schurs->sum_S_Ej_inv_all)); 748aa83b6aeSStefano Zampini } 749b1b3d7a2SStefano Zampini 7505a95e1ceSStefano Zampini /* Compute Schur complements explicitly */ 751be83ff47SStefano Zampini F = NULL; 752d943a642SStefano Zampini if (!sub_schurs->schur_explicit) { 753d943a642SStefano Zampini /* this code branch is used when MatFactor with Schur complement support is not present or when explicitly requested; 754d943a642SStefano Zampini it is not efficient, unless the economic version of the scaling is used */ 7555a95e1ceSStefano Zampini Mat S_Ej_expl; 7565a95e1ceSStefano Zampini PetscScalar *work; 7575a95e1ceSStefano Zampini PetscInt j, *dummy_idx; 7585a95e1ceSStefano Zampini PetscBool Sdense; 7595a95e1ceSStefano Zampini 7609566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(max_subset_size, &dummy_idx, max_subset_size * max_subset_size, &work)); 7615a95e1ceSStefano Zampini local_size = 0; 762b1b3d7a2SStefano Zampini for (i = 0; i < sub_schurs->n_subs; i++) { 7635a95e1ceSStefano Zampini IS is_subset_B; 7645a95e1ceSStefano Zampini Mat AE_EE, AE_IE, AE_EI, S_Ej; 7655a95e1ceSStefano Zampini 7665a95e1ceSStefano Zampini /* subsets in original and boundary numbering */ 7679566063dSJacob Faibussowitsch PetscCall(ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap, IS_GTOLM_DROP, sub_schurs->is_subs[i], &is_subset_B)); 7685a95e1ceSStefano Zampini /* EE block */ 7699566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrix(A_BB, is_subset_B, is_subset_B, MAT_INITIAL_MATRIX, &AE_EE)); 7705a95e1ceSStefano Zampini /* IE block */ 7719566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrix(A_IB, is_I, is_subset_B, MAT_INITIAL_MATRIX, &AE_IE)); 7725a95e1ceSStefano Zampini /* EI block */ 773d943a642SStefano Zampini if (sub_schurs->is_symmetric) { 7749566063dSJacob Faibussowitsch PetscCall(MatCreateTranspose(AE_IE, &AE_EI)); 775d943a642SStefano Zampini } else if (sub_schurs->is_hermitian) { 7769566063dSJacob Faibussowitsch PetscCall(MatCreateHermitianTranspose(AE_IE, &AE_EI)); 7775a95e1ceSStefano Zampini } else { 7789566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrix(A_BI, is_subset_B, is_I, MAT_INITIAL_MATRIX, &AE_EI)); 7795a95e1ceSStefano Zampini } 7809566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is_subset_B)); 7819566063dSJacob Faibussowitsch PetscCall(MatCreateSchurComplement(AE_II, AE_II, AE_IE, AE_EI, AE_EE, &S_Ej)); 7829566063dSJacob Faibussowitsch PetscCall(MatDestroy(&AE_EE)); 7839566063dSJacob Faibussowitsch PetscCall(MatDestroy(&AE_IE)); 7849566063dSJacob Faibussowitsch PetscCall(MatDestroy(&AE_EI)); 785b1b3d7a2SStefano Zampini if (AE_II == A_II) { /* we can reuse the same ksp */ 786b1b3d7a2SStefano Zampini KSP ksp; 7879566063dSJacob Faibussowitsch PetscCall(MatSchurComplementGetKSP(sub_schurs->S, &ksp)); 7889566063dSJacob Faibussowitsch PetscCall(MatSchurComplementSetKSP(S_Ej, ksp)); 789b1b3d7a2SStefano Zampini } else { /* build new ksp object which inherits ksp and pc types from the original one */ 790b1b3d7a2SStefano Zampini KSP origksp, schurksp; 791b1b3d7a2SStefano Zampini PC origpc, schurpc; 792b1b3d7a2SStefano Zampini KSPType ksp_type; 793b1b3d7a2SStefano Zampini PetscInt n_internal; 7945a95e1ceSStefano Zampini PetscBool ispcnone; 795b1b3d7a2SStefano Zampini 7969566063dSJacob Faibussowitsch PetscCall(MatSchurComplementGetKSP(sub_schurs->S, &origksp)); 7979566063dSJacob Faibussowitsch PetscCall(MatSchurComplementGetKSP(S_Ej, &schurksp)); 7989566063dSJacob Faibussowitsch PetscCall(KSPGetType(origksp, &ksp_type)); 7999566063dSJacob Faibussowitsch PetscCall(KSPSetType(schurksp, ksp_type)); 8009566063dSJacob Faibussowitsch PetscCall(KSPGetPC(schurksp, &schurpc)); 8019566063dSJacob Faibussowitsch PetscCall(KSPGetPC(origksp, &origpc)); 8029566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)origpc, PCNONE, &ispcnone)); 8035a95e1ceSStefano Zampini if (!ispcnone) { 8045a95e1ceSStefano Zampini PCType pc_type; 8059566063dSJacob Faibussowitsch PetscCall(PCGetType(origpc, &pc_type)); 8069566063dSJacob Faibussowitsch PetscCall(PCSetType(schurpc, pc_type)); 8075a95e1ceSStefano Zampini } else { 8089566063dSJacob Faibussowitsch PetscCall(PCSetType(schurpc, PCLU)); 8095a95e1ceSStefano Zampini } 8109566063dSJacob Faibussowitsch PetscCall(ISGetSize(is_I, &n_internal)); 811365a3a41SStefano Zampini if (!n_internal) { /* UMFPACK gives error with 0 sized problems */ 8123ca39a21SBarry Smith MatSolverType solver = NULL; 813835f2295SStefano Zampini PetscCall(PCFactorGetMatSolverType(origpc, &solver)); 8141baa6e33SBarry Smith if (solver) PetscCall(PCFactorSetMatSolverType(schurpc, solver)); 815b1b3d7a2SStefano Zampini } 8169566063dSJacob Faibussowitsch PetscCall(KSPSetUp(schurksp)); 817b1b3d7a2SStefano Zampini } 8189566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 8199566063dSJacob Faibussowitsch PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, work, &S_Ej_expl)); 8209566063dSJacob Faibussowitsch PetscCall(PCBDDCComputeExplicitSchur(S_Ej, sub_schurs->is_symmetric, MAT_REUSE_MATRIX, &S_Ej_expl)); 8219566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)S_Ej_expl, MATSEQDENSE, &Sdense)); 8220fdf79fbSJacob Faibussowitsch PetscCheck(Sdense, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented for sparse matrices"); 823ad540459SPierre Jolivet for (j = 0; j < subset_size; j++) dummy_idx[j] = local_size + j; 8249566063dSJacob Faibussowitsch PetscCall(MatSetValues(sub_schurs->S_Ej_all, subset_size, dummy_idx, subset_size, dummy_idx, work, INSERT_VALUES)); 8259566063dSJacob Faibussowitsch PetscCall(MatDestroy(&S_Ej)); 8269566063dSJacob Faibussowitsch PetscCall(MatDestroy(&S_Ej_expl)); 8275a95e1ceSStefano Zampini local_size += subset_size; 8285a95e1ceSStefano Zampini } 8299566063dSJacob Faibussowitsch PetscCall(PetscFree2(dummy_idx, work)); 830b1b3d7a2SStefano Zampini /* free */ 8319566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is_I)); 8329566063dSJacob Faibussowitsch PetscCall(MatDestroy(&AE_II)); 8339566063dSJacob Faibussowitsch PetscCall(PetscFree(all_local_idx_N)); 834883469d8SStefano Zampini } else { 8355cbda25cSStefano Zampini Mat A, cs_AIB_mat = NULL, benign_AIIm1_ones_mat = NULL; 83632fe681dSStefano Zampini Mat *gdswA; 8379d54b7f4SStefano Zampini Vec Dall = NULL; 838791bdc09SStefano Zampini IS is_A_all, *is_p_r = NULL, is_schur; 8397ebab0bbSStefano Zampini MatType Stype; 8405cbda25cSStefano Zampini PetscScalar *work, *S_data, *schur_factor, infty = PETSC_MAX_REAL; 84104c5b2e6SStefano Zampini PetscScalar *SEj_arr = NULL, *SEjinv_arr = NULL; 8421683a169SBarry Smith const PetscScalar *rS_data; 84304c5b2e6SStefano Zampini PetscInt n, n_I, size_schur, size_active_schur, cum, cum2; 8443fc34f97SStefano Zampini PetscBool economic, solver_S, S_lower_triangular = PETSC_FALSE; 8453fc34f97SStefano Zampini PetscBool schur_has_vertices, factor_workaround; 84611955456SStefano Zampini PetscBool use_cholesky; 8477ebab0bbSStefano Zampini #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 8487ebab0bbSStefano Zampini PetscBool oldpin; 8497ebab0bbSStefano Zampini #endif 850791bdc09SStefano Zampini /* multi-element */ 851791bdc09SStefano Zampini IS *is_sub_all = NULL, *is_sub_schur_all = NULL, *is_sub_schur = NULL; 852883469d8SStefano Zampini 853683d3df6SStefano Zampini /* get sizes */ 85481ea8064SStefano Zampini n_I = 0; 85548a46eb9SPierre Jolivet if (is_I_layer) PetscCall(ISGetLocalSize(is_I_layer, &n_I)); 856683d3df6SStefano Zampini economic = PETSC_FALSE; 8579566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(sub_schurs->is_I, &cum)); 858683d3df6SStefano Zampini if (cum != n_I) economic = PETSC_TRUE; 8599566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(sub_schurs->A, &n, NULL)); 8609d54b7f4SStefano Zampini size_active_schur = local_size; 8619d54b7f4SStefano Zampini 862f17d2ae1SStefano Zampini /* import scaling vector (wrong formulation if we have 3D edges) */ 8639d54b7f4SStefano Zampini if (scaling && compute_Stilda) { 8649d54b7f4SStefano Zampini const PetscScalar *array; 8659d54b7f4SStefano Zampini PetscScalar *array2; 8669d54b7f4SStefano Zampini const PetscInt *idxs; 8679d54b7f4SStefano Zampini PetscInt i; 8689d54b7f4SStefano Zampini 8699566063dSJacob Faibussowitsch PetscCall(ISGetIndices(sub_schurs->is_Ej_all, &idxs)); 8709566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF, size_active_schur, &Dall)); 8719566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(scaling, &array)); 8729566063dSJacob Faibussowitsch PetscCall(VecGetArray(Dall, &array2)); 8739d54b7f4SStefano Zampini for (i = 0; i < size_active_schur; i++) array2[i] = array[idxs[i]]; 8749566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(Dall, &array2)); 8759566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(scaling, &array)); 8769566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(sub_schurs->is_Ej_all, &idxs)); 8779d54b7f4SStefano Zampini deluxe = PETSC_FALSE; 8789d54b7f4SStefano Zampini } 879d62866d3SStefano Zampini 880683d3df6SStefano Zampini /* size active schurs does not count any dirichlet or vertex dof on the interface */ 8813fc34f97SStefano Zampini factor_workaround = PETSC_FALSE; 8823fc34f97SStefano Zampini schur_has_vertices = PETSC_FALSE; 883683d3df6SStefano Zampini cum = n_I + size_active_schur; 884683d3df6SStefano Zampini if (sub_schurs->is_dir) { 885683d3df6SStefano Zampini const PetscInt *idxs; 886683d3df6SStefano Zampini PetscInt n_dir; 887683d3df6SStefano Zampini 8889566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(sub_schurs->is_dir, &n_dir)); 8899566063dSJacob Faibussowitsch PetscCall(ISGetIndices(sub_schurs->is_dir, &idxs)); 8909566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(all_local_idx_N + cum, idxs, n_dir)); 891791bdc09SStefano Zampini if (multi_element) 892791bdc09SStefano Zampini for (PetscInt j = 0; j < n_dir; j++) all_local_subid_N[j + cum] = local_subs[idxs[j]]; 8939566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(sub_schurs->is_dir, &idxs)); 894683d3df6SStefano Zampini cum += n_dir; 89532fe681dSStefano Zampini if (!sub_schurs->gdsw) factor_workaround = PETSC_TRUE; 896d62866d3SStefano Zampini } 897683d3df6SStefano Zampini /* include the primal vertices in the Schur complement */ 898367aa537SStefano Zampini if (exact_schur && sub_schurs->is_vertices && (compute_Stilda || benign_n)) { 899683d3df6SStefano Zampini PetscInt n_v; 900683d3df6SStefano Zampini 9019566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_v)); 902683d3df6SStefano Zampini if (n_v) { 903683d3df6SStefano Zampini const PetscInt *idxs; 904683d3df6SStefano Zampini 9059566063dSJacob Faibussowitsch PetscCall(ISGetIndices(sub_schurs->is_vertices, &idxs)); 9069566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(all_local_idx_N + cum, idxs, n_v)); 907791bdc09SStefano Zampini if (multi_element) 908791bdc09SStefano Zampini for (PetscInt j = 0; j < n_v; j++) all_local_subid_N[j + cum] = local_subs[idxs[j]]; 9099566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(sub_schurs->is_vertices, &idxs)); 910683d3df6SStefano Zampini cum += n_v; 91132fe681dSStefano Zampini if (!sub_schurs->gdsw) factor_workaround = PETSC_TRUE; 9123fc34f97SStefano Zampini schur_has_vertices = PETSC_TRUE; 913683d3df6SStefano Zampini } 914683d3df6SStefano Zampini } 915683d3df6SStefano Zampini size_schur = cum - n_I; 9169566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, all_local_idx_N, PETSC_OWN_POINTER, &is_A_all)); 9177ebab0bbSStefano Zampini #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 918b470e4b4SRichard Tran Mills oldpin = sub_schurs->A->boundtocpu; 9199566063dSJacob Faibussowitsch PetscCall(MatBindToCPU(sub_schurs->A, PETSC_TRUE)); 9207ebab0bbSStefano Zampini #endif 921683d3df6SStefano Zampini if (cum == n) { 9229566063dSJacob Faibussowitsch PetscCall(ISSetPermutation(is_A_all)); 9239566063dSJacob Faibussowitsch PetscCall(MatPermute(sub_schurs->A, is_A_all, is_A_all, &A)); 924683d3df6SStefano Zampini } else { 9259566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrix(sub_schurs->A, is_A_all, is_A_all, MAT_INITIAL_MATRIX, &A)); 926683d3df6SStefano Zampini } 9277ebab0bbSStefano Zampini #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 9289566063dSJacob Faibussowitsch PetscCall(MatBindToCPU(sub_schurs->A, oldpin)); 9297ebab0bbSStefano Zampini #endif 93026cc229bSBarry Smith PetscCall(MatSetOptionsPrefixFactor(A, sub_schurs->prefix)); 93126cc229bSBarry Smith PetscCall(MatAppendOptionsPrefixFactor(A, "sub_schurs_")); 932791bdc09SStefano Zampini /* subsets ordered last */ 933791bdc09SStefano Zampini PetscCall(ISCreateStride(PETSC_COMM_SELF, size_schur, n_I, 1, &is_schur)); 934791bdc09SStefano Zampini 935791bdc09SStefano Zampini if (multi_element) { 936791bdc09SStefano Zampini PetscInt *idx_sub; 937791bdc09SStefano Zampini 938791bdc09SStefano Zampini PetscCall(PetscMalloc3(n_local_subs, &is_sub_all, n_local_subs, &is_sub_schur_all, n_local_subs, &is_sub_schur)); 939791bdc09SStefano Zampini PetscCall(PetscMalloc1(n + size_schur, &idx_sub)); 940791bdc09SStefano Zampini for (PetscInt sub = 0; sub < n_local_subs; sub++) { 941791bdc09SStefano Zampini PetscInt size_sub = 0, size_schur_sub = 0, size_I_sub; 942791bdc09SStefano Zampini 943791bdc09SStefano Zampini for (PetscInt j = 0; j < n_I; j++) 944791bdc09SStefano Zampini if (all_local_subid_N[j] == sub) idx_sub[size_sub++] = j; 945791bdc09SStefano Zampini size_I_sub = size_sub; 946791bdc09SStefano Zampini for (PetscInt j = n_I; j < n_I + size_schur; j++) 947791bdc09SStefano Zampini if (all_local_subid_N[j] == sub) { 948791bdc09SStefano Zampini idx_sub[size_sub++] = j; 949791bdc09SStefano Zampini idx_sub[n + size_schur_sub++] = j - n_I; 950791bdc09SStefano Zampini } 951791bdc09SStefano Zampini 952791bdc09SStefano Zampini PetscCall(ISCreateGeneral(PETSC_COMM_SELF, size_sub, idx_sub, PETSC_COPY_VALUES, &is_sub_all[sub])); 953791bdc09SStefano Zampini PetscCall(ISCreateGeneral(PETSC_COMM_SELF, size_schur_sub, idx_sub + n, PETSC_COPY_VALUES, &is_sub_schur[sub])); 954791bdc09SStefano Zampini PetscCall(ISCreateStride(PETSC_COMM_SELF, size_schur_sub, size_I_sub, 1, &is_sub_schur_all[sub])); 955791bdc09SStefano Zampini } 956791bdc09SStefano Zampini PetscCall(PetscFree(idx_sub)); 957791bdc09SStefano Zampini } 958ca92afb2SStefano Zampini 959ca92afb2SStefano Zampini /* if we actually change the basis for the pressures, LDL^T factors will use a lot of memory 9607ebab0bbSStefano Zampini this is a workaround */ 961ca92afb2SStefano Zampini if (benign_n) { 9627ebab0bbSStefano Zampini Vec v, benign_AIIm1_ones; 963ca92afb2SStefano Zampini ISLocalToGlobalMapping N_to_reor; 964ca92afb2SStefano Zampini IS is_p0, is_p0_p; 9655cbda25cSStefano Zampini PetscScalar *cs_AIB, *AIIm1_data; 9665cbda25cSStefano Zampini PetscInt sizeA; 967ca92afb2SStefano Zampini 9689566063dSJacob Faibussowitsch PetscCall(ISLocalToGlobalMappingCreateIS(is_A_all, &N_to_reor)); 9699566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PETSC_COMM_SELF, benign_n, benign_p0_lidx, PETSC_COPY_VALUES, &is_p0)); 9709566063dSJacob Faibussowitsch PetscCall(ISGlobalToLocalMappingApplyIS(N_to_reor, IS_GTOLM_DROP, is_p0, &is_p0_p)); 9719566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is_p0)); 9729566063dSJacob Faibussowitsch PetscCall(MatCreateVecs(A, &v, &benign_AIIm1_ones)); 9739566063dSJacob Faibussowitsch PetscCall(VecGetSize(v, &sizeA)); 9749566063dSJacob Faibussowitsch PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, sizeA, benign_n, NULL, &benign_AIIm1_ones_mat)); 9759566063dSJacob Faibussowitsch PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size_schur, benign_n, NULL, &cs_AIB_mat)); 9769566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(cs_AIB_mat, &cs_AIB)); 9779566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(benign_AIIm1_ones_mat, &AIIm1_data)); 9789566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(benign_n, &is_p_r)); 979ca92afb2SStefano Zampini /* compute colsum of A_IB restricted to pressures */ 980ca92afb2SStefano Zampini for (i = 0; i < benign_n; i++) { 9817ebab0bbSStefano Zampini const PetscScalar *array; 982ca92afb2SStefano Zampini const PetscInt *idxs; 983ca92afb2SStefano Zampini PetscInt j, nz; 984ca92afb2SStefano Zampini 9859566063dSJacob Faibussowitsch PetscCall(ISGlobalToLocalMappingApplyIS(N_to_reor, IS_GTOLM_DROP, benign_zerodiag_subs[i], &is_p_r[i])); 9869566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(is_p_r[i], &nz)); 9879566063dSJacob Faibussowitsch PetscCall(ISGetIndices(is_p_r[i], &idxs)); 9885cbda25cSStefano Zampini for (j = 0; j < nz; j++) AIIm1_data[idxs[j] + sizeA * i] = 1.; 9899566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(is_p_r[i], &idxs)); 9909566063dSJacob Faibussowitsch PetscCall(VecPlaceArray(benign_AIIm1_ones, AIIm1_data + sizeA * i)); 9919566063dSJacob Faibussowitsch PetscCall(MatMult(A, benign_AIIm1_ones, v)); 9929566063dSJacob Faibussowitsch PetscCall(VecResetArray(benign_AIIm1_ones)); 9939566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(v, &array)); 99422db5ddcSStefano Zampini for (j = 0; j < size_schur; j++) { 99522db5ddcSStefano Zampini #if defined(PETSC_USE_COMPLEX) 99622db5ddcSStefano Zampini cs_AIB[i * size_schur + j] = (PetscRealPart(array[j + n_I]) / nz + PETSC_i * (PetscImaginaryPart(array[j + n_I]) / nz)); 99722db5ddcSStefano Zampini #else 99822db5ddcSStefano Zampini cs_AIB[i * size_schur + j] = array[j + n_I] / nz; 99922db5ddcSStefano Zampini #endif 100022db5ddcSStefano Zampini } 10019566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(v, &array)); 1002ca92afb2SStefano Zampini } 10039566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(cs_AIB_mat, &cs_AIB)); 10049566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(benign_AIIm1_ones_mat, &AIIm1_data)); 10059566063dSJacob Faibussowitsch PetscCall(VecDestroy(&v)); 10069566063dSJacob Faibussowitsch PetscCall(VecDestroy(&benign_AIIm1_ones)); 10079566063dSJacob Faibussowitsch PetscCall(MatSetOption(A, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE)); 10089566063dSJacob Faibussowitsch PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 10099566063dSJacob Faibussowitsch PetscCall(MatSetOption(A, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 10109566063dSJacob Faibussowitsch PetscCall(MatZeroRowsColumnsIS(A, is_p0_p, 1.0, NULL, NULL)); 10119566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is_p0_p)); 10129566063dSJacob Faibussowitsch PetscCall(ISLocalToGlobalMappingDestroy(&N_to_reor)); 1013ca92afb2SStefano Zampini } 10149566063dSJacob Faibussowitsch PetscCall(MatSetOption(A, MAT_SYMMETRIC, sub_schurs->is_symmetric)); 10159566063dSJacob Faibussowitsch PetscCall(MatSetOption(A, MAT_HERMITIAN, sub_schurs->is_hermitian)); 10169566063dSJacob Faibussowitsch PetscCall(MatSetOption(A, MAT_SPD, sub_schurs->is_posdef)); 1017883469d8SStefano Zampini 101811955456SStefano Zampini /* for complexes, symmetric and hermitian at the same time implies null imaginary part */ 101911955456SStefano Zampini use_cholesky = (PetscBool)((use_potr || use_sytr) && sub_schurs->is_hermitian && sub_schurs->is_symmetric); 102011955456SStefano Zampini 1021683d3df6SStefano Zampini /* when using the benign subspace trick, the local Schur complements are SPD */ 102235d0533cSStefano Zampini /* MKL_PARDISO does not handle well the computation of a Schur complement from a symmetric indefinite factorization 102335d0533cSStefano Zampini Use LU and adapt pivoting perturbation (still, solution is not as accurate as with using MUMPS) */ 102435d0533cSStefano Zampini if (benign_trick) { 102535d0533cSStefano Zampini sub_schurs->is_posdef = PETSC_TRUE; 10269566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(sub_schurs->mat_solver_type, MATSOLVERMKL_PARDISO, &flg)); 102735d0533cSStefano Zampini if (flg) use_cholesky = PETSC_FALSE; 102835d0533cSStefano Zampini } 102979329b78SStefano Zampini if (sub_schurs->mat_factor_type == MAT_FACTOR_NONE) sub_schurs->mat_factor_type = use_cholesky ? MAT_FACTOR_CHOLESKY : MAT_FACTOR_LU; 1030d47842beSStefano Zampini 1031791bdc09SStefano Zampini if (n_I && !multi_element) { 10327ebab0bbSStefano Zampini char stype[64]; 10334ba54290SStefano Zampini PetscBool gpu = PETSC_FALSE; 10345a05ddb0SStefano Zampini 103579329b78SStefano Zampini PetscCall(MatGetFactor(A, sub_schurs->mat_solver_type, sub_schurs->mat_factor_type, &F)); 103603e5aca4SStefano Zampini PetscCheck(F, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MatGetFactor not supported by matrix instance of type %s. Rerun with \"-info :mat | grep MatGetFactor_\" for additional information", ((PetscObject)A)->type_name); 10379566063dSJacob Faibussowitsch PetscCall(MatSetErrorIfFailure(A, PETSC_TRUE)); 103835d0533cSStefano Zampini #if defined(PETSC_HAVE_MKL_PARDISO) 10399566063dSJacob Faibussowitsch if (benign_trick) PetscCall(MatMkl_PardisoSetCntl(F, 10, 10)); 104035d0533cSStefano Zampini #endif 10419566063dSJacob Faibussowitsch PetscCall(MatFactorSetSchurIS(F, is_schur)); 1042883469d8SStefano Zampini 1043883469d8SStefano Zampini /* factorization step */ 104479329b78SStefano Zampini switch (sub_schurs->mat_factor_type) { 104579329b78SStefano Zampini case MAT_FACTOR_CHOLESKY: 10469566063dSJacob Faibussowitsch PetscCall(MatCholeskyFactorSymbolic(F, A, NULL, NULL)); 104779578405SBarry Smith /* be sure that icntl 19 is not set by command line */ 10489566063dSJacob Faibussowitsch PetscCall(MatMumpsSetIcntl(F, 19, 2)); 10499566063dSJacob Faibussowitsch PetscCall(MatCholeskyFactorNumeric(F, A, NULL)); 1050a0b0af32SStefano Zampini S_lower_triangular = PETSC_TRUE; 105179329b78SStefano Zampini break; 105279329b78SStefano Zampini case MAT_FACTOR_LU: 10539566063dSJacob Faibussowitsch PetscCall(MatLUFactorSymbolic(F, A, NULL, NULL, NULL)); 105479578405SBarry Smith /* be sure that icntl 19 is not set by command line */ 10559566063dSJacob Faibussowitsch PetscCall(MatMumpsSetIcntl(F, 19, 3)); 10569566063dSJacob Faibussowitsch PetscCall(MatLUFactorNumeric(F, A, NULL)); 105779329b78SStefano Zampini break; 105879329b78SStefano Zampini default: 105979329b78SStefano Zampini SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unsupported factor type %s", MatFactorTypes[sub_schurs->mat_factor_type]); 1060883469d8SStefano Zampini } 10619566063dSJacob Faibussowitsch PetscCall(MatViewFromOptions(F, (PetscObject)A, "-mat_factor_view")); 1062883469d8SStefano Zampini 10633b03f7bbSStefano Zampini if (matl_dbg_viewer) { 106411955456SStefano Zampini Mat S; 106511955456SStefano Zampini IS is; 106611955456SStefano Zampini 10679566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)A, "A")); 10689566063dSJacob Faibussowitsch PetscCall(MatView(A, matl_dbg_viewer)); 10699566063dSJacob Faibussowitsch PetscCall(MatFactorCreateSchurComplement(F, &S, NULL)); 10709566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)S, "S")); 10719566063dSJacob Faibussowitsch PetscCall(MatView(S, matl_dbg_viewer)); 10729566063dSJacob Faibussowitsch PetscCall(MatDestroy(&S)); 10739566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF, n_I, 0, 1, &is)); 10749566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)is, "I")); 10759566063dSJacob Faibussowitsch PetscCall(ISView(is, matl_dbg_viewer)); 10769566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is)); 10779566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF, size_schur, n_I, 1, &is)); 10789566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)is, "B")); 10799566063dSJacob Faibussowitsch PetscCall(ISView(is, matl_dbg_viewer)); 10809566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is)); 10819566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)is_A_all, "IA")); 10829566063dSJacob Faibussowitsch PetscCall(ISView(is_A_all, matl_dbg_viewer)); 108332fe681dSStefano Zampini for (i = 0, cum = 0; i < sub_schurs->n_subs; i++) { 108432fe681dSStefano Zampini IS is; 108532fe681dSStefano Zampini char name[16]; 108632fe681dSStefano Zampini 108732fe681dSStefano Zampini PetscCall(PetscSNPrintf(name, sizeof(name), "IE%" PetscInt_FMT, i)); 108832fe681dSStefano Zampini PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 108932fe681dSStefano Zampini PetscCall(ISCreateStride(PETSC_COMM_SELF, subset_size, cum, 1, &is)); 109032fe681dSStefano Zampini PetscCall(PetscObjectSetName((PetscObject)is, name)); 109132fe681dSStefano Zampini PetscCall(ISView(is, matl_dbg_viewer)); 109232fe681dSStefano Zampini PetscCall(ISDestroy(&is)); 109332fe681dSStefano Zampini if (sub_schurs->change) { 109432fe681dSStefano Zampini Mat T; 109532fe681dSStefano Zampini 109632fe681dSStefano Zampini PetscCall(PetscSNPrintf(name, sizeof(name), "TE%" PetscInt_FMT, i)); 109732fe681dSStefano Zampini PetscCall(KSPGetOperators(sub_schurs->change[i], &T, NULL)); 109832fe681dSStefano Zampini PetscCall(PetscObjectSetName((PetscObject)T, name)); 109932fe681dSStefano Zampini PetscCall(MatView(T, matl_dbg_viewer)); 110032fe681dSStefano Zampini PetscCall(PetscSNPrintf(name, sizeof(name), "ITE%" PetscInt_FMT, i)); 110132fe681dSStefano Zampini PetscCall(PetscObjectSetName((PetscObject)sub_schurs->change_primal_sub[i], name)); 110232fe681dSStefano Zampini PetscCall(ISView(sub_schurs->change_primal_sub[i], matl_dbg_viewer)); 110332fe681dSStefano Zampini } 110432fe681dSStefano Zampini cum += subset_size; 110532fe681dSStefano Zampini } 110632fe681dSStefano Zampini PetscCall(PetscViewerFlush(matl_dbg_viewer)); 110711955456SStefano Zampini } 110811955456SStefano Zampini 1109883469d8SStefano Zampini /* get explicit Schur Complement computed during numeric factorization */ 11109566063dSJacob Faibussowitsch PetscCall(MatFactorGetSchurComplement(F, &S_all, NULL)); 11119566063dSJacob Faibussowitsch PetscCall(PetscStrncpy(stype, MATSEQDENSE, sizeof(stype))); 11124ba54290SStefano Zampini #if defined(PETSC_HAVE_CUDA) 11139566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &gpu, MATSEQAIJVIENNACL, MATSEQAIJCUSPARSE, "")); 11144ba54290SStefano Zampini #endif 11151baa6e33SBarry Smith if (gpu) PetscCall(PetscStrncpy(stype, MATSEQDENSECUDA, sizeof(stype))); 11169566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetString(NULL, sub_schurs->prefix, "-sub_schurs_schur_mat_type", stype, sizeof(stype), NULL)); 11179566063dSJacob Faibussowitsch PetscCall(MatConvert(S_all, stype, MAT_INPLACE_MATRIX, &S_all)); 11189566063dSJacob Faibussowitsch PetscCall(MatSetOption(S_all, MAT_SPD, sub_schurs->is_posdef)); 11199566063dSJacob Faibussowitsch PetscCall(MatSetOption(S_all, MAT_HERMITIAN, sub_schurs->is_hermitian)); 11209566063dSJacob Faibussowitsch PetscCall(MatGetType(S_all, &Stype)); 1121b3cb21ddSStefano Zampini 1122d62866d3SStefano Zampini /* we can reuse the solvers if we are not using the economic version */ 1123791bdc09SStefano Zampini reuse_solvers = (PetscBool)(reuse_solvers && !economic && !sub_schurs->graph->multi_element); 112432fe681dSStefano Zampini if (!sub_schurs->gdsw) { 1125683d3df6SStefano Zampini factor_workaround = (PetscBool)(reuse_solvers && factor_workaround); 11269371c9d4SSatish Balay if (!sub_schurs->is_posdef && factor_workaround && compute_Stilda && size_active_schur) reuse_solvers = factor_workaround = PETSC_FALSE; 112732fe681dSStefano Zampini } 1128df4d28bfSStefano Zampini solver_S = PETSC_TRUE; 1129ca92afb2SStefano Zampini 113072b8c272SStefano Zampini /* update the Schur complement with the change of basis on the pressures */ 1131ca92afb2SStefano Zampini if (benign_n) { 11327ebab0bbSStefano Zampini const PetscScalar *cs_AIB; 11337ebab0bbSStefano Zampini PetscScalar *S_data, *AIIm1_data; 11343b03f7bbSStefano Zampini Mat S2 = NULL, S3 = NULL; /* dbg */ 11353b03f7bbSStefano Zampini PetscScalar *S2_data, *S3_data; /* dbg */ 11367ebab0bbSStefano Zampini Vec v, benign_AIIm1_ones; 11375cbda25cSStefano Zampini PetscInt sizeA; 1138ca92afb2SStefano Zampini 11399566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(S_all, &S_data)); 11409566063dSJacob Faibussowitsch PetscCall(MatCreateVecs(A, &v, &benign_AIIm1_ones)); 11419566063dSJacob Faibussowitsch PetscCall(VecGetSize(v, &sizeA)); 11429566063dSJacob Faibussowitsch PetscCall(MatMumpsSetIcntl(F, 26, 0)); 1143ca92afb2SStefano Zampini #if defined(PETSC_HAVE_MKL_PARDISO) 11449566063dSJacob Faibussowitsch PetscCall(MatMkl_PardisoSetCntl(F, 70, 1)); 1145ca92afb2SStefano Zampini #endif 11469566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(cs_AIB_mat, &cs_AIB)); 11479566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(benign_AIIm1_ones_mat, &AIIm1_data)); 11483b03f7bbSStefano Zampini if (matl_dbg_viewer) { 11499566063dSJacob Faibussowitsch PetscCall(MatDuplicate(S_all, MAT_DO_NOT_COPY_VALUES, &S2)); 11509566063dSJacob Faibussowitsch PetscCall(MatDuplicate(S_all, MAT_DO_NOT_COPY_VALUES, &S3)); 11519566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(S2, &S2_data)); 11529566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(S3, &S3_data)); 11533b03f7bbSStefano Zampini } 1154ca92afb2SStefano Zampini for (i = 0; i < benign_n; i++) { 11553b03f7bbSStefano Zampini PetscScalar *array, sum = 0., one = 1., *sums; 1156ca92afb2SStefano Zampini const PetscInt *idxs; 11573b03f7bbSStefano Zampini PetscInt k, j, nz; 115847484b83SStefano Zampini PetscBLASInt B_k, B_n; 1159ca92afb2SStefano Zampini 11609566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(benign_n, &sums)); 11619566063dSJacob Faibussowitsch PetscCall(VecPlaceArray(benign_AIIm1_ones, AIIm1_data + sizeA * i)); 11629566063dSJacob Faibussowitsch PetscCall(VecCopy(benign_AIIm1_ones, v)); 11639566063dSJacob Faibussowitsch PetscCall(MatSolve(F, v, benign_AIIm1_ones)); 11649566063dSJacob Faibussowitsch PetscCall(MatMult(A, benign_AIIm1_ones, v)); 11659566063dSJacob Faibussowitsch PetscCall(VecResetArray(benign_AIIm1_ones)); 11663b03f7bbSStefano Zampini /* p0 dofs (eliminated) are excluded from the sums */ 11673b03f7bbSStefano Zampini for (k = 0; k < benign_n; k++) { 11689566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(is_p_r[k], &nz)); 11699566063dSJacob Faibussowitsch PetscCall(ISGetIndices(is_p_r[k], &idxs)); 11703b03f7bbSStefano Zampini for (j = 0; j < nz - 1; j++) sums[k] -= AIIm1_data[idxs[j] + sizeA * i]; 11719566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(is_p_r[k], &idxs)); 11723b03f7bbSStefano Zampini } 11739566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(v, (const PetscScalar **)&array)); 11743b03f7bbSStefano Zampini if (matl_dbg_viewer) { 11753b03f7bbSStefano Zampini Vec vv; 11763b03f7bbSStefano Zampini char name[16]; 11773b03f7bbSStefano Zampini 11789566063dSJacob Faibussowitsch PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, size_schur, array + n_I, &vv)); 117963a3b9bcSJacob Faibussowitsch PetscCall(PetscSNPrintf(name, sizeof(name), "Pvs%" PetscInt_FMT, i)); 11809566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)vv, name)); 11819566063dSJacob Faibussowitsch PetscCall(VecView(vv, matl_dbg_viewer)); 11823b03f7bbSStefano Zampini } 118347484b83SStefano Zampini /* perform sparse rank updates on symmetric Schur (TODO: move outside of the loop?) */ 118447484b83SStefano Zampini /* cs_AIB already scaled by 1./nz */ 118547484b83SStefano Zampini B_k = 1; 1186f9635d15SStefano Zampini PetscCall(PetscBLASIntCast(size_schur, &B_n)); 11873b03f7bbSStefano Zampini for (k = 0; k < benign_n; k++) { 11883b03f7bbSStefano Zampini sum = sums[k]; 11893b03f7bbSStefano Zampini 11903b03f7bbSStefano Zampini if (PetscAbsScalar(sum) == 0.0) continue; 11913b03f7bbSStefano Zampini if (k == i) { 1192f9635d15SStefano Zampini if (B_n) PetscCallBLAS("BLASsyrk", BLASsyrk_("L", "N", &B_n, &B_k, &sum, cs_AIB + i * size_schur, &B_n, &one, S_data, &B_n)); 1193f9635d15SStefano Zampini if (matl_dbg_viewer && B_n) PetscCallBLAS("BLASsyrk", BLASsyrk_("L", "N", &B_n, &B_k, &sum, cs_AIB + i * size_schur, &B_n, &one, S3_data, &B_n)); 11943b03f7bbSStefano Zampini } else { /* XXX Is it correct to use symmetric rank-2 update with half of the sum? */ 11953b03f7bbSStefano Zampini sum /= 2.0; 1196f9635d15SStefano Zampini if (B_n) PetscCallBLAS("BLASsyr2k", BLASsyr2k_("L", "N", &B_n, &B_k, &sum, cs_AIB + k * size_schur, &B_n, cs_AIB + i * size_schur, &B_n, &one, S_data, &B_n)); 1197f9635d15SStefano Zampini if (matl_dbg_viewer && B_n) PetscCallBLAS("BLASsyr2k", BLASsyr2k_("L", "N", &B_n, &B_k, &sum, cs_AIB + k * size_schur, &B_n, cs_AIB + i * size_schur, &B_n, &one, S3_data, &B_n)); 11983b03f7bbSStefano Zampini } 11993b03f7bbSStefano Zampini } 12005cbda25cSStefano Zampini sum = 1.; 1201f9635d15SStefano Zampini if (B_n) PetscCallBLAS("BLASsyr2k", BLASsyr2k_("L", "N", &B_n, &B_k, &sum, array + n_I, &B_n, cs_AIB + i * size_schur, &B_n, &one, S_data, &B_n)); 1202f9635d15SStefano Zampini if (matl_dbg_viewer && B_n) PetscCallBLAS("BLASsyr2k", BLASsyr2k_("L", "N", &B_n, &B_k, &sum, array + n_I, &B_n, cs_AIB + i * size_schur, &B_n, &one, S2_data, &B_n)); 12039566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(v, (const PetscScalar **)&array)); 12045cbda25cSStefano Zampini /* set p0 entry of AIIm1_ones to zero */ 12059566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(is_p_r[i], &nz)); 12069566063dSJacob Faibussowitsch PetscCall(ISGetIndices(is_p_r[i], &idxs)); 1207282d6408SStefano Zampini for (j = 0; j < benign_n; j++) AIIm1_data[idxs[nz - 1] + sizeA * j] = 0.; 12089566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(is_p_r[i], &idxs)); 12099566063dSJacob Faibussowitsch PetscCall(PetscFree(sums)); 12103b03f7bbSStefano Zampini } 12119566063dSJacob Faibussowitsch PetscCall(VecDestroy(&benign_AIIm1_ones)); 12123b03f7bbSStefano Zampini if (matl_dbg_viewer) { 12139566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(S2, &S2_data)); 12149566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(S3, &S3_data)); 1215ca92afb2SStefano Zampini } 12165e116b59SBarry Smith if (!S_lower_triangular) { /* I need to expand the upper triangular data (column-oriented) */ 1217a7414863SStefano Zampini PetscInt k, j; 1218a7414863SStefano Zampini for (k = 0; k < size_schur; k++) { 1219ad540459SPierre Jolivet for (j = k; j < size_schur; j++) S_data[j * size_schur + k] = PetscConj(S_data[k * size_schur + j]); 1220a7414863SStefano Zampini } 1221a7414863SStefano Zampini } 1222a7414863SStefano Zampini 12235cbda25cSStefano Zampini /* restore defaults */ 12249566063dSJacob Faibussowitsch PetscCall(MatMumpsSetIcntl(F, 26, -1)); 12255cbda25cSStefano Zampini #if defined(PETSC_HAVE_MKL_PARDISO) 12269566063dSJacob Faibussowitsch PetscCall(MatMkl_PardisoSetCntl(F, 70, 0)); 12275cbda25cSStefano Zampini #endif 12289566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(cs_AIB_mat, &cs_AIB)); 12299566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(benign_AIIm1_ones_mat, &AIIm1_data)); 12309566063dSJacob Faibussowitsch PetscCall(VecDestroy(&v)); 12319566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(S_all, &S_data)); 12323b03f7bbSStefano Zampini if (matl_dbg_viewer) { 12333b03f7bbSStefano Zampini Mat S; 12343b03f7bbSStefano Zampini 12359566063dSJacob Faibussowitsch PetscCall(MatFactorRestoreSchurComplement(F, &S_all, MAT_FACTOR_SCHUR_UNFACTORED)); 12369566063dSJacob Faibussowitsch PetscCall(MatFactorCreateSchurComplement(F, &S, NULL)); 12379566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)S, "Sb")); 12389566063dSJacob Faibussowitsch PetscCall(MatView(S, matl_dbg_viewer)); 12399566063dSJacob Faibussowitsch PetscCall(MatDestroy(&S)); 12409566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)S2, "S2P")); 12419566063dSJacob Faibussowitsch PetscCall(MatView(S2, matl_dbg_viewer)); 12429566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)S3, "S3P")); 12439566063dSJacob Faibussowitsch PetscCall(MatView(S3, matl_dbg_viewer)); 12449566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)cs_AIB_mat, "cs")); 12459566063dSJacob Faibussowitsch PetscCall(MatView(cs_AIB_mat, matl_dbg_viewer)); 12469566063dSJacob Faibussowitsch PetscCall(MatFactorGetSchurComplement(F, &S_all, NULL)); 12473b03f7bbSStefano Zampini } 12489566063dSJacob Faibussowitsch PetscCall(MatDestroy(&S2)); 12499566063dSJacob Faibussowitsch PetscCall(MatDestroy(&S3)); 1250ca92afb2SStefano Zampini } 1251a3df083aSStefano Zampini if (!reuse_solvers) { 125248a46eb9SPierre Jolivet for (i = 0; i < benign_n; i++) PetscCall(ISDestroy(&is_p_r[i])); 12539566063dSJacob Faibussowitsch PetscCall(PetscFree(is_p_r)); 12549566063dSJacob Faibussowitsch PetscCall(MatDestroy(&cs_AIB_mat)); 12559566063dSJacob Faibussowitsch PetscCall(MatDestroy(&benign_AIIm1_ones_mat)); 1256a3df083aSStefano Zampini } 1257791bdc09SStefano Zampini } else if (multi_element) { /* MUMPS does not support sparse Schur complements. Loop over local subs */ 1258791bdc09SStefano Zampini PetscInt *nnz; 1259791bdc09SStefano Zampini const PetscInt *idxs; 1260791bdc09SStefano Zampini PetscInt size_schur_sub; 1261791bdc09SStefano Zampini 1262791bdc09SStefano Zampini PetscCall(PetscCalloc1(size_schur, &nnz)); 1263791bdc09SStefano Zampini for (PetscInt sub = 0; sub < n_local_subs; sub++) { 1264791bdc09SStefano Zampini PetscCall(ISGetLocalSize(is_sub_schur[sub], &size_schur_sub)); 1265791bdc09SStefano Zampini PetscCall(ISGetIndices(is_sub_schur[sub], &idxs)); 1266791bdc09SStefano Zampini for (PetscInt j = 0; j < size_schur_sub; j++) nnz[idxs[j]] = size_schur_sub; 1267791bdc09SStefano Zampini PetscCall(ISRestoreIndices(is_sub_schur[sub], &idxs)); 1268791bdc09SStefano Zampini } 1269791bdc09SStefano Zampini PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, size_schur, size_schur, 0, nnz, &S_all)); 1270791bdc09SStefano Zampini PetscCall(MatSetOption(S_all, MAT_ROW_ORIENTED, sub_schurs->is_hermitian)); 1271791bdc09SStefano Zampini PetscCall(PetscFree(nnz)); 1272791bdc09SStefano Zampini 1273791bdc09SStefano Zampini for (PetscInt sub = 0; sub < n_local_subs; sub++) { 1274791bdc09SStefano Zampini Mat Asub, Ssub; 1275791bdc09SStefano Zampini const PetscScalar *vals; 1276*f7b1b682SStefano Zampini PetscInt size_all_sub; 1277791bdc09SStefano Zampini 1278*f7b1b682SStefano Zampini F = NULL; 1279*f7b1b682SStefano Zampini PetscCall(ISGetLocalSize(is_sub_schur[sub], &size_schur_sub)); 1280*f7b1b682SStefano Zampini PetscCall(ISGetLocalSize(is_sub_all[sub], &size_all_sub)); 1281791bdc09SStefano Zampini PetscCall(MatCreateSubMatrix(A, is_sub_all[sub], is_sub_all[sub], MAT_INITIAL_MATRIX, &Asub)); 1282*f7b1b682SStefano Zampini if (size_schur_sub == size_all_sub) { 1283*f7b1b682SStefano Zampini /* we can't use MatFactor when size_schur == size_of_the_problem */ 1284*f7b1b682SStefano Zampini PetscCall(MatConvert(Asub, MATDENSE, MAT_INITIAL_MATRIX, &Ssub)); 1285*f7b1b682SStefano Zampini } else { 1286791bdc09SStefano Zampini PetscCall(MatGetFactor(Asub, sub_schurs->mat_solver_type, sub_schurs->mat_factor_type, &F)); 1287791bdc09SStefano Zampini PetscCheck(F, PetscObjectComm((PetscObject)Asub), PETSC_ERR_SUP, "MatGetFactor not supported by matrix instance of type %s. Rerun with \"-info :mat | grep MatGetFactor_\" for additional information", ((PetscObject)Asub)->type_name); 1288791bdc09SStefano Zampini PetscCall(MatSetErrorIfFailure(Asub, PETSC_TRUE)); 1289791bdc09SStefano Zampini #if defined(PETSC_HAVE_MKL_PARDISO) 1290791bdc09SStefano Zampini if (benign_trick) PetscCall(MatMkl_PardisoSetCntl(F, 10, 10)); 1291791bdc09SStefano Zampini #endif 1292791bdc09SStefano Zampini /* subsets ordered last */ 1293791bdc09SStefano Zampini PetscCall(MatFactorSetSchurIS(F, is_sub_schur_all[sub])); 1294791bdc09SStefano Zampini 1295791bdc09SStefano Zampini /* factorization step */ 1296791bdc09SStefano Zampini switch (sub_schurs->mat_factor_type) { 1297791bdc09SStefano Zampini case MAT_FACTOR_CHOLESKY: 1298791bdc09SStefano Zampini PetscCall(MatCholeskyFactorSymbolic(F, Asub, NULL, NULL)); 1299791bdc09SStefano Zampini /* be sure that icntl 19 is not set by command line */ 1300791bdc09SStefano Zampini PetscCall(MatMumpsSetIcntl(F, 19, 2)); 1301791bdc09SStefano Zampini PetscCall(MatCholeskyFactorNumeric(F, Asub, NULL)); 1302791bdc09SStefano Zampini S_lower_triangular = PETSC_TRUE; 1303791bdc09SStefano Zampini break; 1304791bdc09SStefano Zampini case MAT_FACTOR_LU: 1305791bdc09SStefano Zampini PetscCall(MatLUFactorSymbolic(F, Asub, NULL, NULL, NULL)); 1306791bdc09SStefano Zampini /* be sure that icntl 19 is not set by command line */ 1307791bdc09SStefano Zampini PetscCall(MatMumpsSetIcntl(F, 19, 3)); 1308791bdc09SStefano Zampini PetscCall(MatLUFactorNumeric(F, Asub, NULL)); 1309791bdc09SStefano Zampini break; 1310791bdc09SStefano Zampini default: 1311791bdc09SStefano Zampini SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unsupported factor type %s", MatFactorTypes[sub_schurs->mat_factor_type]); 1312791bdc09SStefano Zampini } 1313791bdc09SStefano Zampini PetscCall(MatFactorCreateSchurComplement(F, &Ssub, NULL)); 1314*f7b1b682SStefano Zampini } 1315*f7b1b682SStefano Zampini PetscCall(MatDestroy(&Asub)); 1316791bdc09SStefano Zampini PetscCall(MatDenseGetArrayRead(Ssub, &vals)); 1317791bdc09SStefano Zampini PetscCall(ISGetIndices(is_sub_schur[sub], &idxs)); 1318791bdc09SStefano Zampini PetscCall(MatSetValues(S_all, size_schur_sub, idxs, size_schur_sub, idxs, vals, INSERT_VALUES)); 1319791bdc09SStefano Zampini PetscCall(ISRestoreIndices(is_sub_schur[sub], &idxs)); 1320791bdc09SStefano Zampini PetscCall(MatDenseRestoreArrayRead(Ssub, &vals)); 1321791bdc09SStefano Zampini PetscCall(MatDestroy(&Ssub)); 1322791bdc09SStefano Zampini PetscCall(MatDestroy(&F)); 1323791bdc09SStefano Zampini } 1324791bdc09SStefano Zampini PetscCall(MatAssemblyBegin(S_all, MAT_FINAL_ASSEMBLY)); 1325791bdc09SStefano Zampini PetscCall(MatAssemblyEnd(S_all, MAT_FINAL_ASSEMBLY)); 1326791bdc09SStefano Zampini PetscCall(MatSetOption(S_all, MAT_SPD, sub_schurs->is_posdef)); 1327791bdc09SStefano Zampini PetscCall(MatSetOption(S_all, MAT_HERMITIAN, sub_schurs->is_hermitian)); 1328791bdc09SStefano Zampini Stype = MATDENSE; 1329791bdc09SStefano Zampini reuse_solvers = PETSC_FALSE; 1330791bdc09SStefano Zampini factor_workaround = PETSC_FALSE; 1331791bdc09SStefano Zampini solver_S = PETSC_FALSE; 1332df4d28bfSStefano Zampini } else { /* we can't use MatFactor when size_schur == size_of_the_problem */ 13339566063dSJacob Faibussowitsch PetscCall(MatConvert(A, MATSEQDENSE, MAT_INITIAL_MATRIX, &S_all)); 13349566063dSJacob Faibussowitsch PetscCall(MatGetType(S_all, &Stype)); 1335683d3df6SStefano Zampini reuse_solvers = PETSC_FALSE; /* TODO: why we can't reuse the solvers here? */ 1336166598c1SStefano Zampini factor_workaround = PETSC_FALSE; 1337df4d28bfSStefano Zampini solver_S = PETSC_FALSE; 1338be83ff47SStefano Zampini } 1339be83ff47SStefano Zampini 1340be83ff47SStefano Zampini if (reuse_solvers) { 134132fe681dSStefano Zampini Mat A_II, pA_II, Afake; 134253892102SStefano Zampini Vec vec1_B; 1343df4d28bfSStefano Zampini PCBDDCReuseSolvers msolv_ctx; 13443462e049SStefano Zampini PetscInt n_R; 1345d5574798SStefano Zampini 1346df4d28bfSStefano Zampini if (sub_schurs->reuse_solver) { 13479566063dSJacob Faibussowitsch PetscCall(PCBDDCReuseSolversReset(sub_schurs->reuse_solver)); 1348e28d306cSStefano Zampini } else { 13499566063dSJacob Faibussowitsch PetscCall(PetscNew(&sub_schurs->reuse_solver)); 1350d62866d3SStefano Zampini } 1351df4d28bfSStefano Zampini msolv_ctx = sub_schurs->reuse_solver; 135232fe681dSStefano Zampini PetscCall(MatSchurComplementGetSubMatrices(sub_schurs->S, &A_II, &pA_II, NULL, NULL, NULL)); 13539566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)F)); 1354d5574798SStefano Zampini msolv_ctx->F = F; 13559566063dSJacob Faibussowitsch PetscCall(MatCreateVecs(F, &msolv_ctx->sol, NULL)); 1356683d3df6SStefano Zampini /* currently PETSc has no support for MatSolve(F,x,x), so cheat and let rhs and sol share the same memory */ 1357683d3df6SStefano Zampini { 1358683d3df6SStefano Zampini PetscScalar *array; 1359683d3df6SStefano Zampini PetscInt n; 1360683d3df6SStefano Zampini 13619566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(msolv_ctx->sol, &n)); 13629566063dSJacob Faibussowitsch PetscCall(VecGetArray(msolv_ctx->sol, &array)); 13639566063dSJacob Faibussowitsch PetscCall(VecCreateSeqWithArray(PetscObjectComm((PetscObject)msolv_ctx->sol), 1, n, array, &msolv_ctx->rhs)); 13649566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(msolv_ctx->sol, &array)); 1365683d3df6SStefano Zampini } 13663fc34f97SStefano Zampini msolv_ctx->has_vertices = schur_has_vertices; 1367d62866d3SStefano Zampini 1368d62866d3SStefano Zampini /* interior solver */ 13699566063dSJacob Faibussowitsch PetscCall(PCCreate(PetscObjectComm((PetscObject)A_II), &msolv_ctx->interior_solver)); 137032fe681dSStefano Zampini PetscCall(PCSetOperators(msolv_ctx->interior_solver, A_II, pA_II)); 13719566063dSJacob Faibussowitsch PetscCall(PCSetType(msolv_ctx->interior_solver, PCSHELL)); 13729566063dSJacob Faibussowitsch PetscCall(PCShellSetName(msolv_ctx->interior_solver, "Interior solver (w/o Schur factorization)")); 13739566063dSJacob Faibussowitsch PetscCall(PCShellSetContext(msolv_ctx->interior_solver, msolv_ctx)); 13749566063dSJacob Faibussowitsch PetscCall(PCShellSetView(msolv_ctx->interior_solver, PCBDDCReuseSolvers_View)); 13759566063dSJacob Faibussowitsch PetscCall(PCShellSetApply(msolv_ctx->interior_solver, PCBDDCReuseSolvers_Interior)); 13769566063dSJacob Faibussowitsch PetscCall(PCShellSetApplyTranspose(msolv_ctx->interior_solver, PCBDDCReuseSolvers_InteriorTranspose)); 137732fe681dSStefano Zampini if (sub_schurs->gdsw) PetscCall(PCShellSetDestroy(msolv_ctx->interior_solver, PCBDDCReuseSolvers_Destroy)); 1378d62866d3SStefano Zampini 1379d62866d3SStefano Zampini /* correction solver */ 138032fe681dSStefano Zampini if (!sub_schurs->gdsw) { 13819566063dSJacob Faibussowitsch PetscCall(PCCreate(PetscObjectComm((PetscObject)A_II), &msolv_ctx->correction_solver)); 13829566063dSJacob Faibussowitsch PetscCall(PCSetType(msolv_ctx->correction_solver, PCSHELL)); 13839566063dSJacob Faibussowitsch PetscCall(PCShellSetName(msolv_ctx->correction_solver, "Correction solver (with Schur factorization)")); 13849566063dSJacob Faibussowitsch PetscCall(PCShellSetContext(msolv_ctx->correction_solver, msolv_ctx)); 13859566063dSJacob Faibussowitsch PetscCall(PCShellSetView(msolv_ctx->interior_solver, PCBDDCReuseSolvers_View)); 13869566063dSJacob Faibussowitsch PetscCall(PCShellSetApply(msolv_ctx->correction_solver, PCBDDCReuseSolvers_Correction)); 13879566063dSJacob Faibussowitsch PetscCall(PCShellSetApplyTranspose(msolv_ctx->correction_solver, PCBDDCReuseSolvers_CorrectionTranspose)); 138853892102SStefano Zampini 138953892102SStefano Zampini /* scatter and vecs for Schur complement solver */ 13909566063dSJacob Faibussowitsch PetscCall(MatCreateVecs(S_all, &msolv_ctx->sol_B, &msolv_ctx->rhs_B)); 13919566063dSJacob Faibussowitsch PetscCall(MatCreateVecs(sub_schurs->S, &vec1_B, NULL)); 13923fc34f97SStefano Zampini if (!schur_has_vertices) { 13939566063dSJacob Faibussowitsch PetscCall(ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap, IS_GTOLM_DROP, is_A_all, &msolv_ctx->is_B)); 13949566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(vec1_B, msolv_ctx->is_B, msolv_ctx->sol_B, NULL, &msolv_ctx->correction_scatter_B)); 13959566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)is_A_all)); 139653892102SStefano Zampini msolv_ctx->is_R = is_A_all; 1397683d3df6SStefano Zampini } else { 1398683d3df6SStefano Zampini IS is_B_all; 1399683d3df6SStefano Zampini const PetscInt *idxs; 1400683d3df6SStefano Zampini PetscInt dual, n_v, n; 1401683d3df6SStefano Zampini 14029566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_v)); 1403683d3df6SStefano Zampini dual = size_schur - n_v; 14049566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(is_A_all, &n)); 14059566063dSJacob Faibussowitsch PetscCall(ISGetIndices(is_A_all, &idxs)); 14069566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)is_A_all), dual, idxs + n_I, PETSC_COPY_VALUES, &is_B_all)); 14079566063dSJacob Faibussowitsch PetscCall(ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap, IS_GTOLM_DROP, is_B_all, &msolv_ctx->is_B)); 14089566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is_B_all)); 14099566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PetscObjectComm((PetscObject)is_A_all), dual, 0, 1, &is_B_all)); 14109566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(vec1_B, msolv_ctx->is_B, msolv_ctx->sol_B, is_B_all, &msolv_ctx->correction_scatter_B)); 14119566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is_B_all)); 14129566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)is_A_all), n - n_v, idxs, PETSC_COPY_VALUES, &msolv_ctx->is_R)); 14139566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(is_A_all, &idxs)); 1414683d3df6SStefano Zampini } 14159566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(msolv_ctx->is_R, &n_R)); 14169566063dSJacob Faibussowitsch PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, n_R, n_R, 0, NULL, &Afake)); 14179566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(Afake, MAT_FINAL_ASSEMBLY)); 14189566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(Afake, MAT_FINAL_ASSEMBLY)); 14199566063dSJacob Faibussowitsch PetscCall(PCSetOperators(msolv_ctx->correction_solver, Afake, Afake)); 14209566063dSJacob Faibussowitsch PetscCall(MatDestroy(&Afake)); 14219566063dSJacob Faibussowitsch PetscCall(VecDestroy(&vec1_B)); 142232fe681dSStefano Zampini } 1423ca92afb2SStefano Zampini /* communicate benign info to solver context */ 1424ca92afb2SStefano Zampini if (benign_n) { 14255cbda25cSStefano Zampini PetscScalar *array; 14265cbda25cSStefano Zampini 1427ca92afb2SStefano Zampini msolv_ctx->benign_n = benign_n; 1428ca92afb2SStefano Zampini msolv_ctx->benign_zerodiag_subs = is_p_r; 14299566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(benign_n, &msolv_ctx->benign_save_vals)); 14305cbda25cSStefano Zampini msolv_ctx->benign_csAIB = cs_AIB_mat; 14319566063dSJacob Faibussowitsch PetscCall(MatCreateVecs(cs_AIB_mat, &msolv_ctx->benign_corr_work, NULL)); 14329566063dSJacob Faibussowitsch PetscCall(VecGetArray(msolv_ctx->benign_corr_work, &array)); 14339566063dSJacob Faibussowitsch PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, size_schur, array, &msolv_ctx->benign_dummy_schur_vec)); 14349566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(msolv_ctx->benign_corr_work, &array)); 14355cbda25cSStefano Zampini msolv_ctx->benign_AIIm1ones = benign_AIIm1_ones_mat; 1436ca92afb2SStefano Zampini } 1437ada6e2d7SStefano Zampini } else { 14381baa6e33SBarry Smith if (sub_schurs->reuse_solver) PetscCall(PCBDDCReuseSolversReset(sub_schurs->reuse_solver)); 14399566063dSJacob Faibussowitsch PetscCall(PetscFree(sub_schurs->reuse_solver)); 1440d5574798SStefano Zampini } 14419566063dSJacob Faibussowitsch PetscCall(MatDestroy(&A)); 14429566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is_A_all)); 14435db18549SStefano Zampini 1444be83ff47SStefano Zampini /* Work arrays */ 14459566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(max_subset_size * max_subset_size, &work)); 1446d2627357SStefano Zampini 1447be83ff47SStefano Zampini /* S_Ej_all */ 1448791bdc09SStefano Zampini PetscInt *idx_work = NULL; 1449be83ff47SStefano Zampini cum = cum2 = 0; 1450791bdc09SStefano Zampini if (!multi_element) PetscCall(MatDenseGetArrayRead(S_all, &rS_data)); 1451791bdc09SStefano Zampini else PetscCall(PetscMalloc1(max_subset_size, &idx_work)); 14529566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArray(sub_schurs->S_Ej_all, &SEj_arr)); 145348a46eb9SPierre Jolivet if (sub_schurs->sum_S_Ej_inv_all) PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all, &SEjinv_arr)); 145448a46eb9SPierre Jolivet if (sub_schurs->gdsw) PetscCall(MatCreateSubMatrices(sub_schurs->A, sub_schurs->n_subs, sub_schurs->is_subs, sub_schurs->is_subs, MAT_INITIAL_MATRIX, &gdswA)); 145565d8bf0aSStefano Zampini for (i = 0; i < sub_schurs->n_subs; i++) { 145632fe681dSStefano Zampini /* get S_E (or K^i_EE for GDSW) */ 14579566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 145832fe681dSStefano Zampini if (sub_schurs->gdsw) { 145932fe681dSStefano Zampini Mat T; 146032fe681dSStefano Zampini 146132fe681dSStefano Zampini PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, work, &T)); 146232fe681dSStefano Zampini PetscCall(MatConvert(gdswA[i], MATDENSE, MAT_REUSE_MATRIX, &T)); 146332fe681dSStefano Zampini PetscCall(MatDestroy(&T)); 146432fe681dSStefano Zampini } else { 1465791bdc09SStefano Zampini if (multi_element) { /* transpose copy to workspace */ 1466791bdc09SStefano Zampini // XXX CSR directly? 1467791bdc09SStefano Zampini for (PetscInt j = 0; j < subset_size; j++) idx_work[j] = cum + j; 1468791bdc09SStefano Zampini PetscCall(MatGetValues(S_all, subset_size, idx_work, subset_size, idx_work, work)); 1469791bdc09SStefano Zampini if (!sub_schurs->is_hermitian) { 1470791bdc09SStefano Zampini for (PetscInt k = 0; k < subset_size; k++) { 1471791bdc09SStefano Zampini for (PetscInt j = k; j < subset_size; j++) { 1472791bdc09SStefano Zampini PetscScalar t = work[k * subset_size + j]; 1473791bdc09SStefano Zampini work[k * subset_size + j] = work[j * subset_size + k]; 1474791bdc09SStefano Zampini work[j * subset_size + k] = t; 1475791bdc09SStefano Zampini } 1476791bdc09SStefano Zampini } 1477791bdc09SStefano Zampini } 1478791bdc09SStefano Zampini } else if (S_lower_triangular) { /* I need to expand the upper triangular data (column-oriented) */ 1479791bdc09SStefano Zampini for (PetscInt k = 0; k < subset_size; k++) { 1480791bdc09SStefano Zampini for (PetscInt j = k; j < subset_size; j++) { 14811683a169SBarry Smith work[k * subset_size + j] = rS_data[cum2 + k * size_schur + j]; 14821683a169SBarry Smith work[j * subset_size + k] = PetscConj(rS_data[cum2 + k * size_schur + j]); 1483be83ff47SStefano Zampini } 1484be83ff47SStefano Zampini } 148506a4e24aSStefano Zampini } else { /* just copy to workspace */ 1486791bdc09SStefano Zampini for (PetscInt k = 0; k < subset_size; k++) { 1487791bdc09SStefano Zampini for (PetscInt j = 0; j < subset_size; j++) work[k * subset_size + j] = rS_data[cum2 + k * size_schur + j]; 1488be83ff47SStefano Zampini } 14899087bf02SStefano Zampini } 149032fe681dSStefano Zampini } 14915a95e1ceSStefano Zampini /* insert S_E values */ 1492b7ab4a40SStefano Zampini if (sub_schurs->change) { 14938760537fSStefano Zampini Mat change_sub, SEj, T; 149472b8c272SStefano Zampini 149572b8c272SStefano Zampini /* change basis */ 14969566063dSJacob Faibussowitsch PetscCall(KSPGetOperators(sub_schurs->change[i], &change_sub, NULL)); 14979566063dSJacob Faibussowitsch PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, work, &SEj)); 14988760537fSStefano Zampini if (!sub_schurs->change_with_qr) { /* currently there's no support for PtAP with P SeqAIJ */ 14998760537fSStefano Zampini Mat T2; 15009566063dSJacob Faibussowitsch PetscCall(MatTransposeMatMult(change_sub, SEj, MAT_INITIAL_MATRIX, 1.0, &T2)); 15019566063dSJacob Faibussowitsch PetscCall(MatMatMult(T2, change_sub, MAT_INITIAL_MATRIX, 1.0, &T)); 15029566063dSJacob Faibussowitsch PetscCall(MatConvert(T, MATSEQDENSE, MAT_INPLACE_MATRIX, &T)); 15039566063dSJacob Faibussowitsch PetscCall(MatDestroy(&T2)); 15048760537fSStefano Zampini } else { 15059566063dSJacob Faibussowitsch PetscCall(MatPtAP(SEj, change_sub, MAT_INITIAL_MATRIX, 1.0, &T)); 150672b8c272SStefano Zampini } 15079566063dSJacob Faibussowitsch PetscCall(MatCopy(T, SEj, SAME_NONZERO_PATTERN)); 15089566063dSJacob Faibussowitsch PetscCall(MatDestroy(&T)); 15099566063dSJacob Faibussowitsch PetscCall(MatZeroRowsColumnsIS(SEj, sub_schurs->change_primal_sub[i], 1.0, NULL, NULL)); 15109566063dSJacob Faibussowitsch PetscCall(MatDestroy(&SEj)); 151172b8c272SStefano Zampini } 15129566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(SEj_arr, work, subset_size * subset_size)); 1513862806e4SStefano Zampini if (compute_Stilda) { 151432fe681dSStefano Zampini if (deluxe) { /* if adaptivity is requested, invert S_E blocks */ 15157ebab0bbSStefano Zampini Mat M; 15167ebab0bbSStefano Zampini const PetscScalar *vals; 15177ebab0bbSStefano Zampini PetscBool isdense, isdensecuda; 1518f4f7d9d6SStefano Zampini 15199566063dSJacob Faibussowitsch PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, work, &M)); 15209566063dSJacob Faibussowitsch PetscCall(MatSetOption(M, MAT_SPD, sub_schurs->is_posdef)); 15219566063dSJacob Faibussowitsch PetscCall(MatSetOption(M, MAT_HERMITIAN, sub_schurs->is_hermitian)); 152248a46eb9SPierre Jolivet if (!PetscBTLookup(sub_schurs->is_edge, i)) PetscCall(MatSetType(M, Stype)); 15239566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)M, MATSEQDENSE, &isdense)); 15249566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)M, MATSEQDENSECUDA, &isdensecuda)); 152579329b78SStefano Zampini switch (sub_schurs->mat_factor_type) { 152679329b78SStefano Zampini case MAT_FACTOR_CHOLESKY: 15279566063dSJacob Faibussowitsch PetscCall(MatCholeskyFactor(M, NULL, NULL)); 152879329b78SStefano Zampini break; 152979329b78SStefano Zampini case MAT_FACTOR_LU: 15309566063dSJacob Faibussowitsch PetscCall(MatLUFactor(M, NULL, NULL, NULL)); 153179329b78SStefano Zampini break; 153279329b78SStefano Zampini default: 153379329b78SStefano Zampini SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unsupported factor type %s", MatFactorTypes[sub_schurs->mat_factor_type]); 15342972d61bSStefano Zampini } 15357ebab0bbSStefano Zampini if (isdense) { 15369566063dSJacob Faibussowitsch PetscCall(MatSeqDenseInvertFactors_Private(M)); 15377ebab0bbSStefano Zampini #if defined(PETSC_HAVE_CUDA) 15387ebab0bbSStefano Zampini } else if (isdensecuda) { 15394742e46bSJacob Faibussowitsch PetscCall(MatSeqDenseCUDAInvertFactors_Internal(M)); 15407ebab0bbSStefano Zampini #endif 154198921bdaSJacob Faibussowitsch } else SETERRQ(PetscObjectComm((PetscObject)M), PETSC_ERR_SUP, "Not implemented for type %s", Stype); 15429566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(M, &vals)); 15439566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(SEjinv_arr, vals, subset_size * subset_size)); 15449566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(M, &vals)); 15459566063dSJacob Faibussowitsch PetscCall(MatDestroy(&M)); 154632fe681dSStefano Zampini } else if (scaling) { /* not using deluxe */ 15479d54b7f4SStefano Zampini Mat SEj; 15489d54b7f4SStefano Zampini Vec D; 15499d54b7f4SStefano Zampini PetscScalar *array; 15509d54b7f4SStefano Zampini 15519566063dSJacob Faibussowitsch PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, work, &SEj)); 15529566063dSJacob Faibussowitsch PetscCall(VecGetArray(Dall, &array)); 15539566063dSJacob Faibussowitsch PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, subset_size, array + cum, &D)); 15549566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(Dall, &array)); 15559566063dSJacob Faibussowitsch PetscCall(VecShift(D, -1.)); 15569566063dSJacob Faibussowitsch PetscCall(MatDiagonalScale(SEj, D, D)); 15579566063dSJacob Faibussowitsch PetscCall(MatDestroy(&SEj)); 15589566063dSJacob Faibussowitsch PetscCall(VecDestroy(&D)); 15599566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(SEj_arr, work, subset_size * subset_size)); 15609d54b7f4SStefano Zampini } 156132fe681dSStefano Zampini } 1562be83ff47SStefano Zampini cum += subset_size; 1563be83ff47SStefano Zampini cum2 += subset_size * (size_schur + 1); 156404c5b2e6SStefano Zampini SEj_arr += subset_size * subset_size; 156504c5b2e6SStefano Zampini if (SEjinv_arr) SEjinv_arr += subset_size * subset_size; 1566be83ff47SStefano Zampini } 156748a46eb9SPierre Jolivet if (sub_schurs->gdsw) PetscCall(MatDestroySubMatrices(sub_schurs->n_subs, &gdswA)); 1568791bdc09SStefano Zampini if (!multi_element) PetscCall(MatDenseRestoreArrayRead(S_all, &rS_data)); 15699566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArray(sub_schurs->S_Ej_all, &SEj_arr)); 157048a46eb9SPierre Jolivet if (sub_schurs->sum_S_Ej_inv_all) PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all, &SEjinv_arr)); 157148a46eb9SPierre Jolivet if (solver_S) PetscCall(MatFactorRestoreSchurComplement(F, &S_all, MAT_FACTOR_SCHUR_UNFACTORED)); 1572683d3df6SStefano Zampini 15737ebab0bbSStefano Zampini /* may prevent from unneeded copies, since MUMPS or MKL_Pardiso always use CPU memory 15747ebab0bbSStefano Zampini however, preliminary tests indicate using GPUs is still faster in the solve phase */ 15757ebab0bbSStefano Zampini #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 15767ebab0bbSStefano Zampini if (reuse_solvers) { 15777ebab0bbSStefano Zampini Mat St; 15787ebab0bbSStefano Zampini MatFactorSchurStatus st; 15797ebab0bbSStefano Zampini 158035d0533cSStefano Zampini flg = PETSC_FALSE; 15819566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetBool(NULL, sub_schurs->prefix, "-sub_schurs_schur_pin_to_cpu", &flg, NULL)); 15829566063dSJacob Faibussowitsch PetscCall(MatFactorGetSchurComplement(F, &St, &st)); 15839566063dSJacob Faibussowitsch PetscCall(MatBindToCPU(St, flg)); 15849566063dSJacob Faibussowitsch PetscCall(MatFactorRestoreSchurComplement(F, &St, st)); 15857ebab0bbSStefano Zampini } 15867ebab0bbSStefano Zampini #endif 15877ebab0bbSStefano Zampini 1588683d3df6SStefano Zampini schur_factor = NULL; 158945951f25SStefano Zampini if (compute_Stilda && size_active_schur) { 15909d54b7f4SStefano Zampini if (sub_schurs->n_subs == 1 && size_schur == size_active_schur && deluxe) { /* we already computed the inverse */ 159132fe681dSStefano Zampini PetscCall(MatSeqAIJGetArrayWrite(sub_schurs->sum_S_Ej_tilda_all, &SEjinv_arr)); 15929566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(SEjinv_arr, work, size_schur * size_schur)); 159332fe681dSStefano Zampini PetscCall(MatSeqAIJRestoreArrayWrite(sub_schurs->sum_S_Ej_tilda_all, &SEjinv_arr)); 15944a6c6b0dSStefano Zampini } else { 1595683d3df6SStefano Zampini Mat S_all_inv = NULL; 15967ebab0bbSStefano Zampini 159732fe681dSStefano Zampini if (solver_S && !sub_schurs->gdsw) { 1598683d3df6SStefano Zampini /* for adaptive selection we need S^-1; for solver reusage we need S_\Delta\Delta^-1. 1599683d3df6SStefano Zampini The latter is not the principal subminor for S^-1. However, the factors can be reused since S_\Delta\Delta is the leading principal submatrix of S */ 16003fc34f97SStefano Zampini if (factor_workaround) { /* invert without calling MatFactorInvertSchurComplement, since we are hacking */ 1601683d3df6SStefano Zampini PetscScalar *data; 1602683d3df6SStefano Zampini PetscInt nd = 0; 16036dba178dSStefano Zampini 16047a46b595SBarry Smith PetscCheck(use_potr, PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor update not yet implemented for non SPD matrices"); 16059566063dSJacob Faibussowitsch PetscCall(MatFactorGetSchurComplement(F, &S_all_inv, NULL)); 16069566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(S_all_inv, &data)); 1607683d3df6SStefano Zampini if (sub_schurs->is_dir) { /* dirichlet dofs could have different scalings */ 16089566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(sub_schurs->is_dir, &nd)); 1609683d3df6SStefano Zampini } 16103fc34f97SStefano Zampini 16113fc34f97SStefano Zampini /* factor and invert activedofs and vertices (dirichlet dofs does not contribute) */ 16123fc34f97SStefano Zampini if (schur_has_vertices) { 16133fc34f97SStefano Zampini Mat M; 16143fc34f97SStefano Zampini PetscScalar *tdata; 16153fc34f97SStefano Zampini PetscInt nv = 0, news; 16163fc34f97SStefano Zampini 16179566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &nv)); 16183fc34f97SStefano Zampini news = size_active_schur + nv; 16199566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(news * news, &tdata)); 1620683d3df6SStefano Zampini for (i = 0; i < size_active_schur; i++) { 16219566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(tdata + i * (news + 1), data + i * (size_schur + 1), size_active_schur - i)); 16229566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(tdata + i * (news + 1) + size_active_schur - i, data + i * size_schur + size_active_schur + nd, nv)); 16233fc34f97SStefano Zampini } 16243fc34f97SStefano Zampini for (i = 0; i < nv; i++) { 16253fc34f97SStefano Zampini PetscInt k = i + size_active_schur; 16269566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(tdata + k * (news + 1), data + (k + nd) * (size_schur + 1), nv - i)); 16273fc34f97SStefano Zampini } 16283fc34f97SStefano Zampini 16299566063dSJacob Faibussowitsch PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, news, news, tdata, &M)); 16309566063dSJacob Faibussowitsch PetscCall(MatSetOption(M, MAT_SPD, PETSC_TRUE)); 16319566063dSJacob Faibussowitsch PetscCall(MatCholeskyFactor(M, NULL, NULL)); 16323fc34f97SStefano Zampini /* save the factors */ 16333fc34f97SStefano Zampini cum = 0; 16349566063dSJacob Faibussowitsch PetscCall(PetscMalloc1((size_active_schur * (size_active_schur + 1)) / 2 + nd, &schur_factor)); 16353fc34f97SStefano Zampini for (i = 0; i < size_active_schur; i++) { 16369566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(schur_factor + cum, tdata + i * (news + 1), size_active_schur - i)); 1637683d3df6SStefano Zampini cum += size_active_schur - i; 1638683d3df6SStefano Zampini } 16393fc34f97SStefano Zampini for (i = 0; i < nd; i++) schur_factor[cum + i] = PetscSqrtReal(PetscRealPart(data[(i + size_active_schur) * (size_schur + 1)])); 1640791bdc09SStefano Zampini S_all_inv->ops->solve = M->ops->solve; 1641791bdc09SStefano Zampini S_all_inv->ops->matsolve = M->ops->matsolve; 1642791bdc09SStefano Zampini S_all_inv->ops->solvetranspose = M->ops->solvetranspose; 1643791bdc09SStefano Zampini S_all_inv->ops->matsolvetranspose = M->ops->matsolvetranspose; 1644791bdc09SStefano Zampini S_all_inv->factortype = MAT_FACTOR_CHOLESKY; 16459566063dSJacob Faibussowitsch PetscCall(MatSeqDenseInvertFactors_Private(M)); 16463fc34f97SStefano Zampini /* move back just the active dofs to the Schur complement */ 164748a46eb9SPierre Jolivet for (i = 0; i < size_active_schur; i++) PetscCall(PetscArraycpy(data + i * size_schur, tdata + i * news, size_active_schur)); 16489566063dSJacob Faibussowitsch PetscCall(PetscFree(tdata)); 16499566063dSJacob Faibussowitsch PetscCall(MatDestroy(&M)); 16503fc34f97SStefano Zampini } else { /* we can factorize and invert just the activedofs */ 16513fc34f97SStefano Zampini Mat M; 16525002105bSStefano Zampini PetscScalar *aux; 16533fc34f97SStefano Zampini 16549566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nd, &aux)); 16555002105bSStefano Zampini for (i = 0; i < nd; i++) aux[i] = 1.0 / data[(i + size_active_schur) * (size_schur + 1)]; 16569566063dSJacob Faibussowitsch PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size_active_schur, size_active_schur, data, &M)); 16579566063dSJacob Faibussowitsch PetscCall(MatDenseSetLDA(M, size_schur)); 16589566063dSJacob Faibussowitsch PetscCall(MatSetOption(M, MAT_SPD, PETSC_TRUE)); 16599566063dSJacob Faibussowitsch PetscCall(MatCholeskyFactor(M, NULL, NULL)); 16609566063dSJacob Faibussowitsch PetscCall(MatSeqDenseInvertFactors_Private(M)); 16619566063dSJacob Faibussowitsch PetscCall(MatDestroy(&M)); 16629566063dSJacob Faibussowitsch PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size_schur, nd, data + size_active_schur * size_schur, &M)); 16639566063dSJacob Faibussowitsch PetscCall(MatZeroEntries(M)); 16649566063dSJacob Faibussowitsch PetscCall(MatDestroy(&M)); 16659566063dSJacob Faibussowitsch PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, nd, size_schur, data + size_active_schur, &M)); 16669566063dSJacob Faibussowitsch PetscCall(MatDenseSetLDA(M, size_schur)); 16679566063dSJacob Faibussowitsch PetscCall(MatZeroEntries(M)); 16689566063dSJacob Faibussowitsch PetscCall(MatDestroy(&M)); 16695002105bSStefano Zampini for (i = 0; i < nd; i++) data[(i + size_active_schur) * (size_schur + 1)] = aux[i]; 16709566063dSJacob Faibussowitsch PetscCall(PetscFree(aux)); 16713fc34f97SStefano Zampini } 16729566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(S_all_inv, &data)); 16733fc34f97SStefano Zampini } else { /* use MatFactor calls to invert S */ 16749566063dSJacob Faibussowitsch PetscCall(MatFactorInvertSchurComplement(F)); 16759566063dSJacob Faibussowitsch PetscCall(MatFactorGetSchurComplement(F, &S_all_inv, NULL)); 1676683d3df6SStefano Zampini } 167732fe681dSStefano Zampini } else if (!sub_schurs->gdsw) { /* we need to invert explicitly since we are not using MatFactor for S */ 1678791bdc09SStefano Zampini if (multi_element) { 1679791bdc09SStefano Zampini PetscCall(MatDuplicate(S_all, MAT_DO_NOT_COPY_VALUES, &S_all_inv)); 1680791bdc09SStefano Zampini PetscCall(MatSetOption(S_all_inv, MAT_ROW_ORIENTED, sub_schurs->is_hermitian)); 1681791bdc09SStefano Zampini for (PetscInt sub = 0; sub < n_local_subs; sub++) { 1682791bdc09SStefano Zampini const PetscScalar *vals; 1683791bdc09SStefano Zampini const PetscInt *idxs; 1684791bdc09SStefano Zampini PetscInt size_schur_sub; 1685791bdc09SStefano Zampini Mat M; 1686791bdc09SStefano Zampini 1687791bdc09SStefano Zampini PetscCall(MatCreateSubMatrix(S_all, is_sub_schur[sub], is_sub_schur[sub], MAT_INITIAL_MATRIX, &M)); 1688791bdc09SStefano Zampini PetscCall(MatConvert(M, MATDENSE, MAT_INPLACE_MATRIX, &M)); 1689791bdc09SStefano Zampini PetscCall(MatSetOption(M, MAT_SPD, sub_schurs->is_posdef)); 1690791bdc09SStefano Zampini PetscCall(MatSetOption(M, MAT_HERMITIAN, sub_schurs->is_hermitian)); 1691791bdc09SStefano Zampini switch (sub_schurs->mat_factor_type) { 1692791bdc09SStefano Zampini case MAT_FACTOR_CHOLESKY: 1693791bdc09SStefano Zampini PetscCall(MatCholeskyFactor(M, NULL, NULL)); 1694791bdc09SStefano Zampini break; 1695791bdc09SStefano Zampini case MAT_FACTOR_LU: 1696791bdc09SStefano Zampini PetscCall(MatLUFactor(M, NULL, NULL, NULL)); 1697791bdc09SStefano Zampini break; 1698791bdc09SStefano Zampini default: 1699791bdc09SStefano Zampini SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unsupported factor type %s", MatFactorTypes[sub_schurs->mat_factor_type]); 1700791bdc09SStefano Zampini } 1701791bdc09SStefano Zampini PetscCall(MatSeqDenseInvertFactors_Private(M)); 1702791bdc09SStefano Zampini PetscCall(MatDenseGetArrayRead(M, &vals)); 1703791bdc09SStefano Zampini PetscCall(ISGetLocalSize(is_sub_schur[sub], &size_schur_sub)); 1704791bdc09SStefano Zampini PetscCall(ISGetIndices(is_sub_schur[sub], &idxs)); 1705791bdc09SStefano Zampini PetscCall(MatSetValues(S_all_inv, size_schur_sub, idxs, size_schur_sub, idxs, vals, INSERT_VALUES)); 1706791bdc09SStefano Zampini PetscCall(ISRestoreIndices(is_sub_schur[sub], &idxs)); 1707791bdc09SStefano Zampini PetscCall(MatDenseRestoreArrayRead(M, &vals)); 1708791bdc09SStefano Zampini PetscCall(MatDestroy(&M)); 1709791bdc09SStefano Zampini } 1710791bdc09SStefano Zampini PetscCall(MatAssemblyBegin(S_all_inv, MAT_FINAL_ASSEMBLY)); 1711791bdc09SStefano Zampini PetscCall(MatAssemblyEnd(S_all_inv, MAT_FINAL_ASSEMBLY)); 1712791bdc09SStefano Zampini } else { 17139566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)S_all)); 1714683d3df6SStefano Zampini S_all_inv = S_all; 17159566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(S_all_inv, &S_data)); 17169566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(size_schur, &B_N)); 17179566063dSJacob Faibussowitsch PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 1718f4f7d9d6SStefano Zampini if (use_potr) { 1719792fecdfSBarry Smith PetscCallBLAS("LAPACKpotrf", LAPACKpotrf_("L", &B_N, S_data, &B_N, &B_ierr)); 1720835f2295SStefano Zampini PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in POTRF Lapack routine %" PetscBLASInt_FMT, B_ierr); 1721792fecdfSBarry Smith PetscCallBLAS("LAPACKpotri", LAPACKpotri_("L", &B_N, S_data, &B_N, &B_ierr)); 1722835f2295SStefano Zampini PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in POTRI Lapack routine %" PetscBLASInt_FMT, B_ierr); 1723f4f7d9d6SStefano Zampini } else if (use_sytr) { 1724792fecdfSBarry Smith PetscCallBLAS("LAPACKsytrf", LAPACKsytrf_("L", &B_N, S_data, &B_N, pivots, Bwork, &B_lwork, &B_ierr)); 1725835f2295SStefano Zampini PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYTRF Lapack routine %" PetscBLASInt_FMT, B_ierr); 1726792fecdfSBarry Smith PetscCallBLAS("LAPACKsytri", LAPACKsytri_("L", &B_N, S_data, &B_N, pivots, Bwork, &B_ierr)); 1727835f2295SStefano Zampini PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYTRI Lapack routine %" PetscBLASInt_FMT, B_ierr); 1728d6462365SStefano Zampini } else { 1729792fecdfSBarry Smith PetscCallBLAS("LAPACKgetrf", LAPACKgetrf_(&B_N, &B_N, S_data, &B_N, pivots, &B_ierr)); 1730835f2295SStefano Zampini PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GETRF Lapack routine %" PetscBLASInt_FMT, B_ierr); 1731792fecdfSBarry Smith PetscCallBLAS("LAPACKgetri", LAPACKgetri_(&B_N, S_data, &B_N, pivots, Bwork, &B_lwork, &B_ierr)); 1732835f2295SStefano Zampini PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GETRI Lapack routine %" PetscBLASInt_FMT, B_ierr); 1733be83ff47SStefano Zampini } 17349566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(1.0 * size_schur * size_schur * size_schur)); 17359566063dSJacob Faibussowitsch PetscCall(PetscFPTrapPop()); 17369566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(S_all_inv, &S_data)); 1737791bdc09SStefano Zampini } 173832fe681dSStefano Zampini } else if (sub_schurs->gdsw) { 173932fe681dSStefano Zampini Mat tS, tX, SEj, S_II, S_IE, S_EE; 174032fe681dSStefano Zampini KSP pS_II; 174132fe681dSStefano Zampini PC pS_II_pc; 174232fe681dSStefano Zampini IS EE, II; 174332fe681dSStefano Zampini PetscInt nS; 174432fe681dSStefano Zampini 174532fe681dSStefano Zampini PetscCall(MatFactorCreateSchurComplement(F, &tS, NULL)); 174632fe681dSStefano Zampini PetscCall(MatGetSize(tS, &nS, NULL)); 174732fe681dSStefano Zampini PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &SEjinv_arr)); 174832fe681dSStefano Zampini for (i = 0, cum = 0; i < sub_schurs->n_subs; i++) { /* naive implementation */ 174932fe681dSStefano Zampini PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 175032fe681dSStefano Zampini PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, SEjinv_arr, &SEj)); 175132fe681dSStefano Zampini 175232fe681dSStefano Zampini PetscCall(ISCreateStride(PETSC_COMM_SELF, subset_size, cum, 1, &EE)); 175332fe681dSStefano Zampini PetscCall(ISComplement(EE, 0, nS, &II)); 175432fe681dSStefano Zampini PetscCall(MatCreateSubMatrix(tS, II, II, MAT_INITIAL_MATRIX, &S_II)); 175532fe681dSStefano Zampini PetscCall(MatCreateSubMatrix(tS, II, EE, MAT_INITIAL_MATRIX, &S_IE)); 175632fe681dSStefano Zampini PetscCall(MatCreateSubMatrix(tS, EE, EE, MAT_INITIAL_MATRIX, &S_EE)); 175732fe681dSStefano Zampini PetscCall(ISDestroy(&II)); 175832fe681dSStefano Zampini PetscCall(ISDestroy(&EE)); 175932fe681dSStefano Zampini 176032fe681dSStefano Zampini PetscCall(KSPCreate(PETSC_COMM_SELF, &pS_II)); 17613821be0aSBarry Smith PetscCall(KSPSetNestLevel(pS_II, 1)); /* do not have direct access to a PC to provide the level of nesting of the KSP */ 176232fe681dSStefano Zampini PetscCall(KSPSetType(pS_II, KSPPREONLY)); 176332fe681dSStefano Zampini PetscCall(KSPGetPC(pS_II, &pS_II_pc)); 176432fe681dSStefano Zampini PetscCall(PCSetType(pS_II_pc, PCSVD)); 176532fe681dSStefano Zampini PetscCall(KSPSetOptionsPrefix(pS_II, sub_schurs->prefix)); 176632fe681dSStefano Zampini PetscCall(KSPAppendOptionsPrefix(pS_II, "pseudo_")); 176732fe681dSStefano Zampini PetscCall(KSPSetOperators(pS_II, S_II, S_II)); 176832fe681dSStefano Zampini PetscCall(MatDestroy(&S_II)); 176932fe681dSStefano Zampini PetscCall(KSPSetFromOptions(pS_II)); 177032fe681dSStefano Zampini PetscCall(KSPSetUp(pS_II)); 177132fe681dSStefano Zampini PetscCall(MatDuplicate(S_IE, MAT_DO_NOT_COPY_VALUES, &tX)); 177232fe681dSStefano Zampini PetscCall(KSPMatSolve(pS_II, S_IE, tX)); 177332fe681dSStefano Zampini PetscCall(KSPDestroy(&pS_II)); 177432fe681dSStefano Zampini 1775fb842aefSJose E. Roman PetscCall(MatTransposeMatMult(S_IE, tX, MAT_REUSE_MATRIX, PETSC_DETERMINE, &SEj)); 177632fe681dSStefano Zampini PetscCall(MatDestroy(&S_IE)); 177732fe681dSStefano Zampini PetscCall(MatDestroy(&tX)); 177832fe681dSStefano Zampini PetscCall(MatAYPX(SEj, -1, S_EE, SAME_NONZERO_PATTERN)); 177932fe681dSStefano Zampini PetscCall(MatDestroy(&S_EE)); 178032fe681dSStefano Zampini 178132fe681dSStefano Zampini PetscCall(MatDestroy(&SEj)); 178232fe681dSStefano Zampini cum += subset_size; 178332fe681dSStefano Zampini SEjinv_arr += subset_size * subset_size; 178432fe681dSStefano Zampini } 178532fe681dSStefano Zampini PetscCall(MatDestroy(&tS)); 178632fe681dSStefano Zampini PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all, &SEjinv_arr)); 1787be83ff47SStefano Zampini } 1788be83ff47SStefano Zampini /* S_Ej_tilda_all */ 1789be83ff47SStefano Zampini cum = cum2 = 0; 179032fe681dSStefano Zampini rS_data = NULL; 1791791bdc09SStefano Zampini if (S_all_inv && !multi_element) PetscCall(MatDenseGetArrayRead(S_all_inv, &rS_data)); 179232fe681dSStefano Zampini PetscCall(MatSeqAIJGetArrayWrite(sub_schurs->sum_S_Ej_tilda_all, &SEjinv_arr)); 1793be83ff47SStefano Zampini for (i = 0; i < sub_schurs->n_subs; i++) { 1794be83ff47SStefano Zampini PetscInt j; 1795862806e4SStefano Zampini 17969566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 1797be83ff47SStefano Zampini /* get (St^-1)_E */ 179872b8c272SStefano Zampini /* Unless we are changing the variables, I don't need to expand to upper triangular since St^-1 179906a4e24aSStefano Zampini will be properly accessed later during adaptive selection */ 1800791bdc09SStefano Zampini if (multi_element) { /* transpose copy to workspace */ 1801791bdc09SStefano Zampini // XXX CSR directly? 1802791bdc09SStefano Zampini for (PetscInt j = 0; j < subset_size; j++) idx_work[j] = cum + j; 1803791bdc09SStefano Zampini PetscCall(MatGetValues(S_all_inv, subset_size, idx_work, subset_size, idx_work, work)); 1804791bdc09SStefano Zampini if (!sub_schurs->is_hermitian) { 1805791bdc09SStefano Zampini for (PetscInt k = 0; k < subset_size; k++) { 1806791bdc09SStefano Zampini for (PetscInt j = k; j < subset_size; j++) { 1807791bdc09SStefano Zampini PetscScalar t = work[k * subset_size + j]; 1808791bdc09SStefano Zampini work[k * subset_size + j] = work[j * subset_size + k]; 1809791bdc09SStefano Zampini work[j * subset_size + k] = t; 1810791bdc09SStefano Zampini } 1811791bdc09SStefano Zampini } 1812791bdc09SStefano Zampini } 1813791bdc09SStefano Zampini } else if (rS_data) { 1814a0b0af32SStefano Zampini if (S_lower_triangular) { 1815be83ff47SStefano Zampini PetscInt k; 1816b7ab4a40SStefano Zampini if (sub_schurs->change) { 1817be83ff47SStefano Zampini for (k = 0; k < subset_size; k++) { 1818be83ff47SStefano Zampini for (j = k; j < subset_size; j++) { 18191683a169SBarry Smith work[k * subset_size + j] = rS_data[cum2 + k * size_schur + j]; 18206c3e6151SStefano Zampini work[j * subset_size + k] = work[k * subset_size + j]; 1821be83ff47SStefano Zampini } 1822be83ff47SStefano Zampini } 182372b8c272SStefano Zampini } else { 182472b8c272SStefano Zampini for (k = 0; k < subset_size; k++) { 1825ad540459SPierre Jolivet for (j = k; j < subset_size; j++) work[k * subset_size + j] = rS_data[cum2 + k * size_schur + j]; 182672b8c272SStefano Zampini } 182772b8c272SStefano Zampini } 182872b8c272SStefano Zampini } else { 1829be83ff47SStefano Zampini PetscInt k; 1830be83ff47SStefano Zampini for (k = 0; k < subset_size; k++) { 1831ad540459SPierre Jolivet for (j = 0; j < subset_size; j++) work[k * subset_size + j] = rS_data[cum2 + k * size_schur + j]; 1832be83ff47SStefano Zampini } 1833be83ff47SStefano Zampini } 183432fe681dSStefano Zampini } 1835b7ab4a40SStefano Zampini if (sub_schurs->change) { 18368760537fSStefano Zampini Mat change_sub, SEj, T; 183732fe681dSStefano Zampini PetscScalar val = sub_schurs->gdsw ? PETSC_SMALL : 1. / PETSC_SMALL; 183872b8c272SStefano Zampini 183972b8c272SStefano Zampini /* change basis */ 18409566063dSJacob Faibussowitsch PetscCall(KSPGetOperators(sub_schurs->change[i], &change_sub, NULL)); 1841791bdc09SStefano Zampini PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, (rS_data || multi_element) ? work : SEjinv_arr, &SEj)); 18428760537fSStefano Zampini if (!sub_schurs->change_with_qr) { /* currently there's no support for PtAP with P SeqAIJ */ 18438760537fSStefano Zampini Mat T2; 18449566063dSJacob Faibussowitsch PetscCall(MatTransposeMatMult(change_sub, SEj, MAT_INITIAL_MATRIX, 1.0, &T2)); 18459566063dSJacob Faibussowitsch PetscCall(MatMatMult(T2, change_sub, MAT_INITIAL_MATRIX, 1.0, &T)); 18469566063dSJacob Faibussowitsch PetscCall(MatDestroy(&T2)); 18479566063dSJacob Faibussowitsch PetscCall(MatConvert(T, MATSEQDENSE, MAT_INPLACE_MATRIX, &T)); 18488760537fSStefano Zampini } else { 18499566063dSJacob Faibussowitsch PetscCall(MatPtAP(SEj, change_sub, MAT_INITIAL_MATRIX, 1.0, &T)); 185072b8c272SStefano Zampini } 18519566063dSJacob Faibussowitsch PetscCall(MatCopy(T, SEj, SAME_NONZERO_PATTERN)); 18529566063dSJacob Faibussowitsch PetscCall(MatDestroy(&T)); 185332fe681dSStefano Zampini PetscCall(MatZeroRowsColumnsIS(SEj, sub_schurs->change_primal_sub[i], val, NULL, NULL)); 18549566063dSJacob Faibussowitsch PetscCall(MatDestroy(&SEj)); 185572b8c272SStefano Zampini } 1856791bdc09SStefano Zampini if (rS_data || multi_element) PetscCall(PetscArraycpy(SEjinv_arr, work, subset_size * subset_size)); 1857be83ff47SStefano Zampini cum += subset_size; 1858be83ff47SStefano Zampini cum2 += subset_size * (size_schur + 1); 185904c5b2e6SStefano Zampini SEjinv_arr += subset_size * subset_size; 1860883469d8SStefano Zampini } 186132fe681dSStefano Zampini PetscCall(MatSeqAIJRestoreArrayWrite(sub_schurs->sum_S_Ej_tilda_all, &SEjinv_arr)); 186232fe681dSStefano Zampini if (S_all_inv) { 1863791bdc09SStefano Zampini if (!multi_element) PetscCall(MatDenseRestoreArrayRead(S_all_inv, &rS_data)); 1864df4d28bfSStefano Zampini if (solver_S) { 18653fc34f97SStefano Zampini if (schur_has_vertices) { 18669566063dSJacob Faibussowitsch PetscCall(MatFactorRestoreSchurComplement(F, &S_all_inv, MAT_FACTOR_SCHUR_FACTORED)); 18673fc34f97SStefano Zampini } else { 18689566063dSJacob Faibussowitsch PetscCall(MatFactorRestoreSchurComplement(F, &S_all_inv, MAT_FACTOR_SCHUR_INVERTED)); 18695db18549SStefano Zampini } 18703fc34f97SStefano Zampini } 187132fe681dSStefano Zampini } 18729566063dSJacob Faibussowitsch PetscCall(MatDestroy(&S_all_inv)); 1873683d3df6SStefano Zampini } 1874683d3df6SStefano Zampini 18753fc34f97SStefano Zampini /* move back factors if needed */ 187632fe681dSStefano Zampini if (schur_has_vertices && factor_workaround && !sub_schurs->gdsw) { 1877683d3df6SStefano Zampini Mat S_tmp; 18783fc34f97SStefano Zampini PetscInt nd = 0; 1879683d3df6SStefano Zampini PetscScalar *data; 1880683d3df6SStefano Zampini 18810fdf79fbSJacob Faibussowitsch PetscCheck(use_potr, PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor update not yet implemented for non SPD matrices"); 18820fdf79fbSJacob Faibussowitsch PetscCheck(solver_S, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen"); 18830fdf79fbSJacob Faibussowitsch PetscCall(MatFactorGetSchurComplement(F, &S_tmp, NULL)); 18849566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(S_tmp, &data)); 18859566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(data, size_schur * size_schur)); 1886683d3df6SStefano Zampini 1887683d3df6SStefano Zampini if (S_lower_triangular) { 1888683d3df6SStefano Zampini cum = 0; 1889683d3df6SStefano Zampini for (i = 0; i < size_active_schur; i++) { 18909566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(data + i * (size_schur + 1), schur_factor + cum, size_active_schur - i)); 1891683d3df6SStefano Zampini cum += size_active_schur - i; 1892683d3df6SStefano Zampini } 1893683d3df6SStefano Zampini } else { 18949566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(data, schur_factor, size_schur * size_schur)); 1895683d3df6SStefano Zampini } 1896683d3df6SStefano Zampini if (sub_schurs->is_dir) { 18979566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(sub_schurs->is_dir, &nd)); 1898ad540459SPierre Jolivet for (i = 0; i < nd; i++) data[(i + size_active_schur) * (size_schur + 1)] = schur_factor[cum + i]; 1899683d3df6SStefano Zampini } 19006dba178dSStefano Zampini /* workaround: since I cannot modify the matrices used inside the solvers for the forward and backward substitutions, 1901683d3df6SStefano Zampini set the diagonal entry of the Schur factor to a very large value */ 1902ad540459SPierre Jolivet for (i = size_active_schur + nd; i < size_schur; i++) data[i * (size_schur + 1)] = infty; 19039566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(S_tmp, &data)); 19049566063dSJacob Faibussowitsch PetscCall(MatFactorRestoreSchurComplement(F, &S_tmp, MAT_FACTOR_SCHUR_FACTORED)); 19059087bf02SStefano Zampini } 190632fe681dSStefano Zampini } else if (factor_workaround && !sub_schurs->gdsw) { /* we need to eliminate any unneeded coupling */ 1907367aa537SStefano Zampini PetscScalar *data; 1908367aa537SStefano Zampini PetscInt nd = 0; 1909367aa537SStefano Zampini 1910367aa537SStefano Zampini if (sub_schurs->is_dir) { /* dirichlet dofs could have different scalings */ 19119566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(sub_schurs->is_dir, &nd)); 1912367aa537SStefano Zampini } 19139566063dSJacob Faibussowitsch PetscCall(MatFactorGetSchurComplement(F, &S_all, NULL)); 19149566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(S_all, &data)); 191548a46eb9SPierre Jolivet for (i = 0; i < size_active_schur; i++) PetscCall(PetscArrayzero(data + i * size_schur + size_active_schur, size_schur - size_active_schur)); 1916367aa537SStefano Zampini for (i = size_active_schur + nd; i < size_schur; i++) { 19179566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(data + i * size_schur + size_active_schur, size_schur - size_active_schur)); 19186c3e6151SStefano Zampini data[i * (size_schur + 1)] = infty; 1919367aa537SStefano Zampini } 19209566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(S_all, &data)); 19219566063dSJacob Faibussowitsch PetscCall(MatFactorRestoreSchurComplement(F, &S_all, MAT_FACTOR_SCHUR_UNFACTORED)); 19224a6c6b0dSStefano Zampini } 1923791bdc09SStefano Zampini PetscCall(PetscFree(idx_work)); 19249566063dSJacob Faibussowitsch PetscCall(PetscFree(work)); 19259566063dSJacob Faibussowitsch PetscCall(PetscFree(schur_factor)); 19269566063dSJacob Faibussowitsch PetscCall(VecDestroy(&Dall)); 1927791bdc09SStefano Zampini PetscCall(ISDestroy(&is_schur)); 1928791bdc09SStefano Zampini if (multi_element) { 1929791bdc09SStefano Zampini for (PetscInt sub = 0; sub < n_local_subs; sub++) { 1930791bdc09SStefano Zampini PetscCall(ISDestroy(&is_sub_all[sub])); 1931791bdc09SStefano Zampini PetscCall(ISDestroy(&is_sub_schur_all[sub])); 1932791bdc09SStefano Zampini PetscCall(ISDestroy(&is_sub_schur[sub])); 1933791bdc09SStefano Zampini } 1934791bdc09SStefano Zampini PetscCall(PetscFree3(is_sub_all, is_sub_schur_all, is_sub_schur)); 1935791bdc09SStefano Zampini } 19364a6c6b0dSStefano Zampini } 19379566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is_I_layer)); 19389566063dSJacob Faibussowitsch PetscCall(MatDestroy(&S_all)); 19399566063dSJacob Faibussowitsch PetscCall(MatDestroy(&A_BB)); 19409566063dSJacob Faibussowitsch PetscCall(MatDestroy(&A_IB)); 19419566063dSJacob Faibussowitsch PetscCall(MatDestroy(&A_BI)); 19429566063dSJacob Faibussowitsch PetscCall(MatDestroy(&F)); 19436afe12f5SStefano Zampini 19449566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(sub_schurs->S_Ej_all, MAT_FINAL_ASSEMBLY)); 19459566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(sub_schurs->S_Ej_all, MAT_FINAL_ASSEMBLY)); 19465a95e1ceSStefano Zampini if (compute_Stilda) { 19479566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(sub_schurs->sum_S_Ej_tilda_all, MAT_FINAL_ASSEMBLY)); 19489566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(sub_schurs->sum_S_Ej_tilda_all, MAT_FINAL_ASSEMBLY)); 19499d54b7f4SStefano Zampini if (deluxe) { 19509566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(sub_schurs->sum_S_Ej_inv_all, MAT_FINAL_ASSEMBLY)); 19519566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(sub_schurs->sum_S_Ej_inv_all, MAT_FINAL_ASSEMBLY)); 195208122e43SStefano Zampini } 19539d54b7f4SStefano Zampini } 19546afe12f5SStefano Zampini 19555db18549SStefano Zampini /* Get local part of (\sum_j S_Ej) */ 195648a46eb9SPierre Jolivet if (!sub_schurs->sum_S_Ej_all) PetscCall(MatDuplicate(sub_schurs->S_Ej_all, MAT_DO_NOT_COPY_VALUES, &sub_schurs->sum_S_Ej_all)); 19579566063dSJacob Faibussowitsch PetscCall(VecSet(gstash, 0.0)); 19589566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArray(sub_schurs->S_Ej_all, &stasharray)); 19599566063dSJacob Faibussowitsch PetscCall(VecPlaceArray(lstash, stasharray)); 19609566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(sstash, lstash, gstash, ADD_VALUES, SCATTER_FORWARD)); 19619566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(sstash, lstash, gstash, ADD_VALUES, SCATTER_FORWARD)); 19629566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArray(sub_schurs->S_Ej_all, &stasharray)); 19639566063dSJacob Faibussowitsch PetscCall(VecResetArray(lstash)); 19649566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &stasharray)); 19659566063dSJacob Faibussowitsch PetscCall(VecPlaceArray(lstash, stasharray)); 19669566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(sstash, gstash, lstash, INSERT_VALUES, SCATTER_REVERSE)); 19679566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(sstash, gstash, lstash, INSERT_VALUES, SCATTER_REVERSE)); 19689566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all, &stasharray)); 19699566063dSJacob Faibussowitsch PetscCall(VecResetArray(lstash)); 197008122e43SStefano Zampini 1971f6f667cfSStefano Zampini /* Get local part of (\sum_j S^-1_Ej) (\sum_j St^-1_Ej) */ 19725a95e1ceSStefano Zampini if (compute_Stilda) { 19739566063dSJacob Faibussowitsch PetscCall(VecSet(gstash, 0.0)); 19749566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &stasharray)); 19759566063dSJacob Faibussowitsch PetscCall(VecPlaceArray(lstash, stasharray)); 19769566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(sstash, lstash, gstash, ADD_VALUES, SCATTER_FORWARD)); 19779566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(sstash, lstash, gstash, ADD_VALUES, SCATTER_FORWARD)); 19789566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(sstash, gstash, lstash, INSERT_VALUES, SCATTER_REVERSE)); 19799566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(sstash, gstash, lstash, INSERT_VALUES, SCATTER_REVERSE)); 19809566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all, &stasharray)); 19819566063dSJacob Faibussowitsch PetscCall(VecResetArray(lstash)); 19829d54b7f4SStefano Zampini if (deluxe) { 19839566063dSJacob Faibussowitsch PetscCall(VecSet(gstash, 0.0)); 19849566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all, &stasharray)); 19859566063dSJacob Faibussowitsch PetscCall(VecPlaceArray(lstash, stasharray)); 19869566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(sstash, lstash, gstash, ADD_VALUES, SCATTER_FORWARD)); 19879566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(sstash, lstash, gstash, ADD_VALUES, SCATTER_FORWARD)); 19889566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(sstash, gstash, lstash, INSERT_VALUES, SCATTER_REVERSE)); 19899566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(sstash, gstash, lstash, INSERT_VALUES, SCATTER_REVERSE)); 19909566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all, &stasharray)); 19919566063dSJacob Faibussowitsch PetscCall(VecResetArray(lstash)); 199232fe681dSStefano Zampini } else if (!sub_schurs->gdsw) { 19939d54b7f4SStefano Zampini PetscScalar *array; 19949d54b7f4SStefano Zampini PetscInt cum; 19959d54b7f4SStefano Zampini 19969566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &array)); 19979d54b7f4SStefano Zampini cum = 0; 19989d54b7f4SStefano Zampini for (i = 0; i < sub_schurs->n_subs; i++) { 19999566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 20009566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(subset_size, &B_N)); 20019566063dSJacob Faibussowitsch PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 2002f4f7d9d6SStefano Zampini if (use_potr) { 2003792fecdfSBarry Smith PetscCallBLAS("LAPACKpotrf", LAPACKpotrf_("L", &B_N, array + cum, &B_N, &B_ierr)); 2004835f2295SStefano Zampini PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in POTRF Lapack routine %" PetscBLASInt_FMT, B_ierr); 2005792fecdfSBarry Smith PetscCallBLAS("LAPACKpotri", LAPACKpotri_("L", &B_N, array + cum, &B_N, &B_ierr)); 2006835f2295SStefano Zampini PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in POTRI Lapack routine %" PetscBLASInt_FMT, B_ierr); 2007f4f7d9d6SStefano Zampini } else if (use_sytr) { 2008792fecdfSBarry Smith PetscCallBLAS("LAPACKsytrf", LAPACKsytrf_("L", &B_N, array + cum, &B_N, pivots, Bwork, &B_lwork, &B_ierr)); 2009835f2295SStefano Zampini PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYTRF Lapack routine %" PetscBLASInt_FMT, B_ierr); 2010792fecdfSBarry Smith PetscCallBLAS("LAPACKsytri", LAPACKsytri_("L", &B_N, array + cum, &B_N, pivots, Bwork, &B_ierr)); 2011835f2295SStefano Zampini PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYTRI Lapack routine %" PetscBLASInt_FMT, B_ierr); 2012f4f7d9d6SStefano Zampini } else { 2013792fecdfSBarry Smith PetscCallBLAS("LAPACKgetrf", LAPACKgetrf_(&B_N, &B_N, array + cum, &B_N, pivots, &B_ierr)); 2014835f2295SStefano Zampini PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GETRF Lapack routine %" PetscBLASInt_FMT, B_ierr); 2015792fecdfSBarry Smith PetscCallBLAS("LAPACKgetri", LAPACKgetri_(&B_N, array + cum, &B_N, pivots, Bwork, &B_lwork, &B_ierr)); 2016835f2295SStefano Zampini PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GETRI Lapack routine %" PetscBLASInt_FMT, B_ierr); 2017f4f7d9d6SStefano Zampini } 20189566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(1.0 * subset_size * subset_size * subset_size)); 20199566063dSJacob Faibussowitsch PetscCall(PetscFPTrapPop()); 20209d54b7f4SStefano Zampini cum += subset_size * subset_size; 20219d54b7f4SStefano Zampini } 20229566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all, &array)); 20239566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)sub_schurs->sum_S_Ej_all)); 20249566063dSJacob Faibussowitsch PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_inv_all)); 20259d54b7f4SStefano Zampini sub_schurs->sum_S_Ej_inv_all = sub_schurs->sum_S_Ej_all; 20269d54b7f4SStefano Zampini } 202708122e43SStefano Zampini } 20289566063dSJacob Faibussowitsch PetscCall(VecDestroy(&lstash)); 20299566063dSJacob Faibussowitsch PetscCall(VecDestroy(&gstash)); 20309566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&sstash)); 203157a87bf3SStefano Zampini 20323b03f7bbSStefano Zampini if (matl_dbg_viewer) { 203311955456SStefano Zampini if (sub_schurs->S_Ej_all) { 20349566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)sub_schurs->S_Ej_all, "SE")); 20359566063dSJacob Faibussowitsch PetscCall(MatView(sub_schurs->S_Ej_all, matl_dbg_viewer)); 203611955456SStefano Zampini } 203711955456SStefano Zampini if (sub_schurs->sum_S_Ej_all) { 20389566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)sub_schurs->sum_S_Ej_all, "SSE")); 20399566063dSJacob Faibussowitsch PetscCall(MatView(sub_schurs->sum_S_Ej_all, matl_dbg_viewer)); 204011955456SStefano Zampini } 204111955456SStefano Zampini if (sub_schurs->sum_S_Ej_inv_all) { 20429566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)sub_schurs->sum_S_Ej_inv_all, "SSEm")); 20439566063dSJacob Faibussowitsch PetscCall(MatView(sub_schurs->sum_S_Ej_inv_all, matl_dbg_viewer)); 204411955456SStefano Zampini } 204511955456SStefano Zampini if (sub_schurs->sum_S_Ej_tilda_all) { 20469566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)sub_schurs->sum_S_Ej_tilda_all, "SSEt")); 20479566063dSJacob Faibussowitsch PetscCall(MatView(sub_schurs->sum_S_Ej_tilda_all, matl_dbg_viewer)); 204811955456SStefano Zampini } 204911955456SStefano Zampini } 20503202ece2SStefano Zampini 205179329b78SStefano Zampini /* when not explicit, we need to set the factor type */ 205279329b78SStefano Zampini if (sub_schurs->mat_factor_type == MAT_FACTOR_NONE) sub_schurs->mat_factor_type = sub_schurs->is_hermitian ? MAT_FACTOR_CHOLESKY : MAT_FACTOR_LU; 205379329b78SStefano Zampini 20545a95e1ceSStefano Zampini /* free workspace */ 205551ab8ad6SStefano Zampini if (matl_dbg_viewer) PetscCall(PetscViewerFlush(matl_dbg_viewer)); 205651ab8ad6SStefano Zampini if (sub_schurs->debug) PetscCallMPI(MPI_Barrier(comm_n)); 20579566063dSJacob Faibussowitsch PetscCall(PetscViewerDestroy(&matl_dbg_viewer)); 20589566063dSJacob Faibussowitsch PetscCall(PetscFree2(Bwork, pivots)); 20599566063dSJacob Faibussowitsch PetscCall(PetscCommDestroy(&comm_n)); 2060791bdc09SStefano Zampini PetscCall(PetscFree(all_local_subid_N)); 20613ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2062b1b3d7a2SStefano Zampini } 2063b1b3d7a2SStefano Zampini 2064d71ae5a4SJacob Faibussowitsch PetscErrorCode PCBDDCSubSchursInit(PCBDDCSubSchurs sub_schurs, const char *prefix, IS is_I, IS is_B, PCBDDCGraph graph, ISLocalToGlobalMapping BtoNmap, PetscBool copycc, PetscBool gdsw) 2065d71ae5a4SJacob Faibussowitsch { 20669bb4a8caSStefano Zampini IS *faces, *edges, *all_cc, vertices; 206732fe681dSStefano Zampini PetscInt s, i, n_faces, n_edges, n_all_cc; 2068365a3a41SStefano Zampini PetscBool is_sorted, ispardiso, ismumps; 2069b1b3d7a2SStefano Zampini 2070b1b3d7a2SStefano Zampini PetscFunctionBegin; 20719566063dSJacob Faibussowitsch PetscCall(ISSorted(is_I, &is_sorted)); 207228b400f6SJacob Faibussowitsch PetscCheck(is_sorted, PetscObjectComm((PetscObject)is_I), PETSC_ERR_PLIB, "IS for I dofs should be shorted"); 20739566063dSJacob Faibussowitsch PetscCall(ISSorted(is_B, &is_sorted)); 207428b400f6SJacob Faibussowitsch PetscCheck(is_sorted, PetscObjectComm((PetscObject)is_B), PETSC_ERR_PLIB, "IS for B dofs should be shorted"); 2075b1b3d7a2SStefano Zampini 2076b1b3d7a2SStefano Zampini /* reset any previous data */ 20779566063dSJacob Faibussowitsch PetscCall(PCBDDCSubSchursReset(sub_schurs)); 2078b1b3d7a2SStefano Zampini 207932fe681dSStefano Zampini sub_schurs->gdsw = gdsw; 2080791bdc09SStefano Zampini sub_schurs->graph = graph; 208132fe681dSStefano Zampini 20825a95e1ceSStefano Zampini /* get index sets for faces and edges (already sorted by global ordering) */ 20839566063dSJacob Faibussowitsch PetscCall(PCBDDCGraphGetCandidatesIS(graph, &n_faces, &faces, &n_edges, &edges, &vertices)); 2084b1b3d7a2SStefano Zampini n_all_cc = n_faces + n_edges; 20859566063dSJacob Faibussowitsch PetscCall(PetscBTCreate(n_all_cc, &sub_schurs->is_edge)); 20869566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n_all_cc, &all_cc)); 208732fe681dSStefano Zampini n_all_cc = 0; 2088b1b3d7a2SStefano Zampini for (i = 0; i < n_faces; i++) { 208932fe681dSStefano Zampini PetscCall(ISGetSize(faces[i], &s)); 209032fe681dSStefano Zampini if (!s) continue; 20918b6046baSStefano Zampini if (copycc) { 209232fe681dSStefano Zampini PetscCall(ISDuplicate(faces[i], &all_cc[n_all_cc])); 20938b6046baSStefano Zampini } else { 20949566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)faces[i])); 209532fe681dSStefano Zampini all_cc[n_all_cc] = faces[i]; 2096b1b3d7a2SStefano Zampini } 209732fe681dSStefano Zampini n_all_cc++; 20988b6046baSStefano Zampini } 2099b1b3d7a2SStefano Zampini for (i = 0; i < n_edges; i++) { 210032fe681dSStefano Zampini PetscCall(ISGetSize(edges[i], &s)); 210132fe681dSStefano Zampini if (!s) continue; 21028b6046baSStefano Zampini if (copycc) { 210332fe681dSStefano Zampini PetscCall(ISDuplicate(edges[i], &all_cc[n_all_cc])); 21048b6046baSStefano Zampini } else { 21059566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)edges[i])); 210632fe681dSStefano Zampini all_cc[n_all_cc] = edges[i]; 21078b6046baSStefano Zampini } 210832fe681dSStefano Zampini PetscCall(PetscBTSet(sub_schurs->is_edge, n_all_cc)); 210932fe681dSStefano Zampini n_all_cc++; 2110b1b3d7a2SStefano Zampini } 21119566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)vertices)); 2112c8272957SStefano Zampini sub_schurs->is_vertices = vertices; 21139566063dSJacob Faibussowitsch PetscCall(PCBDDCGraphRestoreCandidatesIS(graph, &n_faces, &faces, &n_edges, &edges, &vertices)); 2114d62866d3SStefano Zampini sub_schurs->is_dir = NULL; 21159566063dSJacob Faibussowitsch PetscCall(PCBDDCGraphGetDirichletDofsB(graph, &sub_schurs->is_dir)); 2116b1b3d7a2SStefano Zampini 2117df4d28bfSStefano Zampini /* Determine if MatFactor can be used */ 21189566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(prefix, &sub_schurs->prefix)); 2119883469d8SStefano Zampini #if defined(PETSC_HAVE_MUMPS) 21209566063dSJacob Faibussowitsch PetscCall(PetscStrncpy(sub_schurs->mat_solver_type, MATSOLVERMUMPS, sizeof(sub_schurs->mat_solver_type))); 212188113c35SStefano Zampini #elif defined(PETSC_HAVE_MKL_PARDISO) 21229566063dSJacob Faibussowitsch PetscCall(PetscStrncpy(sub_schurs->mat_solver_type, MATSOLVERMKL_PARDISO, sizeof(sub_schurs->mat_solver_type))); 212388113c35SStefano Zampini #else 21249566063dSJacob Faibussowitsch PetscCall(PetscStrncpy(sub_schurs->mat_solver_type, MATSOLVERPETSC, sizeof(sub_schurs->mat_solver_type))); 2125df4d28bfSStefano Zampini #endif 212679329b78SStefano Zampini sub_schurs->mat_factor_type = MAT_FACTOR_NONE; 212788113c35SStefano Zampini #if defined(PETSC_USE_COMPLEX) 212888113c35SStefano Zampini sub_schurs->is_hermitian = PETSC_FALSE; /* Hermitian Cholesky is not supported by PETSc and external packages */ 212988113c35SStefano Zampini #else 213088113c35SStefano Zampini sub_schurs->is_hermitian = PETSC_TRUE; 2131883469d8SStefano Zampini #endif 213288113c35SStefano Zampini sub_schurs->is_posdef = PETSC_TRUE; 213311955456SStefano Zampini sub_schurs->is_symmetric = PETSC_TRUE; 21347f9db97bSStefano Zampini sub_schurs->debug = PETSC_FALSE; 2135991c41b4SStefano Zampini sub_schurs->restrict_comm = PETSC_FALSE; 2136d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)graph->l2gmap), sub_schurs->prefix, "BDDC sub_schurs options", "PC"); 21379566063dSJacob Faibussowitsch PetscCall(PetscOptionsString("-sub_schurs_mat_solver_type", "Specific direct solver to use", NULL, sub_schurs->mat_solver_type, sub_schurs->mat_solver_type, sizeof(sub_schurs->mat_solver_type), NULL)); 213879329b78SStefano Zampini PetscCall(PetscOptionsEnum("-sub_schurs_mat_factor_type", "Factor type to use. Use MAT_FACTOR_NONE for automatic selection", NULL, MatFactorTypes, (PetscEnum)sub_schurs->mat_factor_type, (PetscEnum *)&sub_schurs->mat_factor_type, NULL)); 21399566063dSJacob Faibussowitsch PetscCall(PetscOptionsBool("-sub_schurs_symmetric", "Symmetric problem", NULL, sub_schurs->is_symmetric, &sub_schurs->is_symmetric, NULL)); 21409566063dSJacob Faibussowitsch PetscCall(PetscOptionsBool("-sub_schurs_hermitian", "Hermitian problem", NULL, sub_schurs->is_hermitian, &sub_schurs->is_hermitian, NULL)); 21419566063dSJacob Faibussowitsch PetscCall(PetscOptionsBool("-sub_schurs_posdef", "Positive definite problem", NULL, sub_schurs->is_posdef, &sub_schurs->is_posdef, NULL)); 21429566063dSJacob Faibussowitsch PetscCall(PetscOptionsBool("-sub_schurs_restrictcomm", "Restrict communicator on active processes only", NULL, sub_schurs->restrict_comm, &sub_schurs->restrict_comm, NULL)); 21439566063dSJacob Faibussowitsch PetscCall(PetscOptionsBool("-sub_schurs_debug", "Debug output", NULL, sub_schurs->debug, &sub_schurs->debug, NULL)); 2144d0609cedSBarry Smith PetscOptionsEnd(); 21459566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(sub_schurs->mat_solver_type, MATSOLVERMUMPS, &ismumps)); 21469566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(sub_schurs->mat_solver_type, MATSOLVERMKL_PARDISO, &ispardiso)); 2147365a3a41SStefano Zampini sub_schurs->schur_explicit = (PetscBool)(ispardiso || ismumps); 2148b1b3d7a2SStefano Zampini 2149a678f235SPierre Jolivet /* for reals, symmetric and Hermitian are synonyms */ 215011955456SStefano Zampini #if !defined(PETSC_USE_COMPLEX) 215111955456SStefano Zampini sub_schurs->is_symmetric = (PetscBool)(sub_schurs->is_symmetric && sub_schurs->is_hermitian); 215211955456SStefano Zampini sub_schurs->is_hermitian = sub_schurs->is_symmetric; 215311955456SStefano Zampini #endif 215411955456SStefano Zampini 21559566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)is_I)); 2156b1b3d7a2SStefano Zampini sub_schurs->is_I = is_I; 21579566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)is_B)); 2158b1b3d7a2SStefano Zampini sub_schurs->is_B = is_B; 21599566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)graph->l2gmap)); 21605db18549SStefano Zampini sub_schurs->l2gmap = graph->l2gmap; 21619566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)BtoNmap)); 21625db18549SStefano Zampini sub_schurs->BtoNmap = BtoNmap; 21635a95e1ceSStefano Zampini sub_schurs->n_subs = n_all_cc; 2164b1b3d7a2SStefano Zampini sub_schurs->is_subs = all_cc; 2165b96c3477SStefano Zampini sub_schurs->S_Ej_all = NULL; 2166b96c3477SStefano Zampini sub_schurs->sum_S_Ej_all = NULL; 216708122e43SStefano Zampini sub_schurs->sum_S_Ej_inv_all = NULL; 2168b96c3477SStefano Zampini sub_schurs->sum_S_Ej_tilda_all = NULL; 2169b96c3477SStefano Zampini sub_schurs->is_Ej_all = NULL; 21703ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2171b1b3d7a2SStefano Zampini } 2172b1b3d7a2SStefano Zampini 2173d71ae5a4SJacob Faibussowitsch PetscErrorCode PCBDDCSubSchursCreate(PCBDDCSubSchurs *sub_schurs) 2174d71ae5a4SJacob Faibussowitsch { 217534a97f8cSStefano Zampini PCBDDCSubSchurs schurs_ctx; 217634a97f8cSStefano Zampini 217734a97f8cSStefano Zampini PetscFunctionBegin; 21789566063dSJacob Faibussowitsch PetscCall(PetscNew(&schurs_ctx)); 21795ff63025SStefano Zampini schurs_ctx->n_subs = 0; 218034a97f8cSStefano Zampini *sub_schurs = schurs_ctx; 21813ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 218234a97f8cSStefano Zampini } 218334a97f8cSStefano Zampini 2184d71ae5a4SJacob Faibussowitsch PetscErrorCode PCBDDCSubSchursReset(PCBDDCSubSchurs sub_schurs) 2185d71ae5a4SJacob Faibussowitsch { 218634a97f8cSStefano Zampini PetscFunctionBegin; 21873ba16761SJacob Faibussowitsch if (!sub_schurs) PetscFunctionReturn(PETSC_SUCCESS); 2188791bdc09SStefano Zampini sub_schurs->graph = NULL; 21899566063dSJacob Faibussowitsch PetscCall(PetscFree(sub_schurs->prefix)); 21909566063dSJacob Faibussowitsch PetscCall(MatDestroy(&sub_schurs->A)); 21919566063dSJacob Faibussowitsch PetscCall(MatDestroy(&sub_schurs->S)); 21929566063dSJacob Faibussowitsch PetscCall(ISDestroy(&sub_schurs->is_I)); 21939566063dSJacob Faibussowitsch PetscCall(ISDestroy(&sub_schurs->is_B)); 21949566063dSJacob Faibussowitsch PetscCall(ISLocalToGlobalMappingDestroy(&sub_schurs->l2gmap)); 21959566063dSJacob Faibussowitsch PetscCall(ISLocalToGlobalMappingDestroy(&sub_schurs->BtoNmap)); 21969566063dSJacob Faibussowitsch PetscCall(MatDestroy(&sub_schurs->S_Ej_all)); 21979566063dSJacob Faibussowitsch PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_all)); 21989566063dSJacob Faibussowitsch PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_inv_all)); 21999566063dSJacob Faibussowitsch PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_tilda_all)); 22009566063dSJacob Faibussowitsch PetscCall(ISDestroy(&sub_schurs->is_Ej_all)); 22019566063dSJacob Faibussowitsch PetscCall(ISDestroy(&sub_schurs->is_vertices)); 22029566063dSJacob Faibussowitsch PetscCall(ISDestroy(&sub_schurs->is_dir)); 22039566063dSJacob Faibussowitsch PetscCall(PetscBTDestroy(&sub_schurs->is_edge)); 2204791bdc09SStefano Zampini for (PetscInt i = 0; i < sub_schurs->n_subs; i++) PetscCall(ISDestroy(&sub_schurs->is_subs[i])); 22051baa6e33SBarry Smith if (sub_schurs->n_subs) PetscCall(PetscFree(sub_schurs->is_subs)); 22061baa6e33SBarry Smith if (sub_schurs->reuse_solver) PetscCall(PCBDDCReuseSolversReset(sub_schurs->reuse_solver)); 22079566063dSJacob Faibussowitsch PetscCall(PetscFree(sub_schurs->reuse_solver)); 220872b8c272SStefano Zampini if (sub_schurs->change) { 2209791bdc09SStefano Zampini for (PetscInt i = 0; i < sub_schurs->n_subs; i++) { 22109566063dSJacob Faibussowitsch PetscCall(KSPDestroy(&sub_schurs->change[i])); 22119566063dSJacob Faibussowitsch PetscCall(ISDestroy(&sub_schurs->change_primal_sub[i])); 221272b8c272SStefano Zampini } 221372b8c272SStefano Zampini } 22149566063dSJacob Faibussowitsch PetscCall(PetscFree(sub_schurs->change)); 22159566063dSJacob Faibussowitsch PetscCall(PetscFree(sub_schurs->change_primal_sub)); 221634a97f8cSStefano Zampini sub_schurs->n_subs = 0; 22173ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 221834a97f8cSStefano Zampini } 221934a97f8cSStefano Zampini 2220d71ae5a4SJacob Faibussowitsch PetscErrorCode PCBDDCSubSchursDestroy(PCBDDCSubSchurs *sub_schurs) 2221d71ae5a4SJacob Faibussowitsch { 2222aea80f77Sstefano_zampini PetscFunctionBegin; 22239566063dSJacob Faibussowitsch PetscCall(PCBDDCSubSchursReset(*sub_schurs)); 22249566063dSJacob Faibussowitsch PetscCall(PetscFree(*sub_schurs)); 22253ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2226aea80f77Sstefano_zampini } 2227aea80f77Sstefano_zampini 2228d71ae5a4SJacob Faibussowitsch static inline PetscErrorCode PCBDDCAdjGetNextLayer_Private(PetscInt *queue_tip, PetscInt n_prev, PetscBT touched, PetscInt *xadj, PetscInt *adjncy, PetscInt *n_added) 2229d71ae5a4SJacob Faibussowitsch { 223034a97f8cSStefano Zampini PetscInt i, j, n; 223134a97f8cSStefano Zampini 223234a97f8cSStefano Zampini PetscFunctionBegin; 223334a97f8cSStefano Zampini n = 0; 223434a97f8cSStefano Zampini for (i = -n_prev; i < 0; i++) { 223534a97f8cSStefano Zampini PetscInt start_dof = queue_tip[i]; 223634a97f8cSStefano Zampini for (j = xadj[start_dof]; j < xadj[start_dof + 1]; j++) { 223734a97f8cSStefano Zampini PetscInt dof = adjncy[j]; 223834a97f8cSStefano Zampini if (!PetscBTLookup(touched, dof)) { 22399566063dSJacob Faibussowitsch PetscCall(PetscBTSet(touched, dof)); 224034a97f8cSStefano Zampini queue_tip[n] = dof; 224134a97f8cSStefano Zampini n++; 224234a97f8cSStefano Zampini } 224334a97f8cSStefano Zampini } 224434a97f8cSStefano Zampini } 224534a97f8cSStefano Zampini *n_added = n; 22463ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 224734a97f8cSStefano Zampini } 2248