15e5bbd0aSStefano Zampini #include <petsc/private/pcbddcimpl.h> 25e5bbd0aSStefano Zampini #include <petsc/private/pcbddcprivateimpl.h> 305709791SSatish Balay #include <../src/mat/impls/dense/seq/dense.h> 408122e43SStefano Zampini #include <petscblaslapack.h> 534a97f8cSStefano Zampini 69fbee547SJacob Faibussowitsch static inline PetscErrorCode PCBDDCAdjGetNextLayer_Private(PetscInt *, PetscInt, PetscBT, PetscInt *, PetscInt *, PetscInt *); 75ec10c6aSStefano Zampini static PetscErrorCode PCBDDCComputeExplicitSchur(Mat, PetscBool, MatReuse, Mat *); 8df4d28bfSStefano Zampini static PetscErrorCode PCBDDCReuseSolvers_Interior(PC, Vec, Vec); 9df4d28bfSStefano Zampini static PetscErrorCode PCBDDCReuseSolvers_Correction(PC, Vec, Vec); 10d62866d3SStefano Zampini 11ca92afb2SStefano Zampini /* if v2 is not present, correction is done in-place */ 129371c9d4SSatish Balay PetscErrorCode PCBDDCReuseSolversBenignAdapt(PCBDDCReuseSolvers ctx, Vec v, Vec v2, PetscBool sol, PetscBool full) { 13ca92afb2SStefano Zampini PetscScalar *array; 14ca92afb2SStefano Zampini PetscScalar *array2; 15ca92afb2SStefano Zampini 16ca92afb2SStefano Zampini PetscFunctionBegin; 17ca92afb2SStefano Zampini if (!ctx->benign_n) PetscFunctionReturn(0); 185cbda25cSStefano Zampini if (sol && full) { 195cbda25cSStefano Zampini PetscInt n_I, size_schur; 205cbda25cSStefano Zampini 215cbda25cSStefano Zampini /* get sizes */ 229566063dSJacob Faibussowitsch PetscCall(MatGetSize(ctx->benign_csAIB, &size_schur, NULL)); 239566063dSJacob Faibussowitsch PetscCall(VecGetSize(v, &n_I)); 245cbda25cSStefano Zampini n_I = n_I - size_schur; 255cbda25cSStefano Zampini /* get schur sol from array */ 269566063dSJacob Faibussowitsch PetscCall(VecGetArray(v, &array)); 279566063dSJacob Faibussowitsch PetscCall(VecPlaceArray(ctx->benign_dummy_schur_vec, array + n_I)); 289566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(v, &array)); 295cbda25cSStefano Zampini /* apply interior sol correction */ 309566063dSJacob Faibussowitsch PetscCall(MatMultTranspose(ctx->benign_csAIB, ctx->benign_dummy_schur_vec, ctx->benign_corr_work)); 319566063dSJacob Faibussowitsch PetscCall(VecResetArray(ctx->benign_dummy_schur_vec)); 329566063dSJacob Faibussowitsch PetscCall(MatMultAdd(ctx->benign_AIIm1ones, ctx->benign_corr_work, v, v)); 335cbda25cSStefano Zampini } 34ca92afb2SStefano Zampini if (v2) { 35ca92afb2SStefano Zampini PetscInt nl; 36ca92afb2SStefano Zampini 379566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(v, (const PetscScalar **)&array)); 389566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(v2, &nl)); 399566063dSJacob Faibussowitsch PetscCall(VecGetArray(v2, &array2)); 409566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(array2, array, nl)); 41ca92afb2SStefano Zampini } else { 429566063dSJacob Faibussowitsch PetscCall(VecGetArray(v, &array)); 43ca92afb2SStefano Zampini array2 = array; 44ca92afb2SStefano Zampini } 45ca92afb2SStefano Zampini if (!sol) { /* change rhs */ 46ca92afb2SStefano Zampini PetscInt n; 47ca92afb2SStefano Zampini for (n = 0; n < ctx->benign_n; n++) { 48ca92afb2SStefano Zampini PetscScalar sum = 0.; 49ca92afb2SStefano Zampini const PetscInt *cols; 50ca92afb2SStefano Zampini PetscInt nz, i; 51ca92afb2SStefano Zampini 529566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[n], &nz)); 539566063dSJacob Faibussowitsch PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[n], &cols)); 54ca92afb2SStefano Zampini for (i = 0; i < nz - 1; i++) sum += array[cols[i]]; 5522db5ddcSStefano Zampini #if defined(PETSC_USE_COMPLEX) 5622db5ddcSStefano Zampini sum = -(PetscRealPart(sum) / nz + PETSC_i * (PetscImaginaryPart(sum) / nz)); 5722db5ddcSStefano Zampini #else 58ca92afb2SStefano Zampini sum = -sum / nz; 5922db5ddcSStefano Zampini #endif 60ca92afb2SStefano Zampini for (i = 0; i < nz - 1; i++) array2[cols[i]] += sum; 61ca92afb2SStefano Zampini ctx->benign_save_vals[n] = array2[cols[nz - 1]]; 62ca92afb2SStefano Zampini array2[cols[nz - 1]] = sum; 639566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[n], &cols)); 64ca92afb2SStefano Zampini } 65ca92afb2SStefano Zampini } else { 66ca92afb2SStefano Zampini PetscInt n; 67ca92afb2SStefano Zampini for (n = 0; n < ctx->benign_n; n++) { 68ca92afb2SStefano Zampini PetscScalar sum = 0.; 69ca92afb2SStefano Zampini const PetscInt *cols; 70ca92afb2SStefano Zampini PetscInt nz, i; 719566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[n], &nz)); 729566063dSJacob Faibussowitsch PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[n], &cols)); 73ca92afb2SStefano Zampini for (i = 0; i < nz - 1; i++) sum += array[cols[i]]; 7422db5ddcSStefano Zampini #if defined(PETSC_USE_COMPLEX) 7522db5ddcSStefano Zampini sum = -(PetscRealPart(sum) / nz + PETSC_i * (PetscImaginaryPart(sum) / nz)); 7622db5ddcSStefano Zampini #else 77ca92afb2SStefano Zampini sum = -sum / nz; 7822db5ddcSStefano Zampini #endif 79ca92afb2SStefano Zampini for (i = 0; i < nz - 1; i++) array2[cols[i]] += sum; 80ca92afb2SStefano Zampini array2[cols[nz - 1]] = ctx->benign_save_vals[n]; 819566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[n], &cols)); 82ca92afb2SStefano Zampini } 83ca92afb2SStefano Zampini } 84ca92afb2SStefano Zampini if (v2) { 859566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(v, (const PetscScalar **)&array)); 869566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(v2, &array2)); 87ca92afb2SStefano Zampini } else { 889566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(v, &array)); 89ca92afb2SStefano Zampini } 905cbda25cSStefano Zampini if (!sol && full) { 915cbda25cSStefano Zampini Vec usedv; 925cbda25cSStefano Zampini PetscInt n_I, size_schur; 935cbda25cSStefano Zampini 945cbda25cSStefano Zampini /* get sizes */ 959566063dSJacob Faibussowitsch PetscCall(MatGetSize(ctx->benign_csAIB, &size_schur, NULL)); 969566063dSJacob Faibussowitsch PetscCall(VecGetSize(v, &n_I)); 975cbda25cSStefano Zampini n_I = n_I - size_schur; 985cbda25cSStefano Zampini /* compute schur rhs correction */ 995cbda25cSStefano Zampini if (v2) { 1005cbda25cSStefano Zampini usedv = v2; 1015cbda25cSStefano Zampini } else { 1025cbda25cSStefano Zampini usedv = v; 1035cbda25cSStefano Zampini } 1045cbda25cSStefano Zampini /* apply schur rhs correction */ 1059566063dSJacob Faibussowitsch PetscCall(MatMultTranspose(ctx->benign_AIIm1ones, usedv, ctx->benign_corr_work)); 1069566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(usedv, (const PetscScalar **)&array)); 1079566063dSJacob Faibussowitsch PetscCall(VecPlaceArray(ctx->benign_dummy_schur_vec, array + n_I)); 1089566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(usedv, (const PetscScalar **)&array)); 1099566063dSJacob Faibussowitsch PetscCall(MatMultAdd(ctx->benign_csAIB, ctx->benign_corr_work, ctx->benign_dummy_schur_vec, ctx->benign_dummy_schur_vec)); 1109566063dSJacob Faibussowitsch PetscCall(VecResetArray(ctx->benign_dummy_schur_vec)); 1115cbda25cSStefano Zampini } 112ca92afb2SStefano Zampini PetscFunctionReturn(0); 113ca92afb2SStefano Zampini } 114ca92afb2SStefano Zampini 1159371c9d4SSatish Balay static PetscErrorCode PCBDDCReuseSolvers_Solve_Private(PC pc, Vec rhs, Vec sol, PetscBool transpose, PetscBool full) { 116df4d28bfSStefano Zampini PCBDDCReuseSolvers ctx; 117683d3df6SStefano Zampini PetscBool copy = PETSC_FALSE; 118d62866d3SStefano Zampini 119d62866d3SStefano Zampini PetscFunctionBegin; 1209566063dSJacob Faibussowitsch PetscCall(PCShellGetContext(pc, &ctx)); 121683d3df6SStefano Zampini if (full) { 122d62866d3SStefano Zampini #if defined(PETSC_HAVE_MUMPS) 1239566063dSJacob Faibussowitsch PetscCall(MatMumpsSetIcntl(ctx->F, 26, -1)); 124d62866d3SStefano Zampini #endif 1255cbda25cSStefano Zampini #if defined(PETSC_HAVE_MKL_PARDISO) 1269566063dSJacob Faibussowitsch PetscCall(MatMkl_PardisoSetCntl(ctx->F, 70, 0)); 1275cbda25cSStefano Zampini #endif 128683d3df6SStefano Zampini copy = ctx->has_vertices; 129d4933d67SStefano Zampini } else { /* interior solver */ 1306dba178dSStefano Zampini #if defined(PETSC_HAVE_MUMPS) 1319566063dSJacob Faibussowitsch PetscCall(MatMumpsSetIcntl(ctx->F, 26, 0)); 1326dba178dSStefano Zampini #endif 133d4933d67SStefano Zampini #if defined(PETSC_HAVE_MKL_PARDISO) 1349566063dSJacob Faibussowitsch PetscCall(MatMkl_PardisoSetCntl(ctx->F, 70, 1)); 135d4933d67SStefano Zampini #endif 136683d3df6SStefano Zampini copy = PETSC_TRUE; 137683d3df6SStefano Zampini } 138683d3df6SStefano Zampini /* copy rhs into factored matrix workspace */ 139683d3df6SStefano Zampini if (copy) { 140ca92afb2SStefano Zampini PetscInt n; 141df4d28bfSStefano Zampini PetscScalar *array, *array_solver; 142ca92afb2SStefano Zampini 1439566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(rhs, &n)); 1449566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(rhs, (const PetscScalar **)&array)); 1459566063dSJacob Faibussowitsch PetscCall(VecGetArray(ctx->rhs, &array_solver)); 1469566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(array_solver, array, n)); 1479566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(ctx->rhs, &array_solver)); 1489566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(rhs, (const PetscScalar **)&array)); 149683d3df6SStefano Zampini 1509566063dSJacob Faibussowitsch PetscCall(PCBDDCReuseSolversBenignAdapt(ctx, ctx->rhs, NULL, PETSC_FALSE, full)); 151683d3df6SStefano Zampini if (transpose) { 1529566063dSJacob Faibussowitsch PetscCall(MatSolveTranspose(ctx->F, ctx->rhs, ctx->sol)); 153683d3df6SStefano Zampini } else { 1549566063dSJacob Faibussowitsch PetscCall(MatSolve(ctx->F, ctx->rhs, ctx->sol)); 155683d3df6SStefano Zampini } 1569566063dSJacob Faibussowitsch PetscCall(PCBDDCReuseSolversBenignAdapt(ctx, ctx->sol, NULL, PETSC_TRUE, full)); 157683d3df6SStefano Zampini 158683d3df6SStefano Zampini /* get back data to caller worskpace */ 1599566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(ctx->sol, (const PetscScalar **)&array_solver)); 1609566063dSJacob Faibussowitsch PetscCall(VecGetArray(sol, &array)); 1619566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(array, array_solver, n)); 1629566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(sol, &array)); 1639566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(ctx->sol, (const PetscScalar **)&array_solver)); 164683d3df6SStefano Zampini } else { 165ca92afb2SStefano Zampini if (ctx->benign_n) { 1669566063dSJacob Faibussowitsch PetscCall(PCBDDCReuseSolversBenignAdapt(ctx, rhs, ctx->rhs, PETSC_FALSE, full)); 167ca92afb2SStefano Zampini if (transpose) { 1689566063dSJacob Faibussowitsch PetscCall(MatSolveTranspose(ctx->F, ctx->rhs, sol)); 169ca92afb2SStefano Zampini } else { 1709566063dSJacob Faibussowitsch PetscCall(MatSolve(ctx->F, ctx->rhs, sol)); 171ca92afb2SStefano Zampini } 1729566063dSJacob Faibussowitsch PetscCall(PCBDDCReuseSolversBenignAdapt(ctx, sol, NULL, PETSC_TRUE, full)); 173ca92afb2SStefano Zampini } else { 174e28d306cSStefano Zampini if (transpose) { 1759566063dSJacob Faibussowitsch PetscCall(MatSolveTranspose(ctx->F, rhs, sol)); 176e28d306cSStefano Zampini } else { 1779566063dSJacob Faibussowitsch PetscCall(MatSolve(ctx->F, rhs, sol)); 178e28d306cSStefano Zampini } 179683d3df6SStefano Zampini } 180ca92afb2SStefano Zampini } 1815cbda25cSStefano Zampini /* restore defaults */ 1825cbda25cSStefano Zampini #if defined(PETSC_HAVE_MUMPS) 1839566063dSJacob Faibussowitsch PetscCall(MatMumpsSetIcntl(ctx->F, 26, -1)); 1845cbda25cSStefano Zampini #endif 185d4933d67SStefano Zampini #if defined(PETSC_HAVE_MKL_PARDISO) 1869566063dSJacob Faibussowitsch PetscCall(MatMkl_PardisoSetCntl(ctx->F, 70, 0)); 187d4933d67SStefano Zampini #endif 188d62866d3SStefano Zampini PetscFunctionReturn(0); 189d62866d3SStefano Zampini } 190d62866d3SStefano Zampini 1919371c9d4SSatish Balay static PetscErrorCode PCBDDCReuseSolvers_Correction(PC pc, Vec rhs, Vec sol) { 192e28d306cSStefano Zampini PetscFunctionBegin; 1939566063dSJacob Faibussowitsch PetscCall(PCBDDCReuseSolvers_Solve_Private(pc, rhs, sol, PETSC_FALSE, PETSC_TRUE)); 194e28d306cSStefano Zampini PetscFunctionReturn(0); 195e28d306cSStefano Zampini } 196e28d306cSStefano Zampini 1979371c9d4SSatish Balay static PetscErrorCode PCBDDCReuseSolvers_CorrectionTranspose(PC pc, Vec rhs, Vec sol) { 198e28d306cSStefano Zampini PetscFunctionBegin; 1999566063dSJacob Faibussowitsch PetscCall(PCBDDCReuseSolvers_Solve_Private(pc, rhs, sol, PETSC_TRUE, PETSC_TRUE)); 200683d3df6SStefano Zampini PetscFunctionReturn(0); 201683d3df6SStefano Zampini } 202683d3df6SStefano Zampini 2039371c9d4SSatish Balay static PetscErrorCode PCBDDCReuseSolvers_Interior(PC pc, Vec rhs, Vec sol) { 204683d3df6SStefano Zampini PetscFunctionBegin; 2059566063dSJacob Faibussowitsch PetscCall(PCBDDCReuseSolvers_Solve_Private(pc, rhs, sol, PETSC_FALSE, PETSC_FALSE)); 206683d3df6SStefano Zampini PetscFunctionReturn(0); 207683d3df6SStefano Zampini } 208683d3df6SStefano Zampini 2099371c9d4SSatish Balay static PetscErrorCode PCBDDCReuseSolvers_InteriorTranspose(PC pc, Vec rhs, Vec sol) { 210683d3df6SStefano Zampini PetscFunctionBegin; 2119566063dSJacob Faibussowitsch PetscCall(PCBDDCReuseSolvers_Solve_Private(pc, rhs, sol, PETSC_TRUE, PETSC_FALSE)); 212e28d306cSStefano Zampini PetscFunctionReturn(0); 213e28d306cSStefano Zampini } 214e28d306cSStefano Zampini 2159371c9d4SSatish Balay static PetscErrorCode PCBDDCReuseSolvers_View(PC pc, PetscViewer viewer) { 21615579a77SStefano Zampini PCBDDCReuseSolvers ctx; 21715579a77SStefano Zampini PetscBool iascii; 21815579a77SStefano Zampini 21915579a77SStefano Zampini PetscFunctionBegin; 2209566063dSJacob Faibussowitsch PetscCall(PCShellGetContext(pc, &ctx)); 2219566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 2221baa6e33SBarry Smith if (iascii) PetscCall(PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_INFO)); 2239566063dSJacob Faibussowitsch PetscCall(MatView(ctx->F, viewer)); 2241baa6e33SBarry Smith if (iascii) PetscCall(PetscViewerPopFormat(viewer)); 22515579a77SStefano Zampini PetscFunctionReturn(0); 22615579a77SStefano Zampini } 22715579a77SStefano Zampini 2289371c9d4SSatish Balay static PetscErrorCode PCBDDCReuseSolversReset(PCBDDCReuseSolvers reuse) { 229ca92afb2SStefano Zampini PetscInt i; 230d62866d3SStefano Zampini 231d62866d3SStefano Zampini PetscFunctionBegin; 2329566063dSJacob Faibussowitsch PetscCall(MatDestroy(&reuse->F)); 2339566063dSJacob Faibussowitsch PetscCall(VecDestroy(&reuse->sol)); 2349566063dSJacob Faibussowitsch PetscCall(VecDestroy(&reuse->rhs)); 2359566063dSJacob Faibussowitsch PetscCall(PCDestroy(&reuse->interior_solver)); 2369566063dSJacob Faibussowitsch PetscCall(PCDestroy(&reuse->correction_solver)); 2379566063dSJacob Faibussowitsch PetscCall(ISDestroy(&reuse->is_R)); 2389566063dSJacob Faibussowitsch PetscCall(ISDestroy(&reuse->is_B)); 2399566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&reuse->correction_scatter_B)); 2409566063dSJacob Faibussowitsch PetscCall(VecDestroy(&reuse->sol_B)); 2419566063dSJacob Faibussowitsch PetscCall(VecDestroy(&reuse->rhs_B)); 242*48a46eb9SPierre Jolivet for (i = 0; i < reuse->benign_n; i++) PetscCall(ISDestroy(&reuse->benign_zerodiag_subs[i])); 2439566063dSJacob Faibussowitsch PetscCall(PetscFree(reuse->benign_zerodiag_subs)); 2449566063dSJacob Faibussowitsch PetscCall(PetscFree(reuse->benign_save_vals)); 2459566063dSJacob Faibussowitsch PetscCall(MatDestroy(&reuse->benign_csAIB)); 2469566063dSJacob Faibussowitsch PetscCall(MatDestroy(&reuse->benign_AIIm1ones)); 2479566063dSJacob Faibussowitsch PetscCall(VecDestroy(&reuse->benign_corr_work)); 2489566063dSJacob Faibussowitsch PetscCall(VecDestroy(&reuse->benign_dummy_schur_vec)); 249d62866d3SStefano Zampini PetscFunctionReturn(0); 250d62866d3SStefano Zampini } 251d5574798SStefano Zampini 2529371c9d4SSatish Balay static PetscErrorCode PCBDDCReuseSolvers_Destroy(PC pc) { 25332fe681dSStefano Zampini PCBDDCReuseSolvers ctx; 25432fe681dSStefano Zampini 25532fe681dSStefano Zampini PetscFunctionBegin; 25632fe681dSStefano Zampini PetscCall(PCShellGetContext(pc, &ctx)); 25732fe681dSStefano Zampini PetscCall(PCBDDCReuseSolversReset(ctx)); 25832fe681dSStefano Zampini PetscCall(PetscFree(ctx)); 25932fe681dSStefano Zampini PetscCall(PCShellSetContext(pc, NULL)); 26032fe681dSStefano Zampini PetscFunctionReturn(0); 26132fe681dSStefano Zampini } 26232fe681dSStefano Zampini 2639371c9d4SSatish Balay static PetscErrorCode PCBDDCComputeExplicitSchur(Mat M, PetscBool issym, MatReuse reuse, Mat *S) { 2643202ece2SStefano Zampini Mat B, C, D, Bd, Cd, AinvBd; 2653202ece2SStefano Zampini KSP ksp; 2663202ece2SStefano Zampini PC pc; 2673202ece2SStefano Zampini PetscBool isLU, isILU, isCHOL, Bdense, Cdense; 2683202ece2SStefano Zampini PetscReal fill = 2.0; 269f11841e3SStefano Zampini PetscInt n_I; 2703202ece2SStefano Zampini PetscMPIInt size; 2713202ece2SStefano Zampini 2723202ece2SStefano Zampini PetscFunctionBegin; 2739566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)M), &size)); 2747827d75bSBarry Smith PetscCheck(size == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not for parallel matrices"); 275f11841e3SStefano Zampini if (reuse == MAT_REUSE_MATRIX) { 276f11841e3SStefano Zampini PetscBool Sdense; 277f11841e3SStefano Zampini 2789566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)*S, MATSEQDENSE, &Sdense)); 27928b400f6SJacob Faibussowitsch PetscCheck(Sdense, PetscObjectComm((PetscObject)M), PETSC_ERR_SUP, "S should dense"); 280f11841e3SStefano Zampini } 2819566063dSJacob Faibussowitsch PetscCall(MatSchurComplementGetSubMatrices(M, NULL, NULL, &B, &C, &D)); 2829566063dSJacob Faibussowitsch PetscCall(MatSchurComplementGetKSP(M, &ksp)); 2839566063dSJacob Faibussowitsch PetscCall(KSPGetPC(ksp, &pc)); 2849566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)pc, PCLU, &isLU)); 2859566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)pc, PCILU, &isILU)); 2869566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)pc, PCCHOLESKY, &isCHOL)); 2879566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &Bdense)); 2889566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)C, MATSEQDENSE, &Cdense)); 2899566063dSJacob Faibussowitsch PetscCall(MatGetSize(B, &n_I, NULL)); 290f11841e3SStefano Zampini if (n_I) { 2913202ece2SStefano Zampini if (!Bdense) { 2929566063dSJacob Faibussowitsch PetscCall(MatConvert(B, MATSEQDENSE, MAT_INITIAL_MATRIX, &Bd)); 2933202ece2SStefano Zampini } else { 2943202ece2SStefano Zampini Bd = B; 2953202ece2SStefano Zampini } 2963202ece2SStefano Zampini 2973202ece2SStefano Zampini if (isLU || isILU || isCHOL) { 2983202ece2SStefano Zampini Mat fact; 2999566063dSJacob Faibussowitsch PetscCall(KSPSetUp(ksp)); 3009566063dSJacob Faibussowitsch PetscCall(PCFactorGetMatrix(pc, &fact)); 3019566063dSJacob Faibussowitsch PetscCall(MatDuplicate(Bd, MAT_DO_NOT_COPY_VALUES, &AinvBd)); 3029566063dSJacob Faibussowitsch PetscCall(MatMatSolve(fact, Bd, AinvBd)); 3033202ece2SStefano Zampini } else { 30407b1e237SStefano Zampini PetscBool ex = PETSC_TRUE; 30507b1e237SStefano Zampini 30607b1e237SStefano Zampini if (ex) { 3073202ece2SStefano Zampini Mat Ainvd; 3083202ece2SStefano Zampini 3099566063dSJacob Faibussowitsch PetscCall(PCComputeOperator(pc, MATDENSE, &Ainvd)); 3109566063dSJacob Faibussowitsch PetscCall(MatMatMult(Ainvd, Bd, MAT_INITIAL_MATRIX, fill, &AinvBd)); 3119566063dSJacob Faibussowitsch PetscCall(MatDestroy(&Ainvd)); 31207b1e237SStefano Zampini } else { 31307b1e237SStefano Zampini Vec sol, rhs; 31407b1e237SStefano Zampini PetscScalar *arrayrhs, *arraysol; 31507b1e237SStefano Zampini PetscInt i, nrhs, n; 31607b1e237SStefano Zampini 3179566063dSJacob Faibussowitsch PetscCall(MatDuplicate(Bd, MAT_DO_NOT_COPY_VALUES, &AinvBd)); 3189566063dSJacob Faibussowitsch PetscCall(MatGetSize(Bd, &n, &nrhs)); 3199566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(Bd, &arrayrhs)); 3209566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(AinvBd, &arraysol)); 3219566063dSJacob Faibussowitsch PetscCall(KSPGetSolution(ksp, &sol)); 3229566063dSJacob Faibussowitsch PetscCall(KSPGetRhs(ksp, &rhs)); 32307b1e237SStefano Zampini for (i = 0; i < nrhs; i++) { 3249566063dSJacob Faibussowitsch PetscCall(VecPlaceArray(rhs, arrayrhs + i * n)); 3259566063dSJacob Faibussowitsch PetscCall(VecPlaceArray(sol, arraysol + i * n)); 3269566063dSJacob Faibussowitsch PetscCall(KSPSolve(ksp, rhs, sol)); 3279566063dSJacob Faibussowitsch PetscCall(VecResetArray(rhs)); 3289566063dSJacob Faibussowitsch PetscCall(VecResetArray(sol)); 32907b1e237SStefano Zampini } 3309566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(Bd, &arrayrhs)); 3319566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(AinvBd, &arrayrhs)); 33207b1e237SStefano Zampini } 3333202ece2SStefano Zampini } 334*48a46eb9SPierre Jolivet if (!Bdense & !issym) PetscCall(MatDestroy(&Bd)); 3355ec10c6aSStefano Zampini 3365ec10c6aSStefano Zampini if (!issym) { 3373202ece2SStefano Zampini if (!Cdense) { 3389566063dSJacob Faibussowitsch PetscCall(MatConvert(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &Cd)); 3393202ece2SStefano Zampini } else { 3403202ece2SStefano Zampini Cd = C; 3413202ece2SStefano Zampini } 3429566063dSJacob Faibussowitsch PetscCall(MatMatMult(Cd, AinvBd, reuse, fill, S)); 343*48a46eb9SPierre Jolivet if (!Cdense) PetscCall(MatDestroy(&Cd)); 3445ec10c6aSStefano Zampini } else { 3459566063dSJacob Faibussowitsch PetscCall(MatTransposeMatMult(Bd, AinvBd, reuse, fill, S)); 346*48a46eb9SPierre Jolivet if (!Bdense) PetscCall(MatDestroy(&Bd)); 3475ec10c6aSStefano Zampini } 3489566063dSJacob Faibussowitsch PetscCall(MatDestroy(&AinvBd)); 349f11841e3SStefano Zampini } 3503202ece2SStefano Zampini 3513202ece2SStefano Zampini if (D) { 3523202ece2SStefano Zampini Mat Dd; 3533202ece2SStefano Zampini PetscBool Ddense; 3543202ece2SStefano Zampini 3559566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)D, MATSEQDENSE, &Ddense)); 3563202ece2SStefano Zampini if (!Ddense) { 3579566063dSJacob Faibussowitsch PetscCall(MatConvert(D, MATSEQDENSE, MAT_INITIAL_MATRIX, &Dd)); 3583202ece2SStefano Zampini } else { 3593202ece2SStefano Zampini Dd = D; 3603202ece2SStefano Zampini } 361f11841e3SStefano Zampini if (n_I) { 3629566063dSJacob Faibussowitsch PetscCall(MatAYPX(*S, -1.0, Dd, SAME_NONZERO_PATTERN)); 363f11841e3SStefano Zampini } else { 364f11841e3SStefano Zampini if (reuse == MAT_INITIAL_MATRIX) { 3659566063dSJacob Faibussowitsch PetscCall(MatDuplicate(Dd, MAT_COPY_VALUES, S)); 366f11841e3SStefano Zampini } else { 3679566063dSJacob Faibussowitsch PetscCall(MatCopy(Dd, *S, SAME_NONZERO_PATTERN)); 368f11841e3SStefano Zampini } 369f11841e3SStefano Zampini } 370*48a46eb9SPierre Jolivet if (!Ddense) PetscCall(MatDestroy(&Dd)); 3713202ece2SStefano Zampini } else { 3729566063dSJacob Faibussowitsch PetscCall(MatScale(*S, -1.0)); 3733202ece2SStefano Zampini } 3743202ece2SStefano Zampini PetscFunctionReturn(0); 3753202ece2SStefano Zampini } 37634a97f8cSStefano Zampini 3779371c9d4SSatish Balay PetscErrorCode PCBDDCSubSchursSetUp(PCBDDCSubSchurs sub_schurs, Mat Ain, Mat Sin, PetscBool exact_schur, PetscInt xadj[], PetscInt adjncy[], PetscInt nlayers, Vec scaling, PetscBool compute_Stilda, PetscBool reuse_solvers, PetscBool benign_trick, PetscInt benign_n, PetscInt benign_p0_lidx[], IS benign_zerodiag_subs[], Mat change, IS change_primal) { 378be83ff47SStefano Zampini Mat F, A_II, A_IB, A_BI, A_BB, AE_II; 379be83ff47SStefano Zampini Mat S_all; 38057a87bf3SStefano Zampini Vec gstash, lstash; 38157a87bf3SStefano Zampini VecScatter sstash; 382b7ab4a40SStefano Zampini IS is_I, is_I_layer; 383dc456d91SStefano Zampini IS all_subsets, all_subsets_mult, all_subsets_n; 38457a87bf3SStefano Zampini PetscScalar *stasharray, *Bwork; 385dc456d91SStefano Zampini PetscInt *nnz, *all_local_idx_N; 386dc456d91SStefano Zampini PetscInt *auxnum1, *auxnum2; 3875a95e1ceSStefano Zampini PetscInt i, subset_size, max_subset_size; 388683d3df6SStefano Zampini PetscInt n_B, extra, local_size, global_size; 38957a87bf3SStefano Zampini PetscInt local_stash_size; 39008122e43SStefano Zampini PetscBLASInt B_N, B_ierr, B_lwork, *pivots; 3915a95e1ceSStefano Zampini MPI_Comm comm_n; 392f4f7d9d6SStefano Zampini PetscBool deluxe = PETSC_TRUE; 393f4f7d9d6SStefano Zampini PetscBool use_potr = PETSC_FALSE, use_sytr = PETSC_FALSE; 3943b03f7bbSStefano Zampini PetscViewer matl_dbg_viewer = NULL; 39535d0533cSStefano Zampini PetscBool flg; 396b1b3d7a2SStefano Zampini 397b1b3d7a2SStefano Zampini PetscFunctionBegin; 3989566063dSJacob Faibussowitsch PetscCall(MatDestroy(&sub_schurs->A)); 3999566063dSJacob Faibussowitsch PetscCall(MatDestroy(&sub_schurs->S)); 400e62b6521Sstefano_zampini if (Ain) { 4019566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)Ain)); 402a64f4aa4SStefano Zampini sub_schurs->A = Ain; 403a64f4aa4SStefano Zampini } 4043301b35fSStefano Zampini 4059566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)Sin)); 406a64f4aa4SStefano Zampini sub_schurs->S = Sin; 4079371c9d4SSatish Balay if (sub_schurs->schur_explicit) { sub_schurs->schur_explicit = (PetscBool)(!!sub_schurs->A); } 408a64f4aa4SStefano Zampini 4095a95e1ceSStefano Zampini /* preliminary checks */ 4107827d75bSBarry Smith PetscCheck(sub_schurs->schur_explicit || !compute_Stilda, PetscObjectComm((PetscObject)sub_schurs->l2gmap), PETSC_ERR_SUP, "Adaptive selection of constraints requires MUMPS and/or MKL_PARDISO"); 4115a95e1ceSStefano Zampini 41288113c35SStefano Zampini if (benign_trick) sub_schurs->is_posdef = PETSC_FALSE; 41388113c35SStefano Zampini 4143b03f7bbSStefano Zampini /* debug (MATLAB) */ 4157f9db97bSStefano Zampini if (sub_schurs->debug) { 4167f9db97bSStefano Zampini PetscMPIInt size, rank; 4177ebab0bbSStefano Zampini PetscInt nr, *print_schurs_ranks, print_schurs = PETSC_FALSE; 4187f9db97bSStefano Zampini 4199566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sub_schurs->l2gmap), &size)); 4209566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sub_schurs->l2gmap), &rank)); 4217f9db97bSStefano Zampini nr = size; 4229566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nr, &print_schurs_ranks)); 423d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)sub_schurs->l2gmap), sub_schurs->prefix, "BDDC sub_schurs options", "PC"); 4249566063dSJacob Faibussowitsch PetscCall(PetscOptionsIntArray("-sub_schurs_debug_ranks", "Ranks to debug (all if the option is not used)", NULL, print_schurs_ranks, &nr, &flg)); 4257f9db97bSStefano Zampini if (!flg) print_schurs = PETSC_TRUE; 4267f9db97bSStefano Zampini else { 4277ebab0bbSStefano Zampini print_schurs = PETSC_FALSE; 4289371c9d4SSatish Balay for (i = 0; i < nr; i++) 4299371c9d4SSatish Balay if (print_schurs_ranks[i] == (PetscInt)rank) { 4309371c9d4SSatish Balay print_schurs = PETSC_TRUE; 4319371c9d4SSatish Balay break; 4329371c9d4SSatish Balay } 4337f9db97bSStefano Zampini } 434d0609cedSBarry Smith PetscOptionsEnd(); 4359566063dSJacob Faibussowitsch PetscCall(PetscFree(print_schurs_ranks)); 4363b03f7bbSStefano Zampini if (print_schurs) { 4373b03f7bbSStefano Zampini char filename[256]; 4383b03f7bbSStefano Zampini 4399566063dSJacob Faibussowitsch PetscCall(PetscSNPrintf(filename, sizeof(filename), "sub_schurs_Schur_r%d.m", PetscGlobalRank)); 4409566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF, filename, &matl_dbg_viewer)); 4419566063dSJacob Faibussowitsch PetscCall(PetscViewerPushFormat(matl_dbg_viewer, PETSC_VIEWER_ASCII_MATLAB)); 4423b03f7bbSStefano Zampini } 4437f9db97bSStefano Zampini } 4447f9db97bSStefano Zampini 4455a95e1ceSStefano Zampini /* restrict work on active processes */ 446991c41b4SStefano Zampini if (sub_schurs->restrict_comm) { 447991c41b4SStefano Zampini PetscSubcomm subcomm; 448991c41b4SStefano Zampini PetscMPIInt color, rank; 449991c41b4SStefano Zampini 4505a95e1ceSStefano Zampini color = 0; 4515a95e1ceSStefano Zampini if (!sub_schurs->n_subs) color = 1; /* this can happen if we are in a multilevel case or if the subdomain is disconnected */ 4529566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sub_schurs->l2gmap), &rank)); 4539566063dSJacob Faibussowitsch PetscCall(PetscSubcommCreate(PetscObjectComm((PetscObject)sub_schurs->l2gmap), &subcomm)); 4549566063dSJacob Faibussowitsch PetscCall(PetscSubcommSetNumber(subcomm, 2)); 4559566063dSJacob Faibussowitsch PetscCall(PetscSubcommSetTypeGeneral(subcomm, color, rank)); 4569566063dSJacob Faibussowitsch PetscCall(PetscCommDuplicate(PetscSubcommChild(subcomm), &comm_n, NULL)); 4579566063dSJacob Faibussowitsch PetscCall(PetscSubcommDestroy(&subcomm)); 4585a95e1ceSStefano Zampini if (!sub_schurs->n_subs) { 4599566063dSJacob Faibussowitsch PetscCall(PetscCommDestroy(&comm_n)); 4605a95e1ceSStefano Zampini PetscFunctionReturn(0); 4615a95e1ceSStefano Zampini } 462991c41b4SStefano Zampini } else { 4639566063dSJacob Faibussowitsch PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)sub_schurs->l2gmap), &comm_n, NULL)); 464991c41b4SStefano Zampini } 4655a95e1ceSStefano Zampini 466b1b3d7a2SStefano Zampini /* get Schur complement matrices */ 467df4d28bfSStefano Zampini if (!sub_schurs->schur_explicit) { 468a64f4aa4SStefano Zampini Mat tA_IB, tA_BI, tA_BB; 4693301b35fSStefano Zampini PetscBool isseqsbaij; 4709566063dSJacob Faibussowitsch PetscCall(MatSchurComplementGetSubMatrices(sub_schurs->S, &A_II, NULL, &tA_IB, &tA_BI, &tA_BB)); 4719566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)tA_BB, MATSEQSBAIJ, &isseqsbaij)); 4723301b35fSStefano Zampini if (isseqsbaij) { 4739566063dSJacob Faibussowitsch PetscCall(MatConvert(tA_BB, MATSEQAIJ, MAT_INITIAL_MATRIX, &A_BB)); 4749566063dSJacob Faibussowitsch PetscCall(MatConvert(tA_IB, MATSEQAIJ, MAT_INITIAL_MATRIX, &A_IB)); 4759566063dSJacob Faibussowitsch PetscCall(MatConvert(tA_BI, MATSEQAIJ, MAT_INITIAL_MATRIX, &A_BI)); 476a64f4aa4SStefano Zampini } else { 4779566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)tA_BB)); 478a64f4aa4SStefano Zampini A_BB = tA_BB; 4799566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)tA_IB)); 480a64f4aa4SStefano Zampini A_IB = tA_IB; 4819566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)tA_BI)); 482a64f4aa4SStefano Zampini A_BI = tA_BI; 483f11841e3SStefano Zampini } 484a58a30b4SStefano Zampini } else { 4855a95e1ceSStefano Zampini A_II = NULL; 4865a95e1ceSStefano Zampini A_IB = NULL; 4875a95e1ceSStefano Zampini A_BI = NULL; 4885a95e1ceSStefano Zampini A_BB = NULL; 489b1b3d7a2SStefano Zampini } 4905a95e1ceSStefano Zampini S_all = NULL; 491b1b3d7a2SStefano Zampini 492b1b3d7a2SStefano Zampini /* determine interior problems */ 4939566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(sub_schurs->is_I, &i)); 4943dc780c3SStefano Zampini if (nlayers >= 0 && i) { /* Interior problems can be different from the original one */ 495b1b3d7a2SStefano Zampini PetscBT touched; 496b1b3d7a2SStefano Zampini const PetscInt *idx_B; 497b1b3d7a2SStefano Zampini PetscInt n_I, n_B, n_local_dofs, n_prev_added, j, layer, *local_numbering; 498b1b3d7a2SStefano Zampini 49928b400f6SJacob Faibussowitsch PetscCheck(xadj, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot request layering without adjacency"); 500b1b3d7a2SStefano Zampini /* get sizes */ 5019566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(sub_schurs->is_I, &n_I)); 5029566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(sub_schurs->is_B, &n_B)); 503b1b3d7a2SStefano Zampini 5049566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n_I + n_B, &local_numbering)); 5059566063dSJacob Faibussowitsch PetscCall(PetscBTCreate(n_I + n_B, &touched)); 5069566063dSJacob Faibussowitsch PetscCall(PetscBTMemzero(n_I + n_B, touched)); 507b1b3d7a2SStefano Zampini 508b1b3d7a2SStefano Zampini /* all boundary dofs must be skipped when adding layers */ 5099566063dSJacob Faibussowitsch PetscCall(ISGetIndices(sub_schurs->is_B, &idx_B)); 510*48a46eb9SPierre Jolivet for (j = 0; j < n_B; j++) PetscCall(PetscBTSet(touched, idx_B[j])); 5119566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(local_numbering, idx_B, n_B)); 5129566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(sub_schurs->is_B, &idx_B)); 513b1b3d7a2SStefano Zampini 514b1b3d7a2SStefano Zampini /* add prescribed number of layers of dofs */ 515b1b3d7a2SStefano Zampini n_local_dofs = n_B; 516b1b3d7a2SStefano Zampini n_prev_added = n_B; 517b1b3d7a2SStefano Zampini for (layer = 0; layer < nlayers; layer++) { 518b6bace71SJacob Faibussowitsch PetscInt n_added = 0; 519b1b3d7a2SStefano Zampini if (n_local_dofs == n_I + n_B) break; 52063a3b9bcSJacob Faibussowitsch PetscCheck(n_local_dofs <= n_I + n_B, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error querying layer %" PetscInt_FMT ". Out of bound access (%" PetscInt_FMT " > %" PetscInt_FMT ")", layer, n_local_dofs, n_I + n_B); 5219566063dSJacob Faibussowitsch PetscCall(PCBDDCAdjGetNextLayer_Private(local_numbering + n_local_dofs, n_prev_added, touched, xadj, adjncy, &n_added)); 522b1b3d7a2SStefano Zampini n_prev_added = n_added; 523b1b3d7a2SStefano Zampini n_local_dofs += n_added; 524b1b3d7a2SStefano Zampini if (!n_added) break; 525b1b3d7a2SStefano Zampini } 5269566063dSJacob Faibussowitsch PetscCall(PetscBTDestroy(&touched)); 527b1b3d7a2SStefano Zampini 528883469d8SStefano Zampini /* IS for I layer dofs in original numbering */ 5299566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)sub_schurs->is_I), n_local_dofs - n_B, local_numbering + n_B, PETSC_COPY_VALUES, &is_I_layer)); 5309566063dSJacob Faibussowitsch PetscCall(PetscFree(local_numbering)); 5319566063dSJacob Faibussowitsch PetscCall(ISSort(is_I_layer)); 532883469d8SStefano Zampini /* IS for I layer dofs in I numbering */ 533df4d28bfSStefano Zampini if (!sub_schurs->schur_explicit) { 534b1b3d7a2SStefano Zampini ISLocalToGlobalMapping ItoNmap; 5359566063dSJacob Faibussowitsch PetscCall(ISLocalToGlobalMappingCreateIS(sub_schurs->is_I, &ItoNmap)); 5369566063dSJacob Faibussowitsch PetscCall(ISGlobalToLocalMappingApplyIS(ItoNmap, IS_GTOLM_DROP, is_I_layer, &is_I)); 5379566063dSJacob Faibussowitsch PetscCall(ISLocalToGlobalMappingDestroy(&ItoNmap)); 538b1b3d7a2SStefano Zampini 539b1b3d7a2SStefano Zampini /* II block */ 5409566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrix(A_II, is_I, is_I, MAT_INITIAL_MATRIX, &AE_II)); 541b1b3d7a2SStefano Zampini } 542b1b3d7a2SStefano Zampini } else { 543b1b3d7a2SStefano Zampini PetscInt n_I; 544b1b3d7a2SStefano Zampini 545b1b3d7a2SStefano Zampini /* IS for I dofs in original numbering */ 5469566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)sub_schurs->is_I)); 547a9b99552SStefano Zampini is_I_layer = sub_schurs->is_I; 548b1b3d7a2SStefano Zampini 549b1b3d7a2SStefano Zampini /* IS for I dofs in I numbering (strided 1) */ 550df4d28bfSStefano Zampini if (!sub_schurs->schur_explicit) { 5519566063dSJacob Faibussowitsch PetscCall(ISGetSize(sub_schurs->is_I, &n_I)); 5529566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PetscObjectComm((PetscObject)sub_schurs->is_I), n_I, 0, 1, &is_I)); 553b1b3d7a2SStefano Zampini 554b1b3d7a2SStefano Zampini /* II block is the same */ 5559566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)A_II)); 556b1b3d7a2SStefano Zampini AE_II = A_II; 557b1b3d7a2SStefano Zampini } 558b1b3d7a2SStefano Zampini } 5595a95e1ceSStefano Zampini 560883469d8SStefano Zampini /* Get info on subset sizes and sum of all subsets sizes */ 5615a95e1ceSStefano Zampini max_subset_size = 0; 562883469d8SStefano Zampini local_size = 0; 5635a95e1ceSStefano Zampini for (i = 0; i < sub_schurs->n_subs; i++) { 5649566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 5655a95e1ceSStefano Zampini max_subset_size = PetscMax(subset_size, max_subset_size); 566883469d8SStefano Zampini local_size += subset_size; 567883469d8SStefano Zampini } 568883469d8SStefano Zampini 569883469d8SStefano Zampini /* Work arrays for local indices */ 570883469d8SStefano Zampini extra = 0; 5719566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(sub_schurs->is_B, &n_B)); 572*48a46eb9SPierre Jolivet if (sub_schurs->schur_explicit && is_I_layer) PetscCall(ISGetLocalSize(is_I_layer, &extra)); 5739566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n_B + extra, &all_local_idx_N)); 574883469d8SStefano Zampini if (extra) { 575883469d8SStefano Zampini const PetscInt *idxs; 5769566063dSJacob Faibussowitsch PetscCall(ISGetIndices(is_I_layer, &idxs)); 5779566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(all_local_idx_N, idxs, extra)); 5789566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(is_I_layer, &idxs)); 579883469d8SStefano Zampini } 5809566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(sub_schurs->n_subs, &auxnum1)); 5819566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(sub_schurs->n_subs, &auxnum2)); 582883469d8SStefano Zampini 583883469d8SStefano Zampini /* Get local indices in local numbering */ 584883469d8SStefano Zampini local_size = 0; 58557a87bf3SStefano Zampini local_stash_size = 0; 5865a95e1ceSStefano Zampini for (i = 0; i < sub_schurs->n_subs; i++) { 587883469d8SStefano Zampini const PetscInt *idxs; 588883469d8SStefano Zampini 5899566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 5909566063dSJacob Faibussowitsch PetscCall(ISGetIndices(sub_schurs->is_subs[i], &idxs)); 591eb595f79SStefano Zampini /* start (smallest in global ordering) and multiplicity */ 592eb595f79SStefano Zampini auxnum1[i] = idxs[0]; 59357a87bf3SStefano Zampini auxnum2[i] = subset_size * subset_size; 594883469d8SStefano Zampini /* subset indices in local numbering */ 5959566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(all_local_idx_N + local_size + extra, idxs, subset_size)); 5969566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(sub_schurs->is_subs[i], &idxs)); 597883469d8SStefano Zampini local_size += subset_size; 59857a87bf3SStefano Zampini local_stash_size += subset_size * subset_size; 599883469d8SStefano Zampini } 600883469d8SStefano Zampini 601f4f7d9d6SStefano Zampini /* allocate extra workspace needed only for GETRI or SYTRF */ 60211955456SStefano Zampini use_potr = use_sytr = PETSC_FALSE; 60311955456SStefano Zampini if (benign_trick || (sub_schurs->is_hermitian && sub_schurs->is_posdef)) { 604f4f7d9d6SStefano Zampini use_potr = PETSC_TRUE; 60511955456SStefano Zampini } else if (sub_schurs->is_symmetric) { 60611955456SStefano Zampini use_sytr = PETSC_TRUE; 60711955456SStefano Zampini } 60811955456SStefano Zampini if (local_size && !use_potr) { 60959ac4de7SStefano Zampini PetscScalar lwork, dummyscalar = 0.; 61059ac4de7SStefano Zampini PetscBLASInt dummyint = 0; 611d2627357SStefano Zampini 612d2627357SStefano Zampini B_lwork = -1; 6139566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(local_size, &B_N)); 6149566063dSJacob Faibussowitsch PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 615f4f7d9d6SStefano Zampini if (use_sytr) { 616792fecdfSBarry Smith PetscCallBLAS("LAPACKsytrf", LAPACKsytrf_("L", &B_N, &dummyscalar, &B_N, &dummyint, &lwork, &B_lwork, &B_ierr)); 61728b400f6SJacob Faibussowitsch PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to SYTRF Lapack routine %d", (int)B_ierr); 618f4f7d9d6SStefano Zampini } else { 619792fecdfSBarry Smith PetscCallBLAS("LAPACKgetri", LAPACKgetri_(&B_N, &dummyscalar, &B_N, &dummyint, &lwork, &B_lwork, &B_ierr)); 62028b400f6SJacob Faibussowitsch PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to GETRI Lapack routine %d", (int)B_ierr); 621f4f7d9d6SStefano Zampini } 6229566063dSJacob Faibussowitsch PetscCall(PetscFPTrapPop()); 6239566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lwork), &B_lwork)); 6249566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(B_lwork, &Bwork, B_N, &pivots)); 625056290a2SStefano Zampini } else { 626056290a2SStefano Zampini Bwork = NULL; 627056290a2SStefano Zampini pivots = NULL; 628d2627357SStefano Zampini } 629d2627357SStefano Zampini 63057a87bf3SStefano Zampini /* prepare data for summing up properly schurs on subsets */ 6319566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm_n, sub_schurs->n_subs, auxnum1, PETSC_OWN_POINTER, &all_subsets_n)); 6329566063dSJacob Faibussowitsch PetscCall(ISLocalToGlobalMappingApplyIS(sub_schurs->l2gmap, all_subsets_n, &all_subsets)); 6339566063dSJacob Faibussowitsch PetscCall(ISDestroy(&all_subsets_n)); 6349566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm_n, sub_schurs->n_subs, auxnum2, PETSC_OWN_POINTER, &all_subsets_mult)); 6359566063dSJacob Faibussowitsch PetscCall(ISRenumber(all_subsets, all_subsets_mult, &global_size, &all_subsets_n)); 6369566063dSJacob Faibussowitsch PetscCall(ISDestroy(&all_subsets)); 6379566063dSJacob Faibussowitsch PetscCall(ISDestroy(&all_subsets_mult)); 6389566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(all_subsets_n, &i)); 63963a3b9bcSJacob Faibussowitsch PetscCheck(i == local_stash_size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid size of new subset! %" PetscInt_FMT " != %" PetscInt_FMT, i, local_stash_size); 6409566063dSJacob Faibussowitsch PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, local_stash_size, NULL, &lstash)); 6419566063dSJacob Faibussowitsch PetscCall(VecCreateMPI(comm_n, PETSC_DECIDE, global_size, &gstash)); 6429566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(lstash, NULL, gstash, all_subsets_n, &sstash)); 6439566063dSJacob Faibussowitsch PetscCall(ISDestroy(&all_subsets_n)); 6442972d61bSStefano Zampini 6455a95e1ceSStefano Zampini /* subset indices in local boundary numbering */ 6465a95e1ceSStefano Zampini if (!sub_schurs->is_Ej_all) { 6475a95e1ceSStefano Zampini PetscInt *all_local_idx_B; 6485a95e1ceSStefano Zampini 6499566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(local_size, &all_local_idx_B)); 6509566063dSJacob Faibussowitsch PetscCall(ISGlobalToLocalMappingApply(sub_schurs->BtoNmap, IS_GTOLM_DROP, local_size, all_local_idx_N + extra, &subset_size, all_local_idx_B)); 65163a3b9bcSJacob Faibussowitsch PetscCheck(subset_size == local_size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in sub_schurs serial (BtoNmap)! %" PetscInt_FMT " != %" PetscInt_FMT, subset_size, local_size); 6529566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PETSC_COMM_SELF, local_size, all_local_idx_B, PETSC_OWN_POINTER, &sub_schurs->is_Ej_all)); 653b1b3d7a2SStefano Zampini } 654b1b3d7a2SStefano Zampini 65572b8c272SStefano Zampini if (change) { 65672b8c272SStefano Zampini ISLocalToGlobalMapping BtoS; 65772b8c272SStefano Zampini IS change_primal_B; 65872b8c272SStefano Zampini IS change_primal_all; 65972b8c272SStefano Zampini 66028b400f6SJacob Faibussowitsch PetscCheck(!sub_schurs->change_primal_sub, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen"); 66128b400f6SJacob Faibussowitsch PetscCheck(!sub_schurs->change, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen"); 6629566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(sub_schurs->n_subs, &sub_schurs->change_primal_sub)); 66372b8c272SStefano Zampini for (i = 0; i < sub_schurs->n_subs; i++) { 66472b8c272SStefano Zampini ISLocalToGlobalMapping NtoS; 6659566063dSJacob Faibussowitsch PetscCall(ISLocalToGlobalMappingCreateIS(sub_schurs->is_subs[i], &NtoS)); 6669566063dSJacob Faibussowitsch PetscCall(ISGlobalToLocalMappingApplyIS(NtoS, IS_GTOLM_DROP, change_primal, &sub_schurs->change_primal_sub[i])); 6679566063dSJacob Faibussowitsch PetscCall(ISLocalToGlobalMappingDestroy(&NtoS)); 66872b8c272SStefano Zampini } 6699566063dSJacob Faibussowitsch PetscCall(ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap, IS_GTOLM_DROP, change_primal, &change_primal_B)); 6709566063dSJacob Faibussowitsch PetscCall(ISLocalToGlobalMappingCreateIS(sub_schurs->is_Ej_all, &BtoS)); 6719566063dSJacob Faibussowitsch PetscCall(ISGlobalToLocalMappingApplyIS(BtoS, IS_GTOLM_DROP, change_primal_B, &change_primal_all)); 6729566063dSJacob Faibussowitsch PetscCall(ISLocalToGlobalMappingDestroy(&BtoS)); 6739566063dSJacob Faibussowitsch PetscCall(ISDestroy(&change_primal_B)); 6749566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(sub_schurs->n_subs, &sub_schurs->change)); 67572b8c272SStefano Zampini for (i = 0; i < sub_schurs->n_subs; i++) { 67672b8c272SStefano Zampini Mat change_sub; 67772b8c272SStefano Zampini 6789566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 6799566063dSJacob Faibussowitsch PetscCall(KSPCreate(PETSC_COMM_SELF, &sub_schurs->change[i])); 6809566063dSJacob Faibussowitsch PetscCall(KSPSetType(sub_schurs->change[i], KSPPREONLY)); 68172b8c272SStefano Zampini if (!sub_schurs->change_with_qr) { 6829566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrix(change, sub_schurs->is_subs[i], sub_schurs->is_subs[i], MAT_INITIAL_MATRIX, &change_sub)); 68372b8c272SStefano Zampini } else { 68472b8c272SStefano Zampini Mat change_subt; 6859566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrix(change, sub_schurs->is_subs[i], sub_schurs->is_subs[i], MAT_INITIAL_MATRIX, &change_subt)); 6869566063dSJacob Faibussowitsch PetscCall(MatConvert(change_subt, MATSEQDENSE, MAT_INITIAL_MATRIX, &change_sub)); 6879566063dSJacob Faibussowitsch PetscCall(MatDestroy(&change_subt)); 68872b8c272SStefano Zampini } 6899566063dSJacob Faibussowitsch PetscCall(KSPSetOperators(sub_schurs->change[i], change_sub, change_sub)); 6909566063dSJacob Faibussowitsch PetscCall(MatDestroy(&change_sub)); 6919566063dSJacob Faibussowitsch PetscCall(KSPSetOptionsPrefix(sub_schurs->change[i], sub_schurs->prefix)); 6929566063dSJacob Faibussowitsch PetscCall(KSPAppendOptionsPrefix(sub_schurs->change[i], "sub_schurs_change_")); 69372b8c272SStefano Zampini } 6949566063dSJacob Faibussowitsch PetscCall(ISDestroy(&change_primal_all)); 69572b8c272SStefano Zampini } 69672b8c272SStefano Zampini 6975a95e1ceSStefano Zampini /* Local matrix of all local Schur on subsets (transposed) */ 6985a95e1ceSStefano Zampini if (!sub_schurs->S_Ej_all) { 69904c5b2e6SStefano Zampini Mat T; 70004c5b2e6SStefano Zampini PetscScalar *v; 70104c5b2e6SStefano Zampini PetscInt *ii, *jj; 70204c5b2e6SStefano Zampini PetscInt cum, i, j, k; 70304c5b2e6SStefano Zampini 70404c5b2e6SStefano Zampini /* MatSeqAIJSetPreallocation + MatSetValues is slow for these kind of matrices (may have large blocks) 70504c5b2e6SStefano Zampini Allocate properly a representative matrix and duplicate */ 7069566063dSJacob Faibussowitsch PetscCall(PetscMalloc3(local_size + 1, &ii, local_stash_size, &jj, local_stash_size, &v)); 70704c5b2e6SStefano Zampini ii[0] = 0; 70804c5b2e6SStefano Zampini cum = 0; 70904c5b2e6SStefano Zampini for (i = 0; i < sub_schurs->n_subs; i++) { 7109566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 71104c5b2e6SStefano Zampini for (j = 0; j < subset_size; j++) { 71204c5b2e6SStefano Zampini const PetscInt row = cum + j; 71304c5b2e6SStefano Zampini PetscInt col = cum; 71404c5b2e6SStefano Zampini 71504c5b2e6SStefano Zampini ii[row + 1] = ii[row] + subset_size; 71604c5b2e6SStefano Zampini for (k = ii[row]; k < ii[row + 1]; k++) { 71704c5b2e6SStefano Zampini jj[k] = col; 71804c5b2e6SStefano Zampini col++; 71904c5b2e6SStefano Zampini } 72004c5b2e6SStefano Zampini } 72104c5b2e6SStefano Zampini cum += subset_size; 72204c5b2e6SStefano Zampini } 7239566063dSJacob Faibussowitsch PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, local_size, local_size, ii, jj, v, &T)); 7249566063dSJacob Faibussowitsch PetscCall(MatDuplicate(T, MAT_DO_NOT_COPY_VALUES, &sub_schurs->S_Ej_all)); 7259566063dSJacob Faibussowitsch PetscCall(MatDestroy(&T)); 7269566063dSJacob Faibussowitsch PetscCall(PetscFree3(ii, jj, v)); 72704c5b2e6SStefano Zampini } 72804c5b2e6SStefano Zampini /* matrices for deluxe scaling and adaptive selection */ 72904c5b2e6SStefano Zampini if (compute_Stilda) { 730*48a46eb9SPierre Jolivet if (!sub_schurs->sum_S_Ej_tilda_all) PetscCall(MatDuplicate(sub_schurs->S_Ej_all, MAT_DO_NOT_COPY_VALUES, &sub_schurs->sum_S_Ej_tilda_all)); 731*48a46eb9SPierre Jolivet if (!sub_schurs->sum_S_Ej_inv_all && deluxe) PetscCall(MatDuplicate(sub_schurs->S_Ej_all, MAT_DO_NOT_COPY_VALUES, &sub_schurs->sum_S_Ej_inv_all)); 732aa83b6aeSStefano Zampini } 733b1b3d7a2SStefano Zampini 7345a95e1ceSStefano Zampini /* Compute Schur complements explicitly */ 735be83ff47SStefano Zampini F = NULL; 736d943a642SStefano Zampini if (!sub_schurs->schur_explicit) { 737d943a642SStefano Zampini /* this code branch is used when MatFactor with Schur complement support is not present or when explicitly requested; 738d943a642SStefano Zampini it is not efficient, unless the economic version of the scaling is used */ 7395a95e1ceSStefano Zampini Mat S_Ej_expl; 7405a95e1ceSStefano Zampini PetscScalar *work; 7415a95e1ceSStefano Zampini PetscInt j, *dummy_idx; 7425a95e1ceSStefano Zampini PetscBool Sdense; 7435a95e1ceSStefano Zampini 7449566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(max_subset_size, &dummy_idx, max_subset_size * max_subset_size, &work)); 7455a95e1ceSStefano Zampini local_size = 0; 746b1b3d7a2SStefano Zampini for (i = 0; i < sub_schurs->n_subs; i++) { 7475a95e1ceSStefano Zampini IS is_subset_B; 7485a95e1ceSStefano Zampini Mat AE_EE, AE_IE, AE_EI, S_Ej; 7495a95e1ceSStefano Zampini 7505a95e1ceSStefano Zampini /* subsets in original and boundary numbering */ 7519566063dSJacob Faibussowitsch PetscCall(ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap, IS_GTOLM_DROP, sub_schurs->is_subs[i], &is_subset_B)); 7525a95e1ceSStefano Zampini /* EE block */ 7539566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrix(A_BB, is_subset_B, is_subset_B, MAT_INITIAL_MATRIX, &AE_EE)); 7545a95e1ceSStefano Zampini /* IE block */ 7559566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrix(A_IB, is_I, is_subset_B, MAT_INITIAL_MATRIX, &AE_IE)); 7565a95e1ceSStefano Zampini /* EI block */ 757d943a642SStefano Zampini if (sub_schurs->is_symmetric) { 7589566063dSJacob Faibussowitsch PetscCall(MatCreateTranspose(AE_IE, &AE_EI)); 759d943a642SStefano Zampini } else if (sub_schurs->is_hermitian) { 7609566063dSJacob Faibussowitsch PetscCall(MatCreateHermitianTranspose(AE_IE, &AE_EI)); 7615a95e1ceSStefano Zampini } else { 7629566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrix(A_BI, is_subset_B, is_I, MAT_INITIAL_MATRIX, &AE_EI)); 7635a95e1ceSStefano Zampini } 7649566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is_subset_B)); 7659566063dSJacob Faibussowitsch PetscCall(MatCreateSchurComplement(AE_II, AE_II, AE_IE, AE_EI, AE_EE, &S_Ej)); 7669566063dSJacob Faibussowitsch PetscCall(MatDestroy(&AE_EE)); 7679566063dSJacob Faibussowitsch PetscCall(MatDestroy(&AE_IE)); 7689566063dSJacob Faibussowitsch PetscCall(MatDestroy(&AE_EI)); 769b1b3d7a2SStefano Zampini if (AE_II == A_II) { /* we can reuse the same ksp */ 770b1b3d7a2SStefano Zampini KSP ksp; 7719566063dSJacob Faibussowitsch PetscCall(MatSchurComplementGetKSP(sub_schurs->S, &ksp)); 7729566063dSJacob Faibussowitsch PetscCall(MatSchurComplementSetKSP(S_Ej, ksp)); 773b1b3d7a2SStefano Zampini } else { /* build new ksp object which inherits ksp and pc types from the original one */ 774b1b3d7a2SStefano Zampini KSP origksp, schurksp; 775b1b3d7a2SStefano Zampini PC origpc, schurpc; 776b1b3d7a2SStefano Zampini KSPType ksp_type; 777b1b3d7a2SStefano Zampini PetscInt n_internal; 7785a95e1ceSStefano Zampini PetscBool ispcnone; 779b1b3d7a2SStefano Zampini 7809566063dSJacob Faibussowitsch PetscCall(MatSchurComplementGetKSP(sub_schurs->S, &origksp)); 7819566063dSJacob Faibussowitsch PetscCall(MatSchurComplementGetKSP(S_Ej, &schurksp)); 7829566063dSJacob Faibussowitsch PetscCall(KSPGetType(origksp, &ksp_type)); 7839566063dSJacob Faibussowitsch PetscCall(KSPSetType(schurksp, ksp_type)); 7849566063dSJacob Faibussowitsch PetscCall(KSPGetPC(schurksp, &schurpc)); 7859566063dSJacob Faibussowitsch PetscCall(KSPGetPC(origksp, &origpc)); 7869566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)origpc, PCNONE, &ispcnone)); 7875a95e1ceSStefano Zampini if (!ispcnone) { 7885a95e1ceSStefano Zampini PCType pc_type; 7899566063dSJacob Faibussowitsch PetscCall(PCGetType(origpc, &pc_type)); 7909566063dSJacob Faibussowitsch PetscCall(PCSetType(schurpc, pc_type)); 7915a95e1ceSStefano Zampini } else { 7929566063dSJacob Faibussowitsch PetscCall(PCSetType(schurpc, PCLU)); 7935a95e1ceSStefano Zampini } 7949566063dSJacob Faibussowitsch PetscCall(ISGetSize(is_I, &n_internal)); 795365a3a41SStefano Zampini if (!n_internal) { /* UMFPACK gives error with 0 sized problems */ 7963ca39a21SBarry Smith MatSolverType solver = NULL; 7979566063dSJacob Faibussowitsch PetscCall(PCFactorGetMatSolverType(origpc, (MatSolverType *)&solver)); 7981baa6e33SBarry Smith if (solver) PetscCall(PCFactorSetMatSolverType(schurpc, solver)); 799b1b3d7a2SStefano Zampini } 8009566063dSJacob Faibussowitsch PetscCall(KSPSetUp(schurksp)); 801b1b3d7a2SStefano Zampini } 8029566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 8039566063dSJacob Faibussowitsch PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, work, &S_Ej_expl)); 8049566063dSJacob Faibussowitsch PetscCall(PCBDDCComputeExplicitSchur(S_Ej, sub_schurs->is_symmetric, MAT_REUSE_MATRIX, &S_Ej_expl)); 8059566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)S_Ej_expl, MATSEQDENSE, &Sdense)); 8065a95e1ceSStefano Zampini if (Sdense) { 8079371c9d4SSatish Balay for (j = 0; j < subset_size; j++) { dummy_idx[j] = local_size + j; } 8089566063dSJacob Faibussowitsch PetscCall(MatSetValues(sub_schurs->S_Ej_all, subset_size, dummy_idx, subset_size, dummy_idx, work, INSERT_VALUES)); 8096c4ed002SBarry Smith } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented for sparse matrices"); 8109566063dSJacob Faibussowitsch PetscCall(MatDestroy(&S_Ej)); 8119566063dSJacob Faibussowitsch PetscCall(MatDestroy(&S_Ej_expl)); 8125a95e1ceSStefano Zampini local_size += subset_size; 8135a95e1ceSStefano Zampini } 8149566063dSJacob Faibussowitsch PetscCall(PetscFree2(dummy_idx, work)); 815b1b3d7a2SStefano Zampini /* free */ 8169566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is_I)); 8179566063dSJacob Faibussowitsch PetscCall(MatDestroy(&AE_II)); 8189566063dSJacob Faibussowitsch PetscCall(PetscFree(all_local_idx_N)); 819883469d8SStefano Zampini } else { 8205cbda25cSStefano Zampini Mat A, cs_AIB_mat = NULL, benign_AIIm1_ones_mat = NULL; 82132fe681dSStefano Zampini Mat *gdswA; 8229d54b7f4SStefano Zampini Vec Dall = NULL; 823ca92afb2SStefano Zampini IS is_A_all, *is_p_r = NULL; 8247ebab0bbSStefano Zampini MatType Stype; 8255cbda25cSStefano Zampini PetscScalar *work, *S_data, *schur_factor, infty = PETSC_MAX_REAL; 82604c5b2e6SStefano Zampini PetscScalar *SEj_arr = NULL, *SEjinv_arr = NULL; 8271683a169SBarry Smith const PetscScalar *rS_data; 82804c5b2e6SStefano Zampini PetscInt n, n_I, size_schur, size_active_schur, cum, cum2; 8293fc34f97SStefano Zampini PetscBool economic, solver_S, S_lower_triangular = PETSC_FALSE; 8303fc34f97SStefano Zampini PetscBool schur_has_vertices, factor_workaround; 83111955456SStefano Zampini PetscBool use_cholesky; 8327ebab0bbSStefano Zampini #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 8337ebab0bbSStefano Zampini PetscBool oldpin; 8347ebab0bbSStefano Zampini #endif 835883469d8SStefano Zampini 836683d3df6SStefano Zampini /* get sizes */ 83781ea8064SStefano Zampini n_I = 0; 838*48a46eb9SPierre Jolivet if (is_I_layer) PetscCall(ISGetLocalSize(is_I_layer, &n_I)); 839683d3df6SStefano Zampini economic = PETSC_FALSE; 8409566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(sub_schurs->is_I, &cum)); 841683d3df6SStefano Zampini if (cum != n_I) economic = PETSC_TRUE; 8429566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(sub_schurs->A, &n, NULL)); 8439d54b7f4SStefano Zampini size_active_schur = local_size; 8449d54b7f4SStefano Zampini 845f17d2ae1SStefano Zampini /* import scaling vector (wrong formulation if we have 3D edges) */ 8469d54b7f4SStefano Zampini if (scaling && compute_Stilda) { 8479d54b7f4SStefano Zampini const PetscScalar *array; 8489d54b7f4SStefano Zampini PetscScalar *array2; 8499d54b7f4SStefano Zampini const PetscInt *idxs; 8509d54b7f4SStefano Zampini PetscInt i; 8519d54b7f4SStefano Zampini 8529566063dSJacob Faibussowitsch PetscCall(ISGetIndices(sub_schurs->is_Ej_all, &idxs)); 8539566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF, size_active_schur, &Dall)); 8549566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(scaling, &array)); 8559566063dSJacob Faibussowitsch PetscCall(VecGetArray(Dall, &array2)); 8569d54b7f4SStefano Zampini for (i = 0; i < size_active_schur; i++) array2[i] = array[idxs[i]]; 8579566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(Dall, &array2)); 8589566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(scaling, &array)); 8599566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(sub_schurs->is_Ej_all, &idxs)); 8609d54b7f4SStefano Zampini deluxe = PETSC_FALSE; 8619d54b7f4SStefano Zampini } 862d62866d3SStefano Zampini 863683d3df6SStefano Zampini /* size active schurs does not count any dirichlet or vertex dof on the interface */ 8643fc34f97SStefano Zampini factor_workaround = PETSC_FALSE; 8653fc34f97SStefano Zampini schur_has_vertices = PETSC_FALSE; 866683d3df6SStefano Zampini cum = n_I + size_active_schur; 867683d3df6SStefano Zampini if (sub_schurs->is_dir) { 868683d3df6SStefano Zampini const PetscInt *idxs; 869683d3df6SStefano Zampini PetscInt n_dir; 870683d3df6SStefano Zampini 8719566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(sub_schurs->is_dir, &n_dir)); 8729566063dSJacob Faibussowitsch PetscCall(ISGetIndices(sub_schurs->is_dir, &idxs)); 8739566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(all_local_idx_N + cum, idxs, n_dir)); 8749566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(sub_schurs->is_dir, &idxs)); 875683d3df6SStefano Zampini cum += n_dir; 87632fe681dSStefano Zampini if (!sub_schurs->gdsw) factor_workaround = PETSC_TRUE; 877d62866d3SStefano Zampini } 878683d3df6SStefano Zampini /* include the primal vertices in the Schur complement */ 879367aa537SStefano Zampini if (exact_schur && sub_schurs->is_vertices && (compute_Stilda || benign_n)) { 880683d3df6SStefano Zampini PetscInt n_v; 881683d3df6SStefano Zampini 8829566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_v)); 883683d3df6SStefano Zampini if (n_v) { 884683d3df6SStefano Zampini const PetscInt *idxs; 885683d3df6SStefano Zampini 8869566063dSJacob Faibussowitsch PetscCall(ISGetIndices(sub_schurs->is_vertices, &idxs)); 8879566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(all_local_idx_N + cum, idxs, n_v)); 8889566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(sub_schurs->is_vertices, &idxs)); 889683d3df6SStefano Zampini cum += n_v; 89032fe681dSStefano Zampini if (!sub_schurs->gdsw) factor_workaround = PETSC_TRUE; 8913fc34f97SStefano Zampini schur_has_vertices = PETSC_TRUE; 892683d3df6SStefano Zampini } 893683d3df6SStefano Zampini } 894683d3df6SStefano Zampini size_schur = cum - n_I; 8959566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, all_local_idx_N, PETSC_OWN_POINTER, &is_A_all)); 8967ebab0bbSStefano Zampini #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 897b470e4b4SRichard Tran Mills oldpin = sub_schurs->A->boundtocpu; 8989566063dSJacob Faibussowitsch PetscCall(MatBindToCPU(sub_schurs->A, PETSC_TRUE)); 8997ebab0bbSStefano Zampini #endif 900683d3df6SStefano Zampini if (cum == n) { 9019566063dSJacob Faibussowitsch PetscCall(ISSetPermutation(is_A_all)); 9029566063dSJacob Faibussowitsch PetscCall(MatPermute(sub_schurs->A, is_A_all, is_A_all, &A)); 903683d3df6SStefano Zampini } else { 9049566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrix(sub_schurs->A, is_A_all, is_A_all, MAT_INITIAL_MATRIX, &A)); 905683d3df6SStefano Zampini } 9067ebab0bbSStefano Zampini #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 9079566063dSJacob Faibussowitsch PetscCall(MatBindToCPU(sub_schurs->A, oldpin)); 9087ebab0bbSStefano Zampini #endif 90926cc229bSBarry Smith PetscCall(MatSetOptionsPrefixFactor(A, sub_schurs->prefix)); 91026cc229bSBarry Smith PetscCall(MatAppendOptionsPrefixFactor(A, "sub_schurs_")); 911ca92afb2SStefano Zampini 912ca92afb2SStefano Zampini /* if we actually change the basis for the pressures, LDL^T factors will use a lot of memory 9137ebab0bbSStefano Zampini this is a workaround */ 914ca92afb2SStefano Zampini if (benign_n) { 9157ebab0bbSStefano Zampini Vec v, benign_AIIm1_ones; 916ca92afb2SStefano Zampini ISLocalToGlobalMapping N_to_reor; 917ca92afb2SStefano Zampini IS is_p0, is_p0_p; 9185cbda25cSStefano Zampini PetscScalar *cs_AIB, *AIIm1_data; 9195cbda25cSStefano Zampini PetscInt sizeA; 920ca92afb2SStefano Zampini 9219566063dSJacob Faibussowitsch PetscCall(ISLocalToGlobalMappingCreateIS(is_A_all, &N_to_reor)); 9229566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PETSC_COMM_SELF, benign_n, benign_p0_lidx, PETSC_COPY_VALUES, &is_p0)); 9239566063dSJacob Faibussowitsch PetscCall(ISGlobalToLocalMappingApplyIS(N_to_reor, IS_GTOLM_DROP, is_p0, &is_p0_p)); 9249566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is_p0)); 9259566063dSJacob Faibussowitsch PetscCall(MatCreateVecs(A, &v, &benign_AIIm1_ones)); 9269566063dSJacob Faibussowitsch PetscCall(VecGetSize(v, &sizeA)); 9279566063dSJacob Faibussowitsch PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, sizeA, benign_n, NULL, &benign_AIIm1_ones_mat)); 9289566063dSJacob Faibussowitsch PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size_schur, benign_n, NULL, &cs_AIB_mat)); 9299566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(cs_AIB_mat, &cs_AIB)); 9309566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(benign_AIIm1_ones_mat, &AIIm1_data)); 9319566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(benign_n, &is_p_r)); 932ca92afb2SStefano Zampini /* compute colsum of A_IB restricted to pressures */ 933ca92afb2SStefano Zampini for (i = 0; i < benign_n; i++) { 9347ebab0bbSStefano Zampini const PetscScalar *array; 935ca92afb2SStefano Zampini const PetscInt *idxs; 936ca92afb2SStefano Zampini PetscInt j, nz; 937ca92afb2SStefano Zampini 9389566063dSJacob Faibussowitsch PetscCall(ISGlobalToLocalMappingApplyIS(N_to_reor, IS_GTOLM_DROP, benign_zerodiag_subs[i], &is_p_r[i])); 9399566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(is_p_r[i], &nz)); 9409566063dSJacob Faibussowitsch PetscCall(ISGetIndices(is_p_r[i], &idxs)); 9415cbda25cSStefano Zampini for (j = 0; j < nz; j++) AIIm1_data[idxs[j] + sizeA * i] = 1.; 9429566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(is_p_r[i], &idxs)); 9439566063dSJacob Faibussowitsch PetscCall(VecPlaceArray(benign_AIIm1_ones, AIIm1_data + sizeA * i)); 9449566063dSJacob Faibussowitsch PetscCall(MatMult(A, benign_AIIm1_ones, v)); 9459566063dSJacob Faibussowitsch PetscCall(VecResetArray(benign_AIIm1_ones)); 9469566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(v, &array)); 94722db5ddcSStefano Zampini for (j = 0; j < size_schur; j++) { 94822db5ddcSStefano Zampini #if defined(PETSC_USE_COMPLEX) 94922db5ddcSStefano Zampini cs_AIB[i * size_schur + j] = (PetscRealPart(array[j + n_I]) / nz + PETSC_i * (PetscImaginaryPart(array[j + n_I]) / nz)); 95022db5ddcSStefano Zampini #else 95122db5ddcSStefano Zampini cs_AIB[i * size_schur + j] = array[j + n_I] / nz; 95222db5ddcSStefano Zampini #endif 95322db5ddcSStefano Zampini } 9549566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(v, &array)); 955ca92afb2SStefano Zampini } 9569566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(cs_AIB_mat, &cs_AIB)); 9579566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(benign_AIIm1_ones_mat, &AIIm1_data)); 9589566063dSJacob Faibussowitsch PetscCall(VecDestroy(&v)); 9599566063dSJacob Faibussowitsch PetscCall(VecDestroy(&benign_AIIm1_ones)); 9609566063dSJacob Faibussowitsch PetscCall(MatSetOption(A, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE)); 9619566063dSJacob Faibussowitsch PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 9629566063dSJacob Faibussowitsch PetscCall(MatSetOption(A, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 9639566063dSJacob Faibussowitsch PetscCall(MatZeroRowsColumnsIS(A, is_p0_p, 1.0, NULL, NULL)); 9649566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is_p0_p)); 9659566063dSJacob Faibussowitsch PetscCall(ISLocalToGlobalMappingDestroy(&N_to_reor)); 966ca92afb2SStefano Zampini } 9679566063dSJacob Faibussowitsch PetscCall(MatSetOption(A, MAT_SYMMETRIC, sub_schurs->is_symmetric)); 9689566063dSJacob Faibussowitsch PetscCall(MatSetOption(A, MAT_HERMITIAN, sub_schurs->is_hermitian)); 9699566063dSJacob Faibussowitsch PetscCall(MatSetOption(A, MAT_SPD, sub_schurs->is_posdef)); 970883469d8SStefano Zampini 97111955456SStefano Zampini /* for complexes, symmetric and hermitian at the same time implies null imaginary part */ 97211955456SStefano Zampini use_cholesky = (PetscBool)((use_potr || use_sytr) && sub_schurs->is_hermitian && sub_schurs->is_symmetric); 97311955456SStefano Zampini 974683d3df6SStefano Zampini /* when using the benign subspace trick, the local Schur complements are SPD */ 97535d0533cSStefano Zampini /* MKL_PARDISO does not handle well the computation of a Schur complement from a symmetric indefinite factorization 97635d0533cSStefano Zampini Use LU and adapt pivoting perturbation (still, solution is not as accurate as with using MUMPS) */ 97735d0533cSStefano Zampini if (benign_trick) { 97835d0533cSStefano Zampini sub_schurs->is_posdef = PETSC_TRUE; 9799566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(sub_schurs->mat_solver_type, MATSOLVERMKL_PARDISO, &flg)); 98035d0533cSStefano Zampini if (flg) use_cholesky = PETSC_FALSE; 98135d0533cSStefano Zampini } 982d47842beSStefano Zampini 983f4f7d9d6SStefano Zampini if (n_I) { 9840aa714b2SStefano Zampini IS is_schur; 9857ebab0bbSStefano Zampini char stype[64]; 9864ba54290SStefano Zampini PetscBool gpu = PETSC_FALSE; 9875a05ddb0SStefano Zampini 98811955456SStefano Zampini if (use_cholesky) { 9899566063dSJacob Faibussowitsch PetscCall(MatGetFactor(A, sub_schurs->mat_solver_type, MAT_FACTOR_CHOLESKY, &F)); 990883469d8SStefano Zampini } else { 9919566063dSJacob Faibussowitsch PetscCall(MatGetFactor(A, sub_schurs->mat_solver_type, MAT_FACTOR_LU, &F)); 992883469d8SStefano Zampini } 9939566063dSJacob Faibussowitsch PetscCall(MatSetErrorIfFailure(A, PETSC_TRUE)); 99435d0533cSStefano Zampini #if defined(PETSC_HAVE_MKL_PARDISO) 9959566063dSJacob Faibussowitsch if (benign_trick) PetscCall(MatMkl_PardisoSetCntl(F, 10, 10)); 99635d0533cSStefano Zampini #endif 997883469d8SStefano Zampini /* subsets ordered last */ 9989566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF, size_schur, n_I, 1, &is_schur)); 9999566063dSJacob Faibussowitsch PetscCall(MatFactorSetSchurIS(F, is_schur)); 10009566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is_schur)); 1001883469d8SStefano Zampini 1002883469d8SStefano Zampini /* factorization step */ 100311955456SStefano Zampini if (use_cholesky) { 10049566063dSJacob Faibussowitsch PetscCall(MatCholeskyFactorSymbolic(F, A, NULL, NULL)); 1005be83ff47SStefano Zampini #if defined(PETSC_HAVE_MUMPS) /* be sure that icntl 19 is not set by command line */ 10069566063dSJacob Faibussowitsch PetscCall(MatMumpsSetIcntl(F, 19, 2)); 1007be83ff47SStefano Zampini #endif 10089566063dSJacob Faibussowitsch PetscCall(MatCholeskyFactorNumeric(F, A, NULL)); 1009a0b0af32SStefano Zampini S_lower_triangular = PETSC_TRUE; 1010883469d8SStefano Zampini } else { 10119566063dSJacob Faibussowitsch PetscCall(MatLUFactorSymbolic(F, A, NULL, NULL, NULL)); 1012be83ff47SStefano Zampini #if defined(PETSC_HAVE_MUMPS) /* be sure that icntl 19 is not set by command line */ 10139566063dSJacob Faibussowitsch PetscCall(MatMumpsSetIcntl(F, 19, 3)); 1014be83ff47SStefano Zampini #endif 10159566063dSJacob Faibussowitsch PetscCall(MatLUFactorNumeric(F, A, NULL)); 1016883469d8SStefano Zampini } 10179566063dSJacob Faibussowitsch PetscCall(MatViewFromOptions(F, (PetscObject)A, "-mat_factor_view")); 1018883469d8SStefano Zampini 10193b03f7bbSStefano Zampini if (matl_dbg_viewer) { 102011955456SStefano Zampini Mat S; 102111955456SStefano Zampini IS is; 102211955456SStefano Zampini 10239566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)A, "A")); 10249566063dSJacob Faibussowitsch PetscCall(MatView(A, matl_dbg_viewer)); 10259566063dSJacob Faibussowitsch PetscCall(MatFactorCreateSchurComplement(F, &S, NULL)); 10269566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)S, "S")); 10279566063dSJacob Faibussowitsch PetscCall(MatView(S, matl_dbg_viewer)); 10289566063dSJacob Faibussowitsch PetscCall(MatDestroy(&S)); 10299566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF, n_I, 0, 1, &is)); 10309566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)is, "I")); 10319566063dSJacob Faibussowitsch PetscCall(ISView(is, matl_dbg_viewer)); 10329566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is)); 10339566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF, size_schur, n_I, 1, &is)); 10349566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)is, "B")); 10359566063dSJacob Faibussowitsch PetscCall(ISView(is, matl_dbg_viewer)); 10369566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is)); 10379566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)is_A_all, "IA")); 10389566063dSJacob Faibussowitsch PetscCall(ISView(is_A_all, matl_dbg_viewer)); 103932fe681dSStefano Zampini for (i = 0, cum = 0; i < sub_schurs->n_subs; i++) { 104032fe681dSStefano Zampini IS is; 104132fe681dSStefano Zampini char name[16]; 104232fe681dSStefano Zampini 104332fe681dSStefano Zampini PetscCall(PetscSNPrintf(name, sizeof(name), "IE%" PetscInt_FMT, i)); 104432fe681dSStefano Zampini PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 104532fe681dSStefano Zampini PetscCall(ISCreateStride(PETSC_COMM_SELF, subset_size, cum, 1, &is)); 104632fe681dSStefano Zampini PetscCall(PetscObjectSetName((PetscObject)is, name)); 104732fe681dSStefano Zampini PetscCall(ISView(is, matl_dbg_viewer)); 104832fe681dSStefano Zampini PetscCall(ISDestroy(&is)); 104932fe681dSStefano Zampini if (sub_schurs->change) { 105032fe681dSStefano Zampini Mat T; 105132fe681dSStefano Zampini 105232fe681dSStefano Zampini PetscCall(PetscSNPrintf(name, sizeof(name), "TE%" PetscInt_FMT, i)); 105332fe681dSStefano Zampini PetscCall(KSPGetOperators(sub_schurs->change[i], &T, NULL)); 105432fe681dSStefano Zampini PetscCall(PetscObjectSetName((PetscObject)T, name)); 105532fe681dSStefano Zampini PetscCall(MatView(T, matl_dbg_viewer)); 105632fe681dSStefano Zampini PetscCall(PetscSNPrintf(name, sizeof(name), "ITE%" PetscInt_FMT, i)); 105732fe681dSStefano Zampini PetscCall(PetscObjectSetName((PetscObject)sub_schurs->change_primal_sub[i], name)); 105832fe681dSStefano Zampini PetscCall(ISView(sub_schurs->change_primal_sub[i], matl_dbg_viewer)); 105932fe681dSStefano Zampini } 106032fe681dSStefano Zampini cum += subset_size; 106132fe681dSStefano Zampini } 106232fe681dSStefano Zampini PetscCall(PetscViewerFlush(matl_dbg_viewer)); 106311955456SStefano Zampini } 106411955456SStefano Zampini 1065883469d8SStefano Zampini /* get explicit Schur Complement computed during numeric factorization */ 10669566063dSJacob Faibussowitsch PetscCall(MatFactorGetSchurComplement(F, &S_all, NULL)); 10679566063dSJacob Faibussowitsch PetscCall(PetscStrncpy(stype, MATSEQDENSE, sizeof(stype))); 10684ba54290SStefano Zampini #if defined(PETSC_HAVE_CUDA) 10699566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &gpu, MATSEQAIJVIENNACL, MATSEQAIJCUSPARSE, "")); 10704ba54290SStefano Zampini #endif 10711baa6e33SBarry Smith if (gpu) PetscCall(PetscStrncpy(stype, MATSEQDENSECUDA, sizeof(stype))); 10729566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetString(NULL, sub_schurs->prefix, "-sub_schurs_schur_mat_type", stype, sizeof(stype), NULL)); 10739566063dSJacob Faibussowitsch PetscCall(MatConvert(S_all, stype, MAT_INPLACE_MATRIX, &S_all)); 10749566063dSJacob Faibussowitsch PetscCall(MatSetOption(S_all, MAT_SPD, sub_schurs->is_posdef)); 10759566063dSJacob Faibussowitsch PetscCall(MatSetOption(S_all, MAT_HERMITIAN, sub_schurs->is_hermitian)); 10769566063dSJacob Faibussowitsch PetscCall(MatGetType(S_all, &Stype)); 1077b3cb21ddSStefano Zampini 1078d62866d3SStefano Zampini /* we can reuse the solvers if we are not using the economic version */ 1079683d3df6SStefano Zampini reuse_solvers = (PetscBool)(reuse_solvers && !economic); 108032fe681dSStefano Zampini if (!sub_schurs->gdsw) { 1081683d3df6SStefano Zampini factor_workaround = (PetscBool)(reuse_solvers && factor_workaround); 10829371c9d4SSatish Balay if (!sub_schurs->is_posdef && factor_workaround && compute_Stilda && size_active_schur) reuse_solvers = factor_workaround = PETSC_FALSE; 108332fe681dSStefano Zampini } 1084df4d28bfSStefano Zampini solver_S = PETSC_TRUE; 1085ca92afb2SStefano Zampini 108672b8c272SStefano Zampini /* update the Schur complement with the change of basis on the pressures */ 1087ca92afb2SStefano Zampini if (benign_n) { 10887ebab0bbSStefano Zampini const PetscScalar *cs_AIB; 10897ebab0bbSStefano Zampini PetscScalar *S_data, *AIIm1_data; 10903b03f7bbSStefano Zampini Mat S2 = NULL, S3 = NULL; /* dbg */ 10913b03f7bbSStefano Zampini PetscScalar *S2_data, *S3_data; /* dbg */ 10927ebab0bbSStefano Zampini Vec v, benign_AIIm1_ones; 10935cbda25cSStefano Zampini PetscInt sizeA; 1094ca92afb2SStefano Zampini 10959566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(S_all, &S_data)); 10969566063dSJacob Faibussowitsch PetscCall(MatCreateVecs(A, &v, &benign_AIIm1_ones)); 10979566063dSJacob Faibussowitsch PetscCall(VecGetSize(v, &sizeA)); 1098ca92afb2SStefano Zampini #if defined(PETSC_HAVE_MUMPS) 10999566063dSJacob Faibussowitsch PetscCall(MatMumpsSetIcntl(F, 26, 0)); 1100ca92afb2SStefano Zampini #endif 1101ca92afb2SStefano Zampini #if defined(PETSC_HAVE_MKL_PARDISO) 11029566063dSJacob Faibussowitsch PetscCall(MatMkl_PardisoSetCntl(F, 70, 1)); 1103ca92afb2SStefano Zampini #endif 11049566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(cs_AIB_mat, &cs_AIB)); 11059566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(benign_AIIm1_ones_mat, &AIIm1_data)); 11063b03f7bbSStefano Zampini if (matl_dbg_viewer) { 11079566063dSJacob Faibussowitsch PetscCall(MatDuplicate(S_all, MAT_DO_NOT_COPY_VALUES, &S2)); 11089566063dSJacob Faibussowitsch PetscCall(MatDuplicate(S_all, MAT_DO_NOT_COPY_VALUES, &S3)); 11099566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(S2, &S2_data)); 11109566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(S3, &S3_data)); 11113b03f7bbSStefano Zampini } 1112ca92afb2SStefano Zampini for (i = 0; i < benign_n; i++) { 11133b03f7bbSStefano Zampini PetscScalar *array, sum = 0., one = 1., *sums; 1114ca92afb2SStefano Zampini const PetscInt *idxs; 11153b03f7bbSStefano Zampini PetscInt k, j, nz; 111647484b83SStefano Zampini PetscBLASInt B_k, B_n; 1117ca92afb2SStefano Zampini 11189566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(benign_n, &sums)); 11199566063dSJacob Faibussowitsch PetscCall(VecPlaceArray(benign_AIIm1_ones, AIIm1_data + sizeA * i)); 11209566063dSJacob Faibussowitsch PetscCall(VecCopy(benign_AIIm1_ones, v)); 11219566063dSJacob Faibussowitsch PetscCall(MatSolve(F, v, benign_AIIm1_ones)); 11229566063dSJacob Faibussowitsch PetscCall(MatMult(A, benign_AIIm1_ones, v)); 11239566063dSJacob Faibussowitsch PetscCall(VecResetArray(benign_AIIm1_ones)); 11243b03f7bbSStefano Zampini /* p0 dofs (eliminated) are excluded from the sums */ 11253b03f7bbSStefano Zampini for (k = 0; k < benign_n; k++) { 11269566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(is_p_r[k], &nz)); 11279566063dSJacob Faibussowitsch PetscCall(ISGetIndices(is_p_r[k], &idxs)); 11283b03f7bbSStefano Zampini for (j = 0; j < nz - 1; j++) sums[k] -= AIIm1_data[idxs[j] + sizeA * i]; 11299566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(is_p_r[k], &idxs)); 11303b03f7bbSStefano Zampini } 11319566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(v, (const PetscScalar **)&array)); 11323b03f7bbSStefano Zampini if (matl_dbg_viewer) { 11333b03f7bbSStefano Zampini Vec vv; 11343b03f7bbSStefano Zampini char name[16]; 11353b03f7bbSStefano Zampini 11369566063dSJacob Faibussowitsch PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, size_schur, array + n_I, &vv)); 113763a3b9bcSJacob Faibussowitsch PetscCall(PetscSNPrintf(name, sizeof(name), "Pvs%" PetscInt_FMT, i)); 11389566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)vv, name)); 11399566063dSJacob Faibussowitsch PetscCall(VecView(vv, matl_dbg_viewer)); 11403b03f7bbSStefano Zampini } 114147484b83SStefano Zampini /* perform sparse rank updates on symmetric Schur (TODO: move outside of the loop?) */ 114247484b83SStefano Zampini /* cs_AIB already scaled by 1./nz */ 114347484b83SStefano Zampini B_k = 1; 11443b03f7bbSStefano Zampini for (k = 0; k < benign_n; k++) { 11453b03f7bbSStefano Zampini sum = sums[k]; 11469566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(size_schur, &B_n)); 11473b03f7bbSStefano Zampini 11483b03f7bbSStefano Zampini if (PetscAbsScalar(sum) == 0.0) continue; 11493b03f7bbSStefano Zampini if (k == i) { 1150792fecdfSBarry Smith PetscCallBLAS("BLASsyrk", BLASsyrk_("L", "N", &B_n, &B_k, &sum, cs_AIB + i * size_schur, &B_n, &one, S_data, &B_n)); 1151*48a46eb9SPierre Jolivet if (matl_dbg_viewer) PetscCallBLAS("BLASsyrk", BLASsyrk_("L", "N", &B_n, &B_k, &sum, cs_AIB + i * size_schur, &B_n, &one, S3_data, &B_n)); 11523b03f7bbSStefano Zampini } else { /* XXX Is it correct to use symmetric rank-2 update with half of the sum? */ 11533b03f7bbSStefano Zampini sum /= 2.0; 1154792fecdfSBarry Smith PetscCallBLAS("BLASsyr2k", BLASsyr2k_("L", "N", &B_n, &B_k, &sum, cs_AIB + k * size_schur, &B_n, cs_AIB + i * size_schur, &B_n, &one, S_data, &B_n)); 1155*48a46eb9SPierre Jolivet if (matl_dbg_viewer) PetscCallBLAS("BLASsyr2k", BLASsyr2k_("L", "N", &B_n, &B_k, &sum, cs_AIB + k * size_schur, &B_n, cs_AIB + i * size_schur, &B_n, &one, S3_data, &B_n)); 11563b03f7bbSStefano Zampini } 11573b03f7bbSStefano Zampini } 11585cbda25cSStefano Zampini sum = 1.; 1159792fecdfSBarry Smith PetscCallBLAS("BLASsyr2k", BLASsyr2k_("L", "N", &B_n, &B_k, &sum, array + n_I, &B_n, cs_AIB + i * size_schur, &B_n, &one, S_data, &B_n)); 1160*48a46eb9SPierre Jolivet if (matl_dbg_viewer) PetscCallBLAS("BLASsyr2k", BLASsyr2k_("L", "N", &B_n, &B_k, &sum, array + n_I, &B_n, cs_AIB + i * size_schur, &B_n, &one, S2_data, &B_n)); 11619566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(v, (const PetscScalar **)&array)); 11625cbda25cSStefano Zampini /* set p0 entry of AIIm1_ones to zero */ 11639566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(is_p_r[i], &nz)); 11649566063dSJacob Faibussowitsch PetscCall(ISGetIndices(is_p_r[i], &idxs)); 1165282d6408SStefano Zampini for (j = 0; j < benign_n; j++) AIIm1_data[idxs[nz - 1] + sizeA * j] = 0.; 11669566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(is_p_r[i], &idxs)); 11679566063dSJacob Faibussowitsch PetscCall(PetscFree(sums)); 11683b03f7bbSStefano Zampini } 11699566063dSJacob Faibussowitsch PetscCall(VecDestroy(&benign_AIIm1_ones)); 11703b03f7bbSStefano Zampini if (matl_dbg_viewer) { 11719566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(S2, &S2_data)); 11729566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(S3, &S3_data)); 1173ca92afb2SStefano Zampini } 1174a7414863SStefano Zampini if (!S_lower_triangular) { /* I need to expand the upper triangular data (column oriented) */ 1175a7414863SStefano Zampini PetscInt k, j; 1176a7414863SStefano Zampini for (k = 0; k < size_schur; k++) { 11779371c9d4SSatish Balay for (j = k; j < size_schur; j++) { S_data[j * size_schur + k] = PetscConj(S_data[k * size_schur + j]); } 1178a7414863SStefano Zampini } 1179a7414863SStefano Zampini } 1180a7414863SStefano Zampini 11815cbda25cSStefano Zampini /* restore defaults */ 11825cbda25cSStefano Zampini #if defined(PETSC_HAVE_MUMPS) 11839566063dSJacob Faibussowitsch PetscCall(MatMumpsSetIcntl(F, 26, -1)); 11845cbda25cSStefano Zampini #endif 11855cbda25cSStefano Zampini #if defined(PETSC_HAVE_MKL_PARDISO) 11869566063dSJacob Faibussowitsch PetscCall(MatMkl_PardisoSetCntl(F, 70, 0)); 11875cbda25cSStefano Zampini #endif 11889566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(cs_AIB_mat, &cs_AIB)); 11899566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(benign_AIIm1_ones_mat, &AIIm1_data)); 11909566063dSJacob Faibussowitsch PetscCall(VecDestroy(&v)); 11919566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(S_all, &S_data)); 11923b03f7bbSStefano Zampini if (matl_dbg_viewer) { 11933b03f7bbSStefano Zampini Mat S; 11943b03f7bbSStefano Zampini 11959566063dSJacob Faibussowitsch PetscCall(MatFactorRestoreSchurComplement(F, &S_all, MAT_FACTOR_SCHUR_UNFACTORED)); 11969566063dSJacob Faibussowitsch PetscCall(MatFactorCreateSchurComplement(F, &S, NULL)); 11979566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)S, "Sb")); 11989566063dSJacob Faibussowitsch PetscCall(MatView(S, matl_dbg_viewer)); 11999566063dSJacob Faibussowitsch PetscCall(MatDestroy(&S)); 12009566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)S2, "S2P")); 12019566063dSJacob Faibussowitsch PetscCall(MatView(S2, matl_dbg_viewer)); 12029566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)S3, "S3P")); 12039566063dSJacob Faibussowitsch PetscCall(MatView(S3, matl_dbg_viewer)); 12049566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)cs_AIB_mat, "cs")); 12059566063dSJacob Faibussowitsch PetscCall(MatView(cs_AIB_mat, matl_dbg_viewer)); 12069566063dSJacob Faibussowitsch PetscCall(MatFactorGetSchurComplement(F, &S_all, NULL)); 12073b03f7bbSStefano Zampini } 12089566063dSJacob Faibussowitsch PetscCall(MatDestroy(&S2)); 12099566063dSJacob Faibussowitsch PetscCall(MatDestroy(&S3)); 1210ca92afb2SStefano Zampini } 1211a3df083aSStefano Zampini if (!reuse_solvers) { 1212*48a46eb9SPierre Jolivet for (i = 0; i < benign_n; i++) PetscCall(ISDestroy(&is_p_r[i])); 12139566063dSJacob Faibussowitsch PetscCall(PetscFree(is_p_r)); 12149566063dSJacob Faibussowitsch PetscCall(MatDestroy(&cs_AIB_mat)); 12159566063dSJacob Faibussowitsch PetscCall(MatDestroy(&benign_AIIm1_ones_mat)); 1216a3df083aSStefano Zampini } 1217df4d28bfSStefano Zampini } else { /* we can't use MatFactor when size_schur == size_of_the_problem */ 12189566063dSJacob Faibussowitsch PetscCall(MatConvert(A, MATSEQDENSE, MAT_INITIAL_MATRIX, &S_all)); 12199566063dSJacob Faibussowitsch PetscCall(MatGetType(S_all, &Stype)); 1220683d3df6SStefano Zampini reuse_solvers = PETSC_FALSE; /* TODO: why we can't reuse the solvers here? */ 1221166598c1SStefano Zampini factor_workaround = PETSC_FALSE; 1222df4d28bfSStefano Zampini solver_S = PETSC_FALSE; 1223be83ff47SStefano Zampini } 1224be83ff47SStefano Zampini 1225be83ff47SStefano Zampini if (reuse_solvers) { 122632fe681dSStefano Zampini Mat A_II, pA_II, Afake; 122753892102SStefano Zampini Vec vec1_B; 1228df4d28bfSStefano Zampini PCBDDCReuseSolvers msolv_ctx; 12293462e049SStefano Zampini PetscInt n_R; 1230d5574798SStefano Zampini 1231df4d28bfSStefano Zampini if (sub_schurs->reuse_solver) { 12329566063dSJacob Faibussowitsch PetscCall(PCBDDCReuseSolversReset(sub_schurs->reuse_solver)); 1233e28d306cSStefano Zampini } else { 12349566063dSJacob Faibussowitsch PetscCall(PetscNew(&sub_schurs->reuse_solver)); 1235d62866d3SStefano Zampini } 1236df4d28bfSStefano Zampini msolv_ctx = sub_schurs->reuse_solver; 123732fe681dSStefano Zampini PetscCall(MatSchurComplementGetSubMatrices(sub_schurs->S, &A_II, &pA_II, NULL, NULL, NULL)); 12389566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)F)); 1239d5574798SStefano Zampini msolv_ctx->F = F; 12409566063dSJacob Faibussowitsch PetscCall(MatCreateVecs(F, &msolv_ctx->sol, NULL)); 1241683d3df6SStefano Zampini /* currently PETSc has no support for MatSolve(F,x,x), so cheat and let rhs and sol share the same memory */ 1242683d3df6SStefano Zampini { 1243683d3df6SStefano Zampini PetscScalar *array; 1244683d3df6SStefano Zampini PetscInt n; 1245683d3df6SStefano Zampini 12469566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(msolv_ctx->sol, &n)); 12479566063dSJacob Faibussowitsch PetscCall(VecGetArray(msolv_ctx->sol, &array)); 12489566063dSJacob Faibussowitsch PetscCall(VecCreateSeqWithArray(PetscObjectComm((PetscObject)msolv_ctx->sol), 1, n, array, &msolv_ctx->rhs)); 12499566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(msolv_ctx->sol, &array)); 1250683d3df6SStefano Zampini } 12513fc34f97SStefano Zampini msolv_ctx->has_vertices = schur_has_vertices; 1252d62866d3SStefano Zampini 1253d62866d3SStefano Zampini /* interior solver */ 12549566063dSJacob Faibussowitsch PetscCall(PCCreate(PetscObjectComm((PetscObject)A_II), &msolv_ctx->interior_solver)); 125532fe681dSStefano Zampini PetscCall(PCSetOperators(msolv_ctx->interior_solver, A_II, pA_II)); 12569566063dSJacob Faibussowitsch PetscCall(PCSetType(msolv_ctx->interior_solver, PCSHELL)); 12579566063dSJacob Faibussowitsch PetscCall(PCShellSetName(msolv_ctx->interior_solver, "Interior solver (w/o Schur factorization)")); 12589566063dSJacob Faibussowitsch PetscCall(PCShellSetContext(msolv_ctx->interior_solver, msolv_ctx)); 12599566063dSJacob Faibussowitsch PetscCall(PCShellSetView(msolv_ctx->interior_solver, PCBDDCReuseSolvers_View)); 12609566063dSJacob Faibussowitsch PetscCall(PCShellSetApply(msolv_ctx->interior_solver, PCBDDCReuseSolvers_Interior)); 12619566063dSJacob Faibussowitsch PetscCall(PCShellSetApplyTranspose(msolv_ctx->interior_solver, PCBDDCReuseSolvers_InteriorTranspose)); 126232fe681dSStefano Zampini if (sub_schurs->gdsw) PetscCall(PCShellSetDestroy(msolv_ctx->interior_solver, PCBDDCReuseSolvers_Destroy)); 1263d62866d3SStefano Zampini 1264d62866d3SStefano Zampini /* correction solver */ 126532fe681dSStefano Zampini if (!sub_schurs->gdsw) { 12669566063dSJacob Faibussowitsch PetscCall(PCCreate(PetscObjectComm((PetscObject)A_II), &msolv_ctx->correction_solver)); 12679566063dSJacob Faibussowitsch PetscCall(PCSetType(msolv_ctx->correction_solver, PCSHELL)); 12689566063dSJacob Faibussowitsch PetscCall(PCShellSetName(msolv_ctx->correction_solver, "Correction solver (with Schur factorization)")); 12699566063dSJacob Faibussowitsch PetscCall(PCShellSetContext(msolv_ctx->correction_solver, msolv_ctx)); 12709566063dSJacob Faibussowitsch PetscCall(PCShellSetView(msolv_ctx->interior_solver, PCBDDCReuseSolvers_View)); 12719566063dSJacob Faibussowitsch PetscCall(PCShellSetApply(msolv_ctx->correction_solver, PCBDDCReuseSolvers_Correction)); 12729566063dSJacob Faibussowitsch PetscCall(PCShellSetApplyTranspose(msolv_ctx->correction_solver, PCBDDCReuseSolvers_CorrectionTranspose)); 127353892102SStefano Zampini 127453892102SStefano Zampini /* scatter and vecs for Schur complement solver */ 12759566063dSJacob Faibussowitsch PetscCall(MatCreateVecs(S_all, &msolv_ctx->sol_B, &msolv_ctx->rhs_B)); 12769566063dSJacob Faibussowitsch PetscCall(MatCreateVecs(sub_schurs->S, &vec1_B, NULL)); 12773fc34f97SStefano Zampini if (!schur_has_vertices) { 12789566063dSJacob Faibussowitsch PetscCall(ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap, IS_GTOLM_DROP, is_A_all, &msolv_ctx->is_B)); 12799566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(vec1_B, msolv_ctx->is_B, msolv_ctx->sol_B, NULL, &msolv_ctx->correction_scatter_B)); 12809566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)is_A_all)); 128153892102SStefano Zampini msolv_ctx->is_R = is_A_all; 1282683d3df6SStefano Zampini } else { 1283683d3df6SStefano Zampini IS is_B_all; 1284683d3df6SStefano Zampini const PetscInt *idxs; 1285683d3df6SStefano Zampini PetscInt dual, n_v, n; 1286683d3df6SStefano Zampini 12879566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_v)); 1288683d3df6SStefano Zampini dual = size_schur - n_v; 12899566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(is_A_all, &n)); 12909566063dSJacob Faibussowitsch PetscCall(ISGetIndices(is_A_all, &idxs)); 12919566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)is_A_all), dual, idxs + n_I, PETSC_COPY_VALUES, &is_B_all)); 12929566063dSJacob Faibussowitsch PetscCall(ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap, IS_GTOLM_DROP, is_B_all, &msolv_ctx->is_B)); 12939566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is_B_all)); 12949566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PetscObjectComm((PetscObject)is_A_all), dual, 0, 1, &is_B_all)); 12959566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(vec1_B, msolv_ctx->is_B, msolv_ctx->sol_B, is_B_all, &msolv_ctx->correction_scatter_B)); 12969566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is_B_all)); 12979566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)is_A_all), n - n_v, idxs, PETSC_COPY_VALUES, &msolv_ctx->is_R)); 12989566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(is_A_all, &idxs)); 1299683d3df6SStefano Zampini } 13009566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(msolv_ctx->is_R, &n_R)); 13019566063dSJacob Faibussowitsch PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, n_R, n_R, 0, NULL, &Afake)); 13029566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(Afake, MAT_FINAL_ASSEMBLY)); 13039566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(Afake, MAT_FINAL_ASSEMBLY)); 13049566063dSJacob Faibussowitsch PetscCall(PCSetOperators(msolv_ctx->correction_solver, Afake, Afake)); 13059566063dSJacob Faibussowitsch PetscCall(MatDestroy(&Afake)); 13069566063dSJacob Faibussowitsch PetscCall(VecDestroy(&vec1_B)); 130732fe681dSStefano Zampini } 1308ca92afb2SStefano Zampini /* communicate benign info to solver context */ 1309ca92afb2SStefano Zampini if (benign_n) { 13105cbda25cSStefano Zampini PetscScalar *array; 13115cbda25cSStefano Zampini 1312ca92afb2SStefano Zampini msolv_ctx->benign_n = benign_n; 1313ca92afb2SStefano Zampini msolv_ctx->benign_zerodiag_subs = is_p_r; 13149566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(benign_n, &msolv_ctx->benign_save_vals)); 13155cbda25cSStefano Zampini msolv_ctx->benign_csAIB = cs_AIB_mat; 13169566063dSJacob Faibussowitsch PetscCall(MatCreateVecs(cs_AIB_mat, &msolv_ctx->benign_corr_work, NULL)); 13179566063dSJacob Faibussowitsch PetscCall(VecGetArray(msolv_ctx->benign_corr_work, &array)); 13189566063dSJacob Faibussowitsch PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, size_schur, array, &msolv_ctx->benign_dummy_schur_vec)); 13199566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(msolv_ctx->benign_corr_work, &array)); 13205cbda25cSStefano Zampini msolv_ctx->benign_AIIm1ones = benign_AIIm1_ones_mat; 1321ca92afb2SStefano Zampini } 1322ada6e2d7SStefano Zampini } else { 13231baa6e33SBarry Smith if (sub_schurs->reuse_solver) PetscCall(PCBDDCReuseSolversReset(sub_schurs->reuse_solver)); 13249566063dSJacob Faibussowitsch PetscCall(PetscFree(sub_schurs->reuse_solver)); 1325d5574798SStefano Zampini } 13269566063dSJacob Faibussowitsch PetscCall(MatDestroy(&A)); 13279566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is_A_all)); 13285db18549SStefano Zampini 1329be83ff47SStefano Zampini /* Work arrays */ 13309566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(max_subset_size * max_subset_size, &work)); 1331d2627357SStefano Zampini 1332be83ff47SStefano Zampini /* S_Ej_all */ 1333be83ff47SStefano Zampini cum = cum2 = 0; 13349566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(S_all, &rS_data)); 13359566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArray(sub_schurs->S_Ej_all, &SEj_arr)); 1336*48a46eb9SPierre Jolivet if (sub_schurs->sum_S_Ej_inv_all) PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all, &SEjinv_arr)); 1337*48a46eb9SPierre Jolivet if (sub_schurs->gdsw) PetscCall(MatCreateSubMatrices(sub_schurs->A, sub_schurs->n_subs, sub_schurs->is_subs, sub_schurs->is_subs, MAT_INITIAL_MATRIX, &gdswA)); 133865d8bf0aSStefano Zampini for (i = 0; i < sub_schurs->n_subs; i++) { 133965d8bf0aSStefano Zampini PetscInt j; 134065d8bf0aSStefano Zampini 134132fe681dSStefano Zampini /* get S_E (or K^i_EE for GDSW) */ 13429566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 134332fe681dSStefano Zampini if (sub_schurs->gdsw) { 134432fe681dSStefano Zampini Mat T; 134532fe681dSStefano Zampini 134632fe681dSStefano Zampini PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, work, &T)); 134732fe681dSStefano Zampini PetscCall(MatConvert(gdswA[i], MATDENSE, MAT_REUSE_MATRIX, &T)); 134832fe681dSStefano Zampini PetscCall(MatDestroy(&T)); 134932fe681dSStefano Zampini } else { 1350683d3df6SStefano Zampini if (S_lower_triangular) { /* I need to expand the upper triangular data (column oriented) */ 1351be83ff47SStefano Zampini PetscInt k; 1352be83ff47SStefano Zampini for (k = 0; k < subset_size; k++) { 1353be83ff47SStefano Zampini for (j = k; j < subset_size; j++) { 13541683a169SBarry Smith work[k * subset_size + j] = rS_data[cum2 + k * size_schur + j]; 13551683a169SBarry Smith work[j * subset_size + k] = PetscConj(rS_data[cum2 + k * size_schur + j]); 1356be83ff47SStefano Zampini } 1357be83ff47SStefano Zampini } 135806a4e24aSStefano Zampini } else { /* just copy to workspace */ 1359be83ff47SStefano Zampini PetscInt k; 1360be83ff47SStefano Zampini for (k = 0; k < subset_size; k++) { 13619371c9d4SSatish Balay for (j = 0; j < subset_size; j++) { work[k * subset_size + j] = rS_data[cum2 + k * size_schur + j]; } 1362be83ff47SStefano Zampini } 13639087bf02SStefano Zampini } 136432fe681dSStefano Zampini } 13655a95e1ceSStefano Zampini /* insert S_E values */ 1366b7ab4a40SStefano Zampini if (sub_schurs->change) { 13678760537fSStefano Zampini Mat change_sub, SEj, T; 136872b8c272SStefano Zampini 136972b8c272SStefano Zampini /* change basis */ 13709566063dSJacob Faibussowitsch PetscCall(KSPGetOperators(sub_schurs->change[i], &change_sub, NULL)); 13719566063dSJacob Faibussowitsch PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, work, &SEj)); 13728760537fSStefano Zampini if (!sub_schurs->change_with_qr) { /* currently there's no support for PtAP with P SeqAIJ */ 13738760537fSStefano Zampini Mat T2; 13749566063dSJacob Faibussowitsch PetscCall(MatTransposeMatMult(change_sub, SEj, MAT_INITIAL_MATRIX, 1.0, &T2)); 13759566063dSJacob Faibussowitsch PetscCall(MatMatMult(T2, change_sub, MAT_INITIAL_MATRIX, 1.0, &T)); 13769566063dSJacob Faibussowitsch PetscCall(MatConvert(T, MATSEQDENSE, MAT_INPLACE_MATRIX, &T)); 13779566063dSJacob Faibussowitsch PetscCall(MatDestroy(&T2)); 13788760537fSStefano Zampini } else { 13799566063dSJacob Faibussowitsch PetscCall(MatPtAP(SEj, change_sub, MAT_INITIAL_MATRIX, 1.0, &T)); 138072b8c272SStefano Zampini } 13819566063dSJacob Faibussowitsch PetscCall(MatCopy(T, SEj, SAME_NONZERO_PATTERN)); 13829566063dSJacob Faibussowitsch PetscCall(MatDestroy(&T)); 13839566063dSJacob Faibussowitsch PetscCall(MatZeroRowsColumnsIS(SEj, sub_schurs->change_primal_sub[i], 1.0, NULL, NULL)); 13849566063dSJacob Faibussowitsch PetscCall(MatDestroy(&SEj)); 138572b8c272SStefano Zampini } 13869566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(SEj_arr, work, subset_size * subset_size)); 1387862806e4SStefano Zampini if (compute_Stilda) { 138832fe681dSStefano Zampini if (deluxe) { /* if adaptivity is requested, invert S_E blocks */ 13897ebab0bbSStefano Zampini Mat M; 13907ebab0bbSStefano Zampini const PetscScalar *vals; 13917ebab0bbSStefano Zampini PetscBool isdense, isdensecuda; 1392f4f7d9d6SStefano Zampini 13939566063dSJacob Faibussowitsch PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, work, &M)); 13949566063dSJacob Faibussowitsch PetscCall(MatSetOption(M, MAT_SPD, sub_schurs->is_posdef)); 13959566063dSJacob Faibussowitsch PetscCall(MatSetOption(M, MAT_HERMITIAN, sub_schurs->is_hermitian)); 1396*48a46eb9SPierre Jolivet if (!PetscBTLookup(sub_schurs->is_edge, i)) PetscCall(MatSetType(M, Stype)); 13979566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)M, MATSEQDENSE, &isdense)); 13989566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)M, MATSEQDENSECUDA, &isdensecuda)); 13997ebab0bbSStefano Zampini if (use_cholesky) { 14009566063dSJacob Faibussowitsch PetscCall(MatCholeskyFactor(M, NULL, NULL)); 1401d6462365SStefano Zampini } else { 14029566063dSJacob Faibussowitsch PetscCall(MatLUFactor(M, NULL, NULL, NULL)); 14032972d61bSStefano Zampini } 14047ebab0bbSStefano Zampini if (isdense) { 14059566063dSJacob Faibussowitsch PetscCall(MatSeqDenseInvertFactors_Private(M)); 14067ebab0bbSStefano Zampini #if defined(PETSC_HAVE_CUDA) 14077ebab0bbSStefano Zampini } else if (isdensecuda) { 14089566063dSJacob Faibussowitsch PetscCall(MatSeqDenseCUDAInvertFactors_Private(M)); 14097ebab0bbSStefano Zampini #endif 141098921bdaSJacob Faibussowitsch } else SETERRQ(PetscObjectComm((PetscObject)M), PETSC_ERR_SUP, "Not implemented for type %s", Stype); 14119566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(M, &vals)); 14129566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(SEjinv_arr, vals, subset_size * subset_size)); 14139566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(M, &vals)); 14149566063dSJacob Faibussowitsch PetscCall(MatDestroy(&M)); 141532fe681dSStefano Zampini } else if (scaling) { /* not using deluxe */ 14169d54b7f4SStefano Zampini Mat SEj; 14179d54b7f4SStefano Zampini Vec D; 14189d54b7f4SStefano Zampini PetscScalar *array; 14199d54b7f4SStefano Zampini 14209566063dSJacob Faibussowitsch PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, work, &SEj)); 14219566063dSJacob Faibussowitsch PetscCall(VecGetArray(Dall, &array)); 14229566063dSJacob Faibussowitsch PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, subset_size, array + cum, &D)); 14239566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(Dall, &array)); 14249566063dSJacob Faibussowitsch PetscCall(VecShift(D, -1.)); 14259566063dSJacob Faibussowitsch PetscCall(MatDiagonalScale(SEj, D, D)); 14269566063dSJacob Faibussowitsch PetscCall(MatDestroy(&SEj)); 14279566063dSJacob Faibussowitsch PetscCall(VecDestroy(&D)); 14289566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(SEj_arr, work, subset_size * subset_size)); 14299d54b7f4SStefano Zampini } 143032fe681dSStefano Zampini } 1431be83ff47SStefano Zampini cum += subset_size; 1432be83ff47SStefano Zampini cum2 += subset_size * (size_schur + 1); 143304c5b2e6SStefano Zampini SEj_arr += subset_size * subset_size; 143404c5b2e6SStefano Zampini if (SEjinv_arr) SEjinv_arr += subset_size * subset_size; 1435be83ff47SStefano Zampini } 1436*48a46eb9SPierre Jolivet if (sub_schurs->gdsw) PetscCall(MatDestroySubMatrices(sub_schurs->n_subs, &gdswA)); 14379566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(S_all, &rS_data)); 14389566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArray(sub_schurs->S_Ej_all, &SEj_arr)); 1439*48a46eb9SPierre Jolivet if (sub_schurs->sum_S_Ej_inv_all) PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all, &SEjinv_arr)); 1440*48a46eb9SPierre Jolivet if (solver_S) PetscCall(MatFactorRestoreSchurComplement(F, &S_all, MAT_FACTOR_SCHUR_UNFACTORED)); 1441683d3df6SStefano Zampini 14427ebab0bbSStefano Zampini /* may prevent from unneeded copies, since MUMPS or MKL_Pardiso always use CPU memory 14437ebab0bbSStefano Zampini however, preliminary tests indicate using GPUs is still faster in the solve phase */ 14447ebab0bbSStefano Zampini #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 14457ebab0bbSStefano Zampini if (reuse_solvers) { 14467ebab0bbSStefano Zampini Mat St; 14477ebab0bbSStefano Zampini MatFactorSchurStatus st; 14487ebab0bbSStefano Zampini 144935d0533cSStefano Zampini flg = PETSC_FALSE; 14509566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetBool(NULL, sub_schurs->prefix, "-sub_schurs_schur_pin_to_cpu", &flg, NULL)); 14519566063dSJacob Faibussowitsch PetscCall(MatFactorGetSchurComplement(F, &St, &st)); 14529566063dSJacob Faibussowitsch PetscCall(MatBindToCPU(St, flg)); 14539566063dSJacob Faibussowitsch PetscCall(MatFactorRestoreSchurComplement(F, &St, st)); 14547ebab0bbSStefano Zampini } 14557ebab0bbSStefano Zampini #endif 14567ebab0bbSStefano Zampini 1457683d3df6SStefano Zampini schur_factor = NULL; 145845951f25SStefano Zampini if (compute_Stilda && size_active_schur) { 14599d54b7f4SStefano Zampini if (sub_schurs->n_subs == 1 && size_schur == size_active_schur && deluxe) { /* we already computed the inverse */ 146032fe681dSStefano Zampini PetscCall(MatSeqAIJGetArrayWrite(sub_schurs->sum_S_Ej_tilda_all, &SEjinv_arr)); 14619566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(SEjinv_arr, work, size_schur * size_schur)); 146232fe681dSStefano Zampini PetscCall(MatSeqAIJRestoreArrayWrite(sub_schurs->sum_S_Ej_tilda_all, &SEjinv_arr)); 14634a6c6b0dSStefano Zampini } else { 1464683d3df6SStefano Zampini Mat S_all_inv = NULL; 14657ebab0bbSStefano Zampini 146632fe681dSStefano Zampini if (solver_S && !sub_schurs->gdsw) { 1467683d3df6SStefano Zampini /* for adaptive selection we need S^-1; for solver reusage we need S_\Delta\Delta^-1. 1468683d3df6SStefano Zampini The latter is not the principal subminor for S^-1. However, the factors can be reused since S_\Delta\Delta is the leading principal submatrix of S */ 14693fc34f97SStefano Zampini if (factor_workaround) { /* invert without calling MatFactorInvertSchurComplement, since we are hacking */ 1470683d3df6SStefano Zampini PetscScalar *data; 1471683d3df6SStefano Zampini PetscInt nd = 0; 14726dba178dSStefano Zampini 14737a46b595SBarry Smith PetscCheck(use_potr, PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor update not yet implemented for non SPD matrices"); 14749566063dSJacob Faibussowitsch PetscCall(MatFactorGetSchurComplement(F, &S_all_inv, NULL)); 14759566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(S_all_inv, &data)); 1476683d3df6SStefano Zampini if (sub_schurs->is_dir) { /* dirichlet dofs could have different scalings */ 14779566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(sub_schurs->is_dir, &nd)); 1478683d3df6SStefano Zampini } 14793fc34f97SStefano Zampini 14803fc34f97SStefano Zampini /* factor and invert activedofs and vertices (dirichlet dofs does not contribute) */ 14813fc34f97SStefano Zampini if (schur_has_vertices) { 14823fc34f97SStefano Zampini Mat M; 14833fc34f97SStefano Zampini PetscScalar *tdata; 14843fc34f97SStefano Zampini PetscInt nv = 0, news; 14853fc34f97SStefano Zampini 14869566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &nv)); 14873fc34f97SStefano Zampini news = size_active_schur + nv; 14889566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(news * news, &tdata)); 1489683d3df6SStefano Zampini for (i = 0; i < size_active_schur; i++) { 14909566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(tdata + i * (news + 1), data + i * (size_schur + 1), size_active_schur - i)); 14919566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(tdata + i * (news + 1) + size_active_schur - i, data + i * size_schur + size_active_schur + nd, nv)); 14923fc34f97SStefano Zampini } 14933fc34f97SStefano Zampini for (i = 0; i < nv; i++) { 14943fc34f97SStefano Zampini PetscInt k = i + size_active_schur; 14959566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(tdata + k * (news + 1), data + (k + nd) * (size_schur + 1), nv - i)); 14963fc34f97SStefano Zampini } 14973fc34f97SStefano Zampini 14989566063dSJacob Faibussowitsch PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, news, news, tdata, &M)); 14999566063dSJacob Faibussowitsch PetscCall(MatSetOption(M, MAT_SPD, PETSC_TRUE)); 15009566063dSJacob Faibussowitsch PetscCall(MatCholeskyFactor(M, NULL, NULL)); 15013fc34f97SStefano Zampini /* save the factors */ 15023fc34f97SStefano Zampini cum = 0; 15039566063dSJacob Faibussowitsch PetscCall(PetscMalloc1((size_active_schur * (size_active_schur + 1)) / 2 + nd, &schur_factor)); 15043fc34f97SStefano Zampini for (i = 0; i < size_active_schur; i++) { 15059566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(schur_factor + cum, tdata + i * (news + 1), size_active_schur - i)); 1506683d3df6SStefano Zampini cum += size_active_schur - i; 1507683d3df6SStefano Zampini } 15083fc34f97SStefano Zampini for (i = 0; i < nd; i++) schur_factor[cum + i] = PetscSqrtReal(PetscRealPart(data[(i + size_active_schur) * (size_schur + 1)])); 15099566063dSJacob Faibussowitsch PetscCall(MatSeqDenseInvertFactors_Private(M)); 15103fc34f97SStefano Zampini /* move back just the active dofs to the Schur complement */ 1511*48a46eb9SPierre Jolivet for (i = 0; i < size_active_schur; i++) PetscCall(PetscArraycpy(data + i * size_schur, tdata + i * news, size_active_schur)); 15129566063dSJacob Faibussowitsch PetscCall(PetscFree(tdata)); 15139566063dSJacob Faibussowitsch PetscCall(MatDestroy(&M)); 15143fc34f97SStefano Zampini } else { /* we can factorize and invert just the activedofs */ 15153fc34f97SStefano Zampini Mat M; 15165002105bSStefano Zampini PetscScalar *aux; 15173fc34f97SStefano Zampini 15189566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nd, &aux)); 15195002105bSStefano Zampini for (i = 0; i < nd; i++) aux[i] = 1.0 / data[(i + size_active_schur) * (size_schur + 1)]; 15209566063dSJacob Faibussowitsch PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size_active_schur, size_active_schur, data, &M)); 15219566063dSJacob Faibussowitsch PetscCall(MatDenseSetLDA(M, size_schur)); 15229566063dSJacob Faibussowitsch PetscCall(MatSetOption(M, MAT_SPD, PETSC_TRUE)); 15239566063dSJacob Faibussowitsch PetscCall(MatCholeskyFactor(M, NULL, NULL)); 15249566063dSJacob Faibussowitsch PetscCall(MatSeqDenseInvertFactors_Private(M)); 15259566063dSJacob Faibussowitsch PetscCall(MatDestroy(&M)); 15269566063dSJacob Faibussowitsch PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size_schur, nd, data + size_active_schur * size_schur, &M)); 15279566063dSJacob Faibussowitsch PetscCall(MatZeroEntries(M)); 15289566063dSJacob Faibussowitsch PetscCall(MatDestroy(&M)); 15299566063dSJacob Faibussowitsch PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, nd, size_schur, data + size_active_schur, &M)); 15309566063dSJacob Faibussowitsch PetscCall(MatDenseSetLDA(M, size_schur)); 15319566063dSJacob Faibussowitsch PetscCall(MatZeroEntries(M)); 15329566063dSJacob Faibussowitsch PetscCall(MatDestroy(&M)); 15335002105bSStefano Zampini for (i = 0; i < nd; i++) data[(i + size_active_schur) * (size_schur + 1)] = aux[i]; 15349566063dSJacob Faibussowitsch PetscCall(PetscFree(aux)); 15353fc34f97SStefano Zampini } 15369566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(S_all_inv, &data)); 15373fc34f97SStefano Zampini } else { /* use MatFactor calls to invert S */ 15389566063dSJacob Faibussowitsch PetscCall(MatFactorInvertSchurComplement(F)); 15399566063dSJacob Faibussowitsch PetscCall(MatFactorGetSchurComplement(F, &S_all_inv, NULL)); 1540683d3df6SStefano Zampini } 154132fe681dSStefano Zampini } else if (!sub_schurs->gdsw) { /* we need to invert explicitly since we are not using MatFactor for S */ 15429566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)S_all)); 1543683d3df6SStefano Zampini S_all_inv = S_all; 15449566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(S_all_inv, &S_data)); 15459566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(size_schur, &B_N)); 15469566063dSJacob Faibussowitsch PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 1547f4f7d9d6SStefano Zampini if (use_potr) { 1548792fecdfSBarry Smith PetscCallBLAS("LAPACKpotrf", LAPACKpotrf_("L", &B_N, S_data, &B_N, &B_ierr)); 154928b400f6SJacob Faibussowitsch PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in POTRF Lapack routine %d", (int)B_ierr); 1550792fecdfSBarry Smith PetscCallBLAS("LAPACKpotri", LAPACKpotri_("L", &B_N, S_data, &B_N, &B_ierr)); 155128b400f6SJacob Faibussowitsch PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in POTRI Lapack routine %d", (int)B_ierr); 1552f4f7d9d6SStefano Zampini } else if (use_sytr) { 1553792fecdfSBarry Smith PetscCallBLAS("LAPACKsytrf", LAPACKsytrf_("L", &B_N, S_data, &B_N, pivots, Bwork, &B_lwork, &B_ierr)); 155428b400f6SJacob Faibussowitsch PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYTRF Lapack routine %d", (int)B_ierr); 1555792fecdfSBarry Smith PetscCallBLAS("LAPACKsytri", LAPACKsytri_("L", &B_N, S_data, &B_N, pivots, Bwork, &B_ierr)); 155628b400f6SJacob Faibussowitsch PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYTRI Lapack routine %d", (int)B_ierr); 1557d6462365SStefano Zampini } else { 1558792fecdfSBarry Smith PetscCallBLAS("LAPACKgetrf", LAPACKgetrf_(&B_N, &B_N, S_data, &B_N, pivots, &B_ierr)); 155928b400f6SJacob Faibussowitsch PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GETRF Lapack routine %d", (int)B_ierr); 1560792fecdfSBarry Smith PetscCallBLAS("LAPACKgetri", LAPACKgetri_(&B_N, S_data, &B_N, pivots, Bwork, &B_lwork, &B_ierr)); 156128b400f6SJacob Faibussowitsch PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GETRI Lapack routine %d", (int)B_ierr); 1562be83ff47SStefano Zampini } 15639566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(1.0 * size_schur * size_schur * size_schur)); 15649566063dSJacob Faibussowitsch PetscCall(PetscFPTrapPop()); 15659566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(S_all_inv, &S_data)); 156632fe681dSStefano Zampini } else if (sub_schurs->gdsw) { 156732fe681dSStefano Zampini Mat tS, tX, SEj, S_II, S_IE, S_EE; 156832fe681dSStefano Zampini KSP pS_II; 156932fe681dSStefano Zampini PC pS_II_pc; 157032fe681dSStefano Zampini IS EE, II; 157132fe681dSStefano Zampini PetscInt nS; 157232fe681dSStefano Zampini 157332fe681dSStefano Zampini PetscCall(MatFactorCreateSchurComplement(F, &tS, NULL)); 157432fe681dSStefano Zampini PetscCall(MatGetSize(tS, &nS, NULL)); 157532fe681dSStefano Zampini PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &SEjinv_arr)); 157632fe681dSStefano Zampini for (i = 0, cum = 0; i < sub_schurs->n_subs; i++) { /* naive implementation */ 157732fe681dSStefano Zampini PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 157832fe681dSStefano Zampini PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, SEjinv_arr, &SEj)); 157932fe681dSStefano Zampini 158032fe681dSStefano Zampini PetscCall(ISCreateStride(PETSC_COMM_SELF, subset_size, cum, 1, &EE)); 158132fe681dSStefano Zampini PetscCall(ISComplement(EE, 0, nS, &II)); 158232fe681dSStefano Zampini PetscCall(MatCreateSubMatrix(tS, II, II, MAT_INITIAL_MATRIX, &S_II)); 158332fe681dSStefano Zampini PetscCall(MatCreateSubMatrix(tS, II, EE, MAT_INITIAL_MATRIX, &S_IE)); 158432fe681dSStefano Zampini PetscCall(MatCreateSubMatrix(tS, EE, EE, MAT_INITIAL_MATRIX, &S_EE)); 158532fe681dSStefano Zampini PetscCall(ISDestroy(&II)); 158632fe681dSStefano Zampini PetscCall(ISDestroy(&EE)); 158732fe681dSStefano Zampini 158832fe681dSStefano Zampini PetscCall(KSPCreate(PETSC_COMM_SELF, &pS_II)); 158932fe681dSStefano Zampini PetscCall(KSPSetType(pS_II, KSPPREONLY)); 159032fe681dSStefano Zampini PetscCall(KSPGetPC(pS_II, &pS_II_pc)); 159132fe681dSStefano Zampini PetscCall(PCSetType(pS_II_pc, PCSVD)); 159232fe681dSStefano Zampini PetscCall(KSPSetOptionsPrefix(pS_II, sub_schurs->prefix)); 159332fe681dSStefano Zampini PetscCall(KSPAppendOptionsPrefix(pS_II, "pseudo_")); 159432fe681dSStefano Zampini PetscCall(KSPSetOperators(pS_II, S_II, S_II)); 159532fe681dSStefano Zampini PetscCall(MatDestroy(&S_II)); 159632fe681dSStefano Zampini PetscCall(KSPSetFromOptions(pS_II)); 159732fe681dSStefano Zampini PetscCall(KSPSetUp(pS_II)); 159832fe681dSStefano Zampini PetscCall(MatDuplicate(S_IE, MAT_DO_NOT_COPY_VALUES, &tX)); 159932fe681dSStefano Zampini PetscCall(KSPMatSolve(pS_II, S_IE, tX)); 160032fe681dSStefano Zampini PetscCall(KSPDestroy(&pS_II)); 160132fe681dSStefano Zampini 160232fe681dSStefano Zampini PetscCall(MatTransposeMatMult(S_IE, tX, MAT_REUSE_MATRIX, PETSC_DEFAULT, &SEj)); 160332fe681dSStefano Zampini PetscCall(MatDestroy(&S_IE)); 160432fe681dSStefano Zampini PetscCall(MatDestroy(&tX)); 160532fe681dSStefano Zampini PetscCall(MatAYPX(SEj, -1, S_EE, SAME_NONZERO_PATTERN)); 160632fe681dSStefano Zampini PetscCall(MatDestroy(&S_EE)); 160732fe681dSStefano Zampini 160832fe681dSStefano Zampini PetscCall(MatDestroy(&SEj)); 160932fe681dSStefano Zampini cum += subset_size; 161032fe681dSStefano Zampini SEjinv_arr += subset_size * subset_size; 161132fe681dSStefano Zampini } 161232fe681dSStefano Zampini PetscCall(MatDestroy(&tS)); 161332fe681dSStefano Zampini PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all, &SEjinv_arr)); 1614be83ff47SStefano Zampini } 1615be83ff47SStefano Zampini /* S_Ej_tilda_all */ 1616be83ff47SStefano Zampini cum = cum2 = 0; 161732fe681dSStefano Zampini rS_data = NULL; 161832fe681dSStefano Zampini if (S_all_inv) PetscCall(MatDenseGetArrayRead(S_all_inv, &rS_data)); 161932fe681dSStefano Zampini PetscCall(MatSeqAIJGetArrayWrite(sub_schurs->sum_S_Ej_tilda_all, &SEjinv_arr)); 1620be83ff47SStefano Zampini for (i = 0; i < sub_schurs->n_subs; i++) { 1621be83ff47SStefano Zampini PetscInt j; 1622862806e4SStefano Zampini 16239566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 1624be83ff47SStefano Zampini /* get (St^-1)_E */ 162572b8c272SStefano Zampini /* Unless we are changing the variables, I don't need to expand to upper triangular since St^-1 162606a4e24aSStefano Zampini will be properly accessed later during adaptive selection */ 162732fe681dSStefano Zampini if (rS_data) { 1628a0b0af32SStefano Zampini if (S_lower_triangular) { 1629be83ff47SStefano Zampini PetscInt k; 1630b7ab4a40SStefano Zampini if (sub_schurs->change) { 1631be83ff47SStefano Zampini for (k = 0; k < subset_size; k++) { 1632be83ff47SStefano Zampini for (j = k; j < subset_size; j++) { 16331683a169SBarry Smith work[k * subset_size + j] = rS_data[cum2 + k * size_schur + j]; 16346c3e6151SStefano Zampini work[j * subset_size + k] = work[k * subset_size + j]; 1635be83ff47SStefano Zampini } 1636be83ff47SStefano Zampini } 163772b8c272SStefano Zampini } else { 163872b8c272SStefano Zampini for (k = 0; k < subset_size; k++) { 16399371c9d4SSatish Balay for (j = k; j < subset_size; j++) { work[k * subset_size + j] = rS_data[cum2 + k * size_schur + j]; } 164072b8c272SStefano Zampini } 164172b8c272SStefano Zampini } 164272b8c272SStefano Zampini } else { 1643be83ff47SStefano Zampini PetscInt k; 1644be83ff47SStefano Zampini for (k = 0; k < subset_size; k++) { 16459371c9d4SSatish Balay for (j = 0; j < subset_size; j++) { work[k * subset_size + j] = rS_data[cum2 + k * size_schur + j]; } 1646be83ff47SStefano Zampini } 1647be83ff47SStefano Zampini } 164832fe681dSStefano Zampini } 1649b7ab4a40SStefano Zampini if (sub_schurs->change) { 16508760537fSStefano Zampini Mat change_sub, SEj, T; 165132fe681dSStefano Zampini PetscScalar val = sub_schurs->gdsw ? PETSC_SMALL : 1. / PETSC_SMALL; 165272b8c272SStefano Zampini 165372b8c272SStefano Zampini /* change basis */ 16549566063dSJacob Faibussowitsch PetscCall(KSPGetOperators(sub_schurs->change[i], &change_sub, NULL)); 165532fe681dSStefano Zampini PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, rS_data ? work : SEjinv_arr, &SEj)); 16568760537fSStefano Zampini if (!sub_schurs->change_with_qr) { /* currently there's no support for PtAP with P SeqAIJ */ 16578760537fSStefano Zampini Mat T2; 16589566063dSJacob Faibussowitsch PetscCall(MatTransposeMatMult(change_sub, SEj, MAT_INITIAL_MATRIX, 1.0, &T2)); 16599566063dSJacob Faibussowitsch PetscCall(MatMatMult(T2, change_sub, MAT_INITIAL_MATRIX, 1.0, &T)); 16609566063dSJacob Faibussowitsch PetscCall(MatDestroy(&T2)); 16619566063dSJacob Faibussowitsch PetscCall(MatConvert(T, MATSEQDENSE, MAT_INPLACE_MATRIX, &T)); 16628760537fSStefano Zampini } else { 16639566063dSJacob Faibussowitsch PetscCall(MatPtAP(SEj, change_sub, MAT_INITIAL_MATRIX, 1.0, &T)); 166472b8c272SStefano Zampini } 16659566063dSJacob Faibussowitsch PetscCall(MatCopy(T, SEj, SAME_NONZERO_PATTERN)); 16669566063dSJacob Faibussowitsch PetscCall(MatDestroy(&T)); 166732fe681dSStefano Zampini PetscCall(MatZeroRowsColumnsIS(SEj, sub_schurs->change_primal_sub[i], val, NULL, NULL)); 16689566063dSJacob Faibussowitsch PetscCall(MatDestroy(&SEj)); 166972b8c272SStefano Zampini } 167032fe681dSStefano Zampini if (rS_data) PetscCall(PetscArraycpy(SEjinv_arr, work, subset_size * subset_size)); 1671be83ff47SStefano Zampini cum += subset_size; 1672be83ff47SStefano Zampini cum2 += subset_size * (size_schur + 1); 167304c5b2e6SStefano Zampini SEjinv_arr += subset_size * subset_size; 1674883469d8SStefano Zampini } 167532fe681dSStefano Zampini PetscCall(MatSeqAIJRestoreArrayWrite(sub_schurs->sum_S_Ej_tilda_all, &SEjinv_arr)); 167632fe681dSStefano Zampini if (S_all_inv) { 16779566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(S_all_inv, &rS_data)); 1678df4d28bfSStefano Zampini if (solver_S) { 16793fc34f97SStefano Zampini if (schur_has_vertices) { 16809566063dSJacob Faibussowitsch PetscCall(MatFactorRestoreSchurComplement(F, &S_all_inv, MAT_FACTOR_SCHUR_FACTORED)); 16813fc34f97SStefano Zampini } else { 16829566063dSJacob Faibussowitsch PetscCall(MatFactorRestoreSchurComplement(F, &S_all_inv, MAT_FACTOR_SCHUR_INVERTED)); 16835db18549SStefano Zampini } 16843fc34f97SStefano Zampini } 168532fe681dSStefano Zampini } 16869566063dSJacob Faibussowitsch PetscCall(MatDestroy(&S_all_inv)); 1687683d3df6SStefano Zampini } 1688683d3df6SStefano Zampini 16893fc34f97SStefano Zampini /* move back factors if needed */ 169032fe681dSStefano Zampini if (schur_has_vertices && factor_workaround && !sub_schurs->gdsw) { 1691683d3df6SStefano Zampini Mat S_tmp; 16923fc34f97SStefano Zampini PetscInt nd = 0; 1693683d3df6SStefano Zampini 169428b400f6SJacob Faibussowitsch PetscCheck(solver_S, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen"); 16959566063dSJacob Faibussowitsch PetscCall(MatFactorGetSchurComplement(F, &S_tmp, NULL)); 1696f4f7d9d6SStefano Zampini if (use_potr) { 1697683d3df6SStefano Zampini PetscScalar *data; 1698683d3df6SStefano Zampini 16999566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(S_tmp, &data)); 17009566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(data, size_schur * size_schur)); 1701683d3df6SStefano Zampini 1702683d3df6SStefano Zampini if (S_lower_triangular) { 1703683d3df6SStefano Zampini cum = 0; 1704683d3df6SStefano Zampini for (i = 0; i < size_active_schur; i++) { 17059566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(data + i * (size_schur + 1), schur_factor + cum, size_active_schur - i)); 1706683d3df6SStefano Zampini cum += size_active_schur - i; 1707683d3df6SStefano Zampini } 1708683d3df6SStefano Zampini } else { 17099566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(data, schur_factor, size_schur * size_schur)); 1710683d3df6SStefano Zampini } 1711683d3df6SStefano Zampini if (sub_schurs->is_dir) { 17129566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(sub_schurs->is_dir, &nd)); 17139371c9d4SSatish Balay for (i = 0; i < nd; i++) { data[(i + size_active_schur) * (size_schur + 1)] = schur_factor[cum + i]; } 1714683d3df6SStefano Zampini } 17156dba178dSStefano Zampini /* workaround: since I cannot modify the matrices used inside the solvers for the forward and backward substitutions, 1716683d3df6SStefano Zampini set the diagonal entry of the Schur factor to a very large value */ 17179371c9d4SSatish Balay for (i = size_active_schur + nd; i < size_schur; i++) { data[i * (size_schur + 1)] = infty; } 17189566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(S_tmp, &data)); 17196542c05fSStefano Zampini } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor update not yet implemented for non SPD matrices"); 17209566063dSJacob Faibussowitsch PetscCall(MatFactorRestoreSchurComplement(F, &S_tmp, MAT_FACTOR_SCHUR_FACTORED)); 17219087bf02SStefano Zampini } 172232fe681dSStefano Zampini } else if (factor_workaround && !sub_schurs->gdsw) { /* we need to eliminate any unneeded coupling */ 1723367aa537SStefano Zampini PetscScalar *data; 1724367aa537SStefano Zampini PetscInt nd = 0; 1725367aa537SStefano Zampini 1726367aa537SStefano Zampini if (sub_schurs->is_dir) { /* dirichlet dofs could have different scalings */ 17279566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(sub_schurs->is_dir, &nd)); 1728367aa537SStefano Zampini } 17299566063dSJacob Faibussowitsch PetscCall(MatFactorGetSchurComplement(F, &S_all, NULL)); 17309566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(S_all, &data)); 1731*48a46eb9SPierre Jolivet for (i = 0; i < size_active_schur; i++) PetscCall(PetscArrayzero(data + i * size_schur + size_active_schur, size_schur - size_active_schur)); 1732367aa537SStefano Zampini for (i = size_active_schur + nd; i < size_schur; i++) { 17339566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(data + i * size_schur + size_active_schur, size_schur - size_active_schur)); 17346c3e6151SStefano Zampini data[i * (size_schur + 1)] = infty; 1735367aa537SStefano Zampini } 17369566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(S_all, &data)); 17379566063dSJacob Faibussowitsch PetscCall(MatFactorRestoreSchurComplement(F, &S_all, MAT_FACTOR_SCHUR_UNFACTORED)); 17384a6c6b0dSStefano Zampini } 17399566063dSJacob Faibussowitsch PetscCall(PetscFree(work)); 17409566063dSJacob Faibussowitsch PetscCall(PetscFree(schur_factor)); 17419566063dSJacob Faibussowitsch PetscCall(VecDestroy(&Dall)); 17424a6c6b0dSStefano Zampini } 17439566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is_I_layer)); 17449566063dSJacob Faibussowitsch PetscCall(MatDestroy(&S_all)); 17459566063dSJacob Faibussowitsch PetscCall(MatDestroy(&A_BB)); 17469566063dSJacob Faibussowitsch PetscCall(MatDestroy(&A_IB)); 17479566063dSJacob Faibussowitsch PetscCall(MatDestroy(&A_BI)); 17489566063dSJacob Faibussowitsch PetscCall(MatDestroy(&F)); 17496afe12f5SStefano Zampini 17509566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(sub_schurs->n_subs, &nnz)); 1751*48a46eb9SPierre Jolivet for (i = 0; i < sub_schurs->n_subs; i++) PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &nnz[i])); 17529566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PETSC_COMM_SELF, sub_schurs->n_subs, nnz, PETSC_OWN_POINTER, &is_I_layer)); 17539566063dSJacob Faibussowitsch PetscCall(MatSetVariableBlockSizes(sub_schurs->S_Ej_all, sub_schurs->n_subs, nnz)); 17549566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(sub_schurs->S_Ej_all, MAT_FINAL_ASSEMBLY)); 17559566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(sub_schurs->S_Ej_all, MAT_FINAL_ASSEMBLY)); 17565a95e1ceSStefano Zampini if (compute_Stilda) { 17579566063dSJacob Faibussowitsch PetscCall(MatSetVariableBlockSizes(sub_schurs->sum_S_Ej_tilda_all, sub_schurs->n_subs, nnz)); 17589566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(sub_schurs->sum_S_Ej_tilda_all, MAT_FINAL_ASSEMBLY)); 17599566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(sub_schurs->sum_S_Ej_tilda_all, MAT_FINAL_ASSEMBLY)); 17609d54b7f4SStefano Zampini if (deluxe) { 17619566063dSJacob Faibussowitsch PetscCall(MatSetVariableBlockSizes(sub_schurs->sum_S_Ej_inv_all, sub_schurs->n_subs, nnz)); 17629566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(sub_schurs->sum_S_Ej_inv_all, MAT_FINAL_ASSEMBLY)); 17639566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(sub_schurs->sum_S_Ej_inv_all, MAT_FINAL_ASSEMBLY)); 176408122e43SStefano Zampini } 17659d54b7f4SStefano Zampini } 17669566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is_I_layer)); 17676afe12f5SStefano Zampini 17685db18549SStefano Zampini /* Get local part of (\sum_j S_Ej) */ 1769*48a46eb9SPierre Jolivet if (!sub_schurs->sum_S_Ej_all) PetscCall(MatDuplicate(sub_schurs->S_Ej_all, MAT_DO_NOT_COPY_VALUES, &sub_schurs->sum_S_Ej_all)); 17709566063dSJacob Faibussowitsch PetscCall(VecSet(gstash, 0.0)); 17719566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArray(sub_schurs->S_Ej_all, &stasharray)); 17729566063dSJacob Faibussowitsch PetscCall(VecPlaceArray(lstash, stasharray)); 17739566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(sstash, lstash, gstash, ADD_VALUES, SCATTER_FORWARD)); 17749566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(sstash, lstash, gstash, ADD_VALUES, SCATTER_FORWARD)); 17759566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArray(sub_schurs->S_Ej_all, &stasharray)); 17769566063dSJacob Faibussowitsch PetscCall(VecResetArray(lstash)); 17779566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &stasharray)); 17789566063dSJacob Faibussowitsch PetscCall(VecPlaceArray(lstash, stasharray)); 17799566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(sstash, gstash, lstash, INSERT_VALUES, SCATTER_REVERSE)); 17809566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(sstash, gstash, lstash, INSERT_VALUES, SCATTER_REVERSE)); 17819566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all, &stasharray)); 17829566063dSJacob Faibussowitsch PetscCall(VecResetArray(lstash)); 178308122e43SStefano Zampini 1784f6f667cfSStefano Zampini /* Get local part of (\sum_j S^-1_Ej) (\sum_j St^-1_Ej) */ 17855a95e1ceSStefano Zampini if (compute_Stilda) { 17869566063dSJacob Faibussowitsch PetscCall(VecSet(gstash, 0.0)); 17879566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &stasharray)); 17889566063dSJacob Faibussowitsch PetscCall(VecPlaceArray(lstash, stasharray)); 17899566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(sstash, lstash, gstash, ADD_VALUES, SCATTER_FORWARD)); 17909566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(sstash, lstash, gstash, ADD_VALUES, SCATTER_FORWARD)); 17919566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(sstash, gstash, lstash, INSERT_VALUES, SCATTER_REVERSE)); 17929566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(sstash, gstash, lstash, INSERT_VALUES, SCATTER_REVERSE)); 17939566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all, &stasharray)); 17949566063dSJacob Faibussowitsch PetscCall(VecResetArray(lstash)); 17959d54b7f4SStefano Zampini if (deluxe) { 17969566063dSJacob Faibussowitsch PetscCall(VecSet(gstash, 0.0)); 17979566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all, &stasharray)); 17989566063dSJacob Faibussowitsch PetscCall(VecPlaceArray(lstash, stasharray)); 17999566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(sstash, lstash, gstash, ADD_VALUES, SCATTER_FORWARD)); 18009566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(sstash, lstash, gstash, ADD_VALUES, SCATTER_FORWARD)); 18019566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(sstash, gstash, lstash, INSERT_VALUES, SCATTER_REVERSE)); 18029566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(sstash, gstash, lstash, INSERT_VALUES, SCATTER_REVERSE)); 18039566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all, &stasharray)); 18049566063dSJacob Faibussowitsch PetscCall(VecResetArray(lstash)); 180532fe681dSStefano Zampini } else if (!sub_schurs->gdsw) { 18069d54b7f4SStefano Zampini PetscScalar *array; 18079d54b7f4SStefano Zampini PetscInt cum; 18089d54b7f4SStefano Zampini 18099566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &array)); 18109d54b7f4SStefano Zampini cum = 0; 18119d54b7f4SStefano Zampini for (i = 0; i < sub_schurs->n_subs; i++) { 18129566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 18139566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(subset_size, &B_N)); 18149566063dSJacob Faibussowitsch PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 1815f4f7d9d6SStefano Zampini if (use_potr) { 1816792fecdfSBarry Smith PetscCallBLAS("LAPACKpotrf", LAPACKpotrf_("L", &B_N, array + cum, &B_N, &B_ierr)); 181728b400f6SJacob Faibussowitsch PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in POTRF Lapack routine %d", (int)B_ierr); 1818792fecdfSBarry Smith PetscCallBLAS("LAPACKpotri", LAPACKpotri_("L", &B_N, array + cum, &B_N, &B_ierr)); 181928b400f6SJacob Faibussowitsch PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in POTRI Lapack routine %d", (int)B_ierr); 1820f4f7d9d6SStefano Zampini } else if (use_sytr) { 1821792fecdfSBarry Smith PetscCallBLAS("LAPACKsytrf", LAPACKsytrf_("L", &B_N, array + cum, &B_N, pivots, Bwork, &B_lwork, &B_ierr)); 182228b400f6SJacob Faibussowitsch PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYTRF Lapack routine %d", (int)B_ierr); 1823792fecdfSBarry Smith PetscCallBLAS("LAPACKsytri", LAPACKsytri_("L", &B_N, array + cum, &B_N, pivots, Bwork, &B_ierr)); 182428b400f6SJacob Faibussowitsch PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYTRI Lapack routine %d", (int)B_ierr); 1825f4f7d9d6SStefano Zampini } else { 1826792fecdfSBarry Smith PetscCallBLAS("LAPACKgetrf", LAPACKgetrf_(&B_N, &B_N, array + cum, &B_N, pivots, &B_ierr)); 182728b400f6SJacob Faibussowitsch PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GETRF Lapack routine %d", (int)B_ierr); 1828792fecdfSBarry Smith PetscCallBLAS("LAPACKgetri", LAPACKgetri_(&B_N, array + cum, &B_N, pivots, Bwork, &B_lwork, &B_ierr)); 182928b400f6SJacob Faibussowitsch PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GETRI Lapack routine %d", (int)B_ierr); 1830f4f7d9d6SStefano Zampini } 18319566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(1.0 * subset_size * subset_size * subset_size)); 18329566063dSJacob Faibussowitsch PetscCall(PetscFPTrapPop()); 18339d54b7f4SStefano Zampini cum += subset_size * subset_size; 18349d54b7f4SStefano Zampini } 18359566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all, &array)); 18369566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)sub_schurs->sum_S_Ej_all)); 18379566063dSJacob Faibussowitsch PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_inv_all)); 18389d54b7f4SStefano Zampini sub_schurs->sum_S_Ej_inv_all = sub_schurs->sum_S_Ej_all; 18399d54b7f4SStefano Zampini } 184008122e43SStefano Zampini } 18419566063dSJacob Faibussowitsch PetscCall(VecDestroy(&lstash)); 18429566063dSJacob Faibussowitsch PetscCall(VecDestroy(&gstash)); 18439566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&sstash)); 184457a87bf3SStefano Zampini 18453b03f7bbSStefano Zampini if (matl_dbg_viewer) { 184611955456SStefano Zampini if (sub_schurs->S_Ej_all) { 18479566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)sub_schurs->S_Ej_all, "SE")); 18489566063dSJacob Faibussowitsch PetscCall(MatView(sub_schurs->S_Ej_all, matl_dbg_viewer)); 184911955456SStefano Zampini } 185011955456SStefano Zampini if (sub_schurs->sum_S_Ej_all) { 18519566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)sub_schurs->sum_S_Ej_all, "SSE")); 18529566063dSJacob Faibussowitsch PetscCall(MatView(sub_schurs->sum_S_Ej_all, matl_dbg_viewer)); 185311955456SStefano Zampini } 185411955456SStefano Zampini if (sub_schurs->sum_S_Ej_inv_all) { 18559566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)sub_schurs->sum_S_Ej_inv_all, "SSEm")); 18569566063dSJacob Faibussowitsch PetscCall(MatView(sub_schurs->sum_S_Ej_inv_all, matl_dbg_viewer)); 185711955456SStefano Zampini } 185811955456SStefano Zampini if (sub_schurs->sum_S_Ej_tilda_all) { 18599566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)sub_schurs->sum_S_Ej_tilda_all, "SSEt")); 18609566063dSJacob Faibussowitsch PetscCall(MatView(sub_schurs->sum_S_Ej_tilda_all, matl_dbg_viewer)); 186111955456SStefano Zampini } 186211955456SStefano Zampini } 18633202ece2SStefano Zampini 18645a95e1ceSStefano Zampini /* free workspace */ 186551ab8ad6SStefano Zampini if (matl_dbg_viewer) PetscCall(PetscViewerFlush(matl_dbg_viewer)); 186651ab8ad6SStefano Zampini if (sub_schurs->debug) PetscCallMPI(MPI_Barrier(comm_n)); 18679566063dSJacob Faibussowitsch PetscCall(PetscViewerDestroy(&matl_dbg_viewer)); 18689566063dSJacob Faibussowitsch PetscCall(PetscFree2(Bwork, pivots)); 18699566063dSJacob Faibussowitsch PetscCall(PetscCommDestroy(&comm_n)); 1870b1b3d7a2SStefano Zampini PetscFunctionReturn(0); 1871b1b3d7a2SStefano Zampini } 1872b1b3d7a2SStefano Zampini 18739371c9d4SSatish Balay PetscErrorCode PCBDDCSubSchursInit(PCBDDCSubSchurs sub_schurs, const char *prefix, IS is_I, IS is_B, PCBDDCGraph graph, ISLocalToGlobalMapping BtoNmap, PetscBool copycc, PetscBool gdsw) { 18749bb4a8caSStefano Zampini IS *faces, *edges, *all_cc, vertices; 187532fe681dSStefano Zampini PetscInt s, i, n_faces, n_edges, n_all_cc; 1876365a3a41SStefano Zampini PetscBool is_sorted, ispardiso, ismumps; 1877b1b3d7a2SStefano Zampini 1878b1b3d7a2SStefano Zampini PetscFunctionBegin; 18799566063dSJacob Faibussowitsch PetscCall(ISSorted(is_I, &is_sorted)); 188028b400f6SJacob Faibussowitsch PetscCheck(is_sorted, PetscObjectComm((PetscObject)is_I), PETSC_ERR_PLIB, "IS for I dofs should be shorted"); 18819566063dSJacob Faibussowitsch PetscCall(ISSorted(is_B, &is_sorted)); 188228b400f6SJacob Faibussowitsch PetscCheck(is_sorted, PetscObjectComm((PetscObject)is_B), PETSC_ERR_PLIB, "IS for B dofs should be shorted"); 1883b1b3d7a2SStefano Zampini 1884b1b3d7a2SStefano Zampini /* reset any previous data */ 18859566063dSJacob Faibussowitsch PetscCall(PCBDDCSubSchursReset(sub_schurs)); 1886b1b3d7a2SStefano Zampini 188732fe681dSStefano Zampini sub_schurs->gdsw = gdsw; 188832fe681dSStefano Zampini 18895a95e1ceSStefano Zampini /* get index sets for faces and edges (already sorted by global ordering) */ 18909566063dSJacob Faibussowitsch PetscCall(PCBDDCGraphGetCandidatesIS(graph, &n_faces, &faces, &n_edges, &edges, &vertices)); 1891b1b3d7a2SStefano Zampini n_all_cc = n_faces + n_edges; 18929566063dSJacob Faibussowitsch PetscCall(PetscBTCreate(n_all_cc, &sub_schurs->is_edge)); 18939566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n_all_cc, &all_cc)); 189432fe681dSStefano Zampini n_all_cc = 0; 1895b1b3d7a2SStefano Zampini for (i = 0; i < n_faces; i++) { 189632fe681dSStefano Zampini PetscCall(ISGetSize(faces[i], &s)); 189732fe681dSStefano Zampini if (!s) continue; 18988b6046baSStefano Zampini if (copycc) { 189932fe681dSStefano Zampini PetscCall(ISDuplicate(faces[i], &all_cc[n_all_cc])); 19008b6046baSStefano Zampini } else { 19019566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)faces[i])); 190232fe681dSStefano Zampini all_cc[n_all_cc] = faces[i]; 1903b1b3d7a2SStefano Zampini } 190432fe681dSStefano Zampini n_all_cc++; 19058b6046baSStefano Zampini } 1906b1b3d7a2SStefano Zampini for (i = 0; i < n_edges; i++) { 190732fe681dSStefano Zampini PetscCall(ISGetSize(edges[i], &s)); 190832fe681dSStefano Zampini if (!s) continue; 19098b6046baSStefano Zampini if (copycc) { 191032fe681dSStefano Zampini PetscCall(ISDuplicate(edges[i], &all_cc[n_all_cc])); 19118b6046baSStefano Zampini } else { 19129566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)edges[i])); 191332fe681dSStefano Zampini all_cc[n_all_cc] = edges[i]; 19148b6046baSStefano Zampini } 191532fe681dSStefano Zampini PetscCall(PetscBTSet(sub_schurs->is_edge, n_all_cc)); 191632fe681dSStefano Zampini n_all_cc++; 1917b1b3d7a2SStefano Zampini } 19189566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)vertices)); 1919c8272957SStefano Zampini sub_schurs->is_vertices = vertices; 19209566063dSJacob Faibussowitsch PetscCall(PCBDDCGraphRestoreCandidatesIS(graph, &n_faces, &faces, &n_edges, &edges, &vertices)); 1921d62866d3SStefano Zampini sub_schurs->is_dir = NULL; 19229566063dSJacob Faibussowitsch PetscCall(PCBDDCGraphGetDirichletDofsB(graph, &sub_schurs->is_dir)); 1923b1b3d7a2SStefano Zampini 1924df4d28bfSStefano Zampini /* Determine if MatFactor can be used */ 19259566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(prefix, &sub_schurs->prefix)); 1926883469d8SStefano Zampini #if defined(PETSC_HAVE_MUMPS) 19279566063dSJacob Faibussowitsch PetscCall(PetscStrncpy(sub_schurs->mat_solver_type, MATSOLVERMUMPS, sizeof(sub_schurs->mat_solver_type))); 192888113c35SStefano Zampini #elif defined(PETSC_HAVE_MKL_PARDISO) 19299566063dSJacob Faibussowitsch PetscCall(PetscStrncpy(sub_schurs->mat_solver_type, MATSOLVERMKL_PARDISO, sizeof(sub_schurs->mat_solver_type))); 193088113c35SStefano Zampini #else 19319566063dSJacob Faibussowitsch PetscCall(PetscStrncpy(sub_schurs->mat_solver_type, MATSOLVERPETSC, sizeof(sub_schurs->mat_solver_type))); 1932df4d28bfSStefano Zampini #endif 193388113c35SStefano Zampini #if defined(PETSC_USE_COMPLEX) 193488113c35SStefano Zampini sub_schurs->is_hermitian = PETSC_FALSE; /* Hermitian Cholesky is not supported by PETSc and external packages */ 193588113c35SStefano Zampini #else 193688113c35SStefano Zampini sub_schurs->is_hermitian = PETSC_TRUE; 1937883469d8SStefano Zampini #endif 193888113c35SStefano Zampini sub_schurs->is_posdef = PETSC_TRUE; 193911955456SStefano Zampini sub_schurs->is_symmetric = PETSC_TRUE; 19407f9db97bSStefano Zampini sub_schurs->debug = PETSC_FALSE; 1941991c41b4SStefano Zampini sub_schurs->restrict_comm = PETSC_FALSE; 1942d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)graph->l2gmap), sub_schurs->prefix, "BDDC sub_schurs options", "PC"); 19439566063dSJacob Faibussowitsch PetscCall(PetscOptionsString("-sub_schurs_mat_solver_type", "Specific direct solver to use", NULL, sub_schurs->mat_solver_type, sub_schurs->mat_solver_type, sizeof(sub_schurs->mat_solver_type), NULL)); 19449566063dSJacob Faibussowitsch PetscCall(PetscOptionsBool("-sub_schurs_symmetric", "Symmetric problem", NULL, sub_schurs->is_symmetric, &sub_schurs->is_symmetric, NULL)); 19459566063dSJacob Faibussowitsch PetscCall(PetscOptionsBool("-sub_schurs_hermitian", "Hermitian problem", NULL, sub_schurs->is_hermitian, &sub_schurs->is_hermitian, NULL)); 19469566063dSJacob Faibussowitsch PetscCall(PetscOptionsBool("-sub_schurs_posdef", "Positive definite problem", NULL, sub_schurs->is_posdef, &sub_schurs->is_posdef, NULL)); 19479566063dSJacob Faibussowitsch PetscCall(PetscOptionsBool("-sub_schurs_restrictcomm", "Restrict communicator on active processes only", NULL, sub_schurs->restrict_comm, &sub_schurs->restrict_comm, NULL)); 19489566063dSJacob Faibussowitsch PetscCall(PetscOptionsBool("-sub_schurs_debug", "Debug output", NULL, sub_schurs->debug, &sub_schurs->debug, NULL)); 1949d0609cedSBarry Smith PetscOptionsEnd(); 19509566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(sub_schurs->mat_solver_type, MATSOLVERMUMPS, &ismumps)); 19519566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(sub_schurs->mat_solver_type, MATSOLVERMKL_PARDISO, &ispardiso)); 1952365a3a41SStefano Zampini sub_schurs->schur_explicit = (PetscBool)(ispardiso || ismumps); 1953b1b3d7a2SStefano Zampini 195411955456SStefano Zampini /* for reals, symmetric and hermitian are synonims */ 195511955456SStefano Zampini #if !defined(PETSC_USE_COMPLEX) 195611955456SStefano Zampini sub_schurs->is_symmetric = (PetscBool)(sub_schurs->is_symmetric && sub_schurs->is_hermitian); 195711955456SStefano Zampini sub_schurs->is_hermitian = sub_schurs->is_symmetric; 195811955456SStefano Zampini #endif 195911955456SStefano Zampini 19609566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)is_I)); 1961b1b3d7a2SStefano Zampini sub_schurs->is_I = is_I; 19629566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)is_B)); 1963b1b3d7a2SStefano Zampini sub_schurs->is_B = is_B; 19649566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)graph->l2gmap)); 19655db18549SStefano Zampini sub_schurs->l2gmap = graph->l2gmap; 19669566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)BtoNmap)); 19675db18549SStefano Zampini sub_schurs->BtoNmap = BtoNmap; 19685a95e1ceSStefano Zampini sub_schurs->n_subs = n_all_cc; 1969b1b3d7a2SStefano Zampini sub_schurs->is_subs = all_cc; 1970b96c3477SStefano Zampini sub_schurs->S_Ej_all = NULL; 1971b96c3477SStefano Zampini sub_schurs->sum_S_Ej_all = NULL; 197208122e43SStefano Zampini sub_schurs->sum_S_Ej_inv_all = NULL; 1973b96c3477SStefano Zampini sub_schurs->sum_S_Ej_tilda_all = NULL; 1974b96c3477SStefano Zampini sub_schurs->is_Ej_all = NULL; 1975b1b3d7a2SStefano Zampini PetscFunctionReturn(0); 1976b1b3d7a2SStefano Zampini } 1977b1b3d7a2SStefano Zampini 19789371c9d4SSatish Balay PetscErrorCode PCBDDCSubSchursCreate(PCBDDCSubSchurs *sub_schurs) { 197934a97f8cSStefano Zampini PCBDDCSubSchurs schurs_ctx; 198034a97f8cSStefano Zampini 198134a97f8cSStefano Zampini PetscFunctionBegin; 19829566063dSJacob Faibussowitsch PetscCall(PetscNew(&schurs_ctx)); 19835ff63025SStefano Zampini schurs_ctx->n_subs = 0; 198434a97f8cSStefano Zampini *sub_schurs = schurs_ctx; 198534a97f8cSStefano Zampini PetscFunctionReturn(0); 198634a97f8cSStefano Zampini } 198734a97f8cSStefano Zampini 19889371c9d4SSatish Balay PetscErrorCode PCBDDCSubSchursReset(PCBDDCSubSchurs sub_schurs) { 198934a97f8cSStefano Zampini PetscInt i; 199034a97f8cSStefano Zampini 199134a97f8cSStefano Zampini PetscFunctionBegin; 1992aea80f77Sstefano_zampini if (!sub_schurs) PetscFunctionReturn(0); 19939566063dSJacob Faibussowitsch PetscCall(PetscFree(sub_schurs->prefix)); 19949566063dSJacob Faibussowitsch PetscCall(MatDestroy(&sub_schurs->A)); 19959566063dSJacob Faibussowitsch PetscCall(MatDestroy(&sub_schurs->S)); 19969566063dSJacob Faibussowitsch PetscCall(ISDestroy(&sub_schurs->is_I)); 19979566063dSJacob Faibussowitsch PetscCall(ISDestroy(&sub_schurs->is_B)); 19989566063dSJacob Faibussowitsch PetscCall(ISLocalToGlobalMappingDestroy(&sub_schurs->l2gmap)); 19999566063dSJacob Faibussowitsch PetscCall(ISLocalToGlobalMappingDestroy(&sub_schurs->BtoNmap)); 20009566063dSJacob Faibussowitsch PetscCall(MatDestroy(&sub_schurs->S_Ej_all)); 20019566063dSJacob Faibussowitsch PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_all)); 20029566063dSJacob Faibussowitsch PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_inv_all)); 20039566063dSJacob Faibussowitsch PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_tilda_all)); 20049566063dSJacob Faibussowitsch PetscCall(ISDestroy(&sub_schurs->is_Ej_all)); 20059566063dSJacob Faibussowitsch PetscCall(ISDestroy(&sub_schurs->is_vertices)); 20069566063dSJacob Faibussowitsch PetscCall(ISDestroy(&sub_schurs->is_dir)); 20079566063dSJacob Faibussowitsch PetscCall(PetscBTDestroy(&sub_schurs->is_edge)); 2008*48a46eb9SPierre Jolivet for (i = 0; i < sub_schurs->n_subs; i++) PetscCall(ISDestroy(&sub_schurs->is_subs[i])); 20091baa6e33SBarry Smith if (sub_schurs->n_subs) PetscCall(PetscFree(sub_schurs->is_subs)); 20101baa6e33SBarry Smith if (sub_schurs->reuse_solver) PetscCall(PCBDDCReuseSolversReset(sub_schurs->reuse_solver)); 20119566063dSJacob Faibussowitsch PetscCall(PetscFree(sub_schurs->reuse_solver)); 201272b8c272SStefano Zampini if (sub_schurs->change) { 201372b8c272SStefano Zampini for (i = 0; i < sub_schurs->n_subs; i++) { 20149566063dSJacob Faibussowitsch PetscCall(KSPDestroy(&sub_schurs->change[i])); 20159566063dSJacob Faibussowitsch PetscCall(ISDestroy(&sub_schurs->change_primal_sub[i])); 201672b8c272SStefano Zampini } 201772b8c272SStefano Zampini } 20189566063dSJacob Faibussowitsch PetscCall(PetscFree(sub_schurs->change)); 20199566063dSJacob Faibussowitsch PetscCall(PetscFree(sub_schurs->change_primal_sub)); 202034a97f8cSStefano Zampini sub_schurs->n_subs = 0; 202134a97f8cSStefano Zampini PetscFunctionReturn(0); 202234a97f8cSStefano Zampini } 202334a97f8cSStefano Zampini 20249371c9d4SSatish Balay PetscErrorCode PCBDDCSubSchursDestroy(PCBDDCSubSchurs *sub_schurs) { 2025aea80f77Sstefano_zampini PetscFunctionBegin; 20269566063dSJacob Faibussowitsch PetscCall(PCBDDCSubSchursReset(*sub_schurs)); 20279566063dSJacob Faibussowitsch PetscCall(PetscFree(*sub_schurs)); 2028aea80f77Sstefano_zampini PetscFunctionReturn(0); 2029aea80f77Sstefano_zampini } 2030aea80f77Sstefano_zampini 20319371c9d4SSatish Balay static inline PetscErrorCode PCBDDCAdjGetNextLayer_Private(PetscInt *queue_tip, PetscInt n_prev, PetscBT touched, PetscInt *xadj, PetscInt *adjncy, PetscInt *n_added) { 203234a97f8cSStefano Zampini PetscInt i, j, n; 203334a97f8cSStefano Zampini 203434a97f8cSStefano Zampini PetscFunctionBegin; 203534a97f8cSStefano Zampini n = 0; 203634a97f8cSStefano Zampini for (i = -n_prev; i < 0; i++) { 203734a97f8cSStefano Zampini PetscInt start_dof = queue_tip[i]; 203834a97f8cSStefano Zampini for (j = xadj[start_dof]; j < xadj[start_dof + 1]; j++) { 203934a97f8cSStefano Zampini PetscInt dof = adjncy[j]; 204034a97f8cSStefano Zampini if (!PetscBTLookup(touched, dof)) { 20419566063dSJacob Faibussowitsch PetscCall(PetscBTSet(touched, dof)); 204234a97f8cSStefano Zampini queue_tip[n] = dof; 204334a97f8cSStefano Zampini n++; 204434a97f8cSStefano Zampini } 204534a97f8cSStefano Zampini } 204634a97f8cSStefano Zampini } 204734a97f8cSStefano Zampini *n_added = n; 204834a97f8cSStefano Zampini PetscFunctionReturn(0); 204934a97f8cSStefano Zampini } 2050