15e5bbd0aSStefano Zampini #include <petsc/private/pcbddcimpl.h> 25e5bbd0aSStefano Zampini #include <petsc/private/pcbddcprivateimpl.h> 305709791SSatish Balay #include <../src/mat/impls/dense/seq/dense.h> 408122e43SStefano Zampini #include <petscblaslapack.h> 534a97f8cSStefano Zampini 69fbee547SJacob Faibussowitsch static inline PetscErrorCode PCBDDCAdjGetNextLayer_Private(PetscInt*,PetscInt,PetscBT,PetscInt*,PetscInt*,PetscInt*); 75ec10c6aSStefano Zampini static PetscErrorCode PCBDDCComputeExplicitSchur(Mat,PetscBool,MatReuse,Mat*); 8df4d28bfSStefano Zampini static PetscErrorCode PCBDDCReuseSolvers_Interior(PC,Vec,Vec); 9df4d28bfSStefano Zampini static PetscErrorCode PCBDDCReuseSolvers_Correction(PC,Vec,Vec); 10d62866d3SStefano Zampini 11ca92afb2SStefano Zampini /* if v2 is not present, correction is done in-place */ 125cbda25cSStefano Zampini PetscErrorCode PCBDDCReuseSolversBenignAdapt(PCBDDCReuseSolvers ctx, Vec v, Vec v2, PetscBool sol, PetscBool full) 13ca92afb2SStefano Zampini { 14ca92afb2SStefano Zampini PetscScalar *array; 15ca92afb2SStefano Zampini PetscScalar *array2; 16ca92afb2SStefano Zampini 17ca92afb2SStefano Zampini PetscFunctionBegin; 18ca92afb2SStefano Zampini if (!ctx->benign_n) PetscFunctionReturn(0); 195cbda25cSStefano Zampini if (sol && full) { 205cbda25cSStefano Zampini PetscInt n_I,size_schur; 215cbda25cSStefano Zampini 225cbda25cSStefano Zampini /* get sizes */ 239566063dSJacob Faibussowitsch PetscCall(MatGetSize(ctx->benign_csAIB,&size_schur,NULL)); 249566063dSJacob Faibussowitsch PetscCall(VecGetSize(v,&n_I)); 255cbda25cSStefano Zampini n_I = n_I - size_schur; 265cbda25cSStefano Zampini /* get schur sol from array */ 279566063dSJacob Faibussowitsch PetscCall(VecGetArray(v,&array)); 289566063dSJacob Faibussowitsch PetscCall(VecPlaceArray(ctx->benign_dummy_schur_vec,array+n_I)); 299566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(v,&array)); 305cbda25cSStefano Zampini /* apply interior sol correction */ 319566063dSJacob Faibussowitsch PetscCall(MatMultTranspose(ctx->benign_csAIB,ctx->benign_dummy_schur_vec,ctx->benign_corr_work)); 329566063dSJacob Faibussowitsch PetscCall(VecResetArray(ctx->benign_dummy_schur_vec)); 339566063dSJacob Faibussowitsch PetscCall(MatMultAdd(ctx->benign_AIIm1ones,ctx->benign_corr_work,v,v)); 345cbda25cSStefano Zampini } 35ca92afb2SStefano Zampini if (v2) { 36ca92afb2SStefano Zampini PetscInt nl; 37ca92afb2SStefano Zampini 389566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(v,(const PetscScalar**)&array)); 399566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(v2,&nl)); 409566063dSJacob Faibussowitsch PetscCall(VecGetArray(v2,&array2)); 419566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(array2,array,nl)); 42ca92afb2SStefano Zampini } else { 439566063dSJacob Faibussowitsch PetscCall(VecGetArray(v,&array)); 44ca92afb2SStefano Zampini array2 = array; 45ca92afb2SStefano Zampini } 46ca92afb2SStefano Zampini if (!sol) { /* change rhs */ 47ca92afb2SStefano Zampini PetscInt n; 48ca92afb2SStefano Zampini for (n=0;n<ctx->benign_n;n++) { 49ca92afb2SStefano Zampini PetscScalar sum = 0.; 50ca92afb2SStefano Zampini const PetscInt *cols; 51ca92afb2SStefano Zampini PetscInt nz,i; 52ca92afb2SStefano Zampini 539566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[n],&nz)); 549566063dSJacob Faibussowitsch PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[n],&cols)); 55ca92afb2SStefano Zampini for (i=0;i<nz-1;i++) sum += array[cols[i]]; 5622db5ddcSStefano Zampini #if defined(PETSC_USE_COMPLEX) 5722db5ddcSStefano Zampini sum = -(PetscRealPart(sum)/nz + PETSC_i*(PetscImaginaryPart(sum)/nz)); 5822db5ddcSStefano Zampini #else 59ca92afb2SStefano Zampini sum = -sum/nz; 6022db5ddcSStefano Zampini #endif 61ca92afb2SStefano Zampini for (i=0;i<nz-1;i++) array2[cols[i]] += sum; 62ca92afb2SStefano Zampini ctx->benign_save_vals[n] = array2[cols[nz-1]]; 63ca92afb2SStefano Zampini array2[cols[nz-1]] = sum; 649566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[n],&cols)); 65ca92afb2SStefano Zampini } 66ca92afb2SStefano Zampini } else { 67ca92afb2SStefano Zampini PetscInt n; 68ca92afb2SStefano Zampini for (n=0;n<ctx->benign_n;n++) { 69ca92afb2SStefano Zampini PetscScalar sum = 0.; 70ca92afb2SStefano Zampini const PetscInt *cols; 71ca92afb2SStefano Zampini PetscInt nz,i; 729566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[n],&nz)); 739566063dSJacob Faibussowitsch PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[n],&cols)); 74ca92afb2SStefano Zampini for (i=0;i<nz-1;i++) sum += array[cols[i]]; 7522db5ddcSStefano Zampini #if defined(PETSC_USE_COMPLEX) 7622db5ddcSStefano Zampini sum = -(PetscRealPart(sum)/nz + PETSC_i*(PetscImaginaryPart(sum)/nz)); 7722db5ddcSStefano Zampini #else 78ca92afb2SStefano Zampini sum = -sum/nz; 7922db5ddcSStefano Zampini #endif 80ca92afb2SStefano Zampini for (i=0;i<nz-1;i++) array2[cols[i]] += sum; 81ca92afb2SStefano Zampini array2[cols[nz-1]] = ctx->benign_save_vals[n]; 829566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[n],&cols)); 83ca92afb2SStefano Zampini } 84ca92afb2SStefano Zampini } 85ca92afb2SStefano Zampini if (v2) { 869566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(v,(const PetscScalar**)&array)); 879566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(v2,&array2)); 88ca92afb2SStefano Zampini } else { 899566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(v,&array)); 90ca92afb2SStefano Zampini } 915cbda25cSStefano Zampini if (!sol && full) { 925cbda25cSStefano Zampini Vec usedv; 935cbda25cSStefano Zampini PetscInt n_I,size_schur; 945cbda25cSStefano Zampini 955cbda25cSStefano Zampini /* get sizes */ 969566063dSJacob Faibussowitsch PetscCall(MatGetSize(ctx->benign_csAIB,&size_schur,NULL)); 979566063dSJacob Faibussowitsch PetscCall(VecGetSize(v,&n_I)); 985cbda25cSStefano Zampini n_I = n_I - size_schur; 995cbda25cSStefano Zampini /* compute schur rhs correction */ 1005cbda25cSStefano Zampini if (v2) { 1015cbda25cSStefano Zampini usedv = v2; 1025cbda25cSStefano Zampini } else { 1035cbda25cSStefano Zampini usedv = v; 1045cbda25cSStefano Zampini } 1055cbda25cSStefano Zampini /* apply schur rhs correction */ 1069566063dSJacob Faibussowitsch PetscCall(MatMultTranspose(ctx->benign_AIIm1ones,usedv,ctx->benign_corr_work)); 1079566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(usedv,(const PetscScalar**)&array)); 1089566063dSJacob Faibussowitsch PetscCall(VecPlaceArray(ctx->benign_dummy_schur_vec,array+n_I)); 1099566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(usedv,(const PetscScalar**)&array)); 1109566063dSJacob Faibussowitsch PetscCall(MatMultAdd(ctx->benign_csAIB,ctx->benign_corr_work,ctx->benign_dummy_schur_vec,ctx->benign_dummy_schur_vec)); 1119566063dSJacob Faibussowitsch PetscCall(VecResetArray(ctx->benign_dummy_schur_vec)); 1125cbda25cSStefano Zampini } 113ca92afb2SStefano Zampini PetscFunctionReturn(0); 114ca92afb2SStefano Zampini } 115ca92afb2SStefano Zampini 116df4d28bfSStefano Zampini static PetscErrorCode PCBDDCReuseSolvers_Solve_Private(PC pc, Vec rhs, Vec sol, PetscBool transpose, PetscBool full) 117d62866d3SStefano Zampini { 118df4d28bfSStefano Zampini PCBDDCReuseSolvers ctx; 119683d3df6SStefano Zampini PetscBool copy = PETSC_FALSE; 120d62866d3SStefano Zampini 121d62866d3SStefano Zampini PetscFunctionBegin; 1229566063dSJacob Faibussowitsch PetscCall(PCShellGetContext(pc,&ctx)); 123683d3df6SStefano Zampini if (full) { 124d62866d3SStefano Zampini #if defined(PETSC_HAVE_MUMPS) 1259566063dSJacob Faibussowitsch PetscCall(MatMumpsSetIcntl(ctx->F,26,-1)); 126d62866d3SStefano Zampini #endif 1275cbda25cSStefano Zampini #if defined(PETSC_HAVE_MKL_PARDISO) 1289566063dSJacob Faibussowitsch PetscCall(MatMkl_PardisoSetCntl(ctx->F,70,0)); 1295cbda25cSStefano Zampini #endif 130683d3df6SStefano Zampini copy = ctx->has_vertices; 131d4933d67SStefano Zampini } else { /* interior solver */ 1326dba178dSStefano Zampini #if defined(PETSC_HAVE_MUMPS) 1339566063dSJacob Faibussowitsch PetscCall(MatMumpsSetIcntl(ctx->F,26,0)); 1346dba178dSStefano Zampini #endif 135d4933d67SStefano Zampini #if defined(PETSC_HAVE_MKL_PARDISO) 1369566063dSJacob Faibussowitsch PetscCall(MatMkl_PardisoSetCntl(ctx->F,70,1)); 137d4933d67SStefano Zampini #endif 138683d3df6SStefano Zampini copy = PETSC_TRUE; 139683d3df6SStefano Zampini } 140683d3df6SStefano Zampini /* copy rhs into factored matrix workspace */ 141683d3df6SStefano Zampini if (copy) { 142ca92afb2SStefano Zampini PetscInt n; 143df4d28bfSStefano Zampini PetscScalar *array,*array_solver; 144ca92afb2SStefano Zampini 1459566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(rhs,&n)); 1469566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(rhs,(const PetscScalar**)&array)); 1479566063dSJacob Faibussowitsch PetscCall(VecGetArray(ctx->rhs,&array_solver)); 1489566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(array_solver,array,n)); 1499566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(ctx->rhs,&array_solver)); 1509566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(rhs,(const PetscScalar**)&array)); 151683d3df6SStefano Zampini 1529566063dSJacob Faibussowitsch PetscCall(PCBDDCReuseSolversBenignAdapt(ctx,ctx->rhs,NULL,PETSC_FALSE,full)); 153683d3df6SStefano Zampini if (transpose) { 1549566063dSJacob Faibussowitsch PetscCall(MatSolveTranspose(ctx->F,ctx->rhs,ctx->sol)); 155683d3df6SStefano Zampini } else { 1569566063dSJacob Faibussowitsch PetscCall(MatSolve(ctx->F,ctx->rhs,ctx->sol)); 157683d3df6SStefano Zampini } 1589566063dSJacob Faibussowitsch PetscCall(PCBDDCReuseSolversBenignAdapt(ctx,ctx->sol,NULL,PETSC_TRUE,full)); 159683d3df6SStefano Zampini 160683d3df6SStefano Zampini /* get back data to caller worskpace */ 1619566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(ctx->sol,(const PetscScalar**)&array_solver)); 1629566063dSJacob Faibussowitsch PetscCall(VecGetArray(sol,&array)); 1639566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(array,array_solver,n)); 1649566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(sol,&array)); 1659566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(ctx->sol,(const PetscScalar**)&array_solver)); 166683d3df6SStefano Zampini } else { 167ca92afb2SStefano Zampini if (ctx->benign_n) { 1689566063dSJacob Faibussowitsch PetscCall(PCBDDCReuseSolversBenignAdapt(ctx,rhs,ctx->rhs,PETSC_FALSE,full)); 169ca92afb2SStefano Zampini if (transpose) { 1709566063dSJacob Faibussowitsch PetscCall(MatSolveTranspose(ctx->F,ctx->rhs,sol)); 171ca92afb2SStefano Zampini } else { 1729566063dSJacob Faibussowitsch PetscCall(MatSolve(ctx->F,ctx->rhs,sol)); 173ca92afb2SStefano Zampini } 1749566063dSJacob Faibussowitsch PetscCall(PCBDDCReuseSolversBenignAdapt(ctx,sol,NULL,PETSC_TRUE,full)); 175ca92afb2SStefano Zampini } else { 176e28d306cSStefano Zampini if (transpose) { 1779566063dSJacob Faibussowitsch PetscCall(MatSolveTranspose(ctx->F,rhs,sol)); 178e28d306cSStefano Zampini } else { 1799566063dSJacob Faibussowitsch PetscCall(MatSolve(ctx->F,rhs,sol)); 180e28d306cSStefano Zampini } 181683d3df6SStefano Zampini } 182ca92afb2SStefano Zampini } 1835cbda25cSStefano Zampini /* restore defaults */ 1845cbda25cSStefano Zampini #if defined(PETSC_HAVE_MUMPS) 1859566063dSJacob Faibussowitsch PetscCall(MatMumpsSetIcntl(ctx->F,26,-1)); 1865cbda25cSStefano Zampini #endif 187d4933d67SStefano Zampini #if defined(PETSC_HAVE_MKL_PARDISO) 1889566063dSJacob Faibussowitsch PetscCall(MatMkl_PardisoSetCntl(ctx->F,70,0)); 189d4933d67SStefano Zampini #endif 190d62866d3SStefano Zampini PetscFunctionReturn(0); 191d62866d3SStefano Zampini } 192d62866d3SStefano Zampini 193df4d28bfSStefano Zampini static PetscErrorCode PCBDDCReuseSolvers_Correction(PC pc, Vec rhs, Vec sol) 194e28d306cSStefano Zampini { 195e28d306cSStefano Zampini PetscFunctionBegin; 1969566063dSJacob Faibussowitsch PetscCall(PCBDDCReuseSolvers_Solve_Private(pc,rhs,sol,PETSC_FALSE,PETSC_TRUE)); 197e28d306cSStefano Zampini PetscFunctionReturn(0); 198e28d306cSStefano Zampini } 199e28d306cSStefano Zampini 200df4d28bfSStefano Zampini static PetscErrorCode PCBDDCReuseSolvers_CorrectionTranspose(PC pc, Vec rhs, Vec sol) 201e28d306cSStefano Zampini { 202e28d306cSStefano Zampini PetscFunctionBegin; 2039566063dSJacob Faibussowitsch PetscCall(PCBDDCReuseSolvers_Solve_Private(pc,rhs,sol,PETSC_TRUE,PETSC_TRUE)); 204683d3df6SStefano Zampini PetscFunctionReturn(0); 205683d3df6SStefano Zampini } 206683d3df6SStefano Zampini 207df4d28bfSStefano Zampini static PetscErrorCode PCBDDCReuseSolvers_Interior(PC pc, Vec rhs, Vec sol) 208683d3df6SStefano Zampini { 209683d3df6SStefano Zampini PetscFunctionBegin; 2109566063dSJacob Faibussowitsch PetscCall(PCBDDCReuseSolvers_Solve_Private(pc,rhs,sol,PETSC_FALSE,PETSC_FALSE)); 211683d3df6SStefano Zampini PetscFunctionReturn(0); 212683d3df6SStefano Zampini } 213683d3df6SStefano Zampini 214df4d28bfSStefano Zampini static PetscErrorCode PCBDDCReuseSolvers_InteriorTranspose(PC pc, Vec rhs, Vec sol) 215683d3df6SStefano Zampini { 216683d3df6SStefano Zampini PetscFunctionBegin; 2179566063dSJacob Faibussowitsch PetscCall(PCBDDCReuseSolvers_Solve_Private(pc,rhs,sol,PETSC_TRUE,PETSC_FALSE)); 218e28d306cSStefano Zampini PetscFunctionReturn(0); 219e28d306cSStefano Zampini } 220e28d306cSStefano Zampini 22115579a77SStefano Zampini static PetscErrorCode PCBDDCReuseSolvers_View(PC pc, PetscViewer viewer) 22215579a77SStefano Zampini { 22315579a77SStefano Zampini PCBDDCReuseSolvers ctx; 22415579a77SStefano Zampini PetscBool iascii; 22515579a77SStefano Zampini 22615579a77SStefano Zampini PetscFunctionBegin; 2279566063dSJacob Faibussowitsch PetscCall(PCShellGetContext(pc,&ctx)); 2289566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 22915579a77SStefano Zampini if (iascii) { 2309566063dSJacob Faibussowitsch PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_INFO)); 23115579a77SStefano Zampini } 2329566063dSJacob Faibussowitsch PetscCall(MatView(ctx->F,viewer)); 23315579a77SStefano Zampini if (iascii) { 2349566063dSJacob Faibussowitsch PetscCall(PetscViewerPopFormat(viewer)); 23515579a77SStefano Zampini } 23615579a77SStefano Zampini PetscFunctionReturn(0); 23715579a77SStefano Zampini } 23815579a77SStefano Zampini 239df4d28bfSStefano Zampini static PetscErrorCode PCBDDCReuseSolversReset(PCBDDCReuseSolvers reuse) 240d62866d3SStefano Zampini { 241ca92afb2SStefano Zampini PetscInt i; 242d62866d3SStefano Zampini 243d62866d3SStefano Zampini PetscFunctionBegin; 2449566063dSJacob Faibussowitsch PetscCall(MatDestroy(&reuse->F)); 2459566063dSJacob Faibussowitsch PetscCall(VecDestroy(&reuse->sol)); 2469566063dSJacob Faibussowitsch PetscCall(VecDestroy(&reuse->rhs)); 2479566063dSJacob Faibussowitsch PetscCall(PCDestroy(&reuse->interior_solver)); 2489566063dSJacob Faibussowitsch PetscCall(PCDestroy(&reuse->correction_solver)); 2499566063dSJacob Faibussowitsch PetscCall(ISDestroy(&reuse->is_R)); 2509566063dSJacob Faibussowitsch PetscCall(ISDestroy(&reuse->is_B)); 2519566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&reuse->correction_scatter_B)); 2529566063dSJacob Faibussowitsch PetscCall(VecDestroy(&reuse->sol_B)); 2539566063dSJacob Faibussowitsch PetscCall(VecDestroy(&reuse->rhs_B)); 254ca92afb2SStefano Zampini for (i=0;i<reuse->benign_n;i++) { 2559566063dSJacob Faibussowitsch PetscCall(ISDestroy(&reuse->benign_zerodiag_subs[i])); 256ca92afb2SStefano Zampini } 2579566063dSJacob Faibussowitsch PetscCall(PetscFree(reuse->benign_zerodiag_subs)); 2589566063dSJacob Faibussowitsch PetscCall(PetscFree(reuse->benign_save_vals)); 2599566063dSJacob Faibussowitsch PetscCall(MatDestroy(&reuse->benign_csAIB)); 2609566063dSJacob Faibussowitsch PetscCall(MatDestroy(&reuse->benign_AIIm1ones)); 2619566063dSJacob Faibussowitsch PetscCall(VecDestroy(&reuse->benign_corr_work)); 2629566063dSJacob Faibussowitsch PetscCall(VecDestroy(&reuse->benign_dummy_schur_vec)); 263d62866d3SStefano Zampini PetscFunctionReturn(0); 264d62866d3SStefano Zampini } 265d5574798SStefano Zampini 26632fe681dSStefano Zampini static PetscErrorCode PCBDDCReuseSolvers_Destroy(PC pc) 26732fe681dSStefano Zampini { 26832fe681dSStefano Zampini PCBDDCReuseSolvers ctx; 26932fe681dSStefano Zampini 27032fe681dSStefano Zampini PetscFunctionBegin; 27132fe681dSStefano Zampini PetscCall(PCShellGetContext(pc,&ctx)); 27232fe681dSStefano Zampini PetscCall(PCBDDCReuseSolversReset(ctx)); 27332fe681dSStefano Zampini PetscCall(PetscFree(ctx)); 27432fe681dSStefano Zampini PetscCall(PCShellSetContext(pc,NULL)); 27532fe681dSStefano Zampini PetscFunctionReturn(0); 27632fe681dSStefano Zampini } 27732fe681dSStefano Zampini 2785ec10c6aSStefano Zampini static PetscErrorCode PCBDDCComputeExplicitSchur(Mat M, PetscBool issym, MatReuse reuse, Mat *S) 2793202ece2SStefano Zampini { 2803202ece2SStefano Zampini Mat B, C, D, Bd, Cd, AinvBd; 2813202ece2SStefano Zampini KSP ksp; 2823202ece2SStefano Zampini PC pc; 2833202ece2SStefano Zampini PetscBool isLU, isILU, isCHOL, Bdense, Cdense; 2843202ece2SStefano Zampini PetscReal fill = 2.0; 285f11841e3SStefano Zampini PetscInt n_I; 2863202ece2SStefano Zampini PetscMPIInt size; 2873202ece2SStefano Zampini 2883202ece2SStefano Zampini PetscFunctionBegin; 2899566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)M),&size)); 2907827d75bSBarry Smith PetscCheck(size == 1,PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for parallel matrices"); 291f11841e3SStefano Zampini if (reuse == MAT_REUSE_MATRIX) { 292f11841e3SStefano Zampini PetscBool Sdense; 293f11841e3SStefano Zampini 2949566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)*S, MATSEQDENSE, &Sdense)); 29528b400f6SJacob Faibussowitsch PetscCheck(Sdense,PetscObjectComm((PetscObject)M),PETSC_ERR_SUP,"S should dense"); 296f11841e3SStefano Zampini } 2979566063dSJacob Faibussowitsch PetscCall(MatSchurComplementGetSubMatrices(M, NULL, NULL, &B, &C, &D)); 2989566063dSJacob Faibussowitsch PetscCall(MatSchurComplementGetKSP(M, &ksp)); 2999566063dSJacob Faibussowitsch PetscCall(KSPGetPC(ksp, &pc)); 3009566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject) pc, PCLU, &isLU)); 3019566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject) pc, PCILU, &isILU)); 3029566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject) pc, PCCHOLESKY, &isCHOL)); 3039566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject) B, MATSEQDENSE, &Bdense)); 3049566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject) C, MATSEQDENSE, &Cdense)); 3059566063dSJacob Faibussowitsch PetscCall(MatGetSize(B,&n_I,NULL)); 306f11841e3SStefano Zampini if (n_I) { 3073202ece2SStefano Zampini if (!Bdense) { 3089566063dSJacob Faibussowitsch PetscCall(MatConvert(B, MATSEQDENSE, MAT_INITIAL_MATRIX, &Bd)); 3093202ece2SStefano Zampini } else { 3103202ece2SStefano Zampini Bd = B; 3113202ece2SStefano Zampini } 3123202ece2SStefano Zampini 3133202ece2SStefano Zampini if (isLU || isILU || isCHOL) { 3143202ece2SStefano Zampini Mat fact; 3159566063dSJacob Faibussowitsch PetscCall(KSPSetUp(ksp)); 3169566063dSJacob Faibussowitsch PetscCall(PCFactorGetMatrix(pc, &fact)); 3179566063dSJacob Faibussowitsch PetscCall(MatDuplicate(Bd, MAT_DO_NOT_COPY_VALUES, &AinvBd)); 3189566063dSJacob Faibussowitsch PetscCall(MatMatSolve(fact, Bd, AinvBd)); 3193202ece2SStefano Zampini } else { 32007b1e237SStefano Zampini PetscBool ex = PETSC_TRUE; 32107b1e237SStefano Zampini 32207b1e237SStefano Zampini if (ex) { 3233202ece2SStefano Zampini Mat Ainvd; 3243202ece2SStefano Zampini 3259566063dSJacob Faibussowitsch PetscCall(PCComputeOperator(pc, MATDENSE, &Ainvd)); 3269566063dSJacob Faibussowitsch PetscCall(MatMatMult(Ainvd, Bd, MAT_INITIAL_MATRIX, fill, &AinvBd)); 3279566063dSJacob Faibussowitsch PetscCall(MatDestroy(&Ainvd)); 32807b1e237SStefano Zampini } else { 32907b1e237SStefano Zampini Vec sol,rhs; 33007b1e237SStefano Zampini PetscScalar *arrayrhs,*arraysol; 33107b1e237SStefano Zampini PetscInt i,nrhs,n; 33207b1e237SStefano Zampini 3339566063dSJacob Faibussowitsch PetscCall(MatDuplicate(Bd, MAT_DO_NOT_COPY_VALUES, &AinvBd)); 3349566063dSJacob Faibussowitsch PetscCall(MatGetSize(Bd,&n,&nrhs)); 3359566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(Bd,&arrayrhs)); 3369566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(AinvBd,&arraysol)); 3379566063dSJacob Faibussowitsch PetscCall(KSPGetSolution(ksp,&sol)); 3389566063dSJacob Faibussowitsch PetscCall(KSPGetRhs(ksp,&rhs)); 33907b1e237SStefano Zampini for (i=0;i<nrhs;i++) { 3409566063dSJacob Faibussowitsch PetscCall(VecPlaceArray(rhs,arrayrhs+i*n)); 3419566063dSJacob Faibussowitsch PetscCall(VecPlaceArray(sol,arraysol+i*n)); 3429566063dSJacob Faibussowitsch PetscCall(KSPSolve(ksp,rhs,sol)); 3439566063dSJacob Faibussowitsch PetscCall(VecResetArray(rhs)); 3449566063dSJacob Faibussowitsch PetscCall(VecResetArray(sol)); 34507b1e237SStefano Zampini } 3469566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(Bd,&arrayrhs)); 3479566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(AinvBd,&arrayrhs)); 34807b1e237SStefano Zampini } 3493202ece2SStefano Zampini } 3505ec10c6aSStefano Zampini if (!Bdense & !issym) { 3519566063dSJacob Faibussowitsch PetscCall(MatDestroy(&Bd)); 3523202ece2SStefano Zampini } 3535ec10c6aSStefano Zampini 3545ec10c6aSStefano Zampini if (!issym) { 3553202ece2SStefano Zampini if (!Cdense) { 3569566063dSJacob Faibussowitsch PetscCall(MatConvert(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &Cd)); 3573202ece2SStefano Zampini } else { 3583202ece2SStefano Zampini Cd = C; 3593202ece2SStefano Zampini } 3609566063dSJacob Faibussowitsch PetscCall(MatMatMult(Cd, AinvBd, reuse, fill, S)); 3613202ece2SStefano Zampini if (!Cdense) { 3629566063dSJacob Faibussowitsch PetscCall(MatDestroy(&Cd)); 3633202ece2SStefano Zampini } 3645ec10c6aSStefano Zampini } else { 3659566063dSJacob Faibussowitsch PetscCall(MatTransposeMatMult(Bd, AinvBd, reuse, fill, S)); 3665ec10c6aSStefano Zampini if (!Bdense) { 3679566063dSJacob Faibussowitsch PetscCall(MatDestroy(&Bd)); 3685ec10c6aSStefano Zampini } 3695ec10c6aSStefano Zampini } 3709566063dSJacob Faibussowitsch PetscCall(MatDestroy(&AinvBd)); 371f11841e3SStefano Zampini } 3723202ece2SStefano Zampini 3733202ece2SStefano Zampini if (D) { 3743202ece2SStefano Zampini Mat Dd; 3753202ece2SStefano Zampini PetscBool Ddense; 3763202ece2SStefano Zampini 3779566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)D,MATSEQDENSE,&Ddense)); 3783202ece2SStefano Zampini if (!Ddense) { 3799566063dSJacob Faibussowitsch PetscCall(MatConvert(D, MATSEQDENSE, MAT_INITIAL_MATRIX, &Dd)); 3803202ece2SStefano Zampini } else { 3813202ece2SStefano Zampini Dd = D; 3823202ece2SStefano Zampini } 383f11841e3SStefano Zampini if (n_I) { 3849566063dSJacob Faibussowitsch PetscCall(MatAYPX(*S,-1.0,Dd,SAME_NONZERO_PATTERN)); 385f11841e3SStefano Zampini } else { 386f11841e3SStefano Zampini if (reuse == MAT_INITIAL_MATRIX) { 3879566063dSJacob Faibussowitsch PetscCall(MatDuplicate(Dd,MAT_COPY_VALUES,S)); 388f11841e3SStefano Zampini } else { 3899566063dSJacob Faibussowitsch PetscCall(MatCopy(Dd,*S,SAME_NONZERO_PATTERN)); 390f11841e3SStefano Zampini } 391f11841e3SStefano Zampini } 3923202ece2SStefano Zampini if (!Ddense) { 3939566063dSJacob Faibussowitsch PetscCall(MatDestroy(&Dd)); 3943202ece2SStefano Zampini } 3953202ece2SStefano Zampini } else { 3969566063dSJacob Faibussowitsch PetscCall(MatScale(*S,-1.0)); 3973202ece2SStefano Zampini } 3983202ece2SStefano Zampini PetscFunctionReturn(0); 3993202ece2SStefano Zampini } 40034a97f8cSStefano Zampini 40191af6908SStefano Zampini PetscErrorCode PCBDDCSubSchursSetUp(PCBDDCSubSchurs sub_schurs, Mat Ain, Mat Sin, PetscBool exact_schur, PetscInt xadj[], PetscInt adjncy[], PetscInt nlayers, Vec scaling, PetscBool compute_Stilda, PetscBool reuse_solvers, PetscBool benign_trick, PetscInt benign_n, PetscInt benign_p0_lidx[], IS benign_zerodiag_subs[], Mat change, IS change_primal) 402b1b3d7a2SStefano Zampini { 403be83ff47SStefano Zampini Mat F,A_II,A_IB,A_BI,A_BB,AE_II; 404be83ff47SStefano Zampini Mat S_all; 40557a87bf3SStefano Zampini Vec gstash,lstash; 40657a87bf3SStefano Zampini VecScatter sstash; 407b7ab4a40SStefano Zampini IS is_I,is_I_layer; 408dc456d91SStefano Zampini IS all_subsets,all_subsets_mult,all_subsets_n; 40957a87bf3SStefano Zampini PetscScalar *stasharray,*Bwork; 410dc456d91SStefano Zampini PetscInt *nnz,*all_local_idx_N; 411dc456d91SStefano Zampini PetscInt *auxnum1,*auxnum2; 4125a95e1ceSStefano Zampini PetscInt i,subset_size,max_subset_size; 413683d3df6SStefano Zampini PetscInt n_B,extra,local_size,global_size; 41457a87bf3SStefano Zampini PetscInt local_stash_size; 41508122e43SStefano Zampini PetscBLASInt B_N,B_ierr,B_lwork,*pivots; 4165a95e1ceSStefano Zampini MPI_Comm comm_n; 417f4f7d9d6SStefano Zampini PetscBool deluxe = PETSC_TRUE; 418f4f7d9d6SStefano Zampini PetscBool use_potr = PETSC_FALSE, use_sytr = PETSC_FALSE; 4193b03f7bbSStefano Zampini PetscViewer matl_dbg_viewer = NULL; 42035d0533cSStefano Zampini PetscBool flg; 421b1b3d7a2SStefano Zampini 422b1b3d7a2SStefano Zampini PetscFunctionBegin; 4239566063dSJacob Faibussowitsch PetscCall(MatDestroy(&sub_schurs->A)); 4249566063dSJacob Faibussowitsch PetscCall(MatDestroy(&sub_schurs->S)); 425e62b6521Sstefano_zampini if (Ain) { 4269566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)Ain)); 427a64f4aa4SStefano Zampini sub_schurs->A = Ain; 428a64f4aa4SStefano Zampini } 4293301b35fSStefano Zampini 4309566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)Sin)); 431a64f4aa4SStefano Zampini sub_schurs->S = Sin; 432df4d28bfSStefano Zampini if (sub_schurs->schur_explicit) { 433df4d28bfSStefano Zampini sub_schurs->schur_explicit = (PetscBool)(!!sub_schurs->A); 434a64f4aa4SStefano Zampini } 435a64f4aa4SStefano Zampini 4365a95e1ceSStefano Zampini /* preliminary checks */ 4377827d75bSBarry Smith PetscCheck(sub_schurs->schur_explicit || !compute_Stilda,PetscObjectComm((PetscObject)sub_schurs->l2gmap),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_PARDISO"); 4385a95e1ceSStefano Zampini 43988113c35SStefano Zampini if (benign_trick) sub_schurs->is_posdef = PETSC_FALSE; 44088113c35SStefano Zampini 4413b03f7bbSStefano Zampini /* debug (MATLAB) */ 4427f9db97bSStefano Zampini if (sub_schurs->debug) { 4437f9db97bSStefano Zampini PetscMPIInt size,rank; 4447ebab0bbSStefano Zampini PetscInt nr,*print_schurs_ranks,print_schurs = PETSC_FALSE; 4457f9db97bSStefano Zampini 4469566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&size)); 4479566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&rank)); 4487f9db97bSStefano Zampini nr = size; 4499566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nr,&print_schurs_ranks)); 450d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)sub_schurs->l2gmap),sub_schurs->prefix,"BDDC sub_schurs options","PC"); 4519566063dSJacob Faibussowitsch PetscCall(PetscOptionsIntArray("-sub_schurs_debug_ranks","Ranks to debug (all if the option is not used)",NULL,print_schurs_ranks,&nr,&flg)); 4527f9db97bSStefano Zampini if (!flg) print_schurs = PETSC_TRUE; 4537f9db97bSStefano Zampini else { 4547ebab0bbSStefano Zampini print_schurs = PETSC_FALSE; 4557f9db97bSStefano Zampini for (i=0;i<nr;i++) if (print_schurs_ranks[i] == (PetscInt)rank) { print_schurs = PETSC_TRUE; break; } 4567f9db97bSStefano Zampini } 457d0609cedSBarry Smith PetscOptionsEnd(); 4589566063dSJacob Faibussowitsch PetscCall(PetscFree(print_schurs_ranks)); 4593b03f7bbSStefano Zampini if (print_schurs) { 4603b03f7bbSStefano Zampini char filename[256]; 4613b03f7bbSStefano Zampini 4629566063dSJacob Faibussowitsch PetscCall(PetscSNPrintf(filename,sizeof(filename),"sub_schurs_Schur_r%d.m",PetscGlobalRank)); 4639566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&matl_dbg_viewer)); 4649566063dSJacob Faibussowitsch PetscCall(PetscViewerPushFormat(matl_dbg_viewer,PETSC_VIEWER_ASCII_MATLAB)); 4653b03f7bbSStefano Zampini } 4667f9db97bSStefano Zampini } 4677f9db97bSStefano Zampini 4685a95e1ceSStefano Zampini /* restrict work on active processes */ 469991c41b4SStefano Zampini if (sub_schurs->restrict_comm) { 470991c41b4SStefano Zampini PetscSubcomm subcomm; 471991c41b4SStefano Zampini PetscMPIInt color,rank; 472991c41b4SStefano Zampini 4735a95e1ceSStefano Zampini color = 0; 4745a95e1ceSStefano Zampini if (!sub_schurs->n_subs) color = 1; /* this can happen if we are in a multilevel case or if the subdomain is disconnected */ 4759566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&rank)); 4769566063dSJacob Faibussowitsch PetscCall(PetscSubcommCreate(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&subcomm)); 4779566063dSJacob Faibussowitsch PetscCall(PetscSubcommSetNumber(subcomm,2)); 4789566063dSJacob Faibussowitsch PetscCall(PetscSubcommSetTypeGeneral(subcomm,color,rank)); 4799566063dSJacob Faibussowitsch PetscCall(PetscCommDuplicate(PetscSubcommChild(subcomm),&comm_n,NULL)); 4809566063dSJacob Faibussowitsch PetscCall(PetscSubcommDestroy(&subcomm)); 4815a95e1ceSStefano Zampini if (!sub_schurs->n_subs) { 4829566063dSJacob Faibussowitsch PetscCall(PetscCommDestroy(&comm_n)); 4835a95e1ceSStefano Zampini PetscFunctionReturn(0); 4845a95e1ceSStefano Zampini } 485991c41b4SStefano Zampini } else { 4869566063dSJacob Faibussowitsch PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&comm_n,NULL)); 487991c41b4SStefano Zampini } 4885a95e1ceSStefano Zampini 489b1b3d7a2SStefano Zampini /* get Schur complement matrices */ 490df4d28bfSStefano Zampini if (!sub_schurs->schur_explicit) { 491a64f4aa4SStefano Zampini Mat tA_IB,tA_BI,tA_BB; 4923301b35fSStefano Zampini PetscBool isseqsbaij; 4939566063dSJacob Faibussowitsch PetscCall(MatSchurComplementGetSubMatrices(sub_schurs->S,&A_II,NULL,&tA_IB,&tA_BI,&tA_BB)); 4949566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)tA_BB,MATSEQSBAIJ,&isseqsbaij)); 4953301b35fSStefano Zampini if (isseqsbaij) { 4969566063dSJacob Faibussowitsch PetscCall(MatConvert(tA_BB,MATSEQAIJ,MAT_INITIAL_MATRIX,&A_BB)); 4979566063dSJacob Faibussowitsch PetscCall(MatConvert(tA_IB,MATSEQAIJ,MAT_INITIAL_MATRIX,&A_IB)); 4989566063dSJacob Faibussowitsch PetscCall(MatConvert(tA_BI,MATSEQAIJ,MAT_INITIAL_MATRIX,&A_BI)); 499a64f4aa4SStefano Zampini } else { 5009566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)tA_BB)); 501a64f4aa4SStefano Zampini A_BB = tA_BB; 5029566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)tA_IB)); 503a64f4aa4SStefano Zampini A_IB = tA_IB; 5049566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)tA_BI)); 505a64f4aa4SStefano Zampini A_BI = tA_BI; 506f11841e3SStefano Zampini } 507a58a30b4SStefano Zampini } else { 5085a95e1ceSStefano Zampini A_II = NULL; 5095a95e1ceSStefano Zampini A_IB = NULL; 5105a95e1ceSStefano Zampini A_BI = NULL; 5115a95e1ceSStefano Zampini A_BB = NULL; 512b1b3d7a2SStefano Zampini } 5135a95e1ceSStefano Zampini S_all = NULL; 514b1b3d7a2SStefano Zampini 515b1b3d7a2SStefano Zampini /* determine interior problems */ 5169566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(sub_schurs->is_I,&i)); 5173dc780c3SStefano Zampini if (nlayers >= 0 && i) { /* Interior problems can be different from the original one */ 518b1b3d7a2SStefano Zampini PetscBT touched; 519b1b3d7a2SStefano Zampini const PetscInt* idx_B; 520b1b3d7a2SStefano Zampini PetscInt n_I,n_B,n_local_dofs,n_prev_added,j,layer,*local_numbering; 521b1b3d7a2SStefano Zampini 52228b400f6SJacob Faibussowitsch PetscCheck(xadj,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot request layering without adjacency"); 523b1b3d7a2SStefano Zampini /* get sizes */ 5249566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(sub_schurs->is_I,&n_I)); 5259566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(sub_schurs->is_B,&n_B)); 526b1b3d7a2SStefano Zampini 5279566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n_I+n_B,&local_numbering)); 5289566063dSJacob Faibussowitsch PetscCall(PetscBTCreate(n_I+n_B,&touched)); 5299566063dSJacob Faibussowitsch PetscCall(PetscBTMemzero(n_I+n_B,touched)); 530b1b3d7a2SStefano Zampini 531b1b3d7a2SStefano Zampini /* all boundary dofs must be skipped when adding layers */ 5329566063dSJacob Faibussowitsch PetscCall(ISGetIndices(sub_schurs->is_B,&idx_B)); 533b1b3d7a2SStefano Zampini for (j=0;j<n_B;j++) { 5349566063dSJacob Faibussowitsch PetscCall(PetscBTSet(touched,idx_B[j])); 535b1b3d7a2SStefano Zampini } 5369566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(local_numbering,idx_B,n_B)); 5379566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(sub_schurs->is_B,&idx_B)); 538b1b3d7a2SStefano Zampini 539b1b3d7a2SStefano Zampini /* add prescribed number of layers of dofs */ 540b1b3d7a2SStefano Zampini n_local_dofs = n_B; 541b1b3d7a2SStefano Zampini n_prev_added = n_B; 542b1b3d7a2SStefano Zampini for (layer=0;layer<nlayers;layer++) { 543b6bace71SJacob Faibussowitsch PetscInt n_added = 0; 544b1b3d7a2SStefano Zampini if (n_local_dofs == n_I+n_B) break; 54563a3b9bcSJacob Faibussowitsch PetscCheck(n_local_dofs <= n_I+n_B,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error querying layer %" PetscInt_FMT ". Out of bound access (%" PetscInt_FMT " > %" PetscInt_FMT ")",layer,n_local_dofs,n_I+n_B); 5469566063dSJacob Faibussowitsch PetscCall(PCBDDCAdjGetNextLayer_Private(local_numbering+n_local_dofs,n_prev_added,touched,xadj,adjncy,&n_added)); 547b1b3d7a2SStefano Zampini n_prev_added = n_added; 548b1b3d7a2SStefano Zampini n_local_dofs += n_added; 549b1b3d7a2SStefano Zampini if (!n_added) break; 550b1b3d7a2SStefano Zampini } 5519566063dSJacob Faibussowitsch PetscCall(PetscBTDestroy(&touched)); 552b1b3d7a2SStefano Zampini 553883469d8SStefano Zampini /* IS for I layer dofs in original numbering */ 5549566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)sub_schurs->is_I),n_local_dofs-n_B,local_numbering+n_B,PETSC_COPY_VALUES,&is_I_layer)); 5559566063dSJacob Faibussowitsch PetscCall(PetscFree(local_numbering)); 5569566063dSJacob Faibussowitsch PetscCall(ISSort(is_I_layer)); 557883469d8SStefano Zampini /* IS for I layer dofs in I numbering */ 558df4d28bfSStefano Zampini if (!sub_schurs->schur_explicit) { 559b1b3d7a2SStefano Zampini ISLocalToGlobalMapping ItoNmap; 5609566063dSJacob Faibussowitsch PetscCall(ISLocalToGlobalMappingCreateIS(sub_schurs->is_I,&ItoNmap)); 5619566063dSJacob Faibussowitsch PetscCall(ISGlobalToLocalMappingApplyIS(ItoNmap,IS_GTOLM_DROP,is_I_layer,&is_I)); 5629566063dSJacob Faibussowitsch PetscCall(ISLocalToGlobalMappingDestroy(&ItoNmap)); 563b1b3d7a2SStefano Zampini 564b1b3d7a2SStefano Zampini /* II block */ 5659566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrix(A_II,is_I,is_I,MAT_INITIAL_MATRIX,&AE_II)); 566b1b3d7a2SStefano Zampini } 567b1b3d7a2SStefano Zampini } else { 568b1b3d7a2SStefano Zampini PetscInt n_I; 569b1b3d7a2SStefano Zampini 570b1b3d7a2SStefano Zampini /* IS for I dofs in original numbering */ 5719566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)sub_schurs->is_I)); 572a9b99552SStefano Zampini is_I_layer = sub_schurs->is_I; 573b1b3d7a2SStefano Zampini 574b1b3d7a2SStefano Zampini /* IS for I dofs in I numbering (strided 1) */ 575df4d28bfSStefano Zampini if (!sub_schurs->schur_explicit) { 5769566063dSJacob Faibussowitsch PetscCall(ISGetSize(sub_schurs->is_I,&n_I)); 5779566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PetscObjectComm((PetscObject)sub_schurs->is_I),n_I,0,1,&is_I)); 578b1b3d7a2SStefano Zampini 579b1b3d7a2SStefano Zampini /* II block is the same */ 5809566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)A_II)); 581b1b3d7a2SStefano Zampini AE_II = A_II; 582b1b3d7a2SStefano Zampini } 583b1b3d7a2SStefano Zampini } 5845a95e1ceSStefano Zampini 585883469d8SStefano Zampini /* Get info on subset sizes and sum of all subsets sizes */ 5865a95e1ceSStefano Zampini max_subset_size = 0; 587883469d8SStefano Zampini local_size = 0; 5885a95e1ceSStefano Zampini for (i=0;i<sub_schurs->n_subs;i++) { 5899566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size)); 5905a95e1ceSStefano Zampini max_subset_size = PetscMax(subset_size,max_subset_size); 591883469d8SStefano Zampini local_size += subset_size; 592883469d8SStefano Zampini } 593883469d8SStefano Zampini 594883469d8SStefano Zampini /* Work arrays for local indices */ 595883469d8SStefano Zampini extra = 0; 5969566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(sub_schurs->is_B,&n_B)); 597df4d28bfSStefano Zampini if (sub_schurs->schur_explicit && is_I_layer) { 5989566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(is_I_layer,&extra)); 599883469d8SStefano Zampini } 6009566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n_B+extra,&all_local_idx_N)); 601883469d8SStefano Zampini if (extra) { 602883469d8SStefano Zampini const PetscInt *idxs; 6039566063dSJacob Faibussowitsch PetscCall(ISGetIndices(is_I_layer,&idxs)); 6049566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(all_local_idx_N,idxs,extra)); 6059566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(is_I_layer,&idxs)); 606883469d8SStefano Zampini } 6079566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(sub_schurs->n_subs,&auxnum1)); 6089566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(sub_schurs->n_subs,&auxnum2)); 609883469d8SStefano Zampini 610883469d8SStefano Zampini /* Get local indices in local numbering */ 611883469d8SStefano Zampini local_size = 0; 61257a87bf3SStefano Zampini local_stash_size = 0; 6135a95e1ceSStefano Zampini for (i=0;i<sub_schurs->n_subs;i++) { 614883469d8SStefano Zampini const PetscInt *idxs; 615883469d8SStefano Zampini 6169566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size)); 6179566063dSJacob Faibussowitsch PetscCall(ISGetIndices(sub_schurs->is_subs[i],&idxs)); 618eb595f79SStefano Zampini /* start (smallest in global ordering) and multiplicity */ 619eb595f79SStefano Zampini auxnum1[i] = idxs[0]; 62057a87bf3SStefano Zampini auxnum2[i] = subset_size*subset_size; 621883469d8SStefano Zampini /* subset indices in local numbering */ 6229566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(all_local_idx_N+local_size+extra,idxs,subset_size)); 6239566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(sub_schurs->is_subs[i],&idxs)); 624883469d8SStefano Zampini local_size += subset_size; 62557a87bf3SStefano Zampini local_stash_size += subset_size*subset_size; 626883469d8SStefano Zampini } 627883469d8SStefano Zampini 628f4f7d9d6SStefano Zampini /* allocate extra workspace needed only for GETRI or SYTRF */ 62911955456SStefano Zampini use_potr = use_sytr = PETSC_FALSE; 63011955456SStefano Zampini if (benign_trick || (sub_schurs->is_hermitian && sub_schurs->is_posdef)) { 631f4f7d9d6SStefano Zampini use_potr = PETSC_TRUE; 63211955456SStefano Zampini } else if (sub_schurs->is_symmetric) { 63311955456SStefano Zampini use_sytr = PETSC_TRUE; 63411955456SStefano Zampini } 63511955456SStefano Zampini if (local_size && !use_potr) { 63659ac4de7SStefano Zampini PetscScalar lwork,dummyscalar = 0.; 63759ac4de7SStefano Zampini PetscBLASInt dummyint = 0; 638d2627357SStefano Zampini 639d2627357SStefano Zampini B_lwork = -1; 6409566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(local_size,&B_N)); 6419566063dSJacob Faibussowitsch PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 642f4f7d9d6SStefano Zampini if (use_sytr) { 643f4f7d9d6SStefano Zampini PetscStackCallBLAS("LAPACKsytrf",LAPACKsytrf_("L",&B_N,&dummyscalar,&B_N,&dummyint,&lwork,&B_lwork,&B_ierr)); 64428b400f6SJacob Faibussowitsch PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYTRF Lapack routine %d",(int)B_ierr); 645f4f7d9d6SStefano Zampini } else { 64659ac4de7SStefano Zampini PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,&dummyscalar,&B_N,&dummyint,&lwork,&B_lwork,&B_ierr)); 64728b400f6SJacob Faibussowitsch PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GETRI Lapack routine %d",(int)B_ierr); 648f4f7d9d6SStefano Zampini } 6499566063dSJacob Faibussowitsch PetscCall(PetscFPTrapPop()); 6509566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork)); 6519566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(B_lwork,&Bwork,B_N,&pivots)); 652056290a2SStefano Zampini } else { 653056290a2SStefano Zampini Bwork = NULL; 654056290a2SStefano Zampini pivots = NULL; 655d2627357SStefano Zampini } 656d2627357SStefano Zampini 65757a87bf3SStefano Zampini /* prepare data for summing up properly schurs on subsets */ 6589566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm_n,sub_schurs->n_subs,auxnum1,PETSC_OWN_POINTER,&all_subsets_n)); 6599566063dSJacob Faibussowitsch PetscCall(ISLocalToGlobalMappingApplyIS(sub_schurs->l2gmap,all_subsets_n,&all_subsets)); 6609566063dSJacob Faibussowitsch PetscCall(ISDestroy(&all_subsets_n)); 6619566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm_n,sub_schurs->n_subs,auxnum2,PETSC_OWN_POINTER,&all_subsets_mult)); 6629566063dSJacob Faibussowitsch PetscCall(ISRenumber(all_subsets,all_subsets_mult,&global_size,&all_subsets_n)); 6639566063dSJacob Faibussowitsch PetscCall(ISDestroy(&all_subsets)); 6649566063dSJacob Faibussowitsch PetscCall(ISDestroy(&all_subsets_mult)); 6659566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(all_subsets_n,&i)); 66663a3b9bcSJacob Faibussowitsch PetscCheck(i == local_stash_size,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid size of new subset! %" PetscInt_FMT " != %" PetscInt_FMT,i,local_stash_size); 6679566063dSJacob Faibussowitsch PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,local_stash_size,NULL,&lstash)); 6689566063dSJacob Faibussowitsch PetscCall(VecCreateMPI(comm_n,PETSC_DECIDE,global_size,&gstash)); 6699566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(lstash,NULL,gstash,all_subsets_n,&sstash)); 6709566063dSJacob Faibussowitsch PetscCall(ISDestroy(&all_subsets_n)); 6712972d61bSStefano Zampini 6725a95e1ceSStefano Zampini /* subset indices in local boundary numbering */ 6735a95e1ceSStefano Zampini if (!sub_schurs->is_Ej_all) { 6745a95e1ceSStefano Zampini PetscInt *all_local_idx_B; 6755a95e1ceSStefano Zampini 6769566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(local_size,&all_local_idx_B)); 6779566063dSJacob Faibussowitsch PetscCall(ISGlobalToLocalMappingApply(sub_schurs->BtoNmap,IS_GTOLM_DROP,local_size,all_local_idx_N+extra,&subset_size,all_local_idx_B)); 67863a3b9bcSJacob Faibussowitsch PetscCheck(subset_size == local_size,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in sub_schurs serial (BtoNmap)! %" PetscInt_FMT " != %" PetscInt_FMT,subset_size,local_size); 6799566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PETSC_COMM_SELF,local_size,all_local_idx_B,PETSC_OWN_POINTER,&sub_schurs->is_Ej_all)); 680b1b3d7a2SStefano Zampini } 681b1b3d7a2SStefano Zampini 68272b8c272SStefano Zampini if (change) { 68372b8c272SStefano Zampini ISLocalToGlobalMapping BtoS; 68472b8c272SStefano Zampini IS change_primal_B; 68572b8c272SStefano Zampini IS change_primal_all; 68672b8c272SStefano Zampini 68728b400f6SJacob Faibussowitsch PetscCheck(!sub_schurs->change_primal_sub,PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 68828b400f6SJacob Faibussowitsch PetscCheck(!sub_schurs->change,PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 6899566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(sub_schurs->n_subs,&sub_schurs->change_primal_sub)); 69072b8c272SStefano Zampini for (i=0;i<sub_schurs->n_subs;i++) { 69172b8c272SStefano Zampini ISLocalToGlobalMapping NtoS; 6929566063dSJacob Faibussowitsch PetscCall(ISLocalToGlobalMappingCreateIS(sub_schurs->is_subs[i],&NtoS)); 6939566063dSJacob Faibussowitsch PetscCall(ISGlobalToLocalMappingApplyIS(NtoS,IS_GTOLM_DROP,change_primal,&sub_schurs->change_primal_sub[i])); 6949566063dSJacob Faibussowitsch PetscCall(ISLocalToGlobalMappingDestroy(&NtoS)); 69572b8c272SStefano Zampini } 6969566063dSJacob Faibussowitsch PetscCall(ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap,IS_GTOLM_DROP,change_primal,&change_primal_B)); 6979566063dSJacob Faibussowitsch PetscCall(ISLocalToGlobalMappingCreateIS(sub_schurs->is_Ej_all,&BtoS)); 6989566063dSJacob Faibussowitsch PetscCall(ISGlobalToLocalMappingApplyIS(BtoS,IS_GTOLM_DROP,change_primal_B,&change_primal_all)); 6999566063dSJacob Faibussowitsch PetscCall(ISLocalToGlobalMappingDestroy(&BtoS)); 7009566063dSJacob Faibussowitsch PetscCall(ISDestroy(&change_primal_B)); 7019566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(sub_schurs->n_subs,&sub_schurs->change)); 70272b8c272SStefano Zampini for (i=0;i<sub_schurs->n_subs;i++) { 70372b8c272SStefano Zampini Mat change_sub; 70472b8c272SStefano Zampini 7059566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size)); 7069566063dSJacob Faibussowitsch PetscCall(KSPCreate(PETSC_COMM_SELF,&sub_schurs->change[i])); 7079566063dSJacob Faibussowitsch PetscCall(KSPSetType(sub_schurs->change[i],KSPPREONLY)); 70872b8c272SStefano Zampini if (!sub_schurs->change_with_qr) { 7099566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrix(change,sub_schurs->is_subs[i],sub_schurs->is_subs[i],MAT_INITIAL_MATRIX,&change_sub)); 71072b8c272SStefano Zampini } else { 71172b8c272SStefano Zampini Mat change_subt; 7129566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrix(change,sub_schurs->is_subs[i],sub_schurs->is_subs[i],MAT_INITIAL_MATRIX,&change_subt)); 7139566063dSJacob Faibussowitsch PetscCall(MatConvert(change_subt,MATSEQDENSE,MAT_INITIAL_MATRIX,&change_sub)); 7149566063dSJacob Faibussowitsch PetscCall(MatDestroy(&change_subt)); 71572b8c272SStefano Zampini } 7169566063dSJacob Faibussowitsch PetscCall(KSPSetOperators(sub_schurs->change[i],change_sub,change_sub)); 7179566063dSJacob Faibussowitsch PetscCall(MatDestroy(&change_sub)); 7189566063dSJacob Faibussowitsch PetscCall(KSPSetOptionsPrefix(sub_schurs->change[i],sub_schurs->prefix)); 7199566063dSJacob Faibussowitsch PetscCall(KSPAppendOptionsPrefix(sub_schurs->change[i],"sub_schurs_change_")); 72072b8c272SStefano Zampini } 7219566063dSJacob Faibussowitsch PetscCall(ISDestroy(&change_primal_all)); 72272b8c272SStefano Zampini } 72372b8c272SStefano Zampini 7245a95e1ceSStefano Zampini /* Local matrix of all local Schur on subsets (transposed) */ 7255a95e1ceSStefano Zampini if (!sub_schurs->S_Ej_all) { 72604c5b2e6SStefano Zampini Mat T; 72704c5b2e6SStefano Zampini PetscScalar *v; 72804c5b2e6SStefano Zampini PetscInt *ii,*jj; 72904c5b2e6SStefano Zampini PetscInt cum,i,j,k; 73004c5b2e6SStefano Zampini 73104c5b2e6SStefano Zampini /* MatSeqAIJSetPreallocation + MatSetValues is slow for these kind of matrices (may have large blocks) 73204c5b2e6SStefano Zampini Allocate properly a representative matrix and duplicate */ 7339566063dSJacob Faibussowitsch PetscCall(PetscMalloc3(local_size+1,&ii,local_stash_size,&jj,local_stash_size,&v)); 73404c5b2e6SStefano Zampini ii[0] = 0; 73504c5b2e6SStefano Zampini cum = 0; 73604c5b2e6SStefano Zampini for (i=0;i<sub_schurs->n_subs;i++) { 7379566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size)); 73804c5b2e6SStefano Zampini for (j=0;j<subset_size;j++) { 73904c5b2e6SStefano Zampini const PetscInt row = cum+j; 74004c5b2e6SStefano Zampini PetscInt col = cum; 74104c5b2e6SStefano Zampini 74204c5b2e6SStefano Zampini ii[row+1] = ii[row] + subset_size; 74304c5b2e6SStefano Zampini for (k=ii[row];k<ii[row+1];k++) { 74404c5b2e6SStefano Zampini jj[k] = col; 74504c5b2e6SStefano Zampini col++; 74604c5b2e6SStefano Zampini } 74704c5b2e6SStefano Zampini } 74804c5b2e6SStefano Zampini cum += subset_size; 74904c5b2e6SStefano Zampini } 7509566063dSJacob Faibussowitsch PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,local_size,local_size,ii,jj,v,&T)); 7519566063dSJacob Faibussowitsch PetscCall(MatDuplicate(T,MAT_DO_NOT_COPY_VALUES,&sub_schurs->S_Ej_all)); 7529566063dSJacob Faibussowitsch PetscCall(MatDestroy(&T)); 7539566063dSJacob Faibussowitsch PetscCall(PetscFree3(ii,jj,v)); 75404c5b2e6SStefano Zampini } 75504c5b2e6SStefano Zampini /* matrices for deluxe scaling and adaptive selection */ 75604c5b2e6SStefano Zampini if (compute_Stilda) { 75704c5b2e6SStefano Zampini if (!sub_schurs->sum_S_Ej_tilda_all) { 7589566063dSJacob Faibussowitsch PetscCall(MatDuplicate(sub_schurs->S_Ej_all,MAT_DO_NOT_COPY_VALUES,&sub_schurs->sum_S_Ej_tilda_all)); 75904c5b2e6SStefano Zampini } 76004c5b2e6SStefano Zampini if (!sub_schurs->sum_S_Ej_inv_all && deluxe) { 7619566063dSJacob Faibussowitsch PetscCall(MatDuplicate(sub_schurs->S_Ej_all,MAT_DO_NOT_COPY_VALUES,&sub_schurs->sum_S_Ej_inv_all)); 76204c5b2e6SStefano Zampini } 763aa83b6aeSStefano Zampini } 764b1b3d7a2SStefano Zampini 7655a95e1ceSStefano Zampini /* Compute Schur complements explicitly */ 766be83ff47SStefano Zampini F = NULL; 767d943a642SStefano Zampini if (!sub_schurs->schur_explicit) { 768d943a642SStefano Zampini /* this code branch is used when MatFactor with Schur complement support is not present or when explicitly requested; 769d943a642SStefano Zampini it is not efficient, unless the economic version of the scaling is used */ 7705a95e1ceSStefano Zampini Mat S_Ej_expl; 7715a95e1ceSStefano Zampini PetscScalar *work; 7725a95e1ceSStefano Zampini PetscInt j,*dummy_idx; 7735a95e1ceSStefano Zampini PetscBool Sdense; 7745a95e1ceSStefano Zampini 7759566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(max_subset_size,&dummy_idx,max_subset_size*max_subset_size,&work)); 7765a95e1ceSStefano Zampini local_size = 0; 777b1b3d7a2SStefano Zampini for (i=0;i<sub_schurs->n_subs;i++) { 7785a95e1ceSStefano Zampini IS is_subset_B; 7795a95e1ceSStefano Zampini Mat AE_EE,AE_IE,AE_EI,S_Ej; 7805a95e1ceSStefano Zampini 7815a95e1ceSStefano Zampini /* subsets in original and boundary numbering */ 7829566063dSJacob Faibussowitsch PetscCall(ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap,IS_GTOLM_DROP,sub_schurs->is_subs[i],&is_subset_B)); 7835a95e1ceSStefano Zampini /* EE block */ 7849566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrix(A_BB,is_subset_B,is_subset_B,MAT_INITIAL_MATRIX,&AE_EE)); 7855a95e1ceSStefano Zampini /* IE block */ 7869566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrix(A_IB,is_I,is_subset_B,MAT_INITIAL_MATRIX,&AE_IE)); 7875a95e1ceSStefano Zampini /* EI block */ 788d943a642SStefano Zampini if (sub_schurs->is_symmetric) { 7899566063dSJacob Faibussowitsch PetscCall(MatCreateTranspose(AE_IE,&AE_EI)); 790d943a642SStefano Zampini } else if (sub_schurs->is_hermitian) { 7919566063dSJacob Faibussowitsch PetscCall(MatCreateHermitianTranspose(AE_IE,&AE_EI)); 7925a95e1ceSStefano Zampini } else { 7939566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrix(A_BI,is_subset_B,is_I,MAT_INITIAL_MATRIX,&AE_EI)); 7945a95e1ceSStefano Zampini } 7959566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is_subset_B)); 7969566063dSJacob Faibussowitsch PetscCall(MatCreateSchurComplement(AE_II,AE_II,AE_IE,AE_EI,AE_EE,&S_Ej)); 7979566063dSJacob Faibussowitsch PetscCall(MatDestroy(&AE_EE)); 7989566063dSJacob Faibussowitsch PetscCall(MatDestroy(&AE_IE)); 7999566063dSJacob Faibussowitsch PetscCall(MatDestroy(&AE_EI)); 800b1b3d7a2SStefano Zampini if (AE_II == A_II) { /* we can reuse the same ksp */ 801b1b3d7a2SStefano Zampini KSP ksp; 8029566063dSJacob Faibussowitsch PetscCall(MatSchurComplementGetKSP(sub_schurs->S,&ksp)); 8039566063dSJacob Faibussowitsch PetscCall(MatSchurComplementSetKSP(S_Ej,ksp)); 804b1b3d7a2SStefano Zampini } else { /* build new ksp object which inherits ksp and pc types from the original one */ 805b1b3d7a2SStefano Zampini KSP origksp,schurksp; 806b1b3d7a2SStefano Zampini PC origpc,schurpc; 807b1b3d7a2SStefano Zampini KSPType ksp_type; 808b1b3d7a2SStefano Zampini PetscInt n_internal; 8095a95e1ceSStefano Zampini PetscBool ispcnone; 810b1b3d7a2SStefano Zampini 8119566063dSJacob Faibussowitsch PetscCall(MatSchurComplementGetKSP(sub_schurs->S,&origksp)); 8129566063dSJacob Faibussowitsch PetscCall(MatSchurComplementGetKSP(S_Ej,&schurksp)); 8139566063dSJacob Faibussowitsch PetscCall(KSPGetType(origksp,&ksp_type)); 8149566063dSJacob Faibussowitsch PetscCall(KSPSetType(schurksp,ksp_type)); 8159566063dSJacob Faibussowitsch PetscCall(KSPGetPC(schurksp,&schurpc)); 8169566063dSJacob Faibussowitsch PetscCall(KSPGetPC(origksp,&origpc)); 8179566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)origpc,PCNONE,&ispcnone)); 8185a95e1ceSStefano Zampini if (!ispcnone) { 8195a95e1ceSStefano Zampini PCType pc_type; 8209566063dSJacob Faibussowitsch PetscCall(PCGetType(origpc,&pc_type)); 8219566063dSJacob Faibussowitsch PetscCall(PCSetType(schurpc,pc_type)); 8225a95e1ceSStefano Zampini } else { 8239566063dSJacob Faibussowitsch PetscCall(PCSetType(schurpc,PCLU)); 8245a95e1ceSStefano Zampini } 8259566063dSJacob Faibussowitsch PetscCall(ISGetSize(is_I,&n_internal)); 826365a3a41SStefano Zampini if (!n_internal) { /* UMFPACK gives error with 0 sized problems */ 8273ca39a21SBarry Smith MatSolverType solver = NULL; 8289566063dSJacob Faibussowitsch PetscCall(PCFactorGetMatSolverType(origpc,(MatSolverType*)&solver)); 829b1b3d7a2SStefano Zampini if (solver) { 8309566063dSJacob Faibussowitsch PetscCall(PCFactorSetMatSolverType(schurpc,solver)); 831b1b3d7a2SStefano Zampini } 832b1b3d7a2SStefano Zampini } 8339566063dSJacob Faibussowitsch PetscCall(KSPSetUp(schurksp)); 834b1b3d7a2SStefano Zampini } 8359566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size)); 8369566063dSJacob Faibussowitsch PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,work,&S_Ej_expl)); 8379566063dSJacob Faibussowitsch PetscCall(PCBDDCComputeExplicitSchur(S_Ej,sub_schurs->is_symmetric,MAT_REUSE_MATRIX,&S_Ej_expl)); 8389566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)S_Ej_expl,MATSEQDENSE,&Sdense)); 8395a95e1ceSStefano Zampini if (Sdense) { 8405a95e1ceSStefano Zampini for (j=0;j<subset_size;j++) { 8415a95e1ceSStefano Zampini dummy_idx[j]=local_size+j; 842b1b3d7a2SStefano Zampini } 8439566063dSJacob Faibussowitsch PetscCall(MatSetValues(sub_schurs->S_Ej_all,subset_size,dummy_idx,subset_size,dummy_idx,work,INSERT_VALUES)); 8446c4ed002SBarry Smith } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented for sparse matrices"); 8459566063dSJacob Faibussowitsch PetscCall(MatDestroy(&S_Ej)); 8469566063dSJacob Faibussowitsch PetscCall(MatDestroy(&S_Ej_expl)); 8475a95e1ceSStefano Zampini local_size += subset_size; 8485a95e1ceSStefano Zampini } 8499566063dSJacob Faibussowitsch PetscCall(PetscFree2(dummy_idx,work)); 850b1b3d7a2SStefano Zampini /* free */ 8519566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is_I)); 8529566063dSJacob Faibussowitsch PetscCall(MatDestroy(&AE_II)); 8539566063dSJacob Faibussowitsch PetscCall(PetscFree(all_local_idx_N)); 854883469d8SStefano Zampini } else { 8555cbda25cSStefano Zampini Mat A,cs_AIB_mat = NULL,benign_AIIm1_ones_mat = NULL; 85632fe681dSStefano Zampini Mat *gdswA; 8579d54b7f4SStefano Zampini Vec Dall = NULL; 858ca92afb2SStefano Zampini IS is_A_all,*is_p_r = NULL; 8597ebab0bbSStefano Zampini MatType Stype; 8605cbda25cSStefano Zampini PetscScalar *work,*S_data,*schur_factor,infty = PETSC_MAX_REAL; 86104c5b2e6SStefano Zampini PetscScalar *SEj_arr = NULL,*SEjinv_arr = NULL; 8621683a169SBarry Smith const PetscScalar *rS_data; 86304c5b2e6SStefano Zampini PetscInt n,n_I,size_schur,size_active_schur,cum,cum2; 8643fc34f97SStefano Zampini PetscBool economic,solver_S,S_lower_triangular = PETSC_FALSE; 8653fc34f97SStefano Zampini PetscBool schur_has_vertices,factor_workaround; 86611955456SStefano Zampini PetscBool use_cholesky; 8677ebab0bbSStefano Zampini #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 8687ebab0bbSStefano Zampini PetscBool oldpin; 8697ebab0bbSStefano Zampini #endif 870883469d8SStefano Zampini 871683d3df6SStefano Zampini /* get sizes */ 87281ea8064SStefano Zampini n_I = 0; 87381ea8064SStefano Zampini if (is_I_layer) { 8749566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(is_I_layer,&n_I)); 87581ea8064SStefano Zampini } 876683d3df6SStefano Zampini economic = PETSC_FALSE; 8779566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(sub_schurs->is_I,&cum)); 878683d3df6SStefano Zampini if (cum != n_I) economic = PETSC_TRUE; 8799566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(sub_schurs->A,&n,NULL)); 8809d54b7f4SStefano Zampini size_active_schur = local_size; 8819d54b7f4SStefano Zampini 882f17d2ae1SStefano Zampini /* import scaling vector (wrong formulation if we have 3D edges) */ 8839d54b7f4SStefano Zampini if (scaling && compute_Stilda) { 8849d54b7f4SStefano Zampini const PetscScalar *array; 8859d54b7f4SStefano Zampini PetscScalar *array2; 8869d54b7f4SStefano Zampini const PetscInt *idxs; 8879d54b7f4SStefano Zampini PetscInt i; 8889d54b7f4SStefano Zampini 8899566063dSJacob Faibussowitsch PetscCall(ISGetIndices(sub_schurs->is_Ej_all,&idxs)); 8909566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF,size_active_schur,&Dall)); 8919566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(scaling,&array)); 8929566063dSJacob Faibussowitsch PetscCall(VecGetArray(Dall,&array2)); 8939d54b7f4SStefano Zampini for (i=0;i<size_active_schur;i++) array2[i] = array[idxs[i]]; 8949566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(Dall,&array2)); 8959566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(scaling,&array)); 8969566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(sub_schurs->is_Ej_all,&idxs)); 8979d54b7f4SStefano Zampini deluxe = PETSC_FALSE; 8989d54b7f4SStefano Zampini } 899d62866d3SStefano Zampini 900683d3df6SStefano Zampini /* size active schurs does not count any dirichlet or vertex dof on the interface */ 9013fc34f97SStefano Zampini factor_workaround = PETSC_FALSE; 9023fc34f97SStefano Zampini schur_has_vertices = PETSC_FALSE; 903683d3df6SStefano Zampini cum = n_I+size_active_schur; 904683d3df6SStefano Zampini if (sub_schurs->is_dir) { 905683d3df6SStefano Zampini const PetscInt* idxs; 906683d3df6SStefano Zampini PetscInt n_dir; 907683d3df6SStefano Zampini 9089566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(sub_schurs->is_dir,&n_dir)); 9099566063dSJacob Faibussowitsch PetscCall(ISGetIndices(sub_schurs->is_dir,&idxs)); 9109566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(all_local_idx_N+cum,idxs,n_dir)); 9119566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(sub_schurs->is_dir,&idxs)); 912683d3df6SStefano Zampini cum += n_dir; 91332fe681dSStefano Zampini if (!sub_schurs->gdsw) factor_workaround = PETSC_TRUE; 914d62866d3SStefano Zampini } 915683d3df6SStefano Zampini /* include the primal vertices in the Schur complement */ 916367aa537SStefano Zampini if (exact_schur && sub_schurs->is_vertices && (compute_Stilda || benign_n)) { 917683d3df6SStefano Zampini PetscInt n_v; 918683d3df6SStefano Zampini 9199566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(sub_schurs->is_vertices,&n_v)); 920683d3df6SStefano Zampini if (n_v) { 921683d3df6SStefano Zampini const PetscInt* idxs; 922683d3df6SStefano Zampini 9239566063dSJacob Faibussowitsch PetscCall(ISGetIndices(sub_schurs->is_vertices,&idxs)); 9249566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(all_local_idx_N+cum,idxs,n_v)); 9259566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(sub_schurs->is_vertices,&idxs)); 926683d3df6SStefano Zampini cum += n_v; 92732fe681dSStefano Zampini if (!sub_schurs->gdsw) factor_workaround = PETSC_TRUE; 9283fc34f97SStefano Zampini schur_has_vertices = PETSC_TRUE; 929683d3df6SStefano Zampini } 930683d3df6SStefano Zampini } 931683d3df6SStefano Zampini size_schur = cum - n_I; 9329566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PETSC_COMM_SELF,cum,all_local_idx_N,PETSC_OWN_POINTER,&is_A_all)); 9337ebab0bbSStefano Zampini #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 934b470e4b4SRichard Tran Mills oldpin = sub_schurs->A->boundtocpu; 9359566063dSJacob Faibussowitsch PetscCall(MatBindToCPU(sub_schurs->A,PETSC_TRUE)); 9367ebab0bbSStefano Zampini #endif 937683d3df6SStefano Zampini if (cum == n) { 9389566063dSJacob Faibussowitsch PetscCall(ISSetPermutation(is_A_all)); 9399566063dSJacob Faibussowitsch PetscCall(MatPermute(sub_schurs->A,is_A_all,is_A_all,&A)); 940683d3df6SStefano Zampini } else { 9419566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrix(sub_schurs->A,is_A_all,is_A_all,MAT_INITIAL_MATRIX,&A)); 942683d3df6SStefano Zampini } 9437ebab0bbSStefano Zampini #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 9449566063dSJacob Faibussowitsch PetscCall(MatBindToCPU(sub_schurs->A,oldpin)); 9457ebab0bbSStefano Zampini #endif 946*26cc229bSBarry Smith PetscCall(MatSetOptionsPrefixFactor(A,sub_schurs->prefix)); 947*26cc229bSBarry Smith PetscCall(MatAppendOptionsPrefixFactor(A,"sub_schurs_")); 948ca92afb2SStefano Zampini 949ca92afb2SStefano Zampini /* if we actually change the basis for the pressures, LDL^T factors will use a lot of memory 9507ebab0bbSStefano Zampini this is a workaround */ 951ca92afb2SStefano Zampini if (benign_n) { 9527ebab0bbSStefano Zampini Vec v,benign_AIIm1_ones; 953ca92afb2SStefano Zampini ISLocalToGlobalMapping N_to_reor; 954ca92afb2SStefano Zampini IS is_p0,is_p0_p; 9555cbda25cSStefano Zampini PetscScalar *cs_AIB,*AIIm1_data; 9565cbda25cSStefano Zampini PetscInt sizeA; 957ca92afb2SStefano Zampini 9589566063dSJacob Faibussowitsch PetscCall(ISLocalToGlobalMappingCreateIS(is_A_all,&N_to_reor)); 9599566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PETSC_COMM_SELF,benign_n,benign_p0_lidx,PETSC_COPY_VALUES,&is_p0)); 9609566063dSJacob Faibussowitsch PetscCall(ISGlobalToLocalMappingApplyIS(N_to_reor,IS_GTOLM_DROP,is_p0,&is_p0_p)); 9619566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is_p0)); 9629566063dSJacob Faibussowitsch PetscCall(MatCreateVecs(A,&v,&benign_AIIm1_ones)); 9639566063dSJacob Faibussowitsch PetscCall(VecGetSize(v,&sizeA)); 9649566063dSJacob Faibussowitsch PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,sizeA,benign_n,NULL,&benign_AIIm1_ones_mat)); 9659566063dSJacob Faibussowitsch PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,size_schur,benign_n,NULL,&cs_AIB_mat)); 9669566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(cs_AIB_mat,&cs_AIB)); 9679566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(benign_AIIm1_ones_mat,&AIIm1_data)); 9689566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(benign_n,&is_p_r)); 969ca92afb2SStefano Zampini /* compute colsum of A_IB restricted to pressures */ 970ca92afb2SStefano Zampini for (i=0;i<benign_n;i++) { 9717ebab0bbSStefano Zampini const PetscScalar *array; 972ca92afb2SStefano Zampini const PetscInt *idxs; 973ca92afb2SStefano Zampini PetscInt j,nz; 974ca92afb2SStefano Zampini 9759566063dSJacob Faibussowitsch PetscCall(ISGlobalToLocalMappingApplyIS(N_to_reor,IS_GTOLM_DROP,benign_zerodiag_subs[i],&is_p_r[i])); 9769566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(is_p_r[i],&nz)); 9779566063dSJacob Faibussowitsch PetscCall(ISGetIndices(is_p_r[i],&idxs)); 9785cbda25cSStefano Zampini for (j=0;j<nz;j++) AIIm1_data[idxs[j]+sizeA*i] = 1.; 9799566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(is_p_r[i],&idxs)); 9809566063dSJacob Faibussowitsch PetscCall(VecPlaceArray(benign_AIIm1_ones,AIIm1_data+sizeA*i)); 9819566063dSJacob Faibussowitsch PetscCall(MatMult(A,benign_AIIm1_ones,v)); 9829566063dSJacob Faibussowitsch PetscCall(VecResetArray(benign_AIIm1_ones)); 9839566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(v,&array)); 98422db5ddcSStefano Zampini for (j=0;j<size_schur;j++) { 98522db5ddcSStefano Zampini #if defined(PETSC_USE_COMPLEX) 98622db5ddcSStefano Zampini cs_AIB[i*size_schur + j] = (PetscRealPart(array[j+n_I])/nz + PETSC_i*(PetscImaginaryPart(array[j+n_I])/nz)); 98722db5ddcSStefano Zampini #else 98822db5ddcSStefano Zampini cs_AIB[i*size_schur + j] = array[j+n_I]/nz; 98922db5ddcSStefano Zampini #endif 99022db5ddcSStefano Zampini } 9919566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(v,&array)); 992ca92afb2SStefano Zampini } 9939566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(cs_AIB_mat,&cs_AIB)); 9949566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(benign_AIIm1_ones_mat,&AIIm1_data)); 9959566063dSJacob Faibussowitsch PetscCall(VecDestroy(&v)); 9969566063dSJacob Faibussowitsch PetscCall(VecDestroy(&benign_AIIm1_ones)); 9979566063dSJacob Faibussowitsch PetscCall(MatSetOption(A,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE)); 9989566063dSJacob Faibussowitsch PetscCall(MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE)); 9999566063dSJacob Faibussowitsch PetscCall(MatSetOption(A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE)); 10009566063dSJacob Faibussowitsch PetscCall(MatZeroRowsColumnsIS(A,is_p0_p,1.0,NULL,NULL)); 10019566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is_p0_p)); 10029566063dSJacob Faibussowitsch PetscCall(ISLocalToGlobalMappingDestroy(&N_to_reor)); 1003ca92afb2SStefano Zampini } 10049566063dSJacob Faibussowitsch PetscCall(MatSetOption(A,MAT_SYMMETRIC,sub_schurs->is_symmetric)); 10059566063dSJacob Faibussowitsch PetscCall(MatSetOption(A,MAT_HERMITIAN,sub_schurs->is_hermitian)); 10069566063dSJacob Faibussowitsch PetscCall(MatSetOption(A,MAT_SPD,sub_schurs->is_posdef)); 1007883469d8SStefano Zampini 100811955456SStefano Zampini /* for complexes, symmetric and hermitian at the same time implies null imaginary part */ 100911955456SStefano Zampini use_cholesky = (PetscBool)((use_potr || use_sytr) && sub_schurs->is_hermitian && sub_schurs->is_symmetric); 101011955456SStefano Zampini 1011683d3df6SStefano Zampini /* when using the benign subspace trick, the local Schur complements are SPD */ 101235d0533cSStefano Zampini /* MKL_PARDISO does not handle well the computation of a Schur complement from a symmetric indefinite factorization 101335d0533cSStefano Zampini Use LU and adapt pivoting perturbation (still, solution is not as accurate as with using MUMPS) */ 101435d0533cSStefano Zampini if (benign_trick) { 101535d0533cSStefano Zampini sub_schurs->is_posdef = PETSC_TRUE; 10169566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(sub_schurs->mat_solver_type,MATSOLVERMKL_PARDISO,&flg)); 101735d0533cSStefano Zampini if (flg) use_cholesky = PETSC_FALSE; 101835d0533cSStefano Zampini } 1019d47842beSStefano Zampini 1020f4f7d9d6SStefano Zampini if (n_I) { 10210aa714b2SStefano Zampini IS is_schur; 10227ebab0bbSStefano Zampini char stype[64]; 10234ba54290SStefano Zampini PetscBool gpu = PETSC_FALSE; 10245a05ddb0SStefano Zampini 102511955456SStefano Zampini if (use_cholesky) { 10269566063dSJacob Faibussowitsch PetscCall(MatGetFactor(A,sub_schurs->mat_solver_type,MAT_FACTOR_CHOLESKY,&F)); 1027883469d8SStefano Zampini } else { 10289566063dSJacob Faibussowitsch PetscCall(MatGetFactor(A,sub_schurs->mat_solver_type,MAT_FACTOR_LU,&F)); 1029883469d8SStefano Zampini } 10309566063dSJacob Faibussowitsch PetscCall(MatSetErrorIfFailure(A,PETSC_TRUE)); 103135d0533cSStefano Zampini #if defined(PETSC_HAVE_MKL_PARDISO) 10329566063dSJacob Faibussowitsch if (benign_trick) PetscCall(MatMkl_PardisoSetCntl(F,10,10)); 103335d0533cSStefano Zampini #endif 1034883469d8SStefano Zampini /* subsets ordered last */ 10359566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF,size_schur,n_I,1,&is_schur)); 10369566063dSJacob Faibussowitsch PetscCall(MatFactorSetSchurIS(F,is_schur)); 10379566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is_schur)); 1038883469d8SStefano Zampini 1039883469d8SStefano Zampini /* factorization step */ 104011955456SStefano Zampini if (use_cholesky) { 10419566063dSJacob Faibussowitsch PetscCall(MatCholeskyFactorSymbolic(F,A,NULL,NULL)); 1042be83ff47SStefano Zampini #if defined(PETSC_HAVE_MUMPS) /* be sure that icntl 19 is not set by command line */ 10439566063dSJacob Faibussowitsch PetscCall(MatMumpsSetIcntl(F,19,2)); 1044be83ff47SStefano Zampini #endif 10459566063dSJacob Faibussowitsch PetscCall(MatCholeskyFactorNumeric(F,A,NULL)); 1046a0b0af32SStefano Zampini S_lower_triangular = PETSC_TRUE; 1047883469d8SStefano Zampini } else { 10489566063dSJacob Faibussowitsch PetscCall(MatLUFactorSymbolic(F,A,NULL,NULL,NULL)); 1049be83ff47SStefano Zampini #if defined(PETSC_HAVE_MUMPS) /* be sure that icntl 19 is not set by command line */ 10509566063dSJacob Faibussowitsch PetscCall(MatMumpsSetIcntl(F,19,3)); 1051be83ff47SStefano Zampini #endif 10529566063dSJacob Faibussowitsch PetscCall(MatLUFactorNumeric(F,A,NULL)); 1053883469d8SStefano Zampini } 10549566063dSJacob Faibussowitsch PetscCall(MatViewFromOptions(F,(PetscObject)A,"-mat_factor_view")); 1055883469d8SStefano Zampini 10563b03f7bbSStefano Zampini if (matl_dbg_viewer) { 105711955456SStefano Zampini Mat S; 105811955456SStefano Zampini IS is; 105911955456SStefano Zampini 10609566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)A,"A")); 10619566063dSJacob Faibussowitsch PetscCall(MatView(A,matl_dbg_viewer)); 10629566063dSJacob Faibussowitsch PetscCall(MatFactorCreateSchurComplement(F,&S,NULL)); 10639566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)S,"S")); 10649566063dSJacob Faibussowitsch PetscCall(MatView(S,matl_dbg_viewer)); 10659566063dSJacob Faibussowitsch PetscCall(MatDestroy(&S)); 10669566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF,n_I,0,1,&is)); 10679566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)is,"I")); 10689566063dSJacob Faibussowitsch PetscCall(ISView(is,matl_dbg_viewer)); 10699566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is)); 10709566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF,size_schur,n_I,1,&is)); 10719566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)is,"B")); 10729566063dSJacob Faibussowitsch PetscCall(ISView(is,matl_dbg_viewer)); 10739566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is)); 10749566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)is_A_all,"IA")); 10759566063dSJacob Faibussowitsch PetscCall(ISView(is_A_all,matl_dbg_viewer)); 107632fe681dSStefano Zampini for (i=0,cum=0;i<sub_schurs->n_subs;i++) { 107732fe681dSStefano Zampini IS is; 107832fe681dSStefano Zampini char name[16]; 107932fe681dSStefano Zampini 108032fe681dSStefano Zampini PetscCall(PetscSNPrintf(name,sizeof(name),"IE%" PetscInt_FMT,i)); 108132fe681dSStefano Zampini PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size)); 108232fe681dSStefano Zampini PetscCall(ISCreateStride(PETSC_COMM_SELF,subset_size,cum,1,&is)); 108332fe681dSStefano Zampini PetscCall(PetscObjectSetName((PetscObject)is,name)); 108432fe681dSStefano Zampini PetscCall(ISView(is,matl_dbg_viewer)); 108532fe681dSStefano Zampini PetscCall(ISDestroy(&is)); 108632fe681dSStefano Zampini if (sub_schurs->change) { 108732fe681dSStefano Zampini Mat T; 108832fe681dSStefano Zampini 108932fe681dSStefano Zampini PetscCall(PetscSNPrintf(name,sizeof(name),"TE%" PetscInt_FMT,i)); 109032fe681dSStefano Zampini PetscCall(KSPGetOperators(sub_schurs->change[i],&T,NULL)); 109132fe681dSStefano Zampini PetscCall(PetscObjectSetName((PetscObject)T,name)); 109232fe681dSStefano Zampini PetscCall(MatView(T,matl_dbg_viewer)); 109332fe681dSStefano Zampini PetscCall(PetscSNPrintf(name,sizeof(name),"ITE%" PetscInt_FMT,i)); 109432fe681dSStefano Zampini PetscCall(PetscObjectSetName((PetscObject)sub_schurs->change_primal_sub[i],name)); 109532fe681dSStefano Zampini PetscCall(ISView(sub_schurs->change_primal_sub[i],matl_dbg_viewer)); 109632fe681dSStefano Zampini } 109732fe681dSStefano Zampini cum += subset_size; 109832fe681dSStefano Zampini } 109932fe681dSStefano Zampini PetscCall(PetscViewerFlush(matl_dbg_viewer)); 110011955456SStefano Zampini } 110111955456SStefano Zampini 1102883469d8SStefano Zampini /* get explicit Schur Complement computed during numeric factorization */ 11039566063dSJacob Faibussowitsch PetscCall(MatFactorGetSchurComplement(F,&S_all,NULL)); 11049566063dSJacob Faibussowitsch PetscCall(PetscStrncpy(stype,MATSEQDENSE,sizeof(stype))); 11054ba54290SStefano Zampini #if defined(PETSC_HAVE_CUDA) 11069566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompareAny((PetscObject)A,&gpu,MATSEQAIJVIENNACL,MATSEQAIJCUSPARSE,"")); 11074ba54290SStefano Zampini #endif 11087ebab0bbSStefano Zampini if (gpu) { 11099566063dSJacob Faibussowitsch PetscCall(PetscStrncpy(stype,MATSEQDENSECUDA,sizeof(stype))); 11107ebab0bbSStefano Zampini } 11119566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetString(NULL,sub_schurs->prefix,"-sub_schurs_schur_mat_type",stype,sizeof(stype),NULL)); 11129566063dSJacob Faibussowitsch PetscCall(MatConvert(S_all,stype,MAT_INPLACE_MATRIX,&S_all)); 11139566063dSJacob Faibussowitsch PetscCall(MatSetOption(S_all,MAT_SPD,sub_schurs->is_posdef)); 11149566063dSJacob Faibussowitsch PetscCall(MatSetOption(S_all,MAT_HERMITIAN,sub_schurs->is_hermitian)); 11159566063dSJacob Faibussowitsch PetscCall(MatGetType(S_all,&Stype)); 1116b3cb21ddSStefano Zampini 1117d62866d3SStefano Zampini /* we can reuse the solvers if we are not using the economic version */ 1118683d3df6SStefano Zampini reuse_solvers = (PetscBool)(reuse_solvers && !economic); 111932fe681dSStefano Zampini if (!sub_schurs->gdsw) { 1120683d3df6SStefano Zampini factor_workaround = (PetscBool)(reuse_solvers && factor_workaround); 112103dfb2d7SStefano Zampini if (!sub_schurs->is_posdef && factor_workaround && compute_Stilda && size_active_schur) 112203dfb2d7SStefano Zampini reuse_solvers = factor_workaround = PETSC_FALSE; 112332fe681dSStefano Zampini } 1124df4d28bfSStefano Zampini solver_S = PETSC_TRUE; 1125ca92afb2SStefano Zampini 112672b8c272SStefano Zampini /* update the Schur complement with the change of basis on the pressures */ 1127ca92afb2SStefano Zampini if (benign_n) { 11287ebab0bbSStefano Zampini const PetscScalar *cs_AIB; 11297ebab0bbSStefano Zampini PetscScalar *S_data,*AIIm1_data; 11303b03f7bbSStefano Zampini Mat S2 = NULL,S3 = NULL; /* dbg */ 11313b03f7bbSStefano Zampini PetscScalar *S2_data,*S3_data; /* dbg */ 11327ebab0bbSStefano Zampini Vec v,benign_AIIm1_ones; 11335cbda25cSStefano Zampini PetscInt sizeA; 1134ca92afb2SStefano Zampini 11359566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(S_all,&S_data)); 11369566063dSJacob Faibussowitsch PetscCall(MatCreateVecs(A,&v,&benign_AIIm1_ones)); 11379566063dSJacob Faibussowitsch PetscCall(VecGetSize(v,&sizeA)); 1138ca92afb2SStefano Zampini #if defined(PETSC_HAVE_MUMPS) 11399566063dSJacob Faibussowitsch PetscCall(MatMumpsSetIcntl(F,26,0)); 1140ca92afb2SStefano Zampini #endif 1141ca92afb2SStefano Zampini #if defined(PETSC_HAVE_MKL_PARDISO) 11429566063dSJacob Faibussowitsch PetscCall(MatMkl_PardisoSetCntl(F,70,1)); 1143ca92afb2SStefano Zampini #endif 11449566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(cs_AIB_mat,&cs_AIB)); 11459566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(benign_AIIm1_ones_mat,&AIIm1_data)); 11463b03f7bbSStefano Zampini if (matl_dbg_viewer) { 11479566063dSJacob Faibussowitsch PetscCall(MatDuplicate(S_all,MAT_DO_NOT_COPY_VALUES,&S2)); 11489566063dSJacob Faibussowitsch PetscCall(MatDuplicate(S_all,MAT_DO_NOT_COPY_VALUES,&S3)); 11499566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(S2,&S2_data)); 11509566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(S3,&S3_data)); 11513b03f7bbSStefano Zampini } 1152ca92afb2SStefano Zampini for (i=0;i<benign_n;i++) { 11533b03f7bbSStefano Zampini PetscScalar *array,sum = 0.,one = 1.,*sums; 1154ca92afb2SStefano Zampini const PetscInt *idxs; 11553b03f7bbSStefano Zampini PetscInt k,j,nz; 115647484b83SStefano Zampini PetscBLASInt B_k,B_n; 1157ca92afb2SStefano Zampini 11589566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(benign_n,&sums)); 11599566063dSJacob Faibussowitsch PetscCall(VecPlaceArray(benign_AIIm1_ones,AIIm1_data+sizeA*i)); 11609566063dSJacob Faibussowitsch PetscCall(VecCopy(benign_AIIm1_ones,v)); 11619566063dSJacob Faibussowitsch PetscCall(MatSolve(F,v,benign_AIIm1_ones)); 11629566063dSJacob Faibussowitsch PetscCall(MatMult(A,benign_AIIm1_ones,v)); 11639566063dSJacob Faibussowitsch PetscCall(VecResetArray(benign_AIIm1_ones)); 11643b03f7bbSStefano Zampini /* p0 dofs (eliminated) are excluded from the sums */ 11653b03f7bbSStefano Zampini for (k=0;k<benign_n;k++) { 11669566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(is_p_r[k],&nz)); 11679566063dSJacob Faibussowitsch PetscCall(ISGetIndices(is_p_r[k],&idxs)); 11683b03f7bbSStefano Zampini for (j=0;j<nz-1;j++) sums[k] -= AIIm1_data[idxs[j]+sizeA*i]; 11699566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(is_p_r[k],&idxs)); 11703b03f7bbSStefano Zampini } 11719566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(v,(const PetscScalar**)&array)); 11723b03f7bbSStefano Zampini if (matl_dbg_viewer) { 11733b03f7bbSStefano Zampini Vec vv; 11743b03f7bbSStefano Zampini char name[16]; 11753b03f7bbSStefano Zampini 11769566063dSJacob Faibussowitsch PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,size_schur,array+n_I,&vv)); 117763a3b9bcSJacob Faibussowitsch PetscCall(PetscSNPrintf(name,sizeof(name),"Pvs%" PetscInt_FMT,i)); 11789566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)vv,name)); 11799566063dSJacob Faibussowitsch PetscCall(VecView(vv,matl_dbg_viewer)); 11803b03f7bbSStefano Zampini } 118147484b83SStefano Zampini /* perform sparse rank updates on symmetric Schur (TODO: move outside of the loop?) */ 118247484b83SStefano Zampini /* cs_AIB already scaled by 1./nz */ 118347484b83SStefano Zampini B_k = 1; 11843b03f7bbSStefano Zampini for (k=0;k<benign_n;k++) { 11853b03f7bbSStefano Zampini sum = sums[k]; 11869566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(size_schur,&B_n)); 11873b03f7bbSStefano Zampini 11883b03f7bbSStefano Zampini if (PetscAbsScalar(sum) == 0.0) continue; 11893b03f7bbSStefano Zampini if (k == i) { 119047484b83SStefano Zampini PetscStackCallBLAS("BLASsyrk",BLASsyrk_("L","N",&B_n,&B_k,&sum,cs_AIB+i*size_schur,&B_n,&one,S_data,&B_n)); 11913b03f7bbSStefano Zampini if (matl_dbg_viewer) { 11923b03f7bbSStefano Zampini PetscStackCallBLAS("BLASsyrk",BLASsyrk_("L","N",&B_n,&B_k,&sum,cs_AIB+i*size_schur,&B_n,&one,S3_data,&B_n)); 11933b03f7bbSStefano Zampini } 11943b03f7bbSStefano Zampini } else { /* XXX Is it correct to use symmetric rank-2 update with half of the sum? */ 11953b03f7bbSStefano Zampini sum /= 2.0; 11963b03f7bbSStefano Zampini PetscStackCallBLAS("BLASsyr2k",BLASsyr2k_("L","N",&B_n,&B_k,&sum,cs_AIB+k*size_schur,&B_n,cs_AIB+i*size_schur,&B_n,&one,S_data,&B_n)); 11973b03f7bbSStefano Zampini if (matl_dbg_viewer) { 11983b03f7bbSStefano Zampini PetscStackCallBLAS("BLASsyr2k",BLASsyr2k_("L","N",&B_n,&B_k,&sum,cs_AIB+k*size_schur,&B_n,cs_AIB+i*size_schur,&B_n,&one,S3_data,&B_n)); 11993b03f7bbSStefano Zampini } 12003b03f7bbSStefano Zampini } 12013b03f7bbSStefano Zampini } 12025cbda25cSStefano Zampini sum = 1.; 120347484b83SStefano Zampini PetscStackCallBLAS("BLASsyr2k",BLASsyr2k_("L","N",&B_n,&B_k,&sum,array+n_I,&B_n,cs_AIB+i*size_schur,&B_n,&one,S_data,&B_n)); 12043b03f7bbSStefano Zampini if (matl_dbg_viewer) { 12053b03f7bbSStefano Zampini PetscStackCallBLAS("BLASsyr2k",BLASsyr2k_("L","N",&B_n,&B_k,&sum,array+n_I,&B_n,cs_AIB+i*size_schur,&B_n,&one,S2_data,&B_n)); 12063b03f7bbSStefano Zampini } 12079566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(v,(const PetscScalar**)&array)); 12085cbda25cSStefano Zampini /* set p0 entry of AIIm1_ones to zero */ 12099566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(is_p_r[i],&nz)); 12109566063dSJacob Faibussowitsch PetscCall(ISGetIndices(is_p_r[i],&idxs)); 1211282d6408SStefano Zampini for (j=0;j<benign_n;j++) AIIm1_data[idxs[nz-1]+sizeA*j] = 0.; 12129566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(is_p_r[i],&idxs)); 12139566063dSJacob Faibussowitsch PetscCall(PetscFree(sums)); 12143b03f7bbSStefano Zampini } 12159566063dSJacob Faibussowitsch PetscCall(VecDestroy(&benign_AIIm1_ones)); 12163b03f7bbSStefano Zampini if (matl_dbg_viewer) { 12179566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(S2,&S2_data)); 12189566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(S3,&S3_data)); 1219ca92afb2SStefano Zampini } 1220a7414863SStefano Zampini if (!S_lower_triangular) { /* I need to expand the upper triangular data (column oriented) */ 1221a7414863SStefano Zampini PetscInt k,j; 1222a7414863SStefano Zampini for (k=0;k<size_schur;k++) { 1223a7414863SStefano Zampini for (j=k;j<size_schur;j++) { 1224a7414863SStefano Zampini S_data[j*size_schur+k] = PetscConj(S_data[k*size_schur+j]); 1225a7414863SStefano Zampini } 1226a7414863SStefano Zampini } 1227a7414863SStefano Zampini } 1228a7414863SStefano Zampini 12295cbda25cSStefano Zampini /* restore defaults */ 12305cbda25cSStefano Zampini #if defined(PETSC_HAVE_MUMPS) 12319566063dSJacob Faibussowitsch PetscCall(MatMumpsSetIcntl(F,26,-1)); 12325cbda25cSStefano Zampini #endif 12335cbda25cSStefano Zampini #if defined(PETSC_HAVE_MKL_PARDISO) 12349566063dSJacob Faibussowitsch PetscCall(MatMkl_PardisoSetCntl(F,70,0)); 12355cbda25cSStefano Zampini #endif 12369566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(cs_AIB_mat,&cs_AIB)); 12379566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(benign_AIIm1_ones_mat,&AIIm1_data)); 12389566063dSJacob Faibussowitsch PetscCall(VecDestroy(&v)); 12399566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(S_all,&S_data)); 12403b03f7bbSStefano Zampini if (matl_dbg_viewer) { 12413b03f7bbSStefano Zampini Mat S; 12423b03f7bbSStefano Zampini 12439566063dSJacob Faibussowitsch PetscCall(MatFactorRestoreSchurComplement(F,&S_all,MAT_FACTOR_SCHUR_UNFACTORED)); 12449566063dSJacob Faibussowitsch PetscCall(MatFactorCreateSchurComplement(F,&S,NULL)); 12459566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)S,"Sb")); 12469566063dSJacob Faibussowitsch PetscCall(MatView(S,matl_dbg_viewer)); 12479566063dSJacob Faibussowitsch PetscCall(MatDestroy(&S)); 12489566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)S2,"S2P")); 12499566063dSJacob Faibussowitsch PetscCall(MatView(S2,matl_dbg_viewer)); 12509566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)S3,"S3P")); 12519566063dSJacob Faibussowitsch PetscCall(MatView(S3,matl_dbg_viewer)); 12529566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)cs_AIB_mat,"cs")); 12539566063dSJacob Faibussowitsch PetscCall(MatView(cs_AIB_mat,matl_dbg_viewer)); 12549566063dSJacob Faibussowitsch PetscCall(MatFactorGetSchurComplement(F,&S_all,NULL)); 12553b03f7bbSStefano Zampini } 12569566063dSJacob Faibussowitsch PetscCall(MatDestroy(&S2)); 12579566063dSJacob Faibussowitsch PetscCall(MatDestroy(&S3)); 1258ca92afb2SStefano Zampini } 1259a3df083aSStefano Zampini if (!reuse_solvers) { 1260a3df083aSStefano Zampini for (i=0;i<benign_n;i++) { 12619566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is_p_r[i])); 1262a3df083aSStefano Zampini } 12639566063dSJacob Faibussowitsch PetscCall(PetscFree(is_p_r)); 12649566063dSJacob Faibussowitsch PetscCall(MatDestroy(&cs_AIB_mat)); 12659566063dSJacob Faibussowitsch PetscCall(MatDestroy(&benign_AIIm1_ones_mat)); 1266a3df083aSStefano Zampini } 1267df4d28bfSStefano Zampini } else { /* we can't use MatFactor when size_schur == size_of_the_problem */ 12689566063dSJacob Faibussowitsch PetscCall(MatConvert(A,MATSEQDENSE,MAT_INITIAL_MATRIX,&S_all)); 12699566063dSJacob Faibussowitsch PetscCall(MatGetType(S_all,&Stype)); 1270683d3df6SStefano Zampini reuse_solvers = PETSC_FALSE; /* TODO: why we can't reuse the solvers here? */ 1271166598c1SStefano Zampini factor_workaround = PETSC_FALSE; 1272df4d28bfSStefano Zampini solver_S = PETSC_FALSE; 1273be83ff47SStefano Zampini } 1274be83ff47SStefano Zampini 1275be83ff47SStefano Zampini if (reuse_solvers) { 127632fe681dSStefano Zampini Mat A_II,pA_II,Afake; 127753892102SStefano Zampini Vec vec1_B; 1278df4d28bfSStefano Zampini PCBDDCReuseSolvers msolv_ctx; 12793462e049SStefano Zampini PetscInt n_R; 1280d5574798SStefano Zampini 1281df4d28bfSStefano Zampini if (sub_schurs->reuse_solver) { 12829566063dSJacob Faibussowitsch PetscCall(PCBDDCReuseSolversReset(sub_schurs->reuse_solver)); 1283e28d306cSStefano Zampini } else { 12849566063dSJacob Faibussowitsch PetscCall(PetscNew(&sub_schurs->reuse_solver)); 1285d62866d3SStefano Zampini } 1286df4d28bfSStefano Zampini msolv_ctx = sub_schurs->reuse_solver; 128732fe681dSStefano Zampini PetscCall(MatSchurComplementGetSubMatrices(sub_schurs->S,&A_II,&pA_II,NULL,NULL,NULL)); 12889566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)F)); 1289d5574798SStefano Zampini msolv_ctx->F = F; 12909566063dSJacob Faibussowitsch PetscCall(MatCreateVecs(F,&msolv_ctx->sol,NULL)); 1291683d3df6SStefano Zampini /* currently PETSc has no support for MatSolve(F,x,x), so cheat and let rhs and sol share the same memory */ 1292683d3df6SStefano Zampini { 1293683d3df6SStefano Zampini PetscScalar *array; 1294683d3df6SStefano Zampini PetscInt n; 1295683d3df6SStefano Zampini 12969566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(msolv_ctx->sol,&n)); 12979566063dSJacob Faibussowitsch PetscCall(VecGetArray(msolv_ctx->sol,&array)); 12989566063dSJacob Faibussowitsch PetscCall(VecCreateSeqWithArray(PetscObjectComm((PetscObject)msolv_ctx->sol),1,n,array,&msolv_ctx->rhs)); 12999566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(msolv_ctx->sol,&array)); 1300683d3df6SStefano Zampini } 13013fc34f97SStefano Zampini msolv_ctx->has_vertices = schur_has_vertices; 1302d62866d3SStefano Zampini 1303d62866d3SStefano Zampini /* interior solver */ 13049566063dSJacob Faibussowitsch PetscCall(PCCreate(PetscObjectComm((PetscObject)A_II),&msolv_ctx->interior_solver)); 130532fe681dSStefano Zampini PetscCall(PCSetOperators(msolv_ctx->interior_solver,A_II,pA_II)); 13069566063dSJacob Faibussowitsch PetscCall(PCSetType(msolv_ctx->interior_solver,PCSHELL)); 13079566063dSJacob Faibussowitsch PetscCall(PCShellSetName(msolv_ctx->interior_solver,"Interior solver (w/o Schur factorization)")); 13089566063dSJacob Faibussowitsch PetscCall(PCShellSetContext(msolv_ctx->interior_solver,msolv_ctx)); 13099566063dSJacob Faibussowitsch PetscCall(PCShellSetView(msolv_ctx->interior_solver,PCBDDCReuseSolvers_View)); 13109566063dSJacob Faibussowitsch PetscCall(PCShellSetApply(msolv_ctx->interior_solver,PCBDDCReuseSolvers_Interior)); 13119566063dSJacob Faibussowitsch PetscCall(PCShellSetApplyTranspose(msolv_ctx->interior_solver,PCBDDCReuseSolvers_InteriorTranspose)); 131232fe681dSStefano Zampini if (sub_schurs->gdsw) PetscCall(PCShellSetDestroy(msolv_ctx->interior_solver,PCBDDCReuseSolvers_Destroy)); 1313d62866d3SStefano Zampini 1314d62866d3SStefano Zampini /* correction solver */ 131532fe681dSStefano Zampini if (!sub_schurs->gdsw) { 13169566063dSJacob Faibussowitsch PetscCall(PCCreate(PetscObjectComm((PetscObject)A_II),&msolv_ctx->correction_solver)); 13179566063dSJacob Faibussowitsch PetscCall(PCSetType(msolv_ctx->correction_solver,PCSHELL)); 13189566063dSJacob Faibussowitsch PetscCall(PCShellSetName(msolv_ctx->correction_solver,"Correction solver (with Schur factorization)")); 13199566063dSJacob Faibussowitsch PetscCall(PCShellSetContext(msolv_ctx->correction_solver,msolv_ctx)); 13209566063dSJacob Faibussowitsch PetscCall(PCShellSetView(msolv_ctx->interior_solver,PCBDDCReuseSolvers_View)); 13219566063dSJacob Faibussowitsch PetscCall(PCShellSetApply(msolv_ctx->correction_solver,PCBDDCReuseSolvers_Correction)); 13229566063dSJacob Faibussowitsch PetscCall(PCShellSetApplyTranspose(msolv_ctx->correction_solver,PCBDDCReuseSolvers_CorrectionTranspose)); 132353892102SStefano Zampini 132453892102SStefano Zampini /* scatter and vecs for Schur complement solver */ 13259566063dSJacob Faibussowitsch PetscCall(MatCreateVecs(S_all,&msolv_ctx->sol_B,&msolv_ctx->rhs_B)); 13269566063dSJacob Faibussowitsch PetscCall(MatCreateVecs(sub_schurs->S,&vec1_B,NULL)); 13273fc34f97SStefano Zampini if (!schur_has_vertices) { 13289566063dSJacob Faibussowitsch PetscCall(ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap,IS_GTOLM_DROP,is_A_all,&msolv_ctx->is_B)); 13299566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(vec1_B,msolv_ctx->is_B,msolv_ctx->sol_B,NULL,&msolv_ctx->correction_scatter_B)); 13309566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)is_A_all)); 133153892102SStefano Zampini msolv_ctx->is_R = is_A_all; 1332683d3df6SStefano Zampini } else { 1333683d3df6SStefano Zampini IS is_B_all; 1334683d3df6SStefano Zampini const PetscInt* idxs; 1335683d3df6SStefano Zampini PetscInt dual,n_v,n; 1336683d3df6SStefano Zampini 13379566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(sub_schurs->is_vertices,&n_v)); 1338683d3df6SStefano Zampini dual = size_schur - n_v; 13399566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(is_A_all,&n)); 13409566063dSJacob Faibussowitsch PetscCall(ISGetIndices(is_A_all,&idxs)); 13419566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)is_A_all),dual,idxs+n_I,PETSC_COPY_VALUES,&is_B_all)); 13429566063dSJacob Faibussowitsch PetscCall(ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap,IS_GTOLM_DROP,is_B_all,&msolv_ctx->is_B)); 13439566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is_B_all)); 13449566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PetscObjectComm((PetscObject)is_A_all),dual,0,1,&is_B_all)); 13459566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(vec1_B,msolv_ctx->is_B,msolv_ctx->sol_B,is_B_all,&msolv_ctx->correction_scatter_B)); 13469566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is_B_all)); 13479566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)is_A_all),n-n_v,idxs,PETSC_COPY_VALUES,&msolv_ctx->is_R)); 13489566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(is_A_all,&idxs)); 1349683d3df6SStefano Zampini } 13509566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(msolv_ctx->is_R,&n_R)); 13519566063dSJacob Faibussowitsch PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF,n_R,n_R,0,NULL,&Afake)); 13529566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(Afake,MAT_FINAL_ASSEMBLY)); 13539566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(Afake,MAT_FINAL_ASSEMBLY)); 13549566063dSJacob Faibussowitsch PetscCall(PCSetOperators(msolv_ctx->correction_solver,Afake,Afake)); 13559566063dSJacob Faibussowitsch PetscCall(MatDestroy(&Afake)); 13569566063dSJacob Faibussowitsch PetscCall(VecDestroy(&vec1_B)); 135732fe681dSStefano Zampini } 1358ca92afb2SStefano Zampini /* communicate benign info to solver context */ 1359ca92afb2SStefano Zampini if (benign_n) { 13605cbda25cSStefano Zampini PetscScalar *array; 13615cbda25cSStefano Zampini 1362ca92afb2SStefano Zampini msolv_ctx->benign_n = benign_n; 1363ca92afb2SStefano Zampini msolv_ctx->benign_zerodiag_subs = is_p_r; 13649566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(benign_n,&msolv_ctx->benign_save_vals)); 13655cbda25cSStefano Zampini msolv_ctx->benign_csAIB = cs_AIB_mat; 13669566063dSJacob Faibussowitsch PetscCall(MatCreateVecs(cs_AIB_mat,&msolv_ctx->benign_corr_work,NULL)); 13679566063dSJacob Faibussowitsch PetscCall(VecGetArray(msolv_ctx->benign_corr_work,&array)); 13689566063dSJacob Faibussowitsch PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,size_schur,array,&msolv_ctx->benign_dummy_schur_vec)); 13699566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(msolv_ctx->benign_corr_work,&array)); 13705cbda25cSStefano Zampini msolv_ctx->benign_AIIm1ones = benign_AIIm1_ones_mat; 1371ca92afb2SStefano Zampini } 1372ada6e2d7SStefano Zampini } else { 1373ada6e2d7SStefano Zampini if (sub_schurs->reuse_solver) { 13749566063dSJacob Faibussowitsch PetscCall(PCBDDCReuseSolversReset(sub_schurs->reuse_solver)); 1375ada6e2d7SStefano Zampini } 13769566063dSJacob Faibussowitsch PetscCall(PetscFree(sub_schurs->reuse_solver)); 1377d5574798SStefano Zampini } 13789566063dSJacob Faibussowitsch PetscCall(MatDestroy(&A)); 13799566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is_A_all)); 13805db18549SStefano Zampini 1381be83ff47SStefano Zampini /* Work arrays */ 13829566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(max_subset_size*max_subset_size,&work)); 1383d2627357SStefano Zampini 1384be83ff47SStefano Zampini /* S_Ej_all */ 1385be83ff47SStefano Zampini cum = cum2 = 0; 13869566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(S_all,&rS_data)); 13879566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArray(sub_schurs->S_Ej_all,&SEj_arr)); 138832fe681dSStefano Zampini if (sub_schurs->sum_S_Ej_inv_all) { 13899566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&SEjinv_arr)); 139004c5b2e6SStefano Zampini } 139132fe681dSStefano Zampini if (sub_schurs->gdsw) { 139232fe681dSStefano Zampini PetscCall(MatCreateSubMatrices(sub_schurs->A,sub_schurs->n_subs,sub_schurs->is_subs,sub_schurs->is_subs,MAT_INITIAL_MATRIX,&gdswA)); 139332fe681dSStefano Zampini } 139465d8bf0aSStefano Zampini for (i=0;i<sub_schurs->n_subs;i++) { 139565d8bf0aSStefano Zampini PetscInt j; 139665d8bf0aSStefano Zampini 139732fe681dSStefano Zampini /* get S_E (or K^i_EE for GDSW) */ 13989566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size)); 139932fe681dSStefano Zampini if (sub_schurs->gdsw) { 140032fe681dSStefano Zampini Mat T; 140132fe681dSStefano Zampini 140232fe681dSStefano Zampini PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,work,&T)); 140332fe681dSStefano Zampini PetscCall(MatConvert(gdswA[i],MATDENSE,MAT_REUSE_MATRIX,&T)); 140432fe681dSStefano Zampini PetscCall(MatDestroy(&T)); 140532fe681dSStefano Zampini } else { 1406683d3df6SStefano Zampini if (S_lower_triangular) { /* I need to expand the upper triangular data (column oriented) */ 1407be83ff47SStefano Zampini PetscInt k; 1408be83ff47SStefano Zampini for (k=0;k<subset_size;k++) { 1409be83ff47SStefano Zampini for (j=k;j<subset_size;j++) { 14101683a169SBarry Smith work[k*subset_size+j] = rS_data[cum2+k*size_schur+j]; 14111683a169SBarry Smith work[j*subset_size+k] = PetscConj(rS_data[cum2+k*size_schur+j]); 1412be83ff47SStefano Zampini } 1413be83ff47SStefano Zampini } 141406a4e24aSStefano Zampini } else { /* just copy to workspace */ 1415be83ff47SStefano Zampini PetscInt k; 1416be83ff47SStefano Zampini for (k=0;k<subset_size;k++) { 1417be83ff47SStefano Zampini for (j=0;j<subset_size;j++) { 14181683a169SBarry Smith work[k*subset_size+j] = rS_data[cum2+k*size_schur+j]; 1419be83ff47SStefano Zampini } 1420be83ff47SStefano Zampini } 14219087bf02SStefano Zampini } 142232fe681dSStefano Zampini } 14235a95e1ceSStefano Zampini /* insert S_E values */ 1424b7ab4a40SStefano Zampini if (sub_schurs->change) { 14258760537fSStefano Zampini Mat change_sub,SEj,T; 142672b8c272SStefano Zampini 142772b8c272SStefano Zampini /* change basis */ 14289566063dSJacob Faibussowitsch PetscCall(KSPGetOperators(sub_schurs->change[i],&change_sub,NULL)); 14299566063dSJacob Faibussowitsch PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,work,&SEj)); 14308760537fSStefano Zampini if (!sub_schurs->change_with_qr) { /* currently there's no support for PtAP with P SeqAIJ */ 14318760537fSStefano Zampini Mat T2; 14329566063dSJacob Faibussowitsch PetscCall(MatTransposeMatMult(change_sub,SEj,MAT_INITIAL_MATRIX,1.0,&T2)); 14339566063dSJacob Faibussowitsch PetscCall(MatMatMult(T2,change_sub,MAT_INITIAL_MATRIX,1.0,&T)); 14349566063dSJacob Faibussowitsch PetscCall(MatConvert(T,MATSEQDENSE,MAT_INPLACE_MATRIX,&T)); 14359566063dSJacob Faibussowitsch PetscCall(MatDestroy(&T2)); 14368760537fSStefano Zampini } else { 14379566063dSJacob Faibussowitsch PetscCall(MatPtAP(SEj,change_sub,MAT_INITIAL_MATRIX,1.0,&T)); 143872b8c272SStefano Zampini } 14399566063dSJacob Faibussowitsch PetscCall(MatCopy(T,SEj,SAME_NONZERO_PATTERN)); 14409566063dSJacob Faibussowitsch PetscCall(MatDestroy(&T)); 14419566063dSJacob Faibussowitsch PetscCall(MatZeroRowsColumnsIS(SEj,sub_schurs->change_primal_sub[i],1.0,NULL,NULL)); 14429566063dSJacob Faibussowitsch PetscCall(MatDestroy(&SEj)); 144372b8c272SStefano Zampini } 14449566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(SEj_arr,work,subset_size*subset_size)); 1445862806e4SStefano Zampini if (compute_Stilda) { 144632fe681dSStefano Zampini if (deluxe) { /* if adaptivity is requested, invert S_E blocks */ 14477ebab0bbSStefano Zampini Mat M; 14487ebab0bbSStefano Zampini const PetscScalar *vals; 14497ebab0bbSStefano Zampini PetscBool isdense,isdensecuda; 1450f4f7d9d6SStefano Zampini 14519566063dSJacob Faibussowitsch PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,work,&M)); 14529566063dSJacob Faibussowitsch PetscCall(MatSetOption(M,MAT_SPD,sub_schurs->is_posdef)); 14539566063dSJacob Faibussowitsch PetscCall(MatSetOption(M,MAT_HERMITIAN,sub_schurs->is_hermitian)); 14547ebab0bbSStefano Zampini if (!PetscBTLookup(sub_schurs->is_edge,i)) { 14559566063dSJacob Faibussowitsch PetscCall(MatSetType(M,Stype)); 14566c3e6151SStefano Zampini } 14579566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)M,MATSEQDENSE,&isdense)); 14589566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)M,MATSEQDENSECUDA,&isdensecuda)); 14597ebab0bbSStefano Zampini if (use_cholesky) { 14609566063dSJacob Faibussowitsch PetscCall(MatCholeskyFactor(M,NULL,NULL)); 1461d6462365SStefano Zampini } else { 14629566063dSJacob Faibussowitsch PetscCall(MatLUFactor(M,NULL,NULL,NULL)); 14632972d61bSStefano Zampini } 14647ebab0bbSStefano Zampini if (isdense) { 14659566063dSJacob Faibussowitsch PetscCall(MatSeqDenseInvertFactors_Private(M)); 14667ebab0bbSStefano Zampini #if defined(PETSC_HAVE_CUDA) 14677ebab0bbSStefano Zampini } else if (isdensecuda) { 14689566063dSJacob Faibussowitsch PetscCall(MatSeqDenseCUDAInvertFactors_Private(M)); 14697ebab0bbSStefano Zampini #endif 147098921bdaSJacob Faibussowitsch } else SETERRQ(PetscObjectComm((PetscObject)M),PETSC_ERR_SUP,"Not implemented for type %s",Stype); 14719566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(M,&vals)); 14729566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(SEjinv_arr,vals,subset_size*subset_size)); 14739566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(M,&vals)); 14749566063dSJacob Faibussowitsch PetscCall(MatDestroy(&M)); 147532fe681dSStefano Zampini } else if (scaling) { /* not using deluxe */ 14769d54b7f4SStefano Zampini Mat SEj; 14779d54b7f4SStefano Zampini Vec D; 14789d54b7f4SStefano Zampini PetscScalar *array; 14799d54b7f4SStefano Zampini 14809566063dSJacob Faibussowitsch PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,work,&SEj)); 14819566063dSJacob Faibussowitsch PetscCall(VecGetArray(Dall,&array)); 14829566063dSJacob Faibussowitsch PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,subset_size,array+cum,&D)); 14839566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(Dall,&array)); 14849566063dSJacob Faibussowitsch PetscCall(VecShift(D,-1.)); 14859566063dSJacob Faibussowitsch PetscCall(MatDiagonalScale(SEj,D,D)); 14869566063dSJacob Faibussowitsch PetscCall(MatDestroy(&SEj)); 14879566063dSJacob Faibussowitsch PetscCall(VecDestroy(&D)); 14889566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(SEj_arr,work,subset_size*subset_size)); 14899d54b7f4SStefano Zampini } 149032fe681dSStefano Zampini } 1491be83ff47SStefano Zampini cum += subset_size; 1492be83ff47SStefano Zampini cum2 += subset_size*(size_schur + 1); 149304c5b2e6SStefano Zampini SEj_arr += subset_size*subset_size; 149404c5b2e6SStefano Zampini if (SEjinv_arr) SEjinv_arr += subset_size*subset_size; 1495be83ff47SStefano Zampini } 149632fe681dSStefano Zampini if (sub_schurs->gdsw) { 149732fe681dSStefano Zampini PetscCall(MatDestroySubMatrices(sub_schurs->n_subs,&gdswA)); 149832fe681dSStefano Zampini } 14999566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(S_all,&rS_data)); 15009566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArray(sub_schurs->S_Ej_all,&SEj_arr)); 150132fe681dSStefano Zampini if (sub_schurs->sum_S_Ej_inv_all) { 15029566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&SEjinv_arr)); 150304c5b2e6SStefano Zampini } 1504df4d28bfSStefano Zampini if (solver_S) { 15059566063dSJacob Faibussowitsch PetscCall(MatFactorRestoreSchurComplement(F,&S_all,MAT_FACTOR_SCHUR_UNFACTORED)); 15064a6c6b0dSStefano Zampini } 1507683d3df6SStefano Zampini 15087ebab0bbSStefano Zampini /* may prevent from unneeded copies, since MUMPS or MKL_Pardiso always use CPU memory 15097ebab0bbSStefano Zampini however, preliminary tests indicate using GPUs is still faster in the solve phase */ 15107ebab0bbSStefano Zampini #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 15117ebab0bbSStefano Zampini if (reuse_solvers) { 15127ebab0bbSStefano Zampini Mat St; 15137ebab0bbSStefano Zampini MatFactorSchurStatus st; 15147ebab0bbSStefano Zampini 151535d0533cSStefano Zampini flg = PETSC_FALSE; 15169566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetBool(NULL,sub_schurs->prefix,"-sub_schurs_schur_pin_to_cpu",&flg,NULL)); 15179566063dSJacob Faibussowitsch PetscCall(MatFactorGetSchurComplement(F,&St,&st)); 15189566063dSJacob Faibussowitsch PetscCall(MatBindToCPU(St,flg)); 15199566063dSJacob Faibussowitsch PetscCall(MatFactorRestoreSchurComplement(F,&St,st)); 15207ebab0bbSStefano Zampini } 15217ebab0bbSStefano Zampini #endif 15227ebab0bbSStefano Zampini 1523683d3df6SStefano Zampini schur_factor = NULL; 152445951f25SStefano Zampini if (compute_Stilda && size_active_schur) { 1525683d3df6SStefano Zampini 15269d54b7f4SStefano Zampini if (sub_schurs->n_subs == 1 && size_schur == size_active_schur && deluxe) { /* we already computed the inverse */ 152732fe681dSStefano Zampini PetscCall(MatSeqAIJGetArrayWrite(sub_schurs->sum_S_Ej_tilda_all,&SEjinv_arr)); 15289566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(SEjinv_arr,work,size_schur*size_schur)); 152932fe681dSStefano Zampini PetscCall(MatSeqAIJRestoreArrayWrite(sub_schurs->sum_S_Ej_tilda_all,&SEjinv_arr)); 15304a6c6b0dSStefano Zampini } else { 1531683d3df6SStefano Zampini Mat S_all_inv=NULL; 15327ebab0bbSStefano Zampini 153332fe681dSStefano Zampini if (solver_S && !sub_schurs->gdsw) { 1534683d3df6SStefano Zampini /* for adaptive selection we need S^-1; for solver reusage we need S_\Delta\Delta^-1. 1535683d3df6SStefano Zampini The latter is not the principal subminor for S^-1. However, the factors can be reused since S_\Delta\Delta is the leading principal submatrix of S */ 15363fc34f97SStefano Zampini if (factor_workaround) {/* invert without calling MatFactorInvertSchurComplement, since we are hacking */ 1537683d3df6SStefano Zampini PetscScalar *data; 1538683d3df6SStefano Zampini PetscInt nd = 0; 15396dba178dSStefano Zampini 1540f4f7d9d6SStefano Zampini if (!use_potr) { 1541683d3df6SStefano Zampini SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor update not yet implemented for non SPD matrices"); 1542683d3df6SStefano Zampini } 15439566063dSJacob Faibussowitsch PetscCall(MatFactorGetSchurComplement(F,&S_all_inv,NULL)); 15449566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(S_all_inv,&data)); 1545683d3df6SStefano Zampini if (sub_schurs->is_dir) { /* dirichlet dofs could have different scalings */ 15469566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(sub_schurs->is_dir,&nd)); 1547683d3df6SStefano Zampini } 15483fc34f97SStefano Zampini 15493fc34f97SStefano Zampini /* factor and invert activedofs and vertices (dirichlet dofs does not contribute) */ 15503fc34f97SStefano Zampini if (schur_has_vertices) { 15513fc34f97SStefano Zampini Mat M; 15523fc34f97SStefano Zampini PetscScalar *tdata; 15533fc34f97SStefano Zampini PetscInt nv = 0, news; 15543fc34f97SStefano Zampini 15559566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(sub_schurs->is_vertices,&nv)); 15563fc34f97SStefano Zampini news = size_active_schur + nv; 15579566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(news*news,&tdata)); 1558683d3df6SStefano Zampini for (i=0;i<size_active_schur;i++) { 15599566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(tdata+i*(news+1),data+i*(size_schur+1),size_active_schur-i)); 15609566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(tdata+i*(news+1)+size_active_schur-i,data+i*size_schur+size_active_schur+nd,nv)); 15613fc34f97SStefano Zampini } 15623fc34f97SStefano Zampini for (i=0;i<nv;i++) { 15633fc34f97SStefano Zampini PetscInt k = i+size_active_schur; 15649566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(tdata+k*(news+1),data+(k+nd)*(size_schur+1),nv-i)); 15653fc34f97SStefano Zampini } 15663fc34f97SStefano Zampini 15679566063dSJacob Faibussowitsch PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,news,news,tdata,&M)); 15689566063dSJacob Faibussowitsch PetscCall(MatSetOption(M,MAT_SPD,PETSC_TRUE)); 15699566063dSJacob Faibussowitsch PetscCall(MatCholeskyFactor(M,NULL,NULL)); 15703fc34f97SStefano Zampini /* save the factors */ 15713fc34f97SStefano Zampini cum = 0; 15729566063dSJacob Faibussowitsch PetscCall(PetscMalloc1((size_active_schur*(size_active_schur +1))/2+nd,&schur_factor)); 15733fc34f97SStefano Zampini for (i=0;i<size_active_schur;i++) { 15749566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(schur_factor+cum,tdata+i*(news+1),size_active_schur-i)); 1575683d3df6SStefano Zampini cum += size_active_schur - i; 1576683d3df6SStefano Zampini } 15773fc34f97SStefano Zampini for (i=0;i<nd;i++) schur_factor[cum+i] = PetscSqrtReal(PetscRealPart(data[(i+size_active_schur)*(size_schur+1)])); 15789566063dSJacob Faibussowitsch PetscCall(MatSeqDenseInvertFactors_Private(M)); 15793fc34f97SStefano Zampini /* move back just the active dofs to the Schur complement */ 15803fc34f97SStefano Zampini for (i=0;i<size_active_schur;i++) { 15819566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(data+i*size_schur,tdata+i*news,size_active_schur)); 15823fc34f97SStefano Zampini } 15839566063dSJacob Faibussowitsch PetscCall(PetscFree(tdata)); 15849566063dSJacob Faibussowitsch PetscCall(MatDestroy(&M)); 15853fc34f97SStefano Zampini } else { /* we can factorize and invert just the activedofs */ 15863fc34f97SStefano Zampini Mat M; 15875002105bSStefano Zampini PetscScalar *aux; 15883fc34f97SStefano Zampini 15899566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nd,&aux)); 15905002105bSStefano Zampini for (i=0;i<nd;i++) aux[i] = 1.0/data[(i+size_active_schur)*(size_schur+1)]; 15919566063dSJacob Faibussowitsch PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,size_active_schur,size_active_schur,data,&M)); 15929566063dSJacob Faibussowitsch PetscCall(MatDenseSetLDA(M,size_schur)); 15939566063dSJacob Faibussowitsch PetscCall(MatSetOption(M,MAT_SPD,PETSC_TRUE)); 15949566063dSJacob Faibussowitsch PetscCall(MatCholeskyFactor(M,NULL,NULL)); 15959566063dSJacob Faibussowitsch PetscCall(MatSeqDenseInvertFactors_Private(M)); 15969566063dSJacob Faibussowitsch PetscCall(MatDestroy(&M)); 15979566063dSJacob Faibussowitsch PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,size_schur,nd,data+size_active_schur*size_schur,&M)); 15989566063dSJacob Faibussowitsch PetscCall(MatZeroEntries(M)); 15999566063dSJacob Faibussowitsch PetscCall(MatDestroy(&M)); 16009566063dSJacob Faibussowitsch PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,nd,size_schur,data+size_active_schur,&M)); 16019566063dSJacob Faibussowitsch PetscCall(MatDenseSetLDA(M,size_schur)); 16029566063dSJacob Faibussowitsch PetscCall(MatZeroEntries(M)); 16039566063dSJacob Faibussowitsch PetscCall(MatDestroy(&M)); 16045002105bSStefano Zampini for (i=0;i<nd;i++) data[(i+size_active_schur)*(size_schur+1)] = aux[i]; 16059566063dSJacob Faibussowitsch PetscCall(PetscFree(aux)); 16063fc34f97SStefano Zampini } 16079566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(S_all_inv,&data)); 16083fc34f97SStefano Zampini } else { /* use MatFactor calls to invert S */ 16099566063dSJacob Faibussowitsch PetscCall(MatFactorInvertSchurComplement(F)); 16109566063dSJacob Faibussowitsch PetscCall(MatFactorGetSchurComplement(F,&S_all_inv,NULL)); 1611683d3df6SStefano Zampini } 161232fe681dSStefano Zampini } else if (!sub_schurs->gdsw) { /* we need to invert explicitly since we are not using MatFactor for S */ 16139566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)S_all)); 1614683d3df6SStefano Zampini S_all_inv = S_all; 16159566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(S_all_inv,&S_data)); 16169566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(size_schur,&B_N)); 16179566063dSJacob Faibussowitsch PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 1618f4f7d9d6SStefano Zampini if (use_potr) { 1619be83ff47SStefano Zampini PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("L",&B_N,S_data,&B_N,&B_ierr)); 162028b400f6SJacob Faibussowitsch PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRF Lapack routine %d",(int)B_ierr); 1621be83ff47SStefano Zampini PetscStackCallBLAS("LAPACKpotri",LAPACKpotri_("L",&B_N,S_data,&B_N,&B_ierr)); 162228b400f6SJacob Faibussowitsch PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRI Lapack routine %d",(int)B_ierr); 1623f4f7d9d6SStefano Zampini } else if (use_sytr) { 1624f4f7d9d6SStefano Zampini PetscStackCallBLAS("LAPACKsytrf",LAPACKsytrf_("L",&B_N,S_data,&B_N,pivots,Bwork,&B_lwork,&B_ierr)); 162528b400f6SJacob Faibussowitsch PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYTRF Lapack routine %d",(int)B_ierr); 1626f4f7d9d6SStefano Zampini PetscStackCallBLAS("LAPACKsytri",LAPACKsytri_("L",&B_N,S_data,&B_N,pivots,Bwork,&B_ierr)); 162728b400f6SJacob Faibussowitsch PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYTRI Lapack routine %d",(int)B_ierr); 1628d6462365SStefano Zampini } else { 1629d6462365SStefano Zampini PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&B_N,&B_N,S_data,&B_N,pivots,&B_ierr)); 163028b400f6SJacob Faibussowitsch PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRF Lapack routine %d",(int)B_ierr); 1631d6462365SStefano Zampini PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,S_data,&B_N,pivots,Bwork,&B_lwork,&B_ierr)); 163228b400f6SJacob Faibussowitsch PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRI Lapack routine %d",(int)B_ierr); 1633be83ff47SStefano Zampini } 16349566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(1.0*size_schur*size_schur*size_schur)); 16359566063dSJacob Faibussowitsch PetscCall(PetscFPTrapPop()); 16369566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(S_all_inv,&S_data)); 163732fe681dSStefano Zampini } else if (sub_schurs->gdsw) { 163832fe681dSStefano Zampini Mat tS, tX, SEj, S_II, S_IE, S_EE; 163932fe681dSStefano Zampini KSP pS_II; 164032fe681dSStefano Zampini PC pS_II_pc; 164132fe681dSStefano Zampini IS EE, II; 164232fe681dSStefano Zampini PetscInt nS; 164332fe681dSStefano Zampini 164432fe681dSStefano Zampini PetscCall(MatFactorCreateSchurComplement(F,&tS,NULL)); 164532fe681dSStefano Zampini PetscCall(MatGetSize(tS,&nS,NULL)); 164632fe681dSStefano Zampini PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&SEjinv_arr)); 164732fe681dSStefano Zampini for (i=0,cum=0;i<sub_schurs->n_subs;i++) { /* naive implementation */ 164832fe681dSStefano Zampini PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size)); 164932fe681dSStefano Zampini PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,SEjinv_arr,&SEj)); 165032fe681dSStefano Zampini 165132fe681dSStefano Zampini PetscCall(ISCreateStride(PETSC_COMM_SELF,subset_size,cum,1,&EE)); 165232fe681dSStefano Zampini PetscCall(ISComplement(EE,0,nS,&II)); 165332fe681dSStefano Zampini PetscCall(MatCreateSubMatrix(tS,II,II,MAT_INITIAL_MATRIX,&S_II)); 165432fe681dSStefano Zampini PetscCall(MatCreateSubMatrix(tS,II,EE,MAT_INITIAL_MATRIX,&S_IE)); 165532fe681dSStefano Zampini PetscCall(MatCreateSubMatrix(tS,EE,EE,MAT_INITIAL_MATRIX,&S_EE)); 165632fe681dSStefano Zampini PetscCall(ISDestroy(&II)); 165732fe681dSStefano Zampini PetscCall(ISDestroy(&EE)); 165832fe681dSStefano Zampini 165932fe681dSStefano Zampini PetscCall(KSPCreate(PETSC_COMM_SELF,&pS_II)); 166032fe681dSStefano Zampini PetscCall(KSPSetType(pS_II,KSPPREONLY)); 166132fe681dSStefano Zampini PetscCall(KSPGetPC(pS_II,&pS_II_pc)); 166232fe681dSStefano Zampini PetscCall(PCSetType(pS_II_pc,PCSVD)); 166332fe681dSStefano Zampini PetscCall(KSPSetOptionsPrefix(pS_II,sub_schurs->prefix)); 166432fe681dSStefano Zampini PetscCall(KSPAppendOptionsPrefix(pS_II,"pseudo_")); 166532fe681dSStefano Zampini PetscCall(KSPSetOperators(pS_II,S_II,S_II)); 166632fe681dSStefano Zampini PetscCall(MatDestroy(&S_II)); 166732fe681dSStefano Zampini PetscCall(KSPSetFromOptions(pS_II)); 166832fe681dSStefano Zampini PetscCall(KSPSetUp(pS_II)); 166932fe681dSStefano Zampini PetscCall(MatDuplicate(S_IE,MAT_DO_NOT_COPY_VALUES,&tX)); 167032fe681dSStefano Zampini PetscCall(KSPMatSolve(pS_II,S_IE,tX)); 167132fe681dSStefano Zampini PetscCall(KSPDestroy(&pS_II)); 167232fe681dSStefano Zampini 167332fe681dSStefano Zampini PetscCall(MatTransposeMatMult(S_IE,tX,MAT_REUSE_MATRIX,PETSC_DEFAULT,&SEj)); 167432fe681dSStefano Zampini PetscCall(MatDestroy(&S_IE)); 167532fe681dSStefano Zampini PetscCall(MatDestroy(&tX)); 167632fe681dSStefano Zampini PetscCall(MatAYPX(SEj,-1,S_EE,SAME_NONZERO_PATTERN)); 167732fe681dSStefano Zampini PetscCall(MatDestroy(&S_EE)); 167832fe681dSStefano Zampini 167932fe681dSStefano Zampini PetscCall(MatDestroy(&SEj)); 168032fe681dSStefano Zampini cum += subset_size; 168132fe681dSStefano Zampini SEjinv_arr += subset_size*subset_size; 168232fe681dSStefano Zampini } 168332fe681dSStefano Zampini PetscCall(MatDestroy(&tS)); 168432fe681dSStefano Zampini PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&SEjinv_arr)); 1685be83ff47SStefano Zampini } 1686be83ff47SStefano Zampini /* S_Ej_tilda_all */ 1687be83ff47SStefano Zampini cum = cum2 = 0; 168832fe681dSStefano Zampini rS_data = NULL; 168932fe681dSStefano Zampini if (S_all_inv) PetscCall(MatDenseGetArrayRead(S_all_inv,&rS_data)); 169032fe681dSStefano Zampini PetscCall(MatSeqAIJGetArrayWrite(sub_schurs->sum_S_Ej_tilda_all,&SEjinv_arr)); 1691be83ff47SStefano Zampini for (i=0;i<sub_schurs->n_subs;i++) { 1692be83ff47SStefano Zampini PetscInt j; 1693862806e4SStefano Zampini 16949566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size)); 1695be83ff47SStefano Zampini /* get (St^-1)_E */ 169672b8c272SStefano Zampini /* Unless we are changing the variables, I don't need to expand to upper triangular since St^-1 169706a4e24aSStefano Zampini will be properly accessed later during adaptive selection */ 169832fe681dSStefano Zampini if (rS_data) { 1699a0b0af32SStefano Zampini if (S_lower_triangular) { 1700be83ff47SStefano Zampini PetscInt k; 1701b7ab4a40SStefano Zampini if (sub_schurs->change) { 1702be83ff47SStefano Zampini for (k=0;k<subset_size;k++) { 1703be83ff47SStefano Zampini for (j=k;j<subset_size;j++) { 17041683a169SBarry Smith work[k*subset_size+j] = rS_data[cum2+k*size_schur+j]; 17056c3e6151SStefano Zampini work[j*subset_size+k] = work[k*subset_size+j]; 1706be83ff47SStefano Zampini } 1707be83ff47SStefano Zampini } 170872b8c272SStefano Zampini } else { 170972b8c272SStefano Zampini for (k=0;k<subset_size;k++) { 171072b8c272SStefano Zampini for (j=k;j<subset_size;j++) { 17111683a169SBarry Smith work[k*subset_size+j] = rS_data[cum2+k*size_schur+j]; 171272b8c272SStefano Zampini } 171372b8c272SStefano Zampini } 171472b8c272SStefano Zampini } 171572b8c272SStefano Zampini } else { 1716be83ff47SStefano Zampini PetscInt k; 1717be83ff47SStefano Zampini for (k=0;k<subset_size;k++) { 1718be83ff47SStefano Zampini for (j=0;j<subset_size;j++) { 17191683a169SBarry Smith work[k*subset_size+j] = rS_data[cum2+k*size_schur+j]; 1720be83ff47SStefano Zampini } 1721be83ff47SStefano Zampini } 1722be83ff47SStefano Zampini } 172332fe681dSStefano Zampini } 1724b7ab4a40SStefano Zampini if (sub_schurs->change) { 17258760537fSStefano Zampini Mat change_sub,SEj,T; 172632fe681dSStefano Zampini PetscScalar val = sub_schurs->gdsw ? PETSC_SMALL : 1./PETSC_SMALL; 172772b8c272SStefano Zampini 172872b8c272SStefano Zampini /* change basis */ 17299566063dSJacob Faibussowitsch PetscCall(KSPGetOperators(sub_schurs->change[i],&change_sub,NULL)); 173032fe681dSStefano Zampini PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,rS_data ? work : SEjinv_arr,&SEj)); 17318760537fSStefano Zampini if (!sub_schurs->change_with_qr) { /* currently there's no support for PtAP with P SeqAIJ */ 17328760537fSStefano Zampini Mat T2; 17339566063dSJacob Faibussowitsch PetscCall(MatTransposeMatMult(change_sub,SEj,MAT_INITIAL_MATRIX,1.0,&T2)); 17349566063dSJacob Faibussowitsch PetscCall(MatMatMult(T2,change_sub,MAT_INITIAL_MATRIX,1.0,&T)); 17359566063dSJacob Faibussowitsch PetscCall(MatDestroy(&T2)); 17369566063dSJacob Faibussowitsch PetscCall(MatConvert(T,MATSEQDENSE,MAT_INPLACE_MATRIX,&T)); 17378760537fSStefano Zampini } else { 17389566063dSJacob Faibussowitsch PetscCall(MatPtAP(SEj,change_sub,MAT_INITIAL_MATRIX,1.0,&T)); 173972b8c272SStefano Zampini } 17409566063dSJacob Faibussowitsch PetscCall(MatCopy(T,SEj,SAME_NONZERO_PATTERN)); 17419566063dSJacob Faibussowitsch PetscCall(MatDestroy(&T)); 174232fe681dSStefano Zampini PetscCall(MatZeroRowsColumnsIS(SEj,sub_schurs->change_primal_sub[i],val,NULL,NULL)); 17439566063dSJacob Faibussowitsch PetscCall(MatDestroy(&SEj)); 174472b8c272SStefano Zampini } 174532fe681dSStefano Zampini if (rS_data) PetscCall(PetscArraycpy(SEjinv_arr,work,subset_size*subset_size)); 1746be83ff47SStefano Zampini cum += subset_size; 1747be83ff47SStefano Zampini cum2 += subset_size*(size_schur + 1); 174804c5b2e6SStefano Zampini SEjinv_arr += subset_size*subset_size; 1749883469d8SStefano Zampini } 175032fe681dSStefano Zampini PetscCall(MatSeqAIJRestoreArrayWrite(sub_schurs->sum_S_Ej_tilda_all,&SEjinv_arr)); 175132fe681dSStefano Zampini if (S_all_inv) { 17529566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(S_all_inv,&rS_data)); 1753df4d28bfSStefano Zampini if (solver_S) { 17543fc34f97SStefano Zampini if (schur_has_vertices) { 17559566063dSJacob Faibussowitsch PetscCall(MatFactorRestoreSchurComplement(F,&S_all_inv,MAT_FACTOR_SCHUR_FACTORED)); 17563fc34f97SStefano Zampini } else { 17579566063dSJacob Faibussowitsch PetscCall(MatFactorRestoreSchurComplement(F,&S_all_inv,MAT_FACTOR_SCHUR_INVERTED)); 17585db18549SStefano Zampini } 17593fc34f97SStefano Zampini } 176032fe681dSStefano Zampini } 17619566063dSJacob Faibussowitsch PetscCall(MatDestroy(&S_all_inv)); 1762683d3df6SStefano Zampini } 1763683d3df6SStefano Zampini 17643fc34f97SStefano Zampini /* move back factors if needed */ 176532fe681dSStefano Zampini if (schur_has_vertices && factor_workaround && !sub_schurs->gdsw) { 1766683d3df6SStefano Zampini Mat S_tmp; 17673fc34f97SStefano Zampini PetscInt nd = 0; 1768683d3df6SStefano Zampini 176928b400f6SJacob Faibussowitsch PetscCheck(solver_S,PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 17709566063dSJacob Faibussowitsch PetscCall(MatFactorGetSchurComplement(F,&S_tmp,NULL)); 1771f4f7d9d6SStefano Zampini if (use_potr) { 1772683d3df6SStefano Zampini PetscScalar *data; 1773683d3df6SStefano Zampini 17749566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(S_tmp,&data)); 17759566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(data,size_schur*size_schur)); 1776683d3df6SStefano Zampini 1777683d3df6SStefano Zampini if (S_lower_triangular) { 1778683d3df6SStefano Zampini cum = 0; 1779683d3df6SStefano Zampini for (i=0;i<size_active_schur;i++) { 17809566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(data+i*(size_schur+1),schur_factor+cum,size_active_schur-i)); 1781683d3df6SStefano Zampini cum += size_active_schur-i; 1782683d3df6SStefano Zampini } 1783683d3df6SStefano Zampini } else { 17849566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(data,schur_factor,size_schur*size_schur)); 1785683d3df6SStefano Zampini } 1786683d3df6SStefano Zampini if (sub_schurs->is_dir) { 17879566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(sub_schurs->is_dir,&nd)); 1788683d3df6SStefano Zampini for (i=0;i<nd;i++) { 1789683d3df6SStefano Zampini data[(i+size_active_schur)*(size_schur+1)] = schur_factor[cum+i]; 1790683d3df6SStefano Zampini } 1791683d3df6SStefano Zampini } 17926dba178dSStefano Zampini /* workaround: since I cannot modify the matrices used inside the solvers for the forward and backward substitutions, 1793683d3df6SStefano Zampini set the diagonal entry of the Schur factor to a very large value */ 1794683d3df6SStefano Zampini for (i=size_active_schur+nd;i<size_schur;i++) { 17956c3e6151SStefano Zampini data[i*(size_schur+1)] = infty; 1796683d3df6SStefano Zampini } 17979566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(S_tmp,&data)); 17986542c05fSStefano Zampini } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor update not yet implemented for non SPD matrices"); 17999566063dSJacob Faibussowitsch PetscCall(MatFactorRestoreSchurComplement(F,&S_tmp,MAT_FACTOR_SCHUR_FACTORED)); 18009087bf02SStefano Zampini } 180132fe681dSStefano Zampini } else if (factor_workaround && !sub_schurs->gdsw) { /* we need to eliminate any unneeded coupling */ 1802367aa537SStefano Zampini PetscScalar *data; 1803367aa537SStefano Zampini PetscInt nd = 0; 1804367aa537SStefano Zampini 1805367aa537SStefano Zampini if (sub_schurs->is_dir) { /* dirichlet dofs could have different scalings */ 18069566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(sub_schurs->is_dir,&nd)); 1807367aa537SStefano Zampini } 18089566063dSJacob Faibussowitsch PetscCall(MatFactorGetSchurComplement(F,&S_all,NULL)); 18099566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(S_all,&data)); 1810367aa537SStefano Zampini for (i=0;i<size_active_schur;i++) { 18119566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(data+i*size_schur+size_active_schur,size_schur-size_active_schur)); 1812367aa537SStefano Zampini } 1813367aa537SStefano Zampini for (i=size_active_schur+nd;i<size_schur;i++) { 18149566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(data+i*size_schur+size_active_schur,size_schur-size_active_schur)); 18156c3e6151SStefano Zampini data[i*(size_schur+1)] = infty; 1816367aa537SStefano Zampini } 18179566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(S_all,&data)); 18189566063dSJacob Faibussowitsch PetscCall(MatFactorRestoreSchurComplement(F,&S_all,MAT_FACTOR_SCHUR_UNFACTORED)); 18194a6c6b0dSStefano Zampini } 18209566063dSJacob Faibussowitsch PetscCall(PetscFree(work)); 18219566063dSJacob Faibussowitsch PetscCall(PetscFree(schur_factor)); 18229566063dSJacob Faibussowitsch PetscCall(VecDestroy(&Dall)); 18234a6c6b0dSStefano Zampini } 18249566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is_I_layer)); 18259566063dSJacob Faibussowitsch PetscCall(MatDestroy(&S_all)); 18269566063dSJacob Faibussowitsch PetscCall(MatDestroy(&A_BB)); 18279566063dSJacob Faibussowitsch PetscCall(MatDestroy(&A_IB)); 18289566063dSJacob Faibussowitsch PetscCall(MatDestroy(&A_BI)); 18299566063dSJacob Faibussowitsch PetscCall(MatDestroy(&F)); 18306afe12f5SStefano Zampini 18319566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(sub_schurs->n_subs,&nnz)); 18326afe12f5SStefano Zampini for (i=0;i<sub_schurs->n_subs;i++) { 18339566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&nnz[i])); 18346afe12f5SStefano Zampini } 18359566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PETSC_COMM_SELF,sub_schurs->n_subs,nnz,PETSC_OWN_POINTER,&is_I_layer)); 18369566063dSJacob Faibussowitsch PetscCall(MatSetVariableBlockSizes(sub_schurs->S_Ej_all,sub_schurs->n_subs,nnz)); 18379566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(sub_schurs->S_Ej_all,MAT_FINAL_ASSEMBLY)); 18389566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(sub_schurs->S_Ej_all,MAT_FINAL_ASSEMBLY)); 18395a95e1ceSStefano Zampini if (compute_Stilda) { 18409566063dSJacob Faibussowitsch PetscCall(MatSetVariableBlockSizes(sub_schurs->sum_S_Ej_tilda_all,sub_schurs->n_subs,nnz)); 18419566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(sub_schurs->sum_S_Ej_tilda_all,MAT_FINAL_ASSEMBLY)); 18429566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(sub_schurs->sum_S_Ej_tilda_all,MAT_FINAL_ASSEMBLY)); 18439d54b7f4SStefano Zampini if (deluxe) { 18449566063dSJacob Faibussowitsch PetscCall(MatSetVariableBlockSizes(sub_schurs->sum_S_Ej_inv_all,sub_schurs->n_subs,nnz)); 18459566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(sub_schurs->sum_S_Ej_inv_all,MAT_FINAL_ASSEMBLY)); 18469566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(sub_schurs->sum_S_Ej_inv_all,MAT_FINAL_ASSEMBLY)); 184708122e43SStefano Zampini } 18489d54b7f4SStefano Zampini } 18499566063dSJacob Faibussowitsch PetscCall(ISDestroy(&is_I_layer)); 18506afe12f5SStefano Zampini 18515db18549SStefano Zampini /* Get local part of (\sum_j S_Ej) */ 185241fd5443SStefano Zampini if (!sub_schurs->sum_S_Ej_all) { 18539566063dSJacob Faibussowitsch PetscCall(MatDuplicate(sub_schurs->S_Ej_all,MAT_DO_NOT_COPY_VALUES,&sub_schurs->sum_S_Ej_all)); 185441fd5443SStefano Zampini } 18559566063dSJacob Faibussowitsch PetscCall(VecSet(gstash,0.0)); 18569566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArray(sub_schurs->S_Ej_all,&stasharray)); 18579566063dSJacob Faibussowitsch PetscCall(VecPlaceArray(lstash,stasharray)); 18589566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(sstash,lstash,gstash,ADD_VALUES,SCATTER_FORWARD)); 18599566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(sstash,lstash,gstash,ADD_VALUES,SCATTER_FORWARD)); 18609566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArray(sub_schurs->S_Ej_all,&stasharray)); 18619566063dSJacob Faibussowitsch PetscCall(VecResetArray(lstash)); 18629566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&stasharray)); 18639566063dSJacob Faibussowitsch PetscCall(VecPlaceArray(lstash,stasharray)); 18649566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(sstash,gstash,lstash,INSERT_VALUES,SCATTER_REVERSE)); 18659566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(sstash,gstash,lstash,INSERT_VALUES,SCATTER_REVERSE)); 18669566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all,&stasharray)); 18679566063dSJacob Faibussowitsch PetscCall(VecResetArray(lstash)); 186808122e43SStefano Zampini 1869f6f667cfSStefano Zampini /* Get local part of (\sum_j S^-1_Ej) (\sum_j St^-1_Ej) */ 18705a95e1ceSStefano Zampini if (compute_Stilda) { 18719566063dSJacob Faibussowitsch PetscCall(VecSet(gstash,0.0)); 18729566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&stasharray)); 18739566063dSJacob Faibussowitsch PetscCall(VecPlaceArray(lstash,stasharray)); 18749566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(sstash,lstash,gstash,ADD_VALUES,SCATTER_FORWARD)); 18759566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(sstash,lstash,gstash,ADD_VALUES,SCATTER_FORWARD)); 18769566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(sstash,gstash,lstash,INSERT_VALUES,SCATTER_REVERSE)); 18779566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(sstash,gstash,lstash,INSERT_VALUES,SCATTER_REVERSE)); 18789566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&stasharray)); 18799566063dSJacob Faibussowitsch PetscCall(VecResetArray(lstash)); 18809d54b7f4SStefano Zampini if (deluxe) { 18819566063dSJacob Faibussowitsch PetscCall(VecSet(gstash,0.0)); 18829566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&stasharray)); 18839566063dSJacob Faibussowitsch PetscCall(VecPlaceArray(lstash,stasharray)); 18849566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(sstash,lstash,gstash,ADD_VALUES,SCATTER_FORWARD)); 18859566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(sstash,lstash,gstash,ADD_VALUES,SCATTER_FORWARD)); 18869566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(sstash,gstash,lstash,INSERT_VALUES,SCATTER_REVERSE)); 18879566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(sstash,gstash,lstash,INSERT_VALUES,SCATTER_REVERSE)); 18889566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&stasharray)); 18899566063dSJacob Faibussowitsch PetscCall(VecResetArray(lstash)); 189032fe681dSStefano Zampini } else if (!sub_schurs->gdsw) { 18919d54b7f4SStefano Zampini PetscScalar *array; 18929d54b7f4SStefano Zampini PetscInt cum; 18939d54b7f4SStefano Zampini 18949566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&array)); 18959d54b7f4SStefano Zampini cum = 0; 18969d54b7f4SStefano Zampini for (i=0;i<sub_schurs->n_subs;i++) { 18979566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size)); 18989566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(subset_size,&B_N)); 18999566063dSJacob Faibussowitsch PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 1900f4f7d9d6SStefano Zampini if (use_potr) { 19019d54b7f4SStefano Zampini PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("L",&B_N,array+cum,&B_N,&B_ierr)); 190228b400f6SJacob Faibussowitsch PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRF Lapack routine %d",(int)B_ierr); 19039d54b7f4SStefano Zampini PetscStackCallBLAS("LAPACKpotri",LAPACKpotri_("L",&B_N,array+cum,&B_N,&B_ierr)); 190428b400f6SJacob Faibussowitsch PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRI Lapack routine %d",(int)B_ierr); 1905f4f7d9d6SStefano Zampini } else if (use_sytr) { 1906f4f7d9d6SStefano Zampini PetscStackCallBLAS("LAPACKsytrf",LAPACKsytrf_("L",&B_N,array+cum,&B_N,pivots,Bwork,&B_lwork,&B_ierr)); 190728b400f6SJacob Faibussowitsch PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYTRF Lapack routine %d",(int)B_ierr); 1908f4f7d9d6SStefano Zampini PetscStackCallBLAS("LAPACKsytri",LAPACKsytri_("L",&B_N,array+cum,&B_N,pivots,Bwork,&B_ierr)); 190928b400f6SJacob Faibussowitsch PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYTRI Lapack routine %d",(int)B_ierr); 1910f4f7d9d6SStefano Zampini } else { 1911f4f7d9d6SStefano Zampini PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&B_N,&B_N,array+cum,&B_N,pivots,&B_ierr)); 191228b400f6SJacob Faibussowitsch PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRF Lapack routine %d",(int)B_ierr); 1913f4f7d9d6SStefano Zampini PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,array+cum,&B_N,pivots,Bwork,&B_lwork,&B_ierr)); 191428b400f6SJacob Faibussowitsch PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRI Lapack routine %d",(int)B_ierr); 1915f4f7d9d6SStefano Zampini } 19169566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(1.0*subset_size*subset_size*subset_size)); 19179566063dSJacob Faibussowitsch PetscCall(PetscFPTrapPop()); 19189d54b7f4SStefano Zampini cum += subset_size*subset_size; 19199d54b7f4SStefano Zampini } 19209566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&array)); 19219566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)sub_schurs->sum_S_Ej_all)); 19229566063dSJacob Faibussowitsch PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_inv_all)); 19239d54b7f4SStefano Zampini sub_schurs->sum_S_Ej_inv_all = sub_schurs->sum_S_Ej_all; 19249d54b7f4SStefano Zampini } 192508122e43SStefano Zampini } 19269566063dSJacob Faibussowitsch PetscCall(VecDestroy(&lstash)); 19279566063dSJacob Faibussowitsch PetscCall(VecDestroy(&gstash)); 19289566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&sstash)); 192957a87bf3SStefano Zampini 19303b03f7bbSStefano Zampini if (matl_dbg_viewer) { 193111955456SStefano Zampini if (sub_schurs->S_Ej_all) { 19329566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)sub_schurs->S_Ej_all,"SE")); 19339566063dSJacob Faibussowitsch PetscCall(MatView(sub_schurs->S_Ej_all,matl_dbg_viewer)); 193411955456SStefano Zampini } 193511955456SStefano Zampini if (sub_schurs->sum_S_Ej_all) { 19369566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)sub_schurs->sum_S_Ej_all,"SSE")); 19379566063dSJacob Faibussowitsch PetscCall(MatView(sub_schurs->sum_S_Ej_all,matl_dbg_viewer)); 193811955456SStefano Zampini } 193911955456SStefano Zampini if (sub_schurs->sum_S_Ej_inv_all) { 19409566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)sub_schurs->sum_S_Ej_inv_all,"SSEm")); 19419566063dSJacob Faibussowitsch PetscCall(MatView(sub_schurs->sum_S_Ej_inv_all,matl_dbg_viewer)); 194211955456SStefano Zampini } 194311955456SStefano Zampini if (sub_schurs->sum_S_Ej_tilda_all) { 19449566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)sub_schurs->sum_S_Ej_tilda_all,"SSEt")); 19459566063dSJacob Faibussowitsch PetscCall(MatView(sub_schurs->sum_S_Ej_tilda_all,matl_dbg_viewer)); 194611955456SStefano Zampini } 194711955456SStefano Zampini } 19483202ece2SStefano Zampini 19495a95e1ceSStefano Zampini /* free workspace */ 195051ab8ad6SStefano Zampini if (matl_dbg_viewer) PetscCall(PetscViewerFlush(matl_dbg_viewer)); 195151ab8ad6SStefano Zampini if (sub_schurs->debug) PetscCallMPI(MPI_Barrier(comm_n)); 19529566063dSJacob Faibussowitsch PetscCall(PetscViewerDestroy(&matl_dbg_viewer)); 19539566063dSJacob Faibussowitsch PetscCall(PetscFree2(Bwork,pivots)); 19549566063dSJacob Faibussowitsch PetscCall(PetscCommDestroy(&comm_n)); 1955b1b3d7a2SStefano Zampini PetscFunctionReturn(0); 1956b1b3d7a2SStefano Zampini } 1957b1b3d7a2SStefano Zampini 195832fe681dSStefano Zampini PetscErrorCode PCBDDCSubSchursInit(PCBDDCSubSchurs sub_schurs, const char* prefix, IS is_I, IS is_B, PCBDDCGraph graph, ISLocalToGlobalMapping BtoNmap, PetscBool copycc, PetscBool gdsw) 1959b1b3d7a2SStefano Zampini { 19609bb4a8caSStefano Zampini IS *faces,*edges,*all_cc,vertices; 196132fe681dSStefano Zampini PetscInt s,i,n_faces,n_edges,n_all_cc; 1962365a3a41SStefano Zampini PetscBool is_sorted,ispardiso,ismumps; 1963b1b3d7a2SStefano Zampini 1964b1b3d7a2SStefano Zampini PetscFunctionBegin; 19659566063dSJacob Faibussowitsch PetscCall(ISSorted(is_I,&is_sorted)); 196628b400f6SJacob Faibussowitsch PetscCheck(is_sorted,PetscObjectComm((PetscObject)is_I),PETSC_ERR_PLIB,"IS for I dofs should be shorted"); 19679566063dSJacob Faibussowitsch PetscCall(ISSorted(is_B,&is_sorted)); 196828b400f6SJacob Faibussowitsch PetscCheck(is_sorted,PetscObjectComm((PetscObject)is_B),PETSC_ERR_PLIB,"IS for B dofs should be shorted"); 1969b1b3d7a2SStefano Zampini 1970b1b3d7a2SStefano Zampini /* reset any previous data */ 19719566063dSJacob Faibussowitsch PetscCall(PCBDDCSubSchursReset(sub_schurs)); 1972b1b3d7a2SStefano Zampini 197332fe681dSStefano Zampini sub_schurs->gdsw = gdsw; 197432fe681dSStefano Zampini 19755a95e1ceSStefano Zampini /* get index sets for faces and edges (already sorted by global ordering) */ 19769566063dSJacob Faibussowitsch PetscCall(PCBDDCGraphGetCandidatesIS(graph,&n_faces,&faces,&n_edges,&edges,&vertices)); 1977b1b3d7a2SStefano Zampini n_all_cc = n_faces+n_edges; 19789566063dSJacob Faibussowitsch PetscCall(PetscBTCreate(n_all_cc,&sub_schurs->is_edge)); 19799566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n_all_cc,&all_cc)); 198032fe681dSStefano Zampini n_all_cc = 0; 1981b1b3d7a2SStefano Zampini for (i=0;i<n_faces;i++) { 198232fe681dSStefano Zampini PetscCall(ISGetSize(faces[i],&s)); 198332fe681dSStefano Zampini if (!s) continue; 19848b6046baSStefano Zampini if (copycc) { 198532fe681dSStefano Zampini PetscCall(ISDuplicate(faces[i],&all_cc[n_all_cc])); 19868b6046baSStefano Zampini } else { 19879566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)faces[i])); 198832fe681dSStefano Zampini all_cc[n_all_cc] = faces[i]; 1989b1b3d7a2SStefano Zampini } 199032fe681dSStefano Zampini n_all_cc++; 19918b6046baSStefano Zampini } 1992b1b3d7a2SStefano Zampini for (i=0;i<n_edges;i++) { 199332fe681dSStefano Zampini PetscCall(ISGetSize(edges[i],&s)); 199432fe681dSStefano Zampini if (!s) continue; 19958b6046baSStefano Zampini if (copycc) { 199632fe681dSStefano Zampini PetscCall(ISDuplicate(edges[i],&all_cc[n_all_cc])); 19978b6046baSStefano Zampini } else { 19989566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)edges[i])); 199932fe681dSStefano Zampini all_cc[n_all_cc] = edges[i]; 20008b6046baSStefano Zampini } 200132fe681dSStefano Zampini PetscCall(PetscBTSet(sub_schurs->is_edge,n_all_cc)); 200232fe681dSStefano Zampini n_all_cc++; 2003b1b3d7a2SStefano Zampini } 20049566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)vertices)); 2005c8272957SStefano Zampini sub_schurs->is_vertices = vertices; 20069566063dSJacob Faibussowitsch PetscCall(PCBDDCGraphRestoreCandidatesIS(graph,&n_faces,&faces,&n_edges,&edges,&vertices)); 2007d62866d3SStefano Zampini sub_schurs->is_dir = NULL; 20089566063dSJacob Faibussowitsch PetscCall(PCBDDCGraphGetDirichletDofsB(graph,&sub_schurs->is_dir)); 2009b1b3d7a2SStefano Zampini 2010df4d28bfSStefano Zampini /* Determine if MatFactor can be used */ 20119566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(prefix,&sub_schurs->prefix)); 2012883469d8SStefano Zampini #if defined(PETSC_HAVE_MUMPS) 20139566063dSJacob Faibussowitsch PetscCall(PetscStrncpy(sub_schurs->mat_solver_type,MATSOLVERMUMPS,sizeof(sub_schurs->mat_solver_type))); 201488113c35SStefano Zampini #elif defined(PETSC_HAVE_MKL_PARDISO) 20159566063dSJacob Faibussowitsch PetscCall(PetscStrncpy(sub_schurs->mat_solver_type,MATSOLVERMKL_PARDISO,sizeof(sub_schurs->mat_solver_type))); 201688113c35SStefano Zampini #else 20179566063dSJacob Faibussowitsch PetscCall(PetscStrncpy(sub_schurs->mat_solver_type,MATSOLVERPETSC,sizeof(sub_schurs->mat_solver_type))); 2018df4d28bfSStefano Zampini #endif 201988113c35SStefano Zampini #if defined(PETSC_USE_COMPLEX) 202088113c35SStefano Zampini sub_schurs->is_hermitian = PETSC_FALSE; /* Hermitian Cholesky is not supported by PETSc and external packages */ 202188113c35SStefano Zampini #else 202288113c35SStefano Zampini sub_schurs->is_hermitian = PETSC_TRUE; 2023883469d8SStefano Zampini #endif 202488113c35SStefano Zampini sub_schurs->is_posdef = PETSC_TRUE; 202511955456SStefano Zampini sub_schurs->is_symmetric = PETSC_TRUE; 20267f9db97bSStefano Zampini sub_schurs->debug = PETSC_FALSE; 2027991c41b4SStefano Zampini sub_schurs->restrict_comm = PETSC_FALSE; 2028d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)graph->l2gmap),sub_schurs->prefix,"BDDC sub_schurs options","PC"); 20299566063dSJacob Faibussowitsch PetscCall(PetscOptionsString("-sub_schurs_mat_solver_type","Specific direct solver to use",NULL,sub_schurs->mat_solver_type,sub_schurs->mat_solver_type,sizeof(sub_schurs->mat_solver_type),NULL)); 20309566063dSJacob Faibussowitsch PetscCall(PetscOptionsBool("-sub_schurs_symmetric","Symmetric problem",NULL,sub_schurs->is_symmetric,&sub_schurs->is_symmetric,NULL)); 20319566063dSJacob Faibussowitsch PetscCall(PetscOptionsBool("-sub_schurs_hermitian","Hermitian problem",NULL,sub_schurs->is_hermitian,&sub_schurs->is_hermitian,NULL)); 20329566063dSJacob Faibussowitsch PetscCall(PetscOptionsBool("-sub_schurs_posdef","Positive definite problem",NULL,sub_schurs->is_posdef,&sub_schurs->is_posdef,NULL)); 20339566063dSJacob Faibussowitsch PetscCall(PetscOptionsBool("-sub_schurs_restrictcomm","Restrict communicator on active processes only",NULL,sub_schurs->restrict_comm,&sub_schurs->restrict_comm,NULL)); 20349566063dSJacob Faibussowitsch PetscCall(PetscOptionsBool("-sub_schurs_debug","Debug output",NULL,sub_schurs->debug,&sub_schurs->debug,NULL)); 2035d0609cedSBarry Smith PetscOptionsEnd(); 20369566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(sub_schurs->mat_solver_type,MATSOLVERMUMPS,&ismumps)); 20379566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(sub_schurs->mat_solver_type,MATSOLVERMKL_PARDISO,&ispardiso)); 2038365a3a41SStefano Zampini sub_schurs->schur_explicit = (PetscBool)(ispardiso || ismumps); 2039b1b3d7a2SStefano Zampini 204011955456SStefano Zampini /* for reals, symmetric and hermitian are synonims */ 204111955456SStefano Zampini #if !defined(PETSC_USE_COMPLEX) 204211955456SStefano Zampini sub_schurs->is_symmetric = (PetscBool)(sub_schurs->is_symmetric && sub_schurs->is_hermitian); 204311955456SStefano Zampini sub_schurs->is_hermitian = sub_schurs->is_symmetric; 204411955456SStefano Zampini #endif 204511955456SStefano Zampini 20469566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)is_I)); 2047b1b3d7a2SStefano Zampini sub_schurs->is_I = is_I; 20489566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)is_B)); 2049b1b3d7a2SStefano Zampini sub_schurs->is_B = is_B; 20509566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)graph->l2gmap)); 20515db18549SStefano Zampini sub_schurs->l2gmap = graph->l2gmap; 20529566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)BtoNmap)); 20535db18549SStefano Zampini sub_schurs->BtoNmap = BtoNmap; 20545a95e1ceSStefano Zampini sub_schurs->n_subs = n_all_cc; 2055b1b3d7a2SStefano Zampini sub_schurs->is_subs = all_cc; 2056b96c3477SStefano Zampini sub_schurs->S_Ej_all = NULL; 2057b96c3477SStefano Zampini sub_schurs->sum_S_Ej_all = NULL; 205808122e43SStefano Zampini sub_schurs->sum_S_Ej_inv_all = NULL; 2059b96c3477SStefano Zampini sub_schurs->sum_S_Ej_tilda_all = NULL; 2060b96c3477SStefano Zampini sub_schurs->is_Ej_all = NULL; 2061b1b3d7a2SStefano Zampini PetscFunctionReturn(0); 2062b1b3d7a2SStefano Zampini } 2063b1b3d7a2SStefano Zampini 206434a97f8cSStefano Zampini PetscErrorCode PCBDDCSubSchursCreate(PCBDDCSubSchurs *sub_schurs) 206534a97f8cSStefano Zampini { 206634a97f8cSStefano Zampini PCBDDCSubSchurs schurs_ctx; 206734a97f8cSStefano Zampini 206834a97f8cSStefano Zampini PetscFunctionBegin; 20699566063dSJacob Faibussowitsch PetscCall(PetscNew(&schurs_ctx)); 20705ff63025SStefano Zampini schurs_ctx->n_subs = 0; 207134a97f8cSStefano Zampini *sub_schurs = schurs_ctx; 207234a97f8cSStefano Zampini PetscFunctionReturn(0); 207334a97f8cSStefano Zampini } 207434a97f8cSStefano Zampini 207534a97f8cSStefano Zampini PetscErrorCode PCBDDCSubSchursReset(PCBDDCSubSchurs sub_schurs) 207634a97f8cSStefano Zampini { 207734a97f8cSStefano Zampini PetscInt i; 207834a97f8cSStefano Zampini 207934a97f8cSStefano Zampini PetscFunctionBegin; 2080aea80f77Sstefano_zampini if (!sub_schurs) PetscFunctionReturn(0); 20819566063dSJacob Faibussowitsch PetscCall(PetscFree(sub_schurs->prefix)); 20829566063dSJacob Faibussowitsch PetscCall(MatDestroy(&sub_schurs->A)); 20839566063dSJacob Faibussowitsch PetscCall(MatDestroy(&sub_schurs->S)); 20849566063dSJacob Faibussowitsch PetscCall(ISDestroy(&sub_schurs->is_I)); 20859566063dSJacob Faibussowitsch PetscCall(ISDestroy(&sub_schurs->is_B)); 20869566063dSJacob Faibussowitsch PetscCall(ISLocalToGlobalMappingDestroy(&sub_schurs->l2gmap)); 20879566063dSJacob Faibussowitsch PetscCall(ISLocalToGlobalMappingDestroy(&sub_schurs->BtoNmap)); 20889566063dSJacob Faibussowitsch PetscCall(MatDestroy(&sub_schurs->S_Ej_all)); 20899566063dSJacob Faibussowitsch PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_all)); 20909566063dSJacob Faibussowitsch PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_inv_all)); 20919566063dSJacob Faibussowitsch PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_tilda_all)); 20929566063dSJacob Faibussowitsch PetscCall(ISDestroy(&sub_schurs->is_Ej_all)); 20939566063dSJacob Faibussowitsch PetscCall(ISDestroy(&sub_schurs->is_vertices)); 20949566063dSJacob Faibussowitsch PetscCall(ISDestroy(&sub_schurs->is_dir)); 20959566063dSJacob Faibussowitsch PetscCall(PetscBTDestroy(&sub_schurs->is_edge)); 209634a97f8cSStefano Zampini for (i=0;i<sub_schurs->n_subs;i++) { 20979566063dSJacob Faibussowitsch PetscCall(ISDestroy(&sub_schurs->is_subs[i])); 209834a97f8cSStefano Zampini } 20995ff63025SStefano Zampini if (sub_schurs->n_subs) { 21009566063dSJacob Faibussowitsch PetscCall(PetscFree(sub_schurs->is_subs)); 21013dc780c3SStefano Zampini } 2102df4d28bfSStefano Zampini if (sub_schurs->reuse_solver) { 21039566063dSJacob Faibussowitsch PetscCall(PCBDDCReuseSolversReset(sub_schurs->reuse_solver)); 2104d62866d3SStefano Zampini } 21059566063dSJacob Faibussowitsch PetscCall(PetscFree(sub_schurs->reuse_solver)); 210672b8c272SStefano Zampini if (sub_schurs->change) { 210772b8c272SStefano Zampini for (i=0;i<sub_schurs->n_subs;i++) { 21089566063dSJacob Faibussowitsch PetscCall(KSPDestroy(&sub_schurs->change[i])); 21099566063dSJacob Faibussowitsch PetscCall(ISDestroy(&sub_schurs->change_primal_sub[i])); 211072b8c272SStefano Zampini } 211172b8c272SStefano Zampini } 21129566063dSJacob Faibussowitsch PetscCall(PetscFree(sub_schurs->change)); 21139566063dSJacob Faibussowitsch PetscCall(PetscFree(sub_schurs->change_primal_sub)); 211434a97f8cSStefano Zampini sub_schurs->n_subs = 0; 211534a97f8cSStefano Zampini PetscFunctionReturn(0); 211634a97f8cSStefano Zampini } 211734a97f8cSStefano Zampini 2118aea80f77Sstefano_zampini PetscErrorCode PCBDDCSubSchursDestroy(PCBDDCSubSchurs* sub_schurs) 2119aea80f77Sstefano_zampini { 2120aea80f77Sstefano_zampini PetscFunctionBegin; 21219566063dSJacob Faibussowitsch PetscCall(PCBDDCSubSchursReset(*sub_schurs)); 21229566063dSJacob Faibussowitsch PetscCall(PetscFree(*sub_schurs)); 2123aea80f77Sstefano_zampini PetscFunctionReturn(0); 2124aea80f77Sstefano_zampini } 2125aea80f77Sstefano_zampini 21269fbee547SJacob Faibussowitsch static inline PetscErrorCode PCBDDCAdjGetNextLayer_Private(PetscInt* queue_tip,PetscInt n_prev,PetscBT touched,PetscInt* xadj,PetscInt* adjncy,PetscInt* n_added) 212734a97f8cSStefano Zampini { 212834a97f8cSStefano Zampini PetscInt i,j,n; 212934a97f8cSStefano Zampini 213034a97f8cSStefano Zampini PetscFunctionBegin; 213134a97f8cSStefano Zampini n = 0; 213234a97f8cSStefano Zampini for (i=-n_prev;i<0;i++) { 213334a97f8cSStefano Zampini PetscInt start_dof = queue_tip[i]; 213434a97f8cSStefano Zampini for (j=xadj[start_dof];j<xadj[start_dof+1];j++) { 213534a97f8cSStefano Zampini PetscInt dof = adjncy[j]; 213634a97f8cSStefano Zampini if (!PetscBTLookup(touched,dof)) { 21379566063dSJacob Faibussowitsch PetscCall(PetscBTSet(touched,dof)); 213834a97f8cSStefano Zampini queue_tip[n] = dof; 213934a97f8cSStefano Zampini n++; 214034a97f8cSStefano Zampini } 214134a97f8cSStefano Zampini } 214234a97f8cSStefano Zampini } 214334a97f8cSStefano Zampini *n_added = n; 214434a97f8cSStefano Zampini PetscFunctionReturn(0); 214534a97f8cSStefano Zampini } 2146