134a97f8cSStefano Zampini #include <../src/ksp/pc/impls/bddc/bddc.h> 234a97f8cSStefano Zampini #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 305709791SSatish Balay #include <../src/mat/impls/dense/seq/dense.h> 408122e43SStefano Zampini #include <petscblaslapack.h> 534a97f8cSStefano Zampini 69fbee547SJacob Faibussowitsch static inline PetscErrorCode PCBDDCAdjGetNextLayer_Private(PetscInt*,PetscInt,PetscBT,PetscInt*,PetscInt*,PetscInt*); 75ec10c6aSStefano Zampini static PetscErrorCode PCBDDCComputeExplicitSchur(Mat,PetscBool,MatReuse,Mat*); 8df4d28bfSStefano Zampini static PetscErrorCode PCBDDCReuseSolvers_Interior(PC,Vec,Vec); 9df4d28bfSStefano Zampini static PetscErrorCode PCBDDCReuseSolvers_Correction(PC,Vec,Vec); 10d62866d3SStefano Zampini 11ca92afb2SStefano Zampini /* if v2 is not present, correction is done in-place */ 125cbda25cSStefano Zampini PetscErrorCode PCBDDCReuseSolversBenignAdapt(PCBDDCReuseSolvers ctx, Vec v, Vec v2, PetscBool sol, PetscBool full) 13ca92afb2SStefano Zampini { 14ca92afb2SStefano Zampini PetscScalar *array; 15ca92afb2SStefano Zampini PetscScalar *array2; 16ca92afb2SStefano Zampini 17ca92afb2SStefano Zampini PetscFunctionBegin; 18ca92afb2SStefano Zampini if (!ctx->benign_n) PetscFunctionReturn(0); 195cbda25cSStefano Zampini if (sol && full) { 205cbda25cSStefano Zampini PetscInt n_I,size_schur; 215cbda25cSStefano Zampini 225cbda25cSStefano Zampini /* get sizes */ 23*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatGetSize(ctx->benign_csAIB,&size_schur,NULL)); 24*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetSize(v,&n_I)); 255cbda25cSStefano Zampini n_I = n_I - size_schur; 265cbda25cSStefano Zampini /* get schur sol from array */ 27*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetArray(v,&array)); 28*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecPlaceArray(ctx->benign_dummy_schur_vec,array+n_I)); 29*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecRestoreArray(v,&array)); 305cbda25cSStefano Zampini /* apply interior sol correction */ 31*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatMultTranspose(ctx->benign_csAIB,ctx->benign_dummy_schur_vec,ctx->benign_corr_work)); 32*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecResetArray(ctx->benign_dummy_schur_vec)); 33*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatMultAdd(ctx->benign_AIIm1ones,ctx->benign_corr_work,v,v)); 345cbda25cSStefano Zampini } 35ca92afb2SStefano Zampini if (v2) { 36ca92afb2SStefano Zampini PetscInt nl; 37ca92afb2SStefano Zampini 38*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetArrayRead(v,(const PetscScalar**)&array)); 39*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetLocalSize(v2,&nl)); 40*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetArray(v2,&array2)); 41*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscArraycpy(array2,array,nl)); 42ca92afb2SStefano Zampini } else { 43*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetArray(v,&array)); 44ca92afb2SStefano Zampini array2 = array; 45ca92afb2SStefano Zampini } 46ca92afb2SStefano Zampini if (!sol) { /* change rhs */ 47ca92afb2SStefano Zampini PetscInt n; 48ca92afb2SStefano Zampini for (n=0;n<ctx->benign_n;n++) { 49ca92afb2SStefano Zampini PetscScalar sum = 0.; 50ca92afb2SStefano Zampini const PetscInt *cols; 51ca92afb2SStefano Zampini PetscInt nz,i; 52ca92afb2SStefano Zampini 53*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISGetLocalSize(ctx->benign_zerodiag_subs[n],&nz)); 54*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISGetIndices(ctx->benign_zerodiag_subs[n],&cols)); 55ca92afb2SStefano Zampini for (i=0;i<nz-1;i++) sum += array[cols[i]]; 5622db5ddcSStefano Zampini #if defined(PETSC_USE_COMPLEX) 5722db5ddcSStefano Zampini sum = -(PetscRealPart(sum)/nz + PETSC_i*(PetscImaginaryPart(sum)/nz)); 5822db5ddcSStefano Zampini #else 59ca92afb2SStefano Zampini sum = -sum/nz; 6022db5ddcSStefano Zampini #endif 61ca92afb2SStefano Zampini for (i=0;i<nz-1;i++) array2[cols[i]] += sum; 62ca92afb2SStefano Zampini ctx->benign_save_vals[n] = array2[cols[nz-1]]; 63ca92afb2SStefano Zampini array2[cols[nz-1]] = sum; 64*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISRestoreIndices(ctx->benign_zerodiag_subs[n],&cols)); 65ca92afb2SStefano Zampini } 66ca92afb2SStefano Zampini } else { 67ca92afb2SStefano Zampini PetscInt n; 68ca92afb2SStefano Zampini for (n=0;n<ctx->benign_n;n++) { 69ca92afb2SStefano Zampini PetscScalar sum = 0.; 70ca92afb2SStefano Zampini const PetscInt *cols; 71ca92afb2SStefano Zampini PetscInt nz,i; 72*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISGetLocalSize(ctx->benign_zerodiag_subs[n],&nz)); 73*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISGetIndices(ctx->benign_zerodiag_subs[n],&cols)); 74ca92afb2SStefano Zampini for (i=0;i<nz-1;i++) sum += array[cols[i]]; 7522db5ddcSStefano Zampini #if defined(PETSC_USE_COMPLEX) 7622db5ddcSStefano Zampini sum = -(PetscRealPart(sum)/nz + PETSC_i*(PetscImaginaryPart(sum)/nz)); 7722db5ddcSStefano Zampini #else 78ca92afb2SStefano Zampini sum = -sum/nz; 7922db5ddcSStefano Zampini #endif 80ca92afb2SStefano Zampini for (i=0;i<nz-1;i++) array2[cols[i]] += sum; 81ca92afb2SStefano Zampini array2[cols[nz-1]] = ctx->benign_save_vals[n]; 82*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISRestoreIndices(ctx->benign_zerodiag_subs[n],&cols)); 83ca92afb2SStefano Zampini } 84ca92afb2SStefano Zampini } 85ca92afb2SStefano Zampini if (v2) { 86*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecRestoreArrayRead(v,(const PetscScalar**)&array)); 87*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecRestoreArray(v2,&array2)); 88ca92afb2SStefano Zampini } else { 89*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecRestoreArray(v,&array)); 90ca92afb2SStefano Zampini } 915cbda25cSStefano Zampini if (!sol && full) { 925cbda25cSStefano Zampini Vec usedv; 935cbda25cSStefano Zampini PetscInt n_I,size_schur; 945cbda25cSStefano Zampini 955cbda25cSStefano Zampini /* get sizes */ 96*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatGetSize(ctx->benign_csAIB,&size_schur,NULL)); 97*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetSize(v,&n_I)); 985cbda25cSStefano Zampini n_I = n_I - size_schur; 995cbda25cSStefano Zampini /* compute schur rhs correction */ 1005cbda25cSStefano Zampini if (v2) { 1015cbda25cSStefano Zampini usedv = v2; 1025cbda25cSStefano Zampini } else { 1035cbda25cSStefano Zampini usedv = v; 1045cbda25cSStefano Zampini } 1055cbda25cSStefano Zampini /* apply schur rhs correction */ 106*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatMultTranspose(ctx->benign_AIIm1ones,usedv,ctx->benign_corr_work)); 107*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetArrayRead(usedv,(const PetscScalar**)&array)); 108*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecPlaceArray(ctx->benign_dummy_schur_vec,array+n_I)); 109*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecRestoreArrayRead(usedv,(const PetscScalar**)&array)); 110*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatMultAdd(ctx->benign_csAIB,ctx->benign_corr_work,ctx->benign_dummy_schur_vec,ctx->benign_dummy_schur_vec)); 111*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecResetArray(ctx->benign_dummy_schur_vec)); 1125cbda25cSStefano Zampini } 113ca92afb2SStefano Zampini PetscFunctionReturn(0); 114ca92afb2SStefano Zampini } 115ca92afb2SStefano Zampini 116df4d28bfSStefano Zampini static PetscErrorCode PCBDDCReuseSolvers_Solve_Private(PC pc, Vec rhs, Vec sol, PetscBool transpose, PetscBool full) 117d62866d3SStefano Zampini { 118df4d28bfSStefano Zampini PCBDDCReuseSolvers ctx; 119683d3df6SStefano Zampini PetscBool copy = PETSC_FALSE; 120d62866d3SStefano Zampini 121d62866d3SStefano Zampini PetscFunctionBegin; 122*5f80ce2aSJacob Faibussowitsch CHKERRQ(PCShellGetContext(pc,&ctx)); 123683d3df6SStefano Zampini if (full) { 124d62866d3SStefano Zampini #if defined(PETSC_HAVE_MUMPS) 125*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatMumpsSetIcntl(ctx->F,26,-1)); 126d62866d3SStefano Zampini #endif 1275cbda25cSStefano Zampini #if defined(PETSC_HAVE_MKL_PARDISO) 128*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatMkl_PardisoSetCntl(ctx->F,70,0)); 1295cbda25cSStefano Zampini #endif 130683d3df6SStefano Zampini copy = ctx->has_vertices; 131d4933d67SStefano Zampini } else { /* interior solver */ 1326dba178dSStefano Zampini #if defined(PETSC_HAVE_MUMPS) 133*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatMumpsSetIcntl(ctx->F,26,0)); 1346dba178dSStefano Zampini #endif 135d4933d67SStefano Zampini #if defined(PETSC_HAVE_MKL_PARDISO) 136*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatMkl_PardisoSetCntl(ctx->F,70,1)); 137d4933d67SStefano Zampini #endif 138683d3df6SStefano Zampini copy = PETSC_TRUE; 139683d3df6SStefano Zampini } 140683d3df6SStefano Zampini /* copy rhs into factored matrix workspace */ 141683d3df6SStefano Zampini if (copy) { 142ca92afb2SStefano Zampini PetscInt n; 143df4d28bfSStefano Zampini PetscScalar *array,*array_solver; 144ca92afb2SStefano Zampini 145*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetLocalSize(rhs,&n)); 146*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetArrayRead(rhs,(const PetscScalar**)&array)); 147*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetArray(ctx->rhs,&array_solver)); 148*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscArraycpy(array_solver,array,n)); 149*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecRestoreArray(ctx->rhs,&array_solver)); 150*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecRestoreArrayRead(rhs,(const PetscScalar**)&array)); 151683d3df6SStefano Zampini 152*5f80ce2aSJacob Faibussowitsch CHKERRQ(PCBDDCReuseSolversBenignAdapt(ctx,ctx->rhs,NULL,PETSC_FALSE,full)); 153683d3df6SStefano Zampini if (transpose) { 154*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSolveTranspose(ctx->F,ctx->rhs,ctx->sol)); 155683d3df6SStefano Zampini } else { 156*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSolve(ctx->F,ctx->rhs,ctx->sol)); 157683d3df6SStefano Zampini } 158*5f80ce2aSJacob Faibussowitsch CHKERRQ(PCBDDCReuseSolversBenignAdapt(ctx,ctx->sol,NULL,PETSC_TRUE,full)); 159683d3df6SStefano Zampini 160683d3df6SStefano Zampini /* get back data to caller worskpace */ 161*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetArrayRead(ctx->sol,(const PetscScalar**)&array_solver)); 162*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetArray(sol,&array)); 163*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscArraycpy(array,array_solver,n)); 164*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecRestoreArray(sol,&array)); 165*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecRestoreArrayRead(ctx->sol,(const PetscScalar**)&array_solver)); 166683d3df6SStefano Zampini } else { 167ca92afb2SStefano Zampini if (ctx->benign_n) { 168*5f80ce2aSJacob Faibussowitsch CHKERRQ(PCBDDCReuseSolversBenignAdapt(ctx,rhs,ctx->rhs,PETSC_FALSE,full)); 169ca92afb2SStefano Zampini if (transpose) { 170*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSolveTranspose(ctx->F,ctx->rhs,sol)); 171ca92afb2SStefano Zampini } else { 172*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSolve(ctx->F,ctx->rhs,sol)); 173ca92afb2SStefano Zampini } 174*5f80ce2aSJacob Faibussowitsch CHKERRQ(PCBDDCReuseSolversBenignAdapt(ctx,sol,NULL,PETSC_TRUE,full)); 175ca92afb2SStefano Zampini } else { 176e28d306cSStefano Zampini if (transpose) { 177*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSolveTranspose(ctx->F,rhs,sol)); 178e28d306cSStefano Zampini } else { 179*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSolve(ctx->F,rhs,sol)); 180e28d306cSStefano Zampini } 181683d3df6SStefano Zampini } 182ca92afb2SStefano Zampini } 1835cbda25cSStefano Zampini /* restore defaults */ 1845cbda25cSStefano Zampini #if defined(PETSC_HAVE_MUMPS) 185*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatMumpsSetIcntl(ctx->F,26,-1)); 1865cbda25cSStefano Zampini #endif 187d4933d67SStefano Zampini #if defined(PETSC_HAVE_MKL_PARDISO) 188*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatMkl_PardisoSetCntl(ctx->F,70,0)); 189d4933d67SStefano Zampini #endif 190d62866d3SStefano Zampini PetscFunctionReturn(0); 191d62866d3SStefano Zampini } 192d62866d3SStefano Zampini 193df4d28bfSStefano Zampini static PetscErrorCode PCBDDCReuseSolvers_Correction(PC pc, Vec rhs, Vec sol) 194e28d306cSStefano Zampini { 195e28d306cSStefano Zampini PetscFunctionBegin; 196*5f80ce2aSJacob Faibussowitsch CHKERRQ(PCBDDCReuseSolvers_Solve_Private(pc,rhs,sol,PETSC_FALSE,PETSC_TRUE)); 197e28d306cSStefano Zampini PetscFunctionReturn(0); 198e28d306cSStefano Zampini } 199e28d306cSStefano Zampini 200df4d28bfSStefano Zampini static PetscErrorCode PCBDDCReuseSolvers_CorrectionTranspose(PC pc, Vec rhs, Vec sol) 201e28d306cSStefano Zampini { 202e28d306cSStefano Zampini PetscFunctionBegin; 203*5f80ce2aSJacob Faibussowitsch CHKERRQ(PCBDDCReuseSolvers_Solve_Private(pc,rhs,sol,PETSC_TRUE,PETSC_TRUE)); 204683d3df6SStefano Zampini PetscFunctionReturn(0); 205683d3df6SStefano Zampini } 206683d3df6SStefano Zampini 207df4d28bfSStefano Zampini static PetscErrorCode PCBDDCReuseSolvers_Interior(PC pc, Vec rhs, Vec sol) 208683d3df6SStefano Zampini { 209683d3df6SStefano Zampini PetscFunctionBegin; 210*5f80ce2aSJacob Faibussowitsch CHKERRQ(PCBDDCReuseSolvers_Solve_Private(pc,rhs,sol,PETSC_FALSE,PETSC_FALSE)); 211683d3df6SStefano Zampini PetscFunctionReturn(0); 212683d3df6SStefano Zampini } 213683d3df6SStefano Zampini 214df4d28bfSStefano Zampini static PetscErrorCode PCBDDCReuseSolvers_InteriorTranspose(PC pc, Vec rhs, Vec sol) 215683d3df6SStefano Zampini { 216683d3df6SStefano Zampini PetscFunctionBegin; 217*5f80ce2aSJacob Faibussowitsch CHKERRQ(PCBDDCReuseSolvers_Solve_Private(pc,rhs,sol,PETSC_TRUE,PETSC_FALSE)); 218e28d306cSStefano Zampini PetscFunctionReturn(0); 219e28d306cSStefano Zampini } 220e28d306cSStefano Zampini 22115579a77SStefano Zampini static PetscErrorCode PCBDDCReuseSolvers_View(PC pc, PetscViewer viewer) 22215579a77SStefano Zampini { 22315579a77SStefano Zampini PCBDDCReuseSolvers ctx; 22415579a77SStefano Zampini PetscBool iascii; 22515579a77SStefano Zampini 22615579a77SStefano Zampini PetscFunctionBegin; 227*5f80ce2aSJacob Faibussowitsch CHKERRQ(PCShellGetContext(pc,&ctx)); 228*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 22915579a77SStefano Zampini if (iascii) { 230*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_INFO)); 23115579a77SStefano Zampini } 232*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatView(ctx->F,viewer)); 23315579a77SStefano Zampini if (iascii) { 234*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscViewerPopFormat(viewer)); 23515579a77SStefano Zampini } 23615579a77SStefano Zampini PetscFunctionReturn(0); 23715579a77SStefano Zampini } 23815579a77SStefano Zampini 239df4d28bfSStefano Zampini static PetscErrorCode PCBDDCReuseSolversReset(PCBDDCReuseSolvers reuse) 240d62866d3SStefano Zampini { 241ca92afb2SStefano Zampini PetscInt i; 242d62866d3SStefano Zampini 243d62866d3SStefano Zampini PetscFunctionBegin; 244*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy(&reuse->F)); 245*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecDestroy(&reuse->sol)); 246*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecDestroy(&reuse->rhs)); 247*5f80ce2aSJacob Faibussowitsch CHKERRQ(PCDestroy(&reuse->interior_solver)); 248*5f80ce2aSJacob Faibussowitsch CHKERRQ(PCDestroy(&reuse->correction_solver)); 249*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISDestroy(&reuse->is_R)); 250*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISDestroy(&reuse->is_B)); 251*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecScatterDestroy(&reuse->correction_scatter_B)); 252*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecDestroy(&reuse->sol_B)); 253*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecDestroy(&reuse->rhs_B)); 254ca92afb2SStefano Zampini for (i=0;i<reuse->benign_n;i++) { 255*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISDestroy(&reuse->benign_zerodiag_subs[i])); 256ca92afb2SStefano Zampini } 257*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree(reuse->benign_zerodiag_subs)); 258*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree(reuse->benign_save_vals)); 259*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy(&reuse->benign_csAIB)); 260*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy(&reuse->benign_AIIm1ones)); 261*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecDestroy(&reuse->benign_corr_work)); 262*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecDestroy(&reuse->benign_dummy_schur_vec)); 263d62866d3SStefano Zampini PetscFunctionReturn(0); 264d62866d3SStefano Zampini } 265d5574798SStefano Zampini 2665ec10c6aSStefano Zampini static PetscErrorCode PCBDDCComputeExplicitSchur(Mat M, PetscBool issym, MatReuse reuse, Mat *S) 2673202ece2SStefano Zampini { 2683202ece2SStefano Zampini Mat B, C, D, Bd, Cd, AinvBd; 2693202ece2SStefano Zampini KSP ksp; 2703202ece2SStefano Zampini PC pc; 2713202ece2SStefano Zampini PetscBool isLU, isILU, isCHOL, Bdense, Cdense; 2723202ece2SStefano Zampini PetscReal fill = 2.0; 273f11841e3SStefano Zampini PetscInt n_I; 2743202ece2SStefano Zampini PetscMPIInt size; 2753202ece2SStefano Zampini 2763202ece2SStefano Zampini PetscFunctionBegin; 277*5f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Comm_size(PetscObjectComm((PetscObject)M),&size)); 2782c71b3e2SJacob Faibussowitsch PetscCheckFalse(size != 1,PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for parallel matrices"); 279f11841e3SStefano Zampini if (reuse == MAT_REUSE_MATRIX) { 280f11841e3SStefano Zampini PetscBool Sdense; 281f11841e3SStefano Zampini 282*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectTypeCompare((PetscObject)*S, MATSEQDENSE, &Sdense)); 2832c71b3e2SJacob Faibussowitsch PetscCheckFalse(!Sdense,PetscObjectComm((PetscObject)M),PETSC_ERR_SUP,"S should dense"); 284f11841e3SStefano Zampini } 285*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSchurComplementGetSubMatrices(M, NULL, NULL, &B, &C, &D)); 286*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSchurComplementGetKSP(M, &ksp)); 287*5f80ce2aSJacob Faibussowitsch CHKERRQ(KSPGetPC(ksp, &pc)); 288*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectTypeCompare((PetscObject) pc, PCLU, &isLU)); 289*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectTypeCompare((PetscObject) pc, PCILU, &isILU)); 290*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectTypeCompare((PetscObject) pc, PCCHOLESKY, &isCHOL)); 291*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectTypeCompare((PetscObject) B, MATSEQDENSE, &Bdense)); 292*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectTypeCompare((PetscObject) C, MATSEQDENSE, &Cdense)); 293*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatGetSize(B,&n_I,NULL)); 294f11841e3SStefano Zampini if (n_I) { 2953202ece2SStefano Zampini if (!Bdense) { 296*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatConvert(B, MATSEQDENSE, MAT_INITIAL_MATRIX, &Bd)); 2973202ece2SStefano Zampini } else { 2983202ece2SStefano Zampini Bd = B; 2993202ece2SStefano Zampini } 3003202ece2SStefano Zampini 3013202ece2SStefano Zampini if (isLU || isILU || isCHOL) { 3023202ece2SStefano Zampini Mat fact; 303*5f80ce2aSJacob Faibussowitsch CHKERRQ(KSPSetUp(ksp)); 304*5f80ce2aSJacob Faibussowitsch CHKERRQ(PCFactorGetMatrix(pc, &fact)); 305*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDuplicate(Bd, MAT_DO_NOT_COPY_VALUES, &AinvBd)); 306*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatMatSolve(fact, Bd, AinvBd)); 3073202ece2SStefano Zampini } else { 30807b1e237SStefano Zampini PetscBool ex = PETSC_TRUE; 30907b1e237SStefano Zampini 31007b1e237SStefano Zampini if (ex) { 3113202ece2SStefano Zampini Mat Ainvd; 3123202ece2SStefano Zampini 313*5f80ce2aSJacob Faibussowitsch CHKERRQ(PCComputeOperator(pc, MATDENSE, &Ainvd)); 314*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatMatMult(Ainvd, Bd, MAT_INITIAL_MATRIX, fill, &AinvBd)); 315*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy(&Ainvd)); 31607b1e237SStefano Zampini } else { 31707b1e237SStefano Zampini Vec sol,rhs; 31807b1e237SStefano Zampini PetscScalar *arrayrhs,*arraysol; 31907b1e237SStefano Zampini PetscInt i,nrhs,n; 32007b1e237SStefano Zampini 321*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDuplicate(Bd, MAT_DO_NOT_COPY_VALUES, &AinvBd)); 322*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatGetSize(Bd,&n,&nrhs)); 323*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDenseGetArray(Bd,&arrayrhs)); 324*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDenseGetArray(AinvBd,&arraysol)); 325*5f80ce2aSJacob Faibussowitsch CHKERRQ(KSPGetSolution(ksp,&sol)); 326*5f80ce2aSJacob Faibussowitsch CHKERRQ(KSPGetRhs(ksp,&rhs)); 32707b1e237SStefano Zampini for (i=0;i<nrhs;i++) { 328*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecPlaceArray(rhs,arrayrhs+i*n)); 329*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecPlaceArray(sol,arraysol+i*n)); 330*5f80ce2aSJacob Faibussowitsch CHKERRQ(KSPSolve(ksp,rhs,sol)); 331*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecResetArray(rhs)); 332*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecResetArray(sol)); 33307b1e237SStefano Zampini } 334*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDenseRestoreArray(Bd,&arrayrhs)); 335*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDenseRestoreArray(AinvBd,&arrayrhs)); 33607b1e237SStefano Zampini } 3373202ece2SStefano Zampini } 3385ec10c6aSStefano Zampini if (!Bdense & !issym) { 339*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy(&Bd)); 3403202ece2SStefano Zampini } 3415ec10c6aSStefano Zampini 3425ec10c6aSStefano Zampini if (!issym) { 3433202ece2SStefano Zampini if (!Cdense) { 344*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatConvert(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &Cd)); 3453202ece2SStefano Zampini } else { 3463202ece2SStefano Zampini Cd = C; 3473202ece2SStefano Zampini } 348*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatMatMult(Cd, AinvBd, reuse, fill, S)); 3493202ece2SStefano Zampini if (!Cdense) { 350*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy(&Cd)); 3513202ece2SStefano Zampini } 3525ec10c6aSStefano Zampini } else { 353*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatTransposeMatMult(Bd, AinvBd, reuse, fill, S)); 3545ec10c6aSStefano Zampini if (!Bdense) { 355*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy(&Bd)); 3565ec10c6aSStefano Zampini } 3575ec10c6aSStefano Zampini } 358*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy(&AinvBd)); 359f11841e3SStefano Zampini } 3603202ece2SStefano Zampini 3613202ece2SStefano Zampini if (D) { 3623202ece2SStefano Zampini Mat Dd; 3633202ece2SStefano Zampini PetscBool Ddense; 3643202ece2SStefano Zampini 365*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectTypeCompare((PetscObject)D,MATSEQDENSE,&Ddense)); 3663202ece2SStefano Zampini if (!Ddense) { 367*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatConvert(D, MATSEQDENSE, MAT_INITIAL_MATRIX, &Dd)); 3683202ece2SStefano Zampini } else { 3693202ece2SStefano Zampini Dd = D; 3703202ece2SStefano Zampini } 371f11841e3SStefano Zampini if (n_I) { 372*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatAYPX(*S,-1.0,Dd,SAME_NONZERO_PATTERN)); 373f11841e3SStefano Zampini } else { 374f11841e3SStefano Zampini if (reuse == MAT_INITIAL_MATRIX) { 375*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDuplicate(Dd,MAT_COPY_VALUES,S)); 376f11841e3SStefano Zampini } else { 377*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatCopy(Dd,*S,SAME_NONZERO_PATTERN)); 378f11841e3SStefano Zampini } 379f11841e3SStefano Zampini } 3803202ece2SStefano Zampini if (!Ddense) { 381*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy(&Dd)); 3823202ece2SStefano Zampini } 3833202ece2SStefano Zampini } else { 384*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatScale(*S,-1.0)); 3853202ece2SStefano Zampini } 3863202ece2SStefano Zampini PetscFunctionReturn(0); 3873202ece2SStefano Zampini } 38834a97f8cSStefano Zampini 38991af6908SStefano Zampini PetscErrorCode PCBDDCSubSchursSetUp(PCBDDCSubSchurs sub_schurs, Mat Ain, Mat Sin, PetscBool exact_schur, PetscInt xadj[], PetscInt adjncy[], PetscInt nlayers, Vec scaling, PetscBool compute_Stilda, PetscBool reuse_solvers, PetscBool benign_trick, PetscInt benign_n, PetscInt benign_p0_lidx[], IS benign_zerodiag_subs[], Mat change, IS change_primal) 390b1b3d7a2SStefano Zampini { 391be83ff47SStefano Zampini Mat F,A_II,A_IB,A_BI,A_BB,AE_II; 392be83ff47SStefano Zampini Mat S_all; 39357a87bf3SStefano Zampini Vec gstash,lstash; 39457a87bf3SStefano Zampini VecScatter sstash; 395b7ab4a40SStefano Zampini IS is_I,is_I_layer; 396dc456d91SStefano Zampini IS all_subsets,all_subsets_mult,all_subsets_n; 39757a87bf3SStefano Zampini PetscScalar *stasharray,*Bwork; 398dc456d91SStefano Zampini PetscInt *nnz,*all_local_idx_N; 399dc456d91SStefano Zampini PetscInt *auxnum1,*auxnum2; 4005a95e1ceSStefano Zampini PetscInt i,subset_size,max_subset_size; 401683d3df6SStefano Zampini PetscInt n_B,extra,local_size,global_size; 40257a87bf3SStefano Zampini PetscInt local_stash_size; 40308122e43SStefano Zampini PetscBLASInt B_N,B_ierr,B_lwork,*pivots; 4045a95e1ceSStefano Zampini MPI_Comm comm_n; 405f4f7d9d6SStefano Zampini PetscBool deluxe = PETSC_TRUE; 406f4f7d9d6SStefano Zampini PetscBool use_potr = PETSC_FALSE, use_sytr = PETSC_FALSE; 4073b03f7bbSStefano Zampini PetscViewer matl_dbg_viewer = NULL; 408b1b3d7a2SStefano Zampini PetscErrorCode ierr; 40935d0533cSStefano Zampini PetscBool flg; 410b1b3d7a2SStefano Zampini 411b1b3d7a2SStefano Zampini PetscFunctionBegin; 412*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy(&sub_schurs->A)); 413*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy(&sub_schurs->S)); 414e62b6521Sstefano_zampini if (Ain) { 415*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectReference((PetscObject)Ain)); 416a64f4aa4SStefano Zampini sub_schurs->A = Ain; 417a64f4aa4SStefano Zampini } 4183301b35fSStefano Zampini 419*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectReference((PetscObject)Sin)); 420a64f4aa4SStefano Zampini sub_schurs->S = Sin; 421df4d28bfSStefano Zampini if (sub_schurs->schur_explicit) { 422df4d28bfSStefano Zampini sub_schurs->schur_explicit = (PetscBool)(!!sub_schurs->A); 423a64f4aa4SStefano Zampini } 424a64f4aa4SStefano Zampini 4255a95e1ceSStefano Zampini /* preliminary checks */ 4262c71b3e2SJacob Faibussowitsch PetscCheckFalse(!sub_schurs->schur_explicit && compute_Stilda,PetscObjectComm((PetscObject)sub_schurs->l2gmap),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_PARDISO"); 4275a95e1ceSStefano Zampini 42888113c35SStefano Zampini if (benign_trick) sub_schurs->is_posdef = PETSC_FALSE; 42988113c35SStefano Zampini 4303b03f7bbSStefano Zampini /* debug (MATLAB) */ 4317f9db97bSStefano Zampini if (sub_schurs->debug) { 4327f9db97bSStefano Zampini PetscMPIInt size,rank; 4337ebab0bbSStefano Zampini PetscInt nr,*print_schurs_ranks,print_schurs = PETSC_FALSE; 4347f9db97bSStefano Zampini 435*5f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&size)); 436*5f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&rank)); 4377f9db97bSStefano Zampini nr = size; 438*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMalloc1(nr,&print_schurs_ranks)); 4397f9db97bSStefano Zampini ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)sub_schurs->l2gmap),sub_schurs->prefix,"BDDC sub_schurs options","PC");CHKERRQ(ierr); 440*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscOptionsIntArray("-sub_schurs_debug_ranks","Ranks to debug (all if the option is not used)",NULL,print_schurs_ranks,&nr,&flg)); 4417f9db97bSStefano Zampini if (!flg) print_schurs = PETSC_TRUE; 4427f9db97bSStefano Zampini else { 4437ebab0bbSStefano Zampini print_schurs = PETSC_FALSE; 4447f9db97bSStefano Zampini for (i=0;i<nr;i++) if (print_schurs_ranks[i] == (PetscInt)rank) { print_schurs = PETSC_TRUE; break; } 4457f9db97bSStefano Zampini } 4467f9db97bSStefano Zampini ierr = PetscOptionsEnd();CHKERRQ(ierr); 447*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree(print_schurs_ranks)); 4483b03f7bbSStefano Zampini if (print_schurs) { 4493b03f7bbSStefano Zampini char filename[256]; 4503b03f7bbSStefano Zampini 451*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscSNPrintf(filename,sizeof(filename),"sub_schurs_Schur_r%d.m",PetscGlobalRank)); 452*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&matl_dbg_viewer)); 453*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscViewerPushFormat(matl_dbg_viewer,PETSC_VIEWER_ASCII_MATLAB)); 4543b03f7bbSStefano Zampini } 4557f9db97bSStefano Zampini } 4567f9db97bSStefano Zampini 4575a95e1ceSStefano Zampini /* restrict work on active processes */ 458991c41b4SStefano Zampini if (sub_schurs->restrict_comm) { 459991c41b4SStefano Zampini PetscSubcomm subcomm; 460991c41b4SStefano Zampini PetscMPIInt color,rank; 461991c41b4SStefano Zampini 4625a95e1ceSStefano Zampini color = 0; 4635a95e1ceSStefano Zampini if (!sub_schurs->n_subs) color = 1; /* this can happen if we are in a multilevel case or if the subdomain is disconnected */ 464*5f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&rank)); 465*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscSubcommCreate(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&subcomm)); 466*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscSubcommSetNumber(subcomm,2)); 467*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscSubcommSetTypeGeneral(subcomm,color,rank)); 468*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscCommDuplicate(PetscSubcommChild(subcomm),&comm_n,NULL)); 469*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscSubcommDestroy(&subcomm)); 4705a95e1ceSStefano Zampini if (!sub_schurs->n_subs) { 471*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscCommDestroy(&comm_n)); 4725a95e1ceSStefano Zampini PetscFunctionReturn(0); 4735a95e1ceSStefano Zampini } 474991c41b4SStefano Zampini } else { 475*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscCommDuplicate(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&comm_n,NULL)); 476991c41b4SStefano Zampini } 4775a95e1ceSStefano Zampini 478b1b3d7a2SStefano Zampini /* get Schur complement matrices */ 479df4d28bfSStefano Zampini if (!sub_schurs->schur_explicit) { 480a64f4aa4SStefano Zampini Mat tA_IB,tA_BI,tA_BB; 4813301b35fSStefano Zampini PetscBool isseqsbaij; 482*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSchurComplementGetSubMatrices(sub_schurs->S,&A_II,NULL,&tA_IB,&tA_BI,&tA_BB)); 483*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectTypeCompare((PetscObject)tA_BB,MATSEQSBAIJ,&isseqsbaij)); 4843301b35fSStefano Zampini if (isseqsbaij) { 485*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatConvert(tA_BB,MATSEQAIJ,MAT_INITIAL_MATRIX,&A_BB)); 486*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatConvert(tA_IB,MATSEQAIJ,MAT_INITIAL_MATRIX,&A_IB)); 487*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatConvert(tA_BI,MATSEQAIJ,MAT_INITIAL_MATRIX,&A_BI)); 488a64f4aa4SStefano Zampini } else { 489*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectReference((PetscObject)tA_BB)); 490a64f4aa4SStefano Zampini A_BB = tA_BB; 491*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectReference((PetscObject)tA_IB)); 492a64f4aa4SStefano Zampini A_IB = tA_IB; 493*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectReference((PetscObject)tA_BI)); 494a64f4aa4SStefano Zampini A_BI = tA_BI; 495f11841e3SStefano Zampini } 496a58a30b4SStefano Zampini } else { 4975a95e1ceSStefano Zampini A_II = NULL; 4985a95e1ceSStefano Zampini A_IB = NULL; 4995a95e1ceSStefano Zampini A_BI = NULL; 5005a95e1ceSStefano Zampini A_BB = NULL; 501b1b3d7a2SStefano Zampini } 5025a95e1ceSStefano Zampini S_all = NULL; 503b1b3d7a2SStefano Zampini 504b1b3d7a2SStefano Zampini /* determine interior problems */ 505*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISGetLocalSize(sub_schurs->is_I,&i)); 5063dc780c3SStefano Zampini if (nlayers >= 0 && i) { /* Interior problems can be different from the original one */ 507b1b3d7a2SStefano Zampini PetscBT touched; 508b1b3d7a2SStefano Zampini const PetscInt* idx_B; 509b1b3d7a2SStefano Zampini PetscInt n_I,n_B,n_local_dofs,n_prev_added,j,layer,*local_numbering; 510b1b3d7a2SStefano Zampini 5112c71b3e2SJacob Faibussowitsch PetscCheckFalse(!xadj,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot request layering without adjacency"); 512b1b3d7a2SStefano Zampini /* get sizes */ 513*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISGetLocalSize(sub_schurs->is_I,&n_I)); 514*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISGetLocalSize(sub_schurs->is_B,&n_B)); 515b1b3d7a2SStefano Zampini 516*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMalloc1(n_I+n_B,&local_numbering)); 517*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscBTCreate(n_I+n_B,&touched)); 518*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscBTMemzero(n_I+n_B,touched)); 519b1b3d7a2SStefano Zampini 520b1b3d7a2SStefano Zampini /* all boundary dofs must be skipped when adding layers */ 521*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISGetIndices(sub_schurs->is_B,&idx_B)); 522b1b3d7a2SStefano Zampini for (j=0;j<n_B;j++) { 523*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscBTSet(touched,idx_B[j])); 524b1b3d7a2SStefano Zampini } 525*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscArraycpy(local_numbering,idx_B,n_B)); 526*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISRestoreIndices(sub_schurs->is_B,&idx_B)); 527b1b3d7a2SStefano Zampini 528b1b3d7a2SStefano Zampini /* add prescribed number of layers of dofs */ 529b1b3d7a2SStefano Zampini n_local_dofs = n_B; 530b1b3d7a2SStefano Zampini n_prev_added = n_B; 531b1b3d7a2SStefano Zampini for (layer=0;layer<nlayers;layer++) { 532b6bace71SJacob Faibussowitsch PetscInt n_added = 0; 533b1b3d7a2SStefano Zampini if (n_local_dofs == n_I+n_B) break; 5342c71b3e2SJacob Faibussowitsch PetscCheckFalse(n_local_dofs > n_I+n_B,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error querying layer %D. Out of bound access (%D > %D)",layer,n_local_dofs,n_I+n_B); 535*5f80ce2aSJacob Faibussowitsch CHKERRQ(PCBDDCAdjGetNextLayer_Private(local_numbering+n_local_dofs,n_prev_added,touched,xadj,adjncy,&n_added)); 536b1b3d7a2SStefano Zampini n_prev_added = n_added; 537b1b3d7a2SStefano Zampini n_local_dofs += n_added; 538b1b3d7a2SStefano Zampini if (!n_added) break; 539b1b3d7a2SStefano Zampini } 540*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscBTDestroy(&touched)); 541b1b3d7a2SStefano Zampini 542883469d8SStefano Zampini /* IS for I layer dofs in original numbering */ 543*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISCreateGeneral(PetscObjectComm((PetscObject)sub_schurs->is_I),n_local_dofs-n_B,local_numbering+n_B,PETSC_COPY_VALUES,&is_I_layer)); 544*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree(local_numbering)); 545*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISSort(is_I_layer)); 546883469d8SStefano Zampini /* IS for I layer dofs in I numbering */ 547df4d28bfSStefano Zampini if (!sub_schurs->schur_explicit) { 548b1b3d7a2SStefano Zampini ISLocalToGlobalMapping ItoNmap; 549*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISLocalToGlobalMappingCreateIS(sub_schurs->is_I,&ItoNmap)); 550*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISGlobalToLocalMappingApplyIS(ItoNmap,IS_GTOLM_DROP,is_I_layer,&is_I)); 551*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISLocalToGlobalMappingDestroy(&ItoNmap)); 552b1b3d7a2SStefano Zampini 553b1b3d7a2SStefano Zampini /* II block */ 554*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatCreateSubMatrix(A_II,is_I,is_I,MAT_INITIAL_MATRIX,&AE_II)); 555b1b3d7a2SStefano Zampini } 556b1b3d7a2SStefano Zampini } else { 557b1b3d7a2SStefano Zampini PetscInt n_I; 558b1b3d7a2SStefano Zampini 559b1b3d7a2SStefano Zampini /* IS for I dofs in original numbering */ 560*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectReference((PetscObject)sub_schurs->is_I)); 561a9b99552SStefano Zampini is_I_layer = sub_schurs->is_I; 562b1b3d7a2SStefano Zampini 563b1b3d7a2SStefano Zampini /* IS for I dofs in I numbering (strided 1) */ 564df4d28bfSStefano Zampini if (!sub_schurs->schur_explicit) { 565*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISGetSize(sub_schurs->is_I,&n_I)); 566*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISCreateStride(PetscObjectComm((PetscObject)sub_schurs->is_I),n_I,0,1,&is_I)); 567b1b3d7a2SStefano Zampini 568b1b3d7a2SStefano Zampini /* II block is the same */ 569*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectReference((PetscObject)A_II)); 570b1b3d7a2SStefano Zampini AE_II = A_II; 571b1b3d7a2SStefano Zampini } 572b1b3d7a2SStefano Zampini } 5735a95e1ceSStefano Zampini 574883469d8SStefano Zampini /* Get info on subset sizes and sum of all subsets sizes */ 5755a95e1ceSStefano Zampini max_subset_size = 0; 576883469d8SStefano Zampini local_size = 0; 5775a95e1ceSStefano Zampini for (i=0;i<sub_schurs->n_subs;i++) { 578*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size)); 5795a95e1ceSStefano Zampini max_subset_size = PetscMax(subset_size,max_subset_size); 580883469d8SStefano Zampini local_size += subset_size; 581883469d8SStefano Zampini } 582883469d8SStefano Zampini 583883469d8SStefano Zampini /* Work arrays for local indices */ 584883469d8SStefano Zampini extra = 0; 585*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISGetLocalSize(sub_schurs->is_B,&n_B)); 586df4d28bfSStefano Zampini if (sub_schurs->schur_explicit && is_I_layer) { 587*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISGetLocalSize(is_I_layer,&extra)); 588883469d8SStefano Zampini } 589*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMalloc1(n_B+extra,&all_local_idx_N)); 590883469d8SStefano Zampini if (extra) { 591883469d8SStefano Zampini const PetscInt *idxs; 592*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISGetIndices(is_I_layer,&idxs)); 593*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscArraycpy(all_local_idx_N,idxs,extra)); 594*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISRestoreIndices(is_I_layer,&idxs)); 595883469d8SStefano Zampini } 596*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMalloc1(sub_schurs->n_subs,&auxnum1)); 597*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMalloc1(sub_schurs->n_subs,&auxnum2)); 598883469d8SStefano Zampini 599883469d8SStefano Zampini /* Get local indices in local numbering */ 600883469d8SStefano Zampini local_size = 0; 60157a87bf3SStefano Zampini local_stash_size = 0; 6025a95e1ceSStefano Zampini for (i=0;i<sub_schurs->n_subs;i++) { 603883469d8SStefano Zampini const PetscInt *idxs; 604883469d8SStefano Zampini 605*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size)); 606*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISGetIndices(sub_schurs->is_subs[i],&idxs)); 607eb595f79SStefano Zampini /* start (smallest in global ordering) and multiplicity */ 608eb595f79SStefano Zampini auxnum1[i] = idxs[0]; 60957a87bf3SStefano Zampini auxnum2[i] = subset_size*subset_size; 610883469d8SStefano Zampini /* subset indices in local numbering */ 611*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscArraycpy(all_local_idx_N+local_size+extra,idxs,subset_size)); 612*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISRestoreIndices(sub_schurs->is_subs[i],&idxs)); 613883469d8SStefano Zampini local_size += subset_size; 61457a87bf3SStefano Zampini local_stash_size += subset_size*subset_size; 615883469d8SStefano Zampini } 616883469d8SStefano Zampini 617f4f7d9d6SStefano Zampini /* allocate extra workspace needed only for GETRI or SYTRF */ 61811955456SStefano Zampini use_potr = use_sytr = PETSC_FALSE; 61911955456SStefano Zampini if (benign_trick || (sub_schurs->is_hermitian && sub_schurs->is_posdef)) { 620f4f7d9d6SStefano Zampini use_potr = PETSC_TRUE; 62111955456SStefano Zampini } else if (sub_schurs->is_symmetric) { 62211955456SStefano Zampini use_sytr = PETSC_TRUE; 62311955456SStefano Zampini } 62411955456SStefano Zampini if (local_size && !use_potr) { 62559ac4de7SStefano Zampini PetscScalar lwork,dummyscalar = 0.; 62659ac4de7SStefano Zampini PetscBLASInt dummyint = 0; 627d2627357SStefano Zampini 628d2627357SStefano Zampini B_lwork = -1; 629*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscBLASIntCast(local_size,&B_N)); 630*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 631f4f7d9d6SStefano Zampini if (use_sytr) { 632f4f7d9d6SStefano Zampini PetscStackCallBLAS("LAPACKsytrf",LAPACKsytrf_("L",&B_N,&dummyscalar,&B_N,&dummyint,&lwork,&B_lwork,&B_ierr)); 6332c71b3e2SJacob Faibussowitsch PetscCheckFalse(B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYTRF Lapack routine %d",(int)B_ierr); 634f4f7d9d6SStefano Zampini } else { 63559ac4de7SStefano Zampini PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,&dummyscalar,&B_N,&dummyint,&lwork,&B_lwork,&B_ierr)); 6362c71b3e2SJacob Faibussowitsch PetscCheckFalse(B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GETRI Lapack routine %d",(int)B_ierr); 637f4f7d9d6SStefano Zampini } 638*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFPTrapPop()); 639*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork)); 640*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMalloc2(B_lwork,&Bwork,B_N,&pivots)); 641056290a2SStefano Zampini } else { 642056290a2SStefano Zampini Bwork = NULL; 643056290a2SStefano Zampini pivots = NULL; 644d2627357SStefano Zampini } 645d2627357SStefano Zampini 64657a87bf3SStefano Zampini /* prepare data for summing up properly schurs on subsets */ 647*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISCreateGeneral(comm_n,sub_schurs->n_subs,auxnum1,PETSC_OWN_POINTER,&all_subsets_n)); 648*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISLocalToGlobalMappingApplyIS(sub_schurs->l2gmap,all_subsets_n,&all_subsets)); 649*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISDestroy(&all_subsets_n)); 650*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISCreateGeneral(comm_n,sub_schurs->n_subs,auxnum2,PETSC_OWN_POINTER,&all_subsets_mult)); 651*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISRenumber(all_subsets,all_subsets_mult,&global_size,&all_subsets_n)); 652*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISDestroy(&all_subsets)); 653*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISDestroy(&all_subsets_mult)); 654*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISGetLocalSize(all_subsets_n,&i)); 6552c71b3e2SJacob Faibussowitsch PetscCheckFalse(i != local_stash_size,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid size of new subset! %D != %D",i,local_stash_size); 656*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecCreateSeqWithArray(PETSC_COMM_SELF,1,local_stash_size,NULL,&lstash)); 657*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecCreateMPI(comm_n,PETSC_DECIDE,global_size,&gstash)); 658*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecScatterCreate(lstash,NULL,gstash,all_subsets_n,&sstash)); 659*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISDestroy(&all_subsets_n)); 6602972d61bSStefano Zampini 6615a95e1ceSStefano Zampini /* subset indices in local boundary numbering */ 6625a95e1ceSStefano Zampini if (!sub_schurs->is_Ej_all) { 6635a95e1ceSStefano Zampini PetscInt *all_local_idx_B; 6645a95e1ceSStefano Zampini 665*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMalloc1(local_size,&all_local_idx_B)); 666*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISGlobalToLocalMappingApply(sub_schurs->BtoNmap,IS_GTOLM_DROP,local_size,all_local_idx_N+extra,&subset_size,all_local_idx_B)); 6672c71b3e2SJacob Faibussowitsch PetscCheckFalse(subset_size != local_size,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in sub_schurs serial (BtoNmap)! %D != %D",subset_size,local_size); 668*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISCreateGeneral(PETSC_COMM_SELF,local_size,all_local_idx_B,PETSC_OWN_POINTER,&sub_schurs->is_Ej_all)); 669b1b3d7a2SStefano Zampini } 670b1b3d7a2SStefano Zampini 67172b8c272SStefano Zampini if (change) { 67272b8c272SStefano Zampini ISLocalToGlobalMapping BtoS; 67372b8c272SStefano Zampini IS change_primal_B; 67472b8c272SStefano Zampini IS change_primal_all; 67572b8c272SStefano Zampini 6762c71b3e2SJacob Faibussowitsch PetscCheckFalse(sub_schurs->change_primal_sub,PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 6772c71b3e2SJacob Faibussowitsch PetscCheckFalse(sub_schurs->change,PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 678*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMalloc1(sub_schurs->n_subs,&sub_schurs->change_primal_sub)); 67972b8c272SStefano Zampini for (i=0;i<sub_schurs->n_subs;i++) { 68072b8c272SStefano Zampini ISLocalToGlobalMapping NtoS; 681*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISLocalToGlobalMappingCreateIS(sub_schurs->is_subs[i],&NtoS)); 682*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISGlobalToLocalMappingApplyIS(NtoS,IS_GTOLM_DROP,change_primal,&sub_schurs->change_primal_sub[i])); 683*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISLocalToGlobalMappingDestroy(&NtoS)); 68472b8c272SStefano Zampini } 685*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap,IS_GTOLM_DROP,change_primal,&change_primal_B)); 686*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISLocalToGlobalMappingCreateIS(sub_schurs->is_Ej_all,&BtoS)); 687*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISGlobalToLocalMappingApplyIS(BtoS,IS_GTOLM_DROP,change_primal_B,&change_primal_all)); 688*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISLocalToGlobalMappingDestroy(&BtoS)); 689*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISDestroy(&change_primal_B)); 690*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMalloc1(sub_schurs->n_subs,&sub_schurs->change)); 69172b8c272SStefano Zampini for (i=0;i<sub_schurs->n_subs;i++) { 69272b8c272SStefano Zampini Mat change_sub; 69372b8c272SStefano Zampini 694*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size)); 695*5f80ce2aSJacob Faibussowitsch CHKERRQ(KSPCreate(PETSC_COMM_SELF,&sub_schurs->change[i])); 696*5f80ce2aSJacob Faibussowitsch CHKERRQ(KSPSetType(sub_schurs->change[i],KSPPREONLY)); 69772b8c272SStefano Zampini if (!sub_schurs->change_with_qr) { 698*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatCreateSubMatrix(change,sub_schurs->is_subs[i],sub_schurs->is_subs[i],MAT_INITIAL_MATRIX,&change_sub)); 69972b8c272SStefano Zampini } else { 70072b8c272SStefano Zampini Mat change_subt; 701*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatCreateSubMatrix(change,sub_schurs->is_subs[i],sub_schurs->is_subs[i],MAT_INITIAL_MATRIX,&change_subt)); 702*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatConvert(change_subt,MATSEQDENSE,MAT_INITIAL_MATRIX,&change_sub)); 703*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy(&change_subt)); 70472b8c272SStefano Zampini } 705*5f80ce2aSJacob Faibussowitsch CHKERRQ(KSPSetOperators(sub_schurs->change[i],change_sub,change_sub)); 706*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy(&change_sub)); 707*5f80ce2aSJacob Faibussowitsch CHKERRQ(KSPSetOptionsPrefix(sub_schurs->change[i],sub_schurs->prefix)); 708*5f80ce2aSJacob Faibussowitsch CHKERRQ(KSPAppendOptionsPrefix(sub_schurs->change[i],"sub_schurs_change_")); 70972b8c272SStefano Zampini } 710*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISDestroy(&change_primal_all)); 71172b8c272SStefano Zampini } 71272b8c272SStefano Zampini 7135a95e1ceSStefano Zampini /* Local matrix of all local Schur on subsets (transposed) */ 7145a95e1ceSStefano Zampini if (!sub_schurs->S_Ej_all) { 71504c5b2e6SStefano Zampini Mat T; 71604c5b2e6SStefano Zampini PetscScalar *v; 71704c5b2e6SStefano Zampini PetscInt *ii,*jj; 71804c5b2e6SStefano Zampini PetscInt cum,i,j,k; 71904c5b2e6SStefano Zampini 72004c5b2e6SStefano Zampini /* MatSeqAIJSetPreallocation + MatSetValues is slow for these kind of matrices (may have large blocks) 72104c5b2e6SStefano Zampini Allocate properly a representative matrix and duplicate */ 722*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMalloc3(local_size+1,&ii,local_stash_size,&jj,local_stash_size,&v)); 72304c5b2e6SStefano Zampini ii[0] = 0; 72404c5b2e6SStefano Zampini cum = 0; 72504c5b2e6SStefano Zampini for (i=0;i<sub_schurs->n_subs;i++) { 726*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size)); 72704c5b2e6SStefano Zampini for (j=0;j<subset_size;j++) { 72804c5b2e6SStefano Zampini const PetscInt row = cum+j; 72904c5b2e6SStefano Zampini PetscInt col = cum; 73004c5b2e6SStefano Zampini 73104c5b2e6SStefano Zampini ii[row+1] = ii[row] + subset_size; 73204c5b2e6SStefano Zampini for (k=ii[row];k<ii[row+1];k++) { 73304c5b2e6SStefano Zampini jj[k] = col; 73404c5b2e6SStefano Zampini col++; 73504c5b2e6SStefano Zampini } 73604c5b2e6SStefano Zampini } 73704c5b2e6SStefano Zampini cum += subset_size; 73804c5b2e6SStefano Zampini } 739*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,local_size,local_size,ii,jj,v,&T)); 740*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDuplicate(T,MAT_DO_NOT_COPY_VALUES,&sub_schurs->S_Ej_all)); 741*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy(&T)); 742*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree3(ii,jj,v)); 74304c5b2e6SStefano Zampini } 74404c5b2e6SStefano Zampini /* matrices for deluxe scaling and adaptive selection */ 74504c5b2e6SStefano Zampini if (compute_Stilda) { 74604c5b2e6SStefano Zampini if (!sub_schurs->sum_S_Ej_tilda_all) { 747*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDuplicate(sub_schurs->S_Ej_all,MAT_DO_NOT_COPY_VALUES,&sub_schurs->sum_S_Ej_tilda_all)); 74804c5b2e6SStefano Zampini } 74904c5b2e6SStefano Zampini if (!sub_schurs->sum_S_Ej_inv_all && deluxe) { 750*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDuplicate(sub_schurs->S_Ej_all,MAT_DO_NOT_COPY_VALUES,&sub_schurs->sum_S_Ej_inv_all)); 75104c5b2e6SStefano Zampini } 752aa83b6aeSStefano Zampini } 753b1b3d7a2SStefano Zampini 7545a95e1ceSStefano Zampini /* Compute Schur complements explicitly */ 755be83ff47SStefano Zampini F = NULL; 756d943a642SStefano Zampini if (!sub_schurs->schur_explicit) { 757d943a642SStefano Zampini /* this code branch is used when MatFactor with Schur complement support is not present or when explicitly requested; 758d943a642SStefano Zampini it is not efficient, unless the economic version of the scaling is used */ 7595a95e1ceSStefano Zampini Mat S_Ej_expl; 7605a95e1ceSStefano Zampini PetscScalar *work; 7615a95e1ceSStefano Zampini PetscInt j,*dummy_idx; 7625a95e1ceSStefano Zampini PetscBool Sdense; 7635a95e1ceSStefano Zampini 764*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMalloc2(max_subset_size,&dummy_idx,max_subset_size*max_subset_size,&work)); 7655a95e1ceSStefano Zampini local_size = 0; 766b1b3d7a2SStefano Zampini for (i=0;i<sub_schurs->n_subs;i++) { 7675a95e1ceSStefano Zampini IS is_subset_B; 7685a95e1ceSStefano Zampini Mat AE_EE,AE_IE,AE_EI,S_Ej; 7695a95e1ceSStefano Zampini 7705a95e1ceSStefano Zampini /* subsets in original and boundary numbering */ 771*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap,IS_GTOLM_DROP,sub_schurs->is_subs[i],&is_subset_B)); 7725a95e1ceSStefano Zampini /* EE block */ 773*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatCreateSubMatrix(A_BB,is_subset_B,is_subset_B,MAT_INITIAL_MATRIX,&AE_EE)); 7745a95e1ceSStefano Zampini /* IE block */ 775*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatCreateSubMatrix(A_IB,is_I,is_subset_B,MAT_INITIAL_MATRIX,&AE_IE)); 7765a95e1ceSStefano Zampini /* EI block */ 777d943a642SStefano Zampini if (sub_schurs->is_symmetric) { 778*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatCreateTranspose(AE_IE,&AE_EI)); 779d943a642SStefano Zampini } else if (sub_schurs->is_hermitian) { 780*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatCreateHermitianTranspose(AE_IE,&AE_EI)); 7815a95e1ceSStefano Zampini } else { 782*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatCreateSubMatrix(A_BI,is_subset_B,is_I,MAT_INITIAL_MATRIX,&AE_EI)); 7835a95e1ceSStefano Zampini } 784*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISDestroy(&is_subset_B)); 785*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatCreateSchurComplement(AE_II,AE_II,AE_IE,AE_EI,AE_EE,&S_Ej)); 786*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy(&AE_EE)); 787*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy(&AE_IE)); 788*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy(&AE_EI)); 789b1b3d7a2SStefano Zampini if (AE_II == A_II) { /* we can reuse the same ksp */ 790b1b3d7a2SStefano Zampini KSP ksp; 791*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSchurComplementGetKSP(sub_schurs->S,&ksp)); 792*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSchurComplementSetKSP(S_Ej,ksp)); 793b1b3d7a2SStefano Zampini } else { /* build new ksp object which inherits ksp and pc types from the original one */ 794b1b3d7a2SStefano Zampini KSP origksp,schurksp; 795b1b3d7a2SStefano Zampini PC origpc,schurpc; 796b1b3d7a2SStefano Zampini KSPType ksp_type; 797b1b3d7a2SStefano Zampini PetscInt n_internal; 7985a95e1ceSStefano Zampini PetscBool ispcnone; 799b1b3d7a2SStefano Zampini 800*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSchurComplementGetKSP(sub_schurs->S,&origksp)); 801*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSchurComplementGetKSP(S_Ej,&schurksp)); 802*5f80ce2aSJacob Faibussowitsch CHKERRQ(KSPGetType(origksp,&ksp_type)); 803*5f80ce2aSJacob Faibussowitsch CHKERRQ(KSPSetType(schurksp,ksp_type)); 804*5f80ce2aSJacob Faibussowitsch CHKERRQ(KSPGetPC(schurksp,&schurpc)); 805*5f80ce2aSJacob Faibussowitsch CHKERRQ(KSPGetPC(origksp,&origpc)); 806*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectTypeCompare((PetscObject)origpc,PCNONE,&ispcnone)); 8075a95e1ceSStefano Zampini if (!ispcnone) { 8085a95e1ceSStefano Zampini PCType pc_type; 809*5f80ce2aSJacob Faibussowitsch CHKERRQ(PCGetType(origpc,&pc_type)); 810*5f80ce2aSJacob Faibussowitsch CHKERRQ(PCSetType(schurpc,pc_type)); 8115a95e1ceSStefano Zampini } else { 812*5f80ce2aSJacob Faibussowitsch CHKERRQ(PCSetType(schurpc,PCLU)); 8135a95e1ceSStefano Zampini } 814*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISGetSize(is_I,&n_internal)); 815365a3a41SStefano Zampini if (!n_internal) { /* UMFPACK gives error with 0 sized problems */ 8163ca39a21SBarry Smith MatSolverType solver = NULL; 817*5f80ce2aSJacob Faibussowitsch CHKERRQ(PCFactorGetMatSolverType(origpc,(MatSolverType*)&solver)); 818b1b3d7a2SStefano Zampini if (solver) { 819*5f80ce2aSJacob Faibussowitsch CHKERRQ(PCFactorSetMatSolverType(schurpc,solver)); 820b1b3d7a2SStefano Zampini } 821b1b3d7a2SStefano Zampini } 822*5f80ce2aSJacob Faibussowitsch CHKERRQ(KSPSetUp(schurksp)); 823b1b3d7a2SStefano Zampini } 824*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size)); 825*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,work,&S_Ej_expl)); 826*5f80ce2aSJacob Faibussowitsch CHKERRQ(PCBDDCComputeExplicitSchur(S_Ej,sub_schurs->is_symmetric,MAT_REUSE_MATRIX,&S_Ej_expl)); 827*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectTypeCompare((PetscObject)S_Ej_expl,MATSEQDENSE,&Sdense)); 8285a95e1ceSStefano Zampini if (Sdense) { 8295a95e1ceSStefano Zampini for (j=0;j<subset_size;j++) { 8305a95e1ceSStefano Zampini dummy_idx[j]=local_size+j; 831b1b3d7a2SStefano Zampini } 832*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetValues(sub_schurs->S_Ej_all,subset_size,dummy_idx,subset_size,dummy_idx,work,INSERT_VALUES)); 8336c4ed002SBarry Smith } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented for sparse matrices"); 834*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy(&S_Ej)); 835*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy(&S_Ej_expl)); 8365a95e1ceSStefano Zampini local_size += subset_size; 8375a95e1ceSStefano Zampini } 838*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree2(dummy_idx,work)); 839b1b3d7a2SStefano Zampini /* free */ 840*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISDestroy(&is_I)); 841*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy(&AE_II)); 842*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree(all_local_idx_N)); 843883469d8SStefano Zampini } else { 8445cbda25cSStefano Zampini Mat A,cs_AIB_mat = NULL,benign_AIIm1_ones_mat = NULL; 8459d54b7f4SStefano Zampini Vec Dall = NULL; 846ca92afb2SStefano Zampini IS is_A_all,*is_p_r = NULL; 8477ebab0bbSStefano Zampini MatType Stype; 8485cbda25cSStefano Zampini PetscScalar *work,*S_data,*schur_factor,infty = PETSC_MAX_REAL; 84904c5b2e6SStefano Zampini PetscScalar *SEj_arr = NULL,*SEjinv_arr = NULL; 8501683a169SBarry Smith const PetscScalar *rS_data; 85104c5b2e6SStefano Zampini PetscInt n,n_I,size_schur,size_active_schur,cum,cum2; 8523fc34f97SStefano Zampini PetscBool economic,solver_S,S_lower_triangular = PETSC_FALSE; 8533fc34f97SStefano Zampini PetscBool schur_has_vertices,factor_workaround; 85411955456SStefano Zampini PetscBool use_cholesky; 8557ebab0bbSStefano Zampini #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 8567ebab0bbSStefano Zampini PetscBool oldpin; 8577ebab0bbSStefano Zampini #endif 858883469d8SStefano Zampini 859683d3df6SStefano Zampini /* get sizes */ 86081ea8064SStefano Zampini n_I = 0; 86181ea8064SStefano Zampini if (is_I_layer) { 862*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISGetLocalSize(is_I_layer,&n_I)); 86381ea8064SStefano Zampini } 864683d3df6SStefano Zampini economic = PETSC_FALSE; 865*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISGetLocalSize(sub_schurs->is_I,&cum)); 866683d3df6SStefano Zampini if (cum != n_I) economic = PETSC_TRUE; 867*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatGetLocalSize(sub_schurs->A,&n,NULL)); 8689d54b7f4SStefano Zampini size_active_schur = local_size; 8699d54b7f4SStefano Zampini 870f17d2ae1SStefano Zampini /* import scaling vector (wrong formulation if we have 3D edges) */ 8719d54b7f4SStefano Zampini if (scaling && compute_Stilda) { 8729d54b7f4SStefano Zampini const PetscScalar *array; 8739d54b7f4SStefano Zampini PetscScalar *array2; 8749d54b7f4SStefano Zampini const PetscInt *idxs; 8759d54b7f4SStefano Zampini PetscInt i; 8769d54b7f4SStefano Zampini 877*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISGetIndices(sub_schurs->is_Ej_all,&idxs)); 878*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecCreateSeq(PETSC_COMM_SELF,size_active_schur,&Dall)); 879*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetArrayRead(scaling,&array)); 880*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetArray(Dall,&array2)); 8819d54b7f4SStefano Zampini for (i=0;i<size_active_schur;i++) array2[i] = array[idxs[i]]; 882*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecRestoreArray(Dall,&array2)); 883*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecRestoreArrayRead(scaling,&array)); 884*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISRestoreIndices(sub_schurs->is_Ej_all,&idxs)); 8859d54b7f4SStefano Zampini deluxe = PETSC_FALSE; 8869d54b7f4SStefano Zampini } 887d62866d3SStefano Zampini 888683d3df6SStefano Zampini /* size active schurs does not count any dirichlet or vertex dof on the interface */ 8893fc34f97SStefano Zampini factor_workaround = PETSC_FALSE; 8903fc34f97SStefano Zampini schur_has_vertices = PETSC_FALSE; 891683d3df6SStefano Zampini cum = n_I+size_active_schur; 892683d3df6SStefano Zampini if (sub_schurs->is_dir) { 893683d3df6SStefano Zampini const PetscInt* idxs; 894683d3df6SStefano Zampini PetscInt n_dir; 895683d3df6SStefano Zampini 896*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISGetLocalSize(sub_schurs->is_dir,&n_dir)); 897*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISGetIndices(sub_schurs->is_dir,&idxs)); 898*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscArraycpy(all_local_idx_N+cum,idxs,n_dir)); 899*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISRestoreIndices(sub_schurs->is_dir,&idxs)); 900683d3df6SStefano Zampini cum += n_dir; 9013fc34f97SStefano Zampini factor_workaround = PETSC_TRUE; 902d62866d3SStefano Zampini } 903683d3df6SStefano Zampini /* include the primal vertices in the Schur complement */ 904367aa537SStefano Zampini if (exact_schur && sub_schurs->is_vertices && (compute_Stilda || benign_n)) { 905683d3df6SStefano Zampini PetscInt n_v; 906683d3df6SStefano Zampini 907*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISGetLocalSize(sub_schurs->is_vertices,&n_v)); 908683d3df6SStefano Zampini if (n_v) { 909683d3df6SStefano Zampini const PetscInt* idxs; 910683d3df6SStefano Zampini 911*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISGetIndices(sub_schurs->is_vertices,&idxs)); 912*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscArraycpy(all_local_idx_N+cum,idxs,n_v)); 913*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISRestoreIndices(sub_schurs->is_vertices,&idxs)); 914683d3df6SStefano Zampini cum += n_v; 915683d3df6SStefano Zampini factor_workaround = PETSC_TRUE; 9163fc34f97SStefano Zampini schur_has_vertices = PETSC_TRUE; 917683d3df6SStefano Zampini } 918683d3df6SStefano Zampini } 919683d3df6SStefano Zampini size_schur = cum - n_I; 920*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISCreateGeneral(PETSC_COMM_SELF,cum,all_local_idx_N,PETSC_OWN_POINTER,&is_A_all)); 9217ebab0bbSStefano Zampini #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 922b470e4b4SRichard Tran Mills oldpin = sub_schurs->A->boundtocpu; 923*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatBindToCPU(sub_schurs->A,PETSC_TRUE)); 9247ebab0bbSStefano Zampini #endif 925683d3df6SStefano Zampini if (cum == n) { 926*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISSetPermutation(is_A_all)); 927*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatPermute(sub_schurs->A,is_A_all,is_A_all,&A)); 928683d3df6SStefano Zampini } else { 929*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatCreateSubMatrix(sub_schurs->A,is_A_all,is_A_all,MAT_INITIAL_MATRIX,&A)); 930683d3df6SStefano Zampini } 9317ebab0bbSStefano Zampini #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 932*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatBindToCPU(sub_schurs->A,oldpin)); 9337ebab0bbSStefano Zampini #endif 934*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetOptionsPrefix(A,sub_schurs->prefix)); 935*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatAppendOptionsPrefix(A,"sub_schurs_")); 936ca92afb2SStefano Zampini 937ca92afb2SStefano Zampini /* if we actually change the basis for the pressures, LDL^T factors will use a lot of memory 9387ebab0bbSStefano Zampini this is a workaround */ 939ca92afb2SStefano Zampini if (benign_n) { 9407ebab0bbSStefano Zampini Vec v,benign_AIIm1_ones; 941ca92afb2SStefano Zampini ISLocalToGlobalMapping N_to_reor; 942ca92afb2SStefano Zampini IS is_p0,is_p0_p; 9435cbda25cSStefano Zampini PetscScalar *cs_AIB,*AIIm1_data; 9445cbda25cSStefano Zampini PetscInt sizeA; 945ca92afb2SStefano Zampini 946*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISLocalToGlobalMappingCreateIS(is_A_all,&N_to_reor)); 947*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISCreateGeneral(PETSC_COMM_SELF,benign_n,benign_p0_lidx,PETSC_COPY_VALUES,&is_p0)); 948*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISGlobalToLocalMappingApplyIS(N_to_reor,IS_GTOLM_DROP,is_p0,&is_p0_p)); 949*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISDestroy(&is_p0)); 950*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatCreateVecs(A,&v,&benign_AIIm1_ones)); 951*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetSize(v,&sizeA)); 952*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,sizeA,benign_n,NULL,&benign_AIIm1_ones_mat)); 953*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,size_schur,benign_n,NULL,&cs_AIB_mat)); 954*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDenseGetArray(cs_AIB_mat,&cs_AIB)); 955*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDenseGetArray(benign_AIIm1_ones_mat,&AIIm1_data)); 956*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMalloc1(benign_n,&is_p_r)); 957ca92afb2SStefano Zampini /* compute colsum of A_IB restricted to pressures */ 958ca92afb2SStefano Zampini for (i=0;i<benign_n;i++) { 9597ebab0bbSStefano Zampini const PetscScalar *array; 960ca92afb2SStefano Zampini const PetscInt *idxs; 961ca92afb2SStefano Zampini PetscInt j,nz; 962ca92afb2SStefano Zampini 963*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISGlobalToLocalMappingApplyIS(N_to_reor,IS_GTOLM_DROP,benign_zerodiag_subs[i],&is_p_r[i])); 964*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISGetLocalSize(is_p_r[i],&nz)); 965*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISGetIndices(is_p_r[i],&idxs)); 9665cbda25cSStefano Zampini for (j=0;j<nz;j++) AIIm1_data[idxs[j]+sizeA*i] = 1.; 967*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISRestoreIndices(is_p_r[i],&idxs)); 968*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecPlaceArray(benign_AIIm1_ones,AIIm1_data+sizeA*i)); 969*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatMult(A,benign_AIIm1_ones,v)); 970*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecResetArray(benign_AIIm1_ones)); 971*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetArrayRead(v,&array)); 97222db5ddcSStefano Zampini for (j=0;j<size_schur;j++) { 97322db5ddcSStefano Zampini #if defined(PETSC_USE_COMPLEX) 97422db5ddcSStefano Zampini cs_AIB[i*size_schur + j] = (PetscRealPart(array[j+n_I])/nz + PETSC_i*(PetscImaginaryPart(array[j+n_I])/nz)); 97522db5ddcSStefano Zampini #else 97622db5ddcSStefano Zampini cs_AIB[i*size_schur + j] = array[j+n_I]/nz; 97722db5ddcSStefano Zampini #endif 97822db5ddcSStefano Zampini } 979*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecRestoreArrayRead(v,&array)); 980ca92afb2SStefano Zampini } 981*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDenseRestoreArray(cs_AIB_mat,&cs_AIB)); 982*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDenseRestoreArray(benign_AIIm1_ones_mat,&AIIm1_data)); 983*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecDestroy(&v)); 984*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecDestroy(&benign_AIIm1_ones)); 985*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetOption(A,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE)); 986*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE)); 987*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetOption(A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE)); 988*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatZeroRowsColumnsIS(A,is_p0_p,1.0,NULL,NULL)); 989*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISDestroy(&is_p0_p)); 990*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISLocalToGlobalMappingDestroy(&N_to_reor)); 991ca92afb2SStefano Zampini } 992*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetOption(A,MAT_SYMMETRIC,sub_schurs->is_symmetric)); 993*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetOption(A,MAT_HERMITIAN,sub_schurs->is_hermitian)); 994*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetOption(A,MAT_SPD,sub_schurs->is_posdef)); 995883469d8SStefano Zampini 99611955456SStefano Zampini /* for complexes, symmetric and hermitian at the same time implies null imaginary part */ 99711955456SStefano Zampini use_cholesky = (PetscBool)((use_potr || use_sytr) && sub_schurs->is_hermitian && sub_schurs->is_symmetric); 99811955456SStefano Zampini 999683d3df6SStefano Zampini /* when using the benign subspace trick, the local Schur complements are SPD */ 100035d0533cSStefano Zampini /* MKL_PARDISO does not handle well the computation of a Schur complement from a symmetric indefinite factorization 100135d0533cSStefano Zampini Use LU and adapt pivoting perturbation (still, solution is not as accurate as with using MUMPS) */ 100235d0533cSStefano Zampini if (benign_trick) { 100335d0533cSStefano Zampini sub_schurs->is_posdef = PETSC_TRUE; 1004*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscStrcmp(sub_schurs->mat_solver_type,MATSOLVERMKL_PARDISO,&flg)); 100535d0533cSStefano Zampini if (flg) use_cholesky = PETSC_FALSE; 100635d0533cSStefano Zampini } 1007d47842beSStefano Zampini 1008f4f7d9d6SStefano Zampini if (n_I) { 10090aa714b2SStefano Zampini IS is_schur; 10107ebab0bbSStefano Zampini char stype[64]; 10114ba54290SStefano Zampini PetscBool gpu = PETSC_FALSE; 10125a05ddb0SStefano Zampini 101311955456SStefano Zampini if (use_cholesky) { 1014*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatGetFactor(A,sub_schurs->mat_solver_type,MAT_FACTOR_CHOLESKY,&F)); 1015883469d8SStefano Zampini } else { 1016*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatGetFactor(A,sub_schurs->mat_solver_type,MAT_FACTOR_LU,&F)); 1017883469d8SStefano Zampini } 1018*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetErrorIfFailure(A,PETSC_TRUE)); 101935d0533cSStefano Zampini #if defined(PETSC_HAVE_MKL_PARDISO) 1020*5f80ce2aSJacob Faibussowitsch if (benign_trick) CHKERRQ(MatMkl_PardisoSetCntl(F,10,10)); 102135d0533cSStefano Zampini #endif 1022883469d8SStefano Zampini /* subsets ordered last */ 1023*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISCreateStride(PETSC_COMM_SELF,size_schur,n_I,1,&is_schur)); 1024*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatFactorSetSchurIS(F,is_schur)); 1025*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISDestroy(&is_schur)); 1026883469d8SStefano Zampini 1027883469d8SStefano Zampini /* factorization step */ 102811955456SStefano Zampini if (use_cholesky) { 1029*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatCholeskyFactorSymbolic(F,A,NULL,NULL)); 1030be83ff47SStefano Zampini #if defined(PETSC_HAVE_MUMPS) /* be sure that icntl 19 is not set by command line */ 1031*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatMumpsSetIcntl(F,19,2)); 1032be83ff47SStefano Zampini #endif 1033*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatCholeskyFactorNumeric(F,A,NULL)); 1034a0b0af32SStefano Zampini S_lower_triangular = PETSC_TRUE; 1035883469d8SStefano Zampini } else { 1036*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatLUFactorSymbolic(F,A,NULL,NULL,NULL)); 1037be83ff47SStefano Zampini #if defined(PETSC_HAVE_MUMPS) /* be sure that icntl 19 is not set by command line */ 1038*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatMumpsSetIcntl(F,19,3)); 1039be83ff47SStefano Zampini #endif 1040*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatLUFactorNumeric(F,A,NULL)); 1041883469d8SStefano Zampini } 1042*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatViewFromOptions(F,(PetscObject)A,"-mat_factor_view")); 1043883469d8SStefano Zampini 10443b03f7bbSStefano Zampini if (matl_dbg_viewer) { 104511955456SStefano Zampini Mat S; 104611955456SStefano Zampini IS is; 104711955456SStefano Zampini 1048*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectSetName((PetscObject)A,"A")); 1049*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatView(A,matl_dbg_viewer)); 1050*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatFactorCreateSchurComplement(F,&S,NULL)); 1051*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectSetName((PetscObject)S,"S")); 1052*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatView(S,matl_dbg_viewer)); 1053*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy(&S)); 1054*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISCreateStride(PETSC_COMM_SELF,n_I,0,1,&is)); 1055*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectSetName((PetscObject)is,"I")); 1056*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISView(is,matl_dbg_viewer)); 1057*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISDestroy(&is)); 1058*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISCreateStride(PETSC_COMM_SELF,size_schur,n_I,1,&is)); 1059*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectSetName((PetscObject)is,"B")); 1060*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISView(is,matl_dbg_viewer)); 1061*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISDestroy(&is)); 1062*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectSetName((PetscObject)is_A_all,"IA")); 1063*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISView(is_A_all,matl_dbg_viewer)); 106411955456SStefano Zampini } 106511955456SStefano Zampini 1066883469d8SStefano Zampini /* get explicit Schur Complement computed during numeric factorization */ 1067*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatFactorGetSchurComplement(F,&S_all,NULL)); 1068*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscStrncpy(stype,MATSEQDENSE,sizeof(stype))); 10694ba54290SStefano Zampini #if defined(PETSC_HAVE_CUDA) 1070*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectTypeCompareAny((PetscObject)A,&gpu,MATSEQAIJVIENNACL,MATSEQAIJCUSPARSE,"")); 10714ba54290SStefano Zampini #endif 10727ebab0bbSStefano Zampini if (gpu) { 1073*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscStrncpy(stype,MATSEQDENSECUDA,sizeof(stype))); 10747ebab0bbSStefano Zampini } 1075*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscOptionsGetString(NULL,sub_schurs->prefix,"-sub_schurs_schur_mat_type",stype,sizeof(stype),NULL)); 1076*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatConvert(S_all,stype,MAT_INPLACE_MATRIX,&S_all)); 1077*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetOption(S_all,MAT_SPD,sub_schurs->is_posdef)); 1078*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetOption(S_all,MAT_HERMITIAN,sub_schurs->is_hermitian)); 1079*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatGetType(S_all,&Stype)); 1080b3cb21ddSStefano Zampini 1081d62866d3SStefano Zampini /* we can reuse the solvers if we are not using the economic version */ 1082683d3df6SStefano Zampini reuse_solvers = (PetscBool)(reuse_solvers && !economic); 1083683d3df6SStefano Zampini factor_workaround = (PetscBool)(reuse_solvers && factor_workaround); 108403dfb2d7SStefano Zampini if (!sub_schurs->is_posdef && factor_workaround && compute_Stilda && size_active_schur) 108503dfb2d7SStefano Zampini reuse_solvers = factor_workaround = PETSC_FALSE; 108603dfb2d7SStefano Zampini 1087df4d28bfSStefano Zampini solver_S = PETSC_TRUE; 1088ca92afb2SStefano Zampini 108972b8c272SStefano Zampini /* update the Schur complement with the change of basis on the pressures */ 1090ca92afb2SStefano Zampini if (benign_n) { 10917ebab0bbSStefano Zampini const PetscScalar *cs_AIB; 10927ebab0bbSStefano Zampini PetscScalar *S_data,*AIIm1_data; 10933b03f7bbSStefano Zampini Mat S2 = NULL,S3 = NULL; /* dbg */ 10943b03f7bbSStefano Zampini PetscScalar *S2_data,*S3_data; /* dbg */ 10957ebab0bbSStefano Zampini Vec v,benign_AIIm1_ones; 10965cbda25cSStefano Zampini PetscInt sizeA; 1097ca92afb2SStefano Zampini 1098*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDenseGetArray(S_all,&S_data)); 1099*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatCreateVecs(A,&v,&benign_AIIm1_ones)); 1100*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetSize(v,&sizeA)); 1101ca92afb2SStefano Zampini #if defined(PETSC_HAVE_MUMPS) 1102*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatMumpsSetIcntl(F,26,0)); 1103ca92afb2SStefano Zampini #endif 1104ca92afb2SStefano Zampini #if defined(PETSC_HAVE_MKL_PARDISO) 1105*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatMkl_PardisoSetCntl(F,70,1)); 1106ca92afb2SStefano Zampini #endif 1107*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDenseGetArrayRead(cs_AIB_mat,&cs_AIB)); 1108*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDenseGetArray(benign_AIIm1_ones_mat,&AIIm1_data)); 11093b03f7bbSStefano Zampini if (matl_dbg_viewer) { 1110*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDuplicate(S_all,MAT_DO_NOT_COPY_VALUES,&S2)); 1111*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDuplicate(S_all,MAT_DO_NOT_COPY_VALUES,&S3)); 1112*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDenseGetArray(S2,&S2_data)); 1113*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDenseGetArray(S3,&S3_data)); 11143b03f7bbSStefano Zampini } 1115ca92afb2SStefano Zampini for (i=0;i<benign_n;i++) { 11163b03f7bbSStefano Zampini PetscScalar *array,sum = 0.,one = 1.,*sums; 1117ca92afb2SStefano Zampini const PetscInt *idxs; 11183b03f7bbSStefano Zampini PetscInt k,j,nz; 111947484b83SStefano Zampini PetscBLASInt B_k,B_n; 1120ca92afb2SStefano Zampini 1121*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscCalloc1(benign_n,&sums)); 1122*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecPlaceArray(benign_AIIm1_ones,AIIm1_data+sizeA*i)); 1123*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecCopy(benign_AIIm1_ones,v)); 1124*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSolve(F,v,benign_AIIm1_ones)); 1125*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatMult(A,benign_AIIm1_ones,v)); 1126*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecResetArray(benign_AIIm1_ones)); 11273b03f7bbSStefano Zampini /* p0 dofs (eliminated) are excluded from the sums */ 11283b03f7bbSStefano Zampini for (k=0;k<benign_n;k++) { 1129*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISGetLocalSize(is_p_r[k],&nz)); 1130*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISGetIndices(is_p_r[k],&idxs)); 11313b03f7bbSStefano Zampini for (j=0;j<nz-1;j++) sums[k] -= AIIm1_data[idxs[j]+sizeA*i]; 1132*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISRestoreIndices(is_p_r[k],&idxs)); 11333b03f7bbSStefano Zampini } 1134*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetArrayRead(v,(const PetscScalar**)&array)); 11353b03f7bbSStefano Zampini if (matl_dbg_viewer) { 11363b03f7bbSStefano Zampini Vec vv; 11373b03f7bbSStefano Zampini char name[16]; 11383b03f7bbSStefano Zampini 1139*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecCreateSeqWithArray(PETSC_COMM_SELF,1,size_schur,array+n_I,&vv)); 1140*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscSNPrintf(name,sizeof(name),"Pvs%D",i)); 1141*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectSetName((PetscObject)vv,name)); 1142*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecView(vv,matl_dbg_viewer)); 11433b03f7bbSStefano Zampini } 114447484b83SStefano Zampini /* perform sparse rank updates on symmetric Schur (TODO: move outside of the loop?) */ 114547484b83SStefano Zampini /* cs_AIB already scaled by 1./nz */ 114647484b83SStefano Zampini B_k = 1; 11473b03f7bbSStefano Zampini for (k=0;k<benign_n;k++) { 11483b03f7bbSStefano Zampini sum = sums[k]; 1149*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscBLASIntCast(size_schur,&B_n)); 11503b03f7bbSStefano Zampini 11513b03f7bbSStefano Zampini if (PetscAbsScalar(sum) == 0.0) continue; 11523b03f7bbSStefano Zampini if (k == i) { 115347484b83SStefano Zampini PetscStackCallBLAS("BLASsyrk",BLASsyrk_("L","N",&B_n,&B_k,&sum,cs_AIB+i*size_schur,&B_n,&one,S_data,&B_n)); 11543b03f7bbSStefano Zampini if (matl_dbg_viewer) { 11553b03f7bbSStefano Zampini PetscStackCallBLAS("BLASsyrk",BLASsyrk_("L","N",&B_n,&B_k,&sum,cs_AIB+i*size_schur,&B_n,&one,S3_data,&B_n)); 11563b03f7bbSStefano Zampini } 11573b03f7bbSStefano Zampini } else { /* XXX Is it correct to use symmetric rank-2 update with half of the sum? */ 11583b03f7bbSStefano Zampini sum /= 2.0; 11593b03f7bbSStefano Zampini PetscStackCallBLAS("BLASsyr2k",BLASsyr2k_("L","N",&B_n,&B_k,&sum,cs_AIB+k*size_schur,&B_n,cs_AIB+i*size_schur,&B_n,&one,S_data,&B_n)); 11603b03f7bbSStefano Zampini if (matl_dbg_viewer) { 11613b03f7bbSStefano Zampini PetscStackCallBLAS("BLASsyr2k",BLASsyr2k_("L","N",&B_n,&B_k,&sum,cs_AIB+k*size_schur,&B_n,cs_AIB+i*size_schur,&B_n,&one,S3_data,&B_n)); 11623b03f7bbSStefano Zampini } 11633b03f7bbSStefano Zampini } 11643b03f7bbSStefano Zampini } 11655cbda25cSStefano Zampini sum = 1.; 116647484b83SStefano Zampini PetscStackCallBLAS("BLASsyr2k",BLASsyr2k_("L","N",&B_n,&B_k,&sum,array+n_I,&B_n,cs_AIB+i*size_schur,&B_n,&one,S_data,&B_n)); 11673b03f7bbSStefano Zampini if (matl_dbg_viewer) { 11683b03f7bbSStefano Zampini PetscStackCallBLAS("BLASsyr2k",BLASsyr2k_("L","N",&B_n,&B_k,&sum,array+n_I,&B_n,cs_AIB+i*size_schur,&B_n,&one,S2_data,&B_n)); 11693b03f7bbSStefano Zampini } 1170*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecRestoreArrayRead(v,(const PetscScalar**)&array)); 11715cbda25cSStefano Zampini /* set p0 entry of AIIm1_ones to zero */ 1172*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISGetLocalSize(is_p_r[i],&nz)); 1173*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISGetIndices(is_p_r[i],&idxs)); 1174282d6408SStefano Zampini for (j=0;j<benign_n;j++) AIIm1_data[idxs[nz-1]+sizeA*j] = 0.; 1175*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISRestoreIndices(is_p_r[i],&idxs)); 1176*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree(sums)); 11773b03f7bbSStefano Zampini } 1178*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecDestroy(&benign_AIIm1_ones)); 11793b03f7bbSStefano Zampini if (matl_dbg_viewer) { 1180*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDenseRestoreArray(S2,&S2_data)); 1181*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDenseRestoreArray(S3,&S3_data)); 1182ca92afb2SStefano Zampini } 1183a7414863SStefano Zampini if (!S_lower_triangular) { /* I need to expand the upper triangular data (column oriented) */ 1184a7414863SStefano Zampini PetscInt k,j; 1185a7414863SStefano Zampini for (k=0;k<size_schur;k++) { 1186a7414863SStefano Zampini for (j=k;j<size_schur;j++) { 1187a7414863SStefano Zampini S_data[j*size_schur+k] = PetscConj(S_data[k*size_schur+j]); 1188a7414863SStefano Zampini } 1189a7414863SStefano Zampini } 1190a7414863SStefano Zampini } 1191a7414863SStefano Zampini 11925cbda25cSStefano Zampini /* restore defaults */ 11935cbda25cSStefano Zampini #if defined(PETSC_HAVE_MUMPS) 1194*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatMumpsSetIcntl(F,26,-1)); 11955cbda25cSStefano Zampini #endif 11965cbda25cSStefano Zampini #if defined(PETSC_HAVE_MKL_PARDISO) 1197*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatMkl_PardisoSetCntl(F,70,0)); 11985cbda25cSStefano Zampini #endif 1199*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDenseRestoreArrayRead(cs_AIB_mat,&cs_AIB)); 1200*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDenseRestoreArray(benign_AIIm1_ones_mat,&AIIm1_data)); 1201*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecDestroy(&v)); 1202*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDenseRestoreArray(S_all,&S_data)); 12033b03f7bbSStefano Zampini if (matl_dbg_viewer) { 12043b03f7bbSStefano Zampini Mat S; 12053b03f7bbSStefano Zampini 1206*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatFactorRestoreSchurComplement(F,&S_all,MAT_FACTOR_SCHUR_UNFACTORED)); 1207*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatFactorCreateSchurComplement(F,&S,NULL)); 1208*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectSetName((PetscObject)S,"Sb")); 1209*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatView(S,matl_dbg_viewer)); 1210*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy(&S)); 1211*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectSetName((PetscObject)S2,"S2P")); 1212*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatView(S2,matl_dbg_viewer)); 1213*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectSetName((PetscObject)S3,"S3P")); 1214*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatView(S3,matl_dbg_viewer)); 1215*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectSetName((PetscObject)cs_AIB_mat,"cs")); 1216*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatView(cs_AIB_mat,matl_dbg_viewer)); 1217*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatFactorGetSchurComplement(F,&S_all,NULL)); 12183b03f7bbSStefano Zampini } 1219*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy(&S2)); 1220*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy(&S3)); 1221ca92afb2SStefano Zampini } 1222a3df083aSStefano Zampini if (!reuse_solvers) { 1223a3df083aSStefano Zampini for (i=0;i<benign_n;i++) { 1224*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISDestroy(&is_p_r[i])); 1225a3df083aSStefano Zampini } 1226*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree(is_p_r)); 1227*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy(&cs_AIB_mat)); 1228*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy(&benign_AIIm1_ones_mat)); 1229a3df083aSStefano Zampini } 1230df4d28bfSStefano Zampini } else { /* we can't use MatFactor when size_schur == size_of_the_problem */ 1231*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatConvert(A,MATSEQDENSE,MAT_INITIAL_MATRIX,&S_all)); 1232*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatGetType(S_all,&Stype)); 1233683d3df6SStefano Zampini reuse_solvers = PETSC_FALSE; /* TODO: why we can't reuse the solvers here? */ 1234166598c1SStefano Zampini factor_workaround = PETSC_FALSE; 1235df4d28bfSStefano Zampini solver_S = PETSC_FALSE; 1236be83ff47SStefano Zampini } 1237be83ff47SStefano Zampini 1238be83ff47SStefano Zampini if (reuse_solvers) { 1239a00504b5SStefano Zampini Mat A_II,Afake; 124053892102SStefano Zampini Vec vec1_B; 1241df4d28bfSStefano Zampini PCBDDCReuseSolvers msolv_ctx; 12423462e049SStefano Zampini PetscInt n_R; 1243d5574798SStefano Zampini 1244df4d28bfSStefano Zampini if (sub_schurs->reuse_solver) { 1245*5f80ce2aSJacob Faibussowitsch CHKERRQ(PCBDDCReuseSolversReset(sub_schurs->reuse_solver)); 1246e28d306cSStefano Zampini } else { 1247*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscNew(&sub_schurs->reuse_solver)); 1248d62866d3SStefano Zampini } 1249df4d28bfSStefano Zampini msolv_ctx = sub_schurs->reuse_solver; 1250*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSchurComplementGetSubMatrices(sub_schurs->S,&A_II,NULL,NULL,NULL,NULL)); 1251*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectReference((PetscObject)F)); 1252d5574798SStefano Zampini msolv_ctx->F = F; 1253*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatCreateVecs(F,&msolv_ctx->sol,NULL)); 1254683d3df6SStefano Zampini /* currently PETSc has no support for MatSolve(F,x,x), so cheat and let rhs and sol share the same memory */ 1255683d3df6SStefano Zampini { 1256683d3df6SStefano Zampini PetscScalar *array; 1257683d3df6SStefano Zampini PetscInt n; 1258683d3df6SStefano Zampini 1259*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetLocalSize(msolv_ctx->sol,&n)); 1260*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetArray(msolv_ctx->sol,&array)); 1261*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecCreateSeqWithArray(PetscObjectComm((PetscObject)msolv_ctx->sol),1,n,array,&msolv_ctx->rhs)); 1262*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecRestoreArray(msolv_ctx->sol,&array)); 1263683d3df6SStefano Zampini } 12643fc34f97SStefano Zampini msolv_ctx->has_vertices = schur_has_vertices; 1265d62866d3SStefano Zampini 1266d62866d3SStefano Zampini /* interior solver */ 1267*5f80ce2aSJacob Faibussowitsch CHKERRQ(PCCreate(PetscObjectComm((PetscObject)A_II),&msolv_ctx->interior_solver)); 1268*5f80ce2aSJacob Faibussowitsch CHKERRQ(PCSetOperators(msolv_ctx->interior_solver,A_II,A_II)); 1269*5f80ce2aSJacob Faibussowitsch CHKERRQ(PCSetType(msolv_ctx->interior_solver,PCSHELL)); 1270*5f80ce2aSJacob Faibussowitsch CHKERRQ(PCShellSetName(msolv_ctx->interior_solver,"Interior solver (w/o Schur factorization)")); 1271*5f80ce2aSJacob Faibussowitsch CHKERRQ(PCShellSetContext(msolv_ctx->interior_solver,msolv_ctx)); 1272*5f80ce2aSJacob Faibussowitsch CHKERRQ(PCShellSetView(msolv_ctx->interior_solver,PCBDDCReuseSolvers_View)); 1273*5f80ce2aSJacob Faibussowitsch CHKERRQ(PCShellSetApply(msolv_ctx->interior_solver,PCBDDCReuseSolvers_Interior)); 1274*5f80ce2aSJacob Faibussowitsch CHKERRQ(PCShellSetApplyTranspose(msolv_ctx->interior_solver,PCBDDCReuseSolvers_InteriorTranspose)); 1275d62866d3SStefano Zampini 1276d62866d3SStefano Zampini /* correction solver */ 1277*5f80ce2aSJacob Faibussowitsch CHKERRQ(PCCreate(PetscObjectComm((PetscObject)A_II),&msolv_ctx->correction_solver)); 1278*5f80ce2aSJacob Faibussowitsch CHKERRQ(PCSetType(msolv_ctx->correction_solver,PCSHELL)); 1279*5f80ce2aSJacob Faibussowitsch CHKERRQ(PCShellSetName(msolv_ctx->correction_solver,"Correction solver (with Schur factorization)")); 1280*5f80ce2aSJacob Faibussowitsch CHKERRQ(PCShellSetContext(msolv_ctx->correction_solver,msolv_ctx)); 1281*5f80ce2aSJacob Faibussowitsch CHKERRQ(PCShellSetView(msolv_ctx->interior_solver,PCBDDCReuseSolvers_View)); 1282*5f80ce2aSJacob Faibussowitsch CHKERRQ(PCShellSetApply(msolv_ctx->correction_solver,PCBDDCReuseSolvers_Correction)); 1283*5f80ce2aSJacob Faibussowitsch CHKERRQ(PCShellSetApplyTranspose(msolv_ctx->correction_solver,PCBDDCReuseSolvers_CorrectionTranspose)); 128453892102SStefano Zampini 128553892102SStefano Zampini /* scatter and vecs for Schur complement solver */ 1286*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatCreateVecs(S_all,&msolv_ctx->sol_B,&msolv_ctx->rhs_B)); 1287*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatCreateVecs(sub_schurs->S,&vec1_B,NULL)); 12883fc34f97SStefano Zampini if (!schur_has_vertices) { 1289*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap,IS_GTOLM_DROP,is_A_all,&msolv_ctx->is_B)); 1290*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecScatterCreate(vec1_B,msolv_ctx->is_B,msolv_ctx->sol_B,NULL,&msolv_ctx->correction_scatter_B)); 1291*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectReference((PetscObject)is_A_all)); 129253892102SStefano Zampini msolv_ctx->is_R = is_A_all; 1293683d3df6SStefano Zampini } else { 1294683d3df6SStefano Zampini IS is_B_all; 1295683d3df6SStefano Zampini const PetscInt* idxs; 1296683d3df6SStefano Zampini PetscInt dual,n_v,n; 1297683d3df6SStefano Zampini 1298*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISGetLocalSize(sub_schurs->is_vertices,&n_v)); 1299683d3df6SStefano Zampini dual = size_schur - n_v; 1300*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISGetLocalSize(is_A_all,&n)); 1301*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISGetIndices(is_A_all,&idxs)); 1302*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISCreateGeneral(PetscObjectComm((PetscObject)is_A_all),dual,idxs+n_I,PETSC_COPY_VALUES,&is_B_all)); 1303*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap,IS_GTOLM_DROP,is_B_all,&msolv_ctx->is_B)); 1304*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISDestroy(&is_B_all)); 1305*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISCreateStride(PetscObjectComm((PetscObject)is_A_all),dual,0,1,&is_B_all)); 1306*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecScatterCreate(vec1_B,msolv_ctx->is_B,msolv_ctx->sol_B,is_B_all,&msolv_ctx->correction_scatter_B)); 1307*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISDestroy(&is_B_all)); 1308*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISCreateGeneral(PetscObjectComm((PetscObject)is_A_all),n-n_v,idxs,PETSC_COPY_VALUES,&msolv_ctx->is_R)); 1309*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISRestoreIndices(is_A_all,&idxs)); 1310683d3df6SStefano Zampini } 1311*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISGetLocalSize(msolv_ctx->is_R,&n_R)); 1312*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatCreateSeqAIJ(PETSC_COMM_SELF,n_R,n_R,0,NULL,&Afake)); 1313*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatAssemblyBegin(Afake,MAT_FINAL_ASSEMBLY)); 1314*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatAssemblyEnd(Afake,MAT_FINAL_ASSEMBLY)); 1315*5f80ce2aSJacob Faibussowitsch CHKERRQ(PCSetOperators(msolv_ctx->correction_solver,Afake,Afake)); 1316*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy(&Afake)); 1317*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecDestroy(&vec1_B)); 1318ca92afb2SStefano Zampini 1319ca92afb2SStefano Zampini /* communicate benign info to solver context */ 1320ca92afb2SStefano Zampini if (benign_n) { 13215cbda25cSStefano Zampini PetscScalar *array; 13225cbda25cSStefano Zampini 1323ca92afb2SStefano Zampini msolv_ctx->benign_n = benign_n; 1324ca92afb2SStefano Zampini msolv_ctx->benign_zerodiag_subs = is_p_r; 1325*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMalloc1(benign_n,&msolv_ctx->benign_save_vals)); 13265cbda25cSStefano Zampini msolv_ctx->benign_csAIB = cs_AIB_mat; 1327*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatCreateVecs(cs_AIB_mat,&msolv_ctx->benign_corr_work,NULL)); 1328*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetArray(msolv_ctx->benign_corr_work,&array)); 1329*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecCreateSeqWithArray(PETSC_COMM_SELF,1,size_schur,array,&msolv_ctx->benign_dummy_schur_vec)); 1330*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecRestoreArray(msolv_ctx->benign_corr_work,&array)); 13315cbda25cSStefano Zampini msolv_ctx->benign_AIIm1ones = benign_AIIm1_ones_mat; 1332ca92afb2SStefano Zampini } 1333ada6e2d7SStefano Zampini } else { 1334ada6e2d7SStefano Zampini if (sub_schurs->reuse_solver) { 1335*5f80ce2aSJacob Faibussowitsch CHKERRQ(PCBDDCReuseSolversReset(sub_schurs->reuse_solver)); 1336ada6e2d7SStefano Zampini } 1337*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree(sub_schurs->reuse_solver)); 1338d5574798SStefano Zampini } 1339*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy(&A)); 1340*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISDestroy(&is_A_all)); 13415db18549SStefano Zampini 1342be83ff47SStefano Zampini /* Work arrays */ 1343*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMalloc1(max_subset_size*max_subset_size,&work)); 1344d2627357SStefano Zampini 1345be83ff47SStefano Zampini /* S_Ej_all */ 1346be83ff47SStefano Zampini cum = cum2 = 0; 1347*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDenseGetArrayRead(S_all,&rS_data)); 1348*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSeqAIJGetArray(sub_schurs->S_Ej_all,&SEj_arr)); 134904c5b2e6SStefano Zampini if (compute_Stilda) { 1350*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&SEjinv_arr)); 135104c5b2e6SStefano Zampini } 135265d8bf0aSStefano Zampini for (i=0;i<sub_schurs->n_subs;i++) { 135365d8bf0aSStefano Zampini PetscInt j; 135465d8bf0aSStefano Zampini 13555a95e1ceSStefano Zampini /* get S_E */ 1356*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size)); 1357683d3df6SStefano Zampini if (S_lower_triangular) { /* I need to expand the upper triangular data (column oriented) */ 1358be83ff47SStefano Zampini PetscInt k; 1359be83ff47SStefano Zampini for (k=0;k<subset_size;k++) { 1360be83ff47SStefano Zampini for (j=k;j<subset_size;j++) { 13611683a169SBarry Smith work[k*subset_size+j] = rS_data[cum2+k*size_schur+j]; 13621683a169SBarry Smith work[j*subset_size+k] = PetscConj(rS_data[cum2+k*size_schur+j]); 1363be83ff47SStefano Zampini } 1364be83ff47SStefano Zampini } 136506a4e24aSStefano Zampini } else { /* just copy to workspace */ 1366be83ff47SStefano Zampini PetscInt k; 1367be83ff47SStefano Zampini for (k=0;k<subset_size;k++) { 1368be83ff47SStefano Zampini for (j=0;j<subset_size;j++) { 13691683a169SBarry Smith work[k*subset_size+j] = rS_data[cum2+k*size_schur+j]; 1370be83ff47SStefano Zampini } 1371be83ff47SStefano Zampini } 13729087bf02SStefano Zampini } 13735a95e1ceSStefano Zampini /* insert S_E values */ 1374b7ab4a40SStefano Zampini if (sub_schurs->change) { 13758760537fSStefano Zampini Mat change_sub,SEj,T; 137672b8c272SStefano Zampini 137772b8c272SStefano Zampini /* change basis */ 1378*5f80ce2aSJacob Faibussowitsch CHKERRQ(KSPGetOperators(sub_schurs->change[i],&change_sub,NULL)); 1379*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,work,&SEj)); 13808760537fSStefano Zampini if (!sub_schurs->change_with_qr) { /* currently there's no support for PtAP with P SeqAIJ */ 13818760537fSStefano Zampini Mat T2; 1382*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatTransposeMatMult(change_sub,SEj,MAT_INITIAL_MATRIX,1.0,&T2)); 1383*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatMatMult(T2,change_sub,MAT_INITIAL_MATRIX,1.0,&T)); 1384*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatConvert(T,MATSEQDENSE,MAT_INPLACE_MATRIX,&T)); 1385*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy(&T2)); 13868760537fSStefano Zampini } else { 1387*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatPtAP(SEj,change_sub,MAT_INITIAL_MATRIX,1.0,&T)); 138872b8c272SStefano Zampini } 1389*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatCopy(T,SEj,SAME_NONZERO_PATTERN)); 1390*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy(&T)); 1391*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatZeroRowsColumnsIS(SEj,sub_schurs->change_primal_sub[i],1.0,NULL,NULL)); 1392*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy(&SEj)); 139372b8c272SStefano Zampini } 13949d54b7f4SStefano Zampini if (deluxe) { 1395*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscArraycpy(SEj_arr,work,subset_size*subset_size)); 1396683d3df6SStefano Zampini /* if adaptivity is requested, invert S_E blocks */ 1397862806e4SStefano Zampini if (compute_Stilda) { 13987ebab0bbSStefano Zampini Mat M; 13997ebab0bbSStefano Zampini const PetscScalar *vals; 14007ebab0bbSStefano Zampini PetscBool isdense,isdensecuda; 1401f4f7d9d6SStefano Zampini 1402*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,work,&M)); 1403*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetOption(M,MAT_SPD,sub_schurs->is_posdef)); 1404*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetOption(M,MAT_HERMITIAN,sub_schurs->is_hermitian)); 14057ebab0bbSStefano Zampini if (!PetscBTLookup(sub_schurs->is_edge,i)) { 1406*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetType(M,Stype)); 14076c3e6151SStefano Zampini } 1408*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectTypeCompare((PetscObject)M,MATSEQDENSE,&isdense)); 1409*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectTypeCompare((PetscObject)M,MATSEQDENSECUDA,&isdensecuda)); 14107ebab0bbSStefano Zampini if (use_cholesky) { 1411*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatCholeskyFactor(M,NULL,NULL)); 1412d6462365SStefano Zampini } else { 1413*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatLUFactor(M,NULL,NULL,NULL)); 14142972d61bSStefano Zampini } 14157ebab0bbSStefano Zampini if (isdense) { 1416*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSeqDenseInvertFactors_Private(M)); 14177ebab0bbSStefano Zampini #if defined(PETSC_HAVE_CUDA) 14187ebab0bbSStefano Zampini } else if (isdensecuda) { 1419*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSeqDenseCUDAInvertFactors_Private(M)); 14207ebab0bbSStefano Zampini #endif 142198921bdaSJacob Faibussowitsch } else SETERRQ(PetscObjectComm((PetscObject)M),PETSC_ERR_SUP,"Not implemented for type %s",Stype); 1422*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDenseGetArrayRead(M,&vals)); 1423*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscArraycpy(SEjinv_arr,vals,subset_size*subset_size)); 1424*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDenseRestoreArrayRead(M,&vals)); 1425*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy(&M)); 14269087bf02SStefano Zampini } 14279d54b7f4SStefano Zampini } else if (compute_Stilda) { /* not using deluxe */ 14289d54b7f4SStefano Zampini Mat SEj; 14299d54b7f4SStefano Zampini Vec D; 14309d54b7f4SStefano Zampini PetscScalar *array; 14319d54b7f4SStefano Zampini 1432*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,work,&SEj)); 1433*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetArray(Dall,&array)); 1434*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecCreateSeqWithArray(PETSC_COMM_SELF,1,subset_size,array+cum,&D)); 1435*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecRestoreArray(Dall,&array)); 1436*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecShift(D,-1.)); 1437*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDiagonalScale(SEj,D,D)); 1438*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy(&SEj)); 1439*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecDestroy(&D)); 1440*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscArraycpy(SEj_arr,work,subset_size*subset_size)); 14419d54b7f4SStefano Zampini } 1442be83ff47SStefano Zampini cum += subset_size; 1443be83ff47SStefano Zampini cum2 += subset_size*(size_schur + 1); 144404c5b2e6SStefano Zampini SEj_arr += subset_size*subset_size; 144504c5b2e6SStefano Zampini if (SEjinv_arr) SEjinv_arr += subset_size*subset_size; 1446be83ff47SStefano Zampini } 1447*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDenseRestoreArrayRead(S_all,&rS_data)); 1448*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSeqAIJRestoreArray(sub_schurs->S_Ej_all,&SEj_arr)); 144904c5b2e6SStefano Zampini if (compute_Stilda) { 1450*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&SEjinv_arr)); 145104c5b2e6SStefano Zampini } 1452df4d28bfSStefano Zampini if (solver_S) { 1453*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatFactorRestoreSchurComplement(F,&S_all,MAT_FACTOR_SCHUR_UNFACTORED)); 14544a6c6b0dSStefano Zampini } 1455683d3df6SStefano Zampini 14567ebab0bbSStefano Zampini /* may prevent from unneeded copies, since MUMPS or MKL_Pardiso always use CPU memory 14577ebab0bbSStefano Zampini however, preliminary tests indicate using GPUs is still faster in the solve phase */ 14587ebab0bbSStefano Zampini #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 14597ebab0bbSStefano Zampini if (reuse_solvers) { 14607ebab0bbSStefano Zampini Mat St; 14617ebab0bbSStefano Zampini MatFactorSchurStatus st; 14627ebab0bbSStefano Zampini 146335d0533cSStefano Zampini flg = PETSC_FALSE; 1464*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscOptionsGetBool(NULL,sub_schurs->prefix,"-sub_schurs_schur_pin_to_cpu",&flg,NULL)); 1465*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatFactorGetSchurComplement(F,&St,&st)); 1466*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatBindToCPU(St,flg)); 1467*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatFactorRestoreSchurComplement(F,&St,st)); 14687ebab0bbSStefano Zampini } 14697ebab0bbSStefano Zampini #endif 14707ebab0bbSStefano Zampini 1471683d3df6SStefano Zampini schur_factor = NULL; 147245951f25SStefano Zampini if (compute_Stilda && size_active_schur) { 1473683d3df6SStefano Zampini 1474*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&SEjinv_arr)); 14759d54b7f4SStefano Zampini if (sub_schurs->n_subs == 1 && size_schur == size_active_schur && deluxe) { /* we already computed the inverse */ 1476*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscArraycpy(SEjinv_arr,work,size_schur*size_schur)); 14774a6c6b0dSStefano Zampini } else { 1478683d3df6SStefano Zampini Mat S_all_inv=NULL; 14797ebab0bbSStefano Zampini 14803fc34f97SStefano Zampini if (solver_S) { 1481683d3df6SStefano Zampini /* for adaptive selection we need S^-1; for solver reusage we need S_\Delta\Delta^-1. 1482683d3df6SStefano Zampini The latter is not the principal subminor for S^-1. However, the factors can be reused since S_\Delta\Delta is the leading principal submatrix of S */ 14833fc34f97SStefano Zampini if (factor_workaround) {/* invert without calling MatFactorInvertSchurComplement, since we are hacking */ 1484683d3df6SStefano Zampini PetscScalar *data; 1485683d3df6SStefano Zampini PetscInt nd = 0; 14866dba178dSStefano Zampini 1487f4f7d9d6SStefano Zampini if (!use_potr) { 1488683d3df6SStefano Zampini SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor update not yet implemented for non SPD matrices"); 1489683d3df6SStefano Zampini } 1490*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatFactorGetSchurComplement(F,&S_all_inv,NULL)); 1491*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDenseGetArray(S_all_inv,&data)); 1492683d3df6SStefano Zampini if (sub_schurs->is_dir) { /* dirichlet dofs could have different scalings */ 1493*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISGetLocalSize(sub_schurs->is_dir,&nd)); 1494683d3df6SStefano Zampini } 14953fc34f97SStefano Zampini 14963fc34f97SStefano Zampini /* factor and invert activedofs and vertices (dirichlet dofs does not contribute) */ 14973fc34f97SStefano Zampini if (schur_has_vertices) { 14983fc34f97SStefano Zampini Mat M; 14993fc34f97SStefano Zampini PetscScalar *tdata; 15003fc34f97SStefano Zampini PetscInt nv = 0, news; 15013fc34f97SStefano Zampini 1502*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISGetLocalSize(sub_schurs->is_vertices,&nv)); 15033fc34f97SStefano Zampini news = size_active_schur + nv; 1504*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscCalloc1(news*news,&tdata)); 1505683d3df6SStefano Zampini for (i=0;i<size_active_schur;i++) { 1506*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscArraycpy(tdata+i*(news+1),data+i*(size_schur+1),size_active_schur-i)); 1507*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscArraycpy(tdata+i*(news+1)+size_active_schur-i,data+i*size_schur+size_active_schur+nd,nv)); 15083fc34f97SStefano Zampini } 15093fc34f97SStefano Zampini for (i=0;i<nv;i++) { 15103fc34f97SStefano Zampini PetscInt k = i+size_active_schur; 1511*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscArraycpy(tdata+k*(news+1),data+(k+nd)*(size_schur+1),nv-i)); 15123fc34f97SStefano Zampini } 15133fc34f97SStefano Zampini 1514*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,news,news,tdata,&M)); 1515*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetOption(M,MAT_SPD,PETSC_TRUE)); 1516*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatCholeskyFactor(M,NULL,NULL)); 15173fc34f97SStefano Zampini /* save the factors */ 15183fc34f97SStefano Zampini cum = 0; 1519*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMalloc1((size_active_schur*(size_active_schur +1))/2+nd,&schur_factor)); 15203fc34f97SStefano Zampini for (i=0;i<size_active_schur;i++) { 1521*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscArraycpy(schur_factor+cum,tdata+i*(news+1),size_active_schur-i)); 1522683d3df6SStefano Zampini cum += size_active_schur - i; 1523683d3df6SStefano Zampini } 15243fc34f97SStefano Zampini for (i=0;i<nd;i++) schur_factor[cum+i] = PetscSqrtReal(PetscRealPart(data[(i+size_active_schur)*(size_schur+1)])); 1525*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSeqDenseInvertFactors_Private(M)); 15263fc34f97SStefano Zampini /* move back just the active dofs to the Schur complement */ 15273fc34f97SStefano Zampini for (i=0;i<size_active_schur;i++) { 1528*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscArraycpy(data+i*size_schur,tdata+i*news,size_active_schur)); 15293fc34f97SStefano Zampini } 1530*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree(tdata)); 1531*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy(&M)); 15323fc34f97SStefano Zampini } else { /* we can factorize and invert just the activedofs */ 15333fc34f97SStefano Zampini Mat M; 15345002105bSStefano Zampini PetscScalar *aux; 15353fc34f97SStefano Zampini 1536*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMalloc1(nd,&aux)); 15375002105bSStefano Zampini for (i=0;i<nd;i++) aux[i] = 1.0/data[(i+size_active_schur)*(size_schur+1)]; 1538*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,size_active_schur,size_active_schur,data,&M)); 1539*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDenseSetLDA(M,size_schur)); 1540*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetOption(M,MAT_SPD,PETSC_TRUE)); 1541*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatCholeskyFactor(M,NULL,NULL)); 1542*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSeqDenseInvertFactors_Private(M)); 1543*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy(&M)); 1544*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,size_schur,nd,data+size_active_schur*size_schur,&M)); 1545*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatZeroEntries(M)); 1546*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy(&M)); 1547*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,nd,size_schur,data+size_active_schur,&M)); 1548*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDenseSetLDA(M,size_schur)); 1549*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatZeroEntries(M)); 1550*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy(&M)); 15515002105bSStefano Zampini for (i=0;i<nd;i++) data[(i+size_active_schur)*(size_schur+1)] = aux[i]; 1552*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree(aux)); 15533fc34f97SStefano Zampini } 1554*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDenseRestoreArray(S_all_inv,&data)); 15553fc34f97SStefano Zampini } else { /* use MatFactor calls to invert S */ 1556*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatFactorInvertSchurComplement(F)); 1557*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatFactorGetSchurComplement(F,&S_all_inv,NULL)); 1558683d3df6SStefano Zampini } 1559df4d28bfSStefano Zampini } else { /* we need to invert explicitly since we are not using MatFactor for S */ 1560*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectReference((PetscObject)S_all)); 1561683d3df6SStefano Zampini S_all_inv = S_all; 1562*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDenseGetArray(S_all_inv,&S_data)); 1563*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscBLASIntCast(size_schur,&B_N)); 1564*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 1565f4f7d9d6SStefano Zampini if (use_potr) { 1566be83ff47SStefano Zampini PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("L",&B_N,S_data,&B_N,&B_ierr)); 15672c71b3e2SJacob Faibussowitsch PetscCheckFalse(B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRF Lapack routine %d",(int)B_ierr); 1568be83ff47SStefano Zampini PetscStackCallBLAS("LAPACKpotri",LAPACKpotri_("L",&B_N,S_data,&B_N,&B_ierr)); 15692c71b3e2SJacob Faibussowitsch PetscCheckFalse(B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRI Lapack routine %d",(int)B_ierr); 1570f4f7d9d6SStefano Zampini } else if (use_sytr) { 1571f4f7d9d6SStefano Zampini PetscStackCallBLAS("LAPACKsytrf",LAPACKsytrf_("L",&B_N,S_data,&B_N,pivots,Bwork,&B_lwork,&B_ierr)); 15722c71b3e2SJacob Faibussowitsch PetscCheckFalse(B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYTRF Lapack routine %d",(int)B_ierr); 1573f4f7d9d6SStefano Zampini PetscStackCallBLAS("LAPACKsytri",LAPACKsytri_("L",&B_N,S_data,&B_N,pivots,Bwork,&B_ierr)); 15742c71b3e2SJacob Faibussowitsch PetscCheckFalse(B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYTRI Lapack routine %d",(int)B_ierr); 1575d6462365SStefano Zampini } else { 1576d6462365SStefano Zampini PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&B_N,&B_N,S_data,&B_N,pivots,&B_ierr)); 15772c71b3e2SJacob Faibussowitsch PetscCheckFalse(B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRF Lapack routine %d",(int)B_ierr); 1578d6462365SStefano Zampini PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,S_data,&B_N,pivots,Bwork,&B_lwork,&B_ierr)); 15792c71b3e2SJacob Faibussowitsch PetscCheckFalse(B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRI Lapack routine %d",(int)B_ierr); 1580be83ff47SStefano Zampini } 1581*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscLogFlops(1.0*size_schur*size_schur*size_schur)); 1582*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFPTrapPop()); 1583*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDenseRestoreArray(S_all_inv,&S_data)); 1584be83ff47SStefano Zampini } 1585be83ff47SStefano Zampini /* S_Ej_tilda_all */ 1586be83ff47SStefano Zampini cum = cum2 = 0; 1587*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDenseGetArrayRead(S_all_inv,&rS_data)); 1588be83ff47SStefano Zampini for (i=0;i<sub_schurs->n_subs;i++) { 1589be83ff47SStefano Zampini PetscInt j; 1590862806e4SStefano Zampini 1591*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size)); 1592be83ff47SStefano Zampini /* get (St^-1)_E */ 159372b8c272SStefano Zampini /* Unless we are changing the variables, I don't need to expand to upper triangular since St^-1 159406a4e24aSStefano Zampini will be properly accessed later during adaptive selection */ 1595a0b0af32SStefano Zampini if (S_lower_triangular) { 1596be83ff47SStefano Zampini PetscInt k; 1597b7ab4a40SStefano Zampini if (sub_schurs->change) { 1598be83ff47SStefano Zampini for (k=0;k<subset_size;k++) { 1599be83ff47SStefano Zampini for (j=k;j<subset_size;j++) { 16001683a169SBarry Smith work[k*subset_size+j] = rS_data[cum2+k*size_schur+j]; 16016c3e6151SStefano Zampini work[j*subset_size+k] = work[k*subset_size+j]; 1602be83ff47SStefano Zampini } 1603be83ff47SStefano Zampini } 160472b8c272SStefano Zampini } else { 160572b8c272SStefano Zampini for (k=0;k<subset_size;k++) { 160672b8c272SStefano Zampini for (j=k;j<subset_size;j++) { 16071683a169SBarry Smith work[k*subset_size+j] = rS_data[cum2+k*size_schur+j]; 160872b8c272SStefano Zampini } 160972b8c272SStefano Zampini } 161072b8c272SStefano Zampini } 161172b8c272SStefano Zampini } else { 1612be83ff47SStefano Zampini PetscInt k; 1613be83ff47SStefano Zampini for (k=0;k<subset_size;k++) { 1614be83ff47SStefano Zampini for (j=0;j<subset_size;j++) { 16151683a169SBarry Smith work[k*subset_size+j] = rS_data[cum2+k*size_schur+j]; 1616be83ff47SStefano Zampini } 1617be83ff47SStefano Zampini } 1618be83ff47SStefano Zampini } 1619b7ab4a40SStefano Zampini if (sub_schurs->change) { 16208760537fSStefano Zampini Mat change_sub,SEj,T; 162172b8c272SStefano Zampini 162272b8c272SStefano Zampini /* change basis */ 1623*5f80ce2aSJacob Faibussowitsch CHKERRQ(KSPGetOperators(sub_schurs->change[i],&change_sub,NULL)); 1624*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,work,&SEj)); 16258760537fSStefano Zampini if (!sub_schurs->change_with_qr) { /* currently there's no support for PtAP with P SeqAIJ */ 16268760537fSStefano Zampini Mat T2; 1627*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatTransposeMatMult(change_sub,SEj,MAT_INITIAL_MATRIX,1.0,&T2)); 1628*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatMatMult(T2,change_sub,MAT_INITIAL_MATRIX,1.0,&T)); 1629*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy(&T2)); 1630*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatConvert(T,MATSEQDENSE,MAT_INPLACE_MATRIX,&T)); 16318760537fSStefano Zampini } else { 1632*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatPtAP(SEj,change_sub,MAT_INITIAL_MATRIX,1.0,&T)); 163372b8c272SStefano Zampini } 1634*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatCopy(T,SEj,SAME_NONZERO_PATTERN)); 1635*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy(&T)); 163672b8c272SStefano Zampini /* set diagonal entry to a very large value to pick the basis we are eliminating as the first eigenvectors with adaptive selection */ 1637*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatZeroRowsColumnsIS(SEj,sub_schurs->change_primal_sub[i],1./PETSC_SMALL,NULL,NULL)); 1638*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy(&SEj)); 163972b8c272SStefano Zampini } 1640*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscArraycpy(SEjinv_arr,work,subset_size*subset_size)); 1641be83ff47SStefano Zampini cum += subset_size; 1642be83ff47SStefano Zampini cum2 += subset_size*(size_schur + 1); 164304c5b2e6SStefano Zampini SEjinv_arr += subset_size*subset_size; 1644883469d8SStefano Zampini } 1645*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDenseRestoreArrayRead(S_all_inv,&rS_data)); 1646df4d28bfSStefano Zampini if (solver_S) { 16473fc34f97SStefano Zampini if (schur_has_vertices) { 1648*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatFactorRestoreSchurComplement(F,&S_all_inv,MAT_FACTOR_SCHUR_FACTORED)); 16493fc34f97SStefano Zampini } else { 1650*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatFactorRestoreSchurComplement(F,&S_all_inv,MAT_FACTOR_SCHUR_INVERTED)); 16515db18549SStefano Zampini } 16523fc34f97SStefano Zampini } 1653*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy(&S_all_inv)); 1654683d3df6SStefano Zampini } 1655*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&SEjinv_arr)); 1656683d3df6SStefano Zampini 16573fc34f97SStefano Zampini /* move back factors if needed */ 16583fc34f97SStefano Zampini if (schur_has_vertices) { 1659683d3df6SStefano Zampini Mat S_tmp; 16603fc34f97SStefano Zampini PetscInt nd = 0; 1661683d3df6SStefano Zampini 16622c71b3e2SJacob Faibussowitsch PetscCheckFalse(!solver_S,PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 1663*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatFactorGetSchurComplement(F,&S_tmp,NULL)); 1664f4f7d9d6SStefano Zampini if (use_potr) { 1665683d3df6SStefano Zampini PetscScalar *data; 1666683d3df6SStefano Zampini 1667*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDenseGetArray(S_tmp,&data)); 1668*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscArrayzero(data,size_schur*size_schur)); 1669683d3df6SStefano Zampini 1670683d3df6SStefano Zampini if (S_lower_triangular) { 1671683d3df6SStefano Zampini cum = 0; 1672683d3df6SStefano Zampini for (i=0;i<size_active_schur;i++) { 1673*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscArraycpy(data+i*(size_schur+1),schur_factor+cum,size_active_schur-i)); 1674683d3df6SStefano Zampini cum += size_active_schur-i; 1675683d3df6SStefano Zampini } 1676683d3df6SStefano Zampini } else { 1677*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscArraycpy(data,schur_factor,size_schur*size_schur)); 1678683d3df6SStefano Zampini } 1679683d3df6SStefano Zampini if (sub_schurs->is_dir) { 1680*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISGetLocalSize(sub_schurs->is_dir,&nd)); 1681683d3df6SStefano Zampini for (i=0;i<nd;i++) { 1682683d3df6SStefano Zampini data[(i+size_active_schur)*(size_schur+1)] = schur_factor[cum+i]; 1683683d3df6SStefano Zampini } 1684683d3df6SStefano Zampini } 16856dba178dSStefano Zampini /* workaround: since I cannot modify the matrices used inside the solvers for the forward and backward substitutions, 1686683d3df6SStefano Zampini set the diagonal entry of the Schur factor to a very large value */ 1687683d3df6SStefano Zampini for (i=size_active_schur+nd;i<size_schur;i++) { 16886c3e6151SStefano Zampini data[i*(size_schur+1)] = infty; 1689683d3df6SStefano Zampini } 1690*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDenseRestoreArray(S_tmp,&data)); 16916542c05fSStefano Zampini } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor update not yet implemented for non SPD matrices"); 1692*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatFactorRestoreSchurComplement(F,&S_tmp,MAT_FACTOR_SCHUR_FACTORED)); 16939087bf02SStefano Zampini } 1694367aa537SStefano Zampini } else if (factor_workaround) { /* we need to eliminate any unneeded coupling */ 1695367aa537SStefano Zampini PetscScalar *data; 1696367aa537SStefano Zampini PetscInt nd = 0; 1697367aa537SStefano Zampini 1698367aa537SStefano Zampini if (sub_schurs->is_dir) { /* dirichlet dofs could have different scalings */ 1699*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISGetLocalSize(sub_schurs->is_dir,&nd)); 1700367aa537SStefano Zampini } 1701*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatFactorGetSchurComplement(F,&S_all,NULL)); 1702*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDenseGetArray(S_all,&data)); 1703367aa537SStefano Zampini for (i=0;i<size_active_schur;i++) { 1704*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscArrayzero(data+i*size_schur+size_active_schur,size_schur-size_active_schur)); 1705367aa537SStefano Zampini } 1706367aa537SStefano Zampini for (i=size_active_schur+nd;i<size_schur;i++) { 1707*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscArrayzero(data+i*size_schur+size_active_schur,size_schur-size_active_schur)); 17086c3e6151SStefano Zampini data[i*(size_schur+1)] = infty; 1709367aa537SStefano Zampini } 1710*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDenseRestoreArray(S_all,&data)); 1711*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatFactorRestoreSchurComplement(F,&S_all,MAT_FACTOR_SCHUR_UNFACTORED)); 17124a6c6b0dSStefano Zampini } 1713*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree(work)); 1714*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree(schur_factor)); 1715*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecDestroy(&Dall)); 17164a6c6b0dSStefano Zampini } 1717*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISDestroy(&is_I_layer)); 1718*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy(&S_all)); 1719*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy(&A_BB)); 1720*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy(&A_IB)); 1721*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy(&A_BI)); 1722*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy(&F)); 17236afe12f5SStefano Zampini 1724*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMalloc1(sub_schurs->n_subs,&nnz)); 17256afe12f5SStefano Zampini for (i=0;i<sub_schurs->n_subs;i++) { 1726*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISGetLocalSize(sub_schurs->is_subs[i],&nnz[i])); 17276afe12f5SStefano Zampini } 1728*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISCreateGeneral(PETSC_COMM_SELF,sub_schurs->n_subs,nnz,PETSC_OWN_POINTER,&is_I_layer)); 1729*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetVariableBlockSizes(sub_schurs->S_Ej_all,sub_schurs->n_subs,nnz)); 1730*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatAssemblyBegin(sub_schurs->S_Ej_all,MAT_FINAL_ASSEMBLY)); 1731*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatAssemblyEnd(sub_schurs->S_Ej_all,MAT_FINAL_ASSEMBLY)); 17325a95e1ceSStefano Zampini if (compute_Stilda) { 1733*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetVariableBlockSizes(sub_schurs->sum_S_Ej_tilda_all,sub_schurs->n_subs,nnz)); 1734*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatAssemblyBegin(sub_schurs->sum_S_Ej_tilda_all,MAT_FINAL_ASSEMBLY)); 1735*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatAssemblyEnd(sub_schurs->sum_S_Ej_tilda_all,MAT_FINAL_ASSEMBLY)); 17369d54b7f4SStefano Zampini if (deluxe) { 1737*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetVariableBlockSizes(sub_schurs->sum_S_Ej_inv_all,sub_schurs->n_subs,nnz)); 1738*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatAssemblyBegin(sub_schurs->sum_S_Ej_inv_all,MAT_FINAL_ASSEMBLY)); 1739*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatAssemblyEnd(sub_schurs->sum_S_Ej_inv_all,MAT_FINAL_ASSEMBLY)); 174008122e43SStefano Zampini } 17419d54b7f4SStefano Zampini } 1742*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISDestroy(&is_I_layer)); 17436afe12f5SStefano Zampini 17445db18549SStefano Zampini /* Get local part of (\sum_j S_Ej) */ 174541fd5443SStefano Zampini if (!sub_schurs->sum_S_Ej_all) { 1746*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDuplicate(sub_schurs->S_Ej_all,MAT_DO_NOT_COPY_VALUES,&sub_schurs->sum_S_Ej_all)); 174741fd5443SStefano Zampini } 1748*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecSet(gstash,0.0)); 1749*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSeqAIJGetArray(sub_schurs->S_Ej_all,&stasharray)); 1750*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecPlaceArray(lstash,stasharray)); 1751*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecScatterBegin(sstash,lstash,gstash,ADD_VALUES,SCATTER_FORWARD)); 1752*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecScatterEnd(sstash,lstash,gstash,ADD_VALUES,SCATTER_FORWARD)); 1753*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSeqAIJRestoreArray(sub_schurs->S_Ej_all,&stasharray)); 1754*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecResetArray(lstash)); 1755*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&stasharray)); 1756*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecPlaceArray(lstash,stasharray)); 1757*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecScatterBegin(sstash,gstash,lstash,INSERT_VALUES,SCATTER_REVERSE)); 1758*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecScatterEnd(sstash,gstash,lstash,INSERT_VALUES,SCATTER_REVERSE)); 1759*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all,&stasharray)); 1760*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecResetArray(lstash)); 176108122e43SStefano Zampini 1762f6f667cfSStefano Zampini /* Get local part of (\sum_j S^-1_Ej) (\sum_j St^-1_Ej) */ 17635a95e1ceSStefano Zampini if (compute_Stilda) { 1764*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecSet(gstash,0.0)); 1765*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&stasharray)); 1766*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecPlaceArray(lstash,stasharray)); 1767*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecScatterBegin(sstash,lstash,gstash,ADD_VALUES,SCATTER_FORWARD)); 1768*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecScatterEnd(sstash,lstash,gstash,ADD_VALUES,SCATTER_FORWARD)); 1769*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecScatterBegin(sstash,gstash,lstash,INSERT_VALUES,SCATTER_REVERSE)); 1770*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecScatterEnd(sstash,gstash,lstash,INSERT_VALUES,SCATTER_REVERSE)); 1771*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&stasharray)); 1772*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecResetArray(lstash)); 17739d54b7f4SStefano Zampini if (deluxe) { 1774*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecSet(gstash,0.0)); 1775*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&stasharray)); 1776*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecPlaceArray(lstash,stasharray)); 1777*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecScatterBegin(sstash,lstash,gstash,ADD_VALUES,SCATTER_FORWARD)); 1778*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecScatterEnd(sstash,lstash,gstash,ADD_VALUES,SCATTER_FORWARD)); 1779*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecScatterBegin(sstash,gstash,lstash,INSERT_VALUES,SCATTER_REVERSE)); 1780*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecScatterEnd(sstash,gstash,lstash,INSERT_VALUES,SCATTER_REVERSE)); 1781*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&stasharray)); 1782*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecResetArray(lstash)); 17839d54b7f4SStefano Zampini } else { 17849d54b7f4SStefano Zampini PetscScalar *array; 17859d54b7f4SStefano Zampini PetscInt cum; 17869d54b7f4SStefano Zampini 1787*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&array)); 17889d54b7f4SStefano Zampini cum = 0; 17899d54b7f4SStefano Zampini for (i=0;i<sub_schurs->n_subs;i++) { 1790*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size)); 1791*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscBLASIntCast(subset_size,&B_N)); 1792*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 1793f4f7d9d6SStefano Zampini if (use_potr) { 17949d54b7f4SStefano Zampini PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("L",&B_N,array+cum,&B_N,&B_ierr)); 17952c71b3e2SJacob Faibussowitsch PetscCheckFalse(B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRF Lapack routine %d",(int)B_ierr); 17969d54b7f4SStefano Zampini PetscStackCallBLAS("LAPACKpotri",LAPACKpotri_("L",&B_N,array+cum,&B_N,&B_ierr)); 17972c71b3e2SJacob Faibussowitsch PetscCheckFalse(B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRI Lapack routine %d",(int)B_ierr); 1798f4f7d9d6SStefano Zampini } else if (use_sytr) { 1799f4f7d9d6SStefano Zampini PetscStackCallBLAS("LAPACKsytrf",LAPACKsytrf_("L",&B_N,array+cum,&B_N,pivots,Bwork,&B_lwork,&B_ierr)); 18002c71b3e2SJacob Faibussowitsch PetscCheckFalse(B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYTRF Lapack routine %d",(int)B_ierr); 1801f4f7d9d6SStefano Zampini PetscStackCallBLAS("LAPACKsytri",LAPACKsytri_("L",&B_N,array+cum,&B_N,pivots,Bwork,&B_ierr)); 18022c71b3e2SJacob Faibussowitsch PetscCheckFalse(B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYTRI Lapack routine %d",(int)B_ierr); 1803f4f7d9d6SStefano Zampini } else { 1804f4f7d9d6SStefano Zampini PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&B_N,&B_N,array+cum,&B_N,pivots,&B_ierr)); 18052c71b3e2SJacob Faibussowitsch PetscCheckFalse(B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRF Lapack routine %d",(int)B_ierr); 1806f4f7d9d6SStefano Zampini PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,array+cum,&B_N,pivots,Bwork,&B_lwork,&B_ierr)); 18072c71b3e2SJacob Faibussowitsch PetscCheckFalse(B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRI Lapack routine %d",(int)B_ierr); 1808f4f7d9d6SStefano Zampini } 1809*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscLogFlops(1.0*subset_size*subset_size*subset_size)); 1810*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFPTrapPop()); 18119d54b7f4SStefano Zampini cum += subset_size*subset_size; 18129d54b7f4SStefano Zampini } 1813*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&array)); 1814*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectReference((PetscObject)sub_schurs->sum_S_Ej_all)); 1815*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy(&sub_schurs->sum_S_Ej_inv_all)); 18169d54b7f4SStefano Zampini sub_schurs->sum_S_Ej_inv_all = sub_schurs->sum_S_Ej_all; 18179d54b7f4SStefano Zampini } 181808122e43SStefano Zampini } 1819*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecDestroy(&lstash)); 1820*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecDestroy(&gstash)); 1821*5f80ce2aSJacob Faibussowitsch CHKERRQ(VecScatterDestroy(&sstash)); 182257a87bf3SStefano Zampini 18233b03f7bbSStefano Zampini if (matl_dbg_viewer) { 182411955456SStefano Zampini PetscInt cum; 182511955456SStefano Zampini 182611955456SStefano Zampini if (sub_schurs->S_Ej_all) { 1827*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectSetName((PetscObject)sub_schurs->S_Ej_all,"SE")); 1828*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatView(sub_schurs->S_Ej_all,matl_dbg_viewer)); 182911955456SStefano Zampini } 183011955456SStefano Zampini if (sub_schurs->sum_S_Ej_all) { 1831*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectSetName((PetscObject)sub_schurs->sum_S_Ej_all,"SSE")); 1832*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatView(sub_schurs->sum_S_Ej_all,matl_dbg_viewer)); 183311955456SStefano Zampini } 183411955456SStefano Zampini if (sub_schurs->sum_S_Ej_inv_all) { 1835*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectSetName((PetscObject)sub_schurs->sum_S_Ej_inv_all,"SSEm")); 1836*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatView(sub_schurs->sum_S_Ej_inv_all,matl_dbg_viewer)); 183711955456SStefano Zampini } 183811955456SStefano Zampini if (sub_schurs->sum_S_Ej_tilda_all) { 1839*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectSetName((PetscObject)sub_schurs->sum_S_Ej_tilda_all,"SSEt")); 1840*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatView(sub_schurs->sum_S_Ej_tilda_all,matl_dbg_viewer)); 184111955456SStefano Zampini } 184211955456SStefano Zampini for (i=0,cum=0;i<sub_schurs->n_subs;i++) { 184311955456SStefano Zampini IS is; 184411955456SStefano Zampini char name[16]; 184511955456SStefano Zampini 1846*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscSNPrintf(name,sizeof(name),"IE%D",i)); 1847*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size)); 1848*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISCreateStride(PETSC_COMM_SELF,subset_size,cum,1,&is)); 1849*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectSetName((PetscObject)is,name)); 1850*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISView(is,matl_dbg_viewer)); 1851*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISDestroy(&is)); 185211955456SStefano Zampini cum += subset_size; 185311955456SStefano Zampini } 185411955456SStefano Zampini } 18553202ece2SStefano Zampini 18565a95e1ceSStefano Zampini /* free workspace */ 1857*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscViewerDestroy(&matl_dbg_viewer)); 1858*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree2(Bwork,pivots)); 1859*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscCommDestroy(&comm_n)); 1860b1b3d7a2SStefano Zampini PetscFunctionReturn(0); 1861b1b3d7a2SStefano Zampini } 1862b1b3d7a2SStefano Zampini 186388113c35SStefano Zampini PetscErrorCode PCBDDCSubSchursInit(PCBDDCSubSchurs sub_schurs, const char* prefix, IS is_I, IS is_B, PCBDDCGraph graph, ISLocalToGlobalMapping BtoNmap, PetscBool copycc) 1864b1b3d7a2SStefano Zampini { 18659bb4a8caSStefano Zampini IS *faces,*edges,*all_cc,vertices; 18665a95e1ceSStefano Zampini PetscInt i,n_faces,n_edges,n_all_cc; 1867365a3a41SStefano Zampini PetscBool is_sorted,ispardiso,ismumps; 1868b1b3d7a2SStefano Zampini PetscErrorCode ierr; 1869b1b3d7a2SStefano Zampini 1870b1b3d7a2SStefano Zampini PetscFunctionBegin; 1871*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISSorted(is_I,&is_sorted)); 18722c71b3e2SJacob Faibussowitsch PetscCheckFalse(!is_sorted,PetscObjectComm((PetscObject)is_I),PETSC_ERR_PLIB,"IS for I dofs should be shorted"); 1873*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISSorted(is_B,&is_sorted)); 18742c71b3e2SJacob Faibussowitsch PetscCheckFalse(!is_sorted,PetscObjectComm((PetscObject)is_B),PETSC_ERR_PLIB,"IS for B dofs should be shorted"); 1875b1b3d7a2SStefano Zampini 1876b1b3d7a2SStefano Zampini /* reset any previous data */ 1877*5f80ce2aSJacob Faibussowitsch CHKERRQ(PCBDDCSubSchursReset(sub_schurs)); 1878b1b3d7a2SStefano Zampini 18795a95e1ceSStefano Zampini /* get index sets for faces and edges (already sorted by global ordering) */ 1880*5f80ce2aSJacob Faibussowitsch CHKERRQ(PCBDDCGraphGetCandidatesIS(graph,&n_faces,&faces,&n_edges,&edges,&vertices)); 1881b1b3d7a2SStefano Zampini n_all_cc = n_faces+n_edges; 1882*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscBTCreate(n_all_cc,&sub_schurs->is_edge)); 1883*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMalloc1(n_all_cc,&all_cc)); 1884b1b3d7a2SStefano Zampini for (i=0;i<n_faces;i++) { 18858b6046baSStefano Zampini if (copycc) { 1886*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISDuplicate(faces[i],&all_cc[i])); 18878b6046baSStefano Zampini } else { 1888*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectReference((PetscObject)faces[i])); 1889b1b3d7a2SStefano Zampini all_cc[i] = faces[i]; 1890b1b3d7a2SStefano Zampini } 18918b6046baSStefano Zampini } 1892b1b3d7a2SStefano Zampini for (i=0;i<n_edges;i++) { 18938b6046baSStefano Zampini if (copycc) { 1894*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISDuplicate(edges[i],&all_cc[n_faces+i])); 18958b6046baSStefano Zampini } else { 1896*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectReference((PetscObject)edges[i])); 1897b1b3d7a2SStefano Zampini all_cc[n_faces+i] = edges[i]; 18988b6046baSStefano Zampini } 1899*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscBTSet(sub_schurs->is_edge,n_faces+i)); 1900b1b3d7a2SStefano Zampini } 1901*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectReference((PetscObject)vertices)); 1902c8272957SStefano Zampini sub_schurs->is_vertices = vertices; 1903*5f80ce2aSJacob Faibussowitsch CHKERRQ(PCBDDCGraphRestoreCandidatesIS(graph,&n_faces,&faces,&n_edges,&edges,&vertices)); 1904d62866d3SStefano Zampini sub_schurs->is_dir = NULL; 1905*5f80ce2aSJacob Faibussowitsch CHKERRQ(PCBDDCGraphGetDirichletDofsB(graph,&sub_schurs->is_dir)); 1906b1b3d7a2SStefano Zampini 1907df4d28bfSStefano Zampini /* Determine if MatFactor can be used */ 1908*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscStrallocpy(prefix,&sub_schurs->prefix)); 1909883469d8SStefano Zampini #if defined(PETSC_HAVE_MUMPS) 1910*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscStrncpy(sub_schurs->mat_solver_type,MATSOLVERMUMPS,sizeof(sub_schurs->mat_solver_type))); 191188113c35SStefano Zampini #elif defined(PETSC_HAVE_MKL_PARDISO) 1912*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscStrncpy(sub_schurs->mat_solver_type,MATSOLVERMKL_PARDISO,sizeof(sub_schurs->mat_solver_type))); 191388113c35SStefano Zampini #else 1914*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscStrncpy(sub_schurs->mat_solver_type,MATSOLVERPETSC,sizeof(sub_schurs->mat_solver_type))); 1915df4d28bfSStefano Zampini #endif 191688113c35SStefano Zampini #if defined(PETSC_USE_COMPLEX) 191788113c35SStefano Zampini sub_schurs->is_hermitian = PETSC_FALSE; /* Hermitian Cholesky is not supported by PETSc and external packages */ 191888113c35SStefano Zampini #else 191988113c35SStefano Zampini sub_schurs->is_hermitian = PETSC_TRUE; 1920883469d8SStefano Zampini #endif 192188113c35SStefano Zampini sub_schurs->is_posdef = PETSC_TRUE; 192211955456SStefano Zampini sub_schurs->is_symmetric = PETSC_TRUE; 19237f9db97bSStefano Zampini sub_schurs->debug = PETSC_FALSE; 1924991c41b4SStefano Zampini sub_schurs->restrict_comm = PETSC_FALSE; 192588113c35SStefano Zampini ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)graph->l2gmap),sub_schurs->prefix,"BDDC sub_schurs options","PC");CHKERRQ(ierr); 1926*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscOptionsString("-sub_schurs_mat_solver_type","Specific direct solver to use",NULL,sub_schurs->mat_solver_type,sub_schurs->mat_solver_type,sizeof(sub_schurs->mat_solver_type),NULL)); 1927*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscOptionsBool("-sub_schurs_symmetric","Symmetric problem",NULL,sub_schurs->is_symmetric,&sub_schurs->is_symmetric,NULL)); 1928*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscOptionsBool("-sub_schurs_hermitian","Hermitian problem",NULL,sub_schurs->is_hermitian,&sub_schurs->is_hermitian,NULL)); 1929*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscOptionsBool("-sub_schurs_posdef","Positive definite problem",NULL,sub_schurs->is_posdef,&sub_schurs->is_posdef,NULL)); 1930*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscOptionsBool("-sub_schurs_restrictcomm","Restrict communicator on active processes only",NULL,sub_schurs->restrict_comm,&sub_schurs->restrict_comm,NULL)); 1931*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscOptionsBool("-sub_schurs_debug","Debug output",NULL,sub_schurs->debug,&sub_schurs->debug,NULL)); 193288113c35SStefano Zampini ierr = PetscOptionsEnd();CHKERRQ(ierr); 1933*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscStrcmp(sub_schurs->mat_solver_type,MATSOLVERMUMPS,&ismumps)); 1934*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscStrcmp(sub_schurs->mat_solver_type,MATSOLVERMKL_PARDISO,&ispardiso)); 1935365a3a41SStefano Zampini sub_schurs->schur_explicit = (PetscBool)(ispardiso || ismumps); 1936b1b3d7a2SStefano Zampini 193711955456SStefano Zampini /* for reals, symmetric and hermitian are synonims */ 193811955456SStefano Zampini #if !defined(PETSC_USE_COMPLEX) 193911955456SStefano Zampini sub_schurs->is_symmetric = (PetscBool)(sub_schurs->is_symmetric && sub_schurs->is_hermitian); 194011955456SStefano Zampini sub_schurs->is_hermitian = sub_schurs->is_symmetric; 194111955456SStefano Zampini #endif 194211955456SStefano Zampini 1943*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectReference((PetscObject)is_I)); 1944b1b3d7a2SStefano Zampini sub_schurs->is_I = is_I; 1945*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectReference((PetscObject)is_B)); 1946b1b3d7a2SStefano Zampini sub_schurs->is_B = is_B; 1947*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectReference((PetscObject)graph->l2gmap)); 19485db18549SStefano Zampini sub_schurs->l2gmap = graph->l2gmap; 1949*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectReference((PetscObject)BtoNmap)); 19505db18549SStefano Zampini sub_schurs->BtoNmap = BtoNmap; 19515a95e1ceSStefano Zampini sub_schurs->n_subs = n_all_cc; 1952b1b3d7a2SStefano Zampini sub_schurs->is_subs = all_cc; 1953b96c3477SStefano Zampini sub_schurs->S_Ej_all = NULL; 1954b96c3477SStefano Zampini sub_schurs->sum_S_Ej_all = NULL; 195508122e43SStefano Zampini sub_schurs->sum_S_Ej_inv_all = NULL; 1956b96c3477SStefano Zampini sub_schurs->sum_S_Ej_tilda_all = NULL; 1957b96c3477SStefano Zampini sub_schurs->is_Ej_all = NULL; 1958b1b3d7a2SStefano Zampini PetscFunctionReturn(0); 1959b1b3d7a2SStefano Zampini } 1960b1b3d7a2SStefano Zampini 196134a97f8cSStefano Zampini PetscErrorCode PCBDDCSubSchursCreate(PCBDDCSubSchurs *sub_schurs) 196234a97f8cSStefano Zampini { 196334a97f8cSStefano Zampini PCBDDCSubSchurs schurs_ctx; 196434a97f8cSStefano Zampini 196534a97f8cSStefano Zampini PetscFunctionBegin; 1966*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscNew(&schurs_ctx)); 19675ff63025SStefano Zampini schurs_ctx->n_subs = 0; 196834a97f8cSStefano Zampini *sub_schurs = schurs_ctx; 196934a97f8cSStefano Zampini PetscFunctionReturn(0); 197034a97f8cSStefano Zampini } 197134a97f8cSStefano Zampini 197234a97f8cSStefano Zampini PetscErrorCode PCBDDCSubSchursReset(PCBDDCSubSchurs sub_schurs) 197334a97f8cSStefano Zampini { 197434a97f8cSStefano Zampini PetscInt i; 197534a97f8cSStefano Zampini 197634a97f8cSStefano Zampini PetscFunctionBegin; 1977aea80f77Sstefano_zampini if (!sub_schurs) PetscFunctionReturn(0); 1978*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree(sub_schurs->prefix)); 1979*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy(&sub_schurs->A)); 1980*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy(&sub_schurs->S)); 1981*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISDestroy(&sub_schurs->is_I)); 1982*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISDestroy(&sub_schurs->is_B)); 1983*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISLocalToGlobalMappingDestroy(&sub_schurs->l2gmap)); 1984*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISLocalToGlobalMappingDestroy(&sub_schurs->BtoNmap)); 1985*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy(&sub_schurs->S_Ej_all)); 1986*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy(&sub_schurs->sum_S_Ej_all)); 1987*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy(&sub_schurs->sum_S_Ej_inv_all)); 1988*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy(&sub_schurs->sum_S_Ej_tilda_all)); 1989*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISDestroy(&sub_schurs->is_Ej_all)); 1990*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISDestroy(&sub_schurs->is_vertices)); 1991*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISDestroy(&sub_schurs->is_dir)); 1992*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscBTDestroy(&sub_schurs->is_edge)); 199334a97f8cSStefano Zampini for (i=0;i<sub_schurs->n_subs;i++) { 1994*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISDestroy(&sub_schurs->is_subs[i])); 199534a97f8cSStefano Zampini } 19965ff63025SStefano Zampini if (sub_schurs->n_subs) { 1997*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree(sub_schurs->is_subs)); 19983dc780c3SStefano Zampini } 1999df4d28bfSStefano Zampini if (sub_schurs->reuse_solver) { 2000*5f80ce2aSJacob Faibussowitsch CHKERRQ(PCBDDCReuseSolversReset(sub_schurs->reuse_solver)); 2001d62866d3SStefano Zampini } 2002*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree(sub_schurs->reuse_solver)); 200372b8c272SStefano Zampini if (sub_schurs->change) { 200472b8c272SStefano Zampini for (i=0;i<sub_schurs->n_subs;i++) { 2005*5f80ce2aSJacob Faibussowitsch CHKERRQ(KSPDestroy(&sub_schurs->change[i])); 2006*5f80ce2aSJacob Faibussowitsch CHKERRQ(ISDestroy(&sub_schurs->change_primal_sub[i])); 200772b8c272SStefano Zampini } 200872b8c272SStefano Zampini } 2009*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree(sub_schurs->change)); 2010*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree(sub_schurs->change_primal_sub)); 201134a97f8cSStefano Zampini sub_schurs->n_subs = 0; 201234a97f8cSStefano Zampini PetscFunctionReturn(0); 201334a97f8cSStefano Zampini } 201434a97f8cSStefano Zampini 2015aea80f77Sstefano_zampini PetscErrorCode PCBDDCSubSchursDestroy(PCBDDCSubSchurs* sub_schurs) 2016aea80f77Sstefano_zampini { 2017aea80f77Sstefano_zampini PetscFunctionBegin; 2018*5f80ce2aSJacob Faibussowitsch CHKERRQ(PCBDDCSubSchursReset(*sub_schurs)); 2019*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree(*sub_schurs)); 2020aea80f77Sstefano_zampini PetscFunctionReturn(0); 2021aea80f77Sstefano_zampini } 2022aea80f77Sstefano_zampini 20239fbee547SJacob Faibussowitsch static inline PetscErrorCode PCBDDCAdjGetNextLayer_Private(PetscInt* queue_tip,PetscInt n_prev,PetscBT touched,PetscInt* xadj,PetscInt* adjncy,PetscInt* n_added) 202434a97f8cSStefano Zampini { 202534a97f8cSStefano Zampini PetscInt i,j,n; 202634a97f8cSStefano Zampini 202734a97f8cSStefano Zampini PetscFunctionBegin; 202834a97f8cSStefano Zampini n = 0; 202934a97f8cSStefano Zampini for (i=-n_prev;i<0;i++) { 203034a97f8cSStefano Zampini PetscInt start_dof = queue_tip[i]; 203134a97f8cSStefano Zampini for (j=xadj[start_dof];j<xadj[start_dof+1];j++) { 203234a97f8cSStefano Zampini PetscInt dof = adjncy[j]; 203334a97f8cSStefano Zampini if (!PetscBTLookup(touched,dof)) { 2034*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscBTSet(touched,dof)); 203534a97f8cSStefano Zampini queue_tip[n] = dof; 203634a97f8cSStefano Zampini n++; 203734a97f8cSStefano Zampini } 203834a97f8cSStefano Zampini } 203934a97f8cSStefano Zampini } 204034a97f8cSStefano Zampini *n_added = n; 204134a97f8cSStefano Zampini PetscFunctionReturn(0); 204234a97f8cSStefano Zampini } 2043