1 #include <petsc/private/pcbddcimpl.h> 2 #include <petsc/private/pcbddcprivateimpl.h> 3 #include <../src/mat/impls/dense/seq/dense.h> 4 #include <petscblaslapack.h> 5 6 static inline PetscErrorCode PCBDDCAdjGetNextLayer_Private(PetscInt*,PetscInt,PetscBT,PetscInt*,PetscInt*,PetscInt*); 7 static PetscErrorCode PCBDDCComputeExplicitSchur(Mat,PetscBool,MatReuse,Mat*); 8 static PetscErrorCode PCBDDCReuseSolvers_Interior(PC,Vec,Vec); 9 static PetscErrorCode PCBDDCReuseSolvers_Correction(PC,Vec,Vec); 10 11 /* if v2 is not present, correction is done in-place */ 12 PetscErrorCode PCBDDCReuseSolversBenignAdapt(PCBDDCReuseSolvers ctx, Vec v, Vec v2, PetscBool sol, PetscBool full) 13 { 14 PetscScalar *array; 15 PetscScalar *array2; 16 17 PetscFunctionBegin; 18 if (!ctx->benign_n) PetscFunctionReturn(0); 19 if (sol && full) { 20 PetscInt n_I,size_schur; 21 22 /* get sizes */ 23 PetscCall(MatGetSize(ctx->benign_csAIB,&size_schur,NULL)); 24 PetscCall(VecGetSize(v,&n_I)); 25 n_I = n_I - size_schur; 26 /* get schur sol from array */ 27 PetscCall(VecGetArray(v,&array)); 28 PetscCall(VecPlaceArray(ctx->benign_dummy_schur_vec,array+n_I)); 29 PetscCall(VecRestoreArray(v,&array)); 30 /* apply interior sol correction */ 31 PetscCall(MatMultTranspose(ctx->benign_csAIB,ctx->benign_dummy_schur_vec,ctx->benign_corr_work)); 32 PetscCall(VecResetArray(ctx->benign_dummy_schur_vec)); 33 PetscCall(MatMultAdd(ctx->benign_AIIm1ones,ctx->benign_corr_work,v,v)); 34 } 35 if (v2) { 36 PetscInt nl; 37 38 PetscCall(VecGetArrayRead(v,(const PetscScalar**)&array)); 39 PetscCall(VecGetLocalSize(v2,&nl)); 40 PetscCall(VecGetArray(v2,&array2)); 41 PetscCall(PetscArraycpy(array2,array,nl)); 42 } else { 43 PetscCall(VecGetArray(v,&array)); 44 array2 = array; 45 } 46 if (!sol) { /* change rhs */ 47 PetscInt n; 48 for (n=0;n<ctx->benign_n;n++) { 49 PetscScalar sum = 0.; 50 const PetscInt *cols; 51 PetscInt nz,i; 52 53 PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[n],&nz)); 54 PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[n],&cols)); 55 for (i=0;i<nz-1;i++) sum += array[cols[i]]; 56 #if defined(PETSC_USE_COMPLEX) 57 sum = -(PetscRealPart(sum)/nz + PETSC_i*(PetscImaginaryPart(sum)/nz)); 58 #else 59 sum = -sum/nz; 60 #endif 61 for (i=0;i<nz-1;i++) array2[cols[i]] += sum; 62 ctx->benign_save_vals[n] = array2[cols[nz-1]]; 63 array2[cols[nz-1]] = sum; 64 PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[n],&cols)); 65 } 66 } else { 67 PetscInt n; 68 for (n=0;n<ctx->benign_n;n++) { 69 PetscScalar sum = 0.; 70 const PetscInt *cols; 71 PetscInt nz,i; 72 PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[n],&nz)); 73 PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[n],&cols)); 74 for (i=0;i<nz-1;i++) sum += array[cols[i]]; 75 #if defined(PETSC_USE_COMPLEX) 76 sum = -(PetscRealPart(sum)/nz + PETSC_i*(PetscImaginaryPart(sum)/nz)); 77 #else 78 sum = -sum/nz; 79 #endif 80 for (i=0;i<nz-1;i++) array2[cols[i]] += sum; 81 array2[cols[nz-1]] = ctx->benign_save_vals[n]; 82 PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[n],&cols)); 83 } 84 } 85 if (v2) { 86 PetscCall(VecRestoreArrayRead(v,(const PetscScalar**)&array)); 87 PetscCall(VecRestoreArray(v2,&array2)); 88 } else { 89 PetscCall(VecRestoreArray(v,&array)); 90 } 91 if (!sol && full) { 92 Vec usedv; 93 PetscInt n_I,size_schur; 94 95 /* get sizes */ 96 PetscCall(MatGetSize(ctx->benign_csAIB,&size_schur,NULL)); 97 PetscCall(VecGetSize(v,&n_I)); 98 n_I = n_I - size_schur; 99 /* compute schur rhs correction */ 100 if (v2) { 101 usedv = v2; 102 } else { 103 usedv = v; 104 } 105 /* apply schur rhs correction */ 106 PetscCall(MatMultTranspose(ctx->benign_AIIm1ones,usedv,ctx->benign_corr_work)); 107 PetscCall(VecGetArrayRead(usedv,(const PetscScalar**)&array)); 108 PetscCall(VecPlaceArray(ctx->benign_dummy_schur_vec,array+n_I)); 109 PetscCall(VecRestoreArrayRead(usedv,(const PetscScalar**)&array)); 110 PetscCall(MatMultAdd(ctx->benign_csAIB,ctx->benign_corr_work,ctx->benign_dummy_schur_vec,ctx->benign_dummy_schur_vec)); 111 PetscCall(VecResetArray(ctx->benign_dummy_schur_vec)); 112 } 113 PetscFunctionReturn(0); 114 } 115 116 static PetscErrorCode PCBDDCReuseSolvers_Solve_Private(PC pc, Vec rhs, Vec sol, PetscBool transpose, PetscBool full) 117 { 118 PCBDDCReuseSolvers ctx; 119 PetscBool copy = PETSC_FALSE; 120 121 PetscFunctionBegin; 122 PetscCall(PCShellGetContext(pc,&ctx)); 123 if (full) { 124 #if defined(PETSC_HAVE_MUMPS) 125 PetscCall(MatMumpsSetIcntl(ctx->F,26,-1)); 126 #endif 127 #if defined(PETSC_HAVE_MKL_PARDISO) 128 PetscCall(MatMkl_PardisoSetCntl(ctx->F,70,0)); 129 #endif 130 copy = ctx->has_vertices; 131 } else { /* interior solver */ 132 #if defined(PETSC_HAVE_MUMPS) 133 PetscCall(MatMumpsSetIcntl(ctx->F,26,0)); 134 #endif 135 #if defined(PETSC_HAVE_MKL_PARDISO) 136 PetscCall(MatMkl_PardisoSetCntl(ctx->F,70,1)); 137 #endif 138 copy = PETSC_TRUE; 139 } 140 /* copy rhs into factored matrix workspace */ 141 if (copy) { 142 PetscInt n; 143 PetscScalar *array,*array_solver; 144 145 PetscCall(VecGetLocalSize(rhs,&n)); 146 PetscCall(VecGetArrayRead(rhs,(const PetscScalar**)&array)); 147 PetscCall(VecGetArray(ctx->rhs,&array_solver)); 148 PetscCall(PetscArraycpy(array_solver,array,n)); 149 PetscCall(VecRestoreArray(ctx->rhs,&array_solver)); 150 PetscCall(VecRestoreArrayRead(rhs,(const PetscScalar**)&array)); 151 152 PetscCall(PCBDDCReuseSolversBenignAdapt(ctx,ctx->rhs,NULL,PETSC_FALSE,full)); 153 if (transpose) { 154 PetscCall(MatSolveTranspose(ctx->F,ctx->rhs,ctx->sol)); 155 } else { 156 PetscCall(MatSolve(ctx->F,ctx->rhs,ctx->sol)); 157 } 158 PetscCall(PCBDDCReuseSolversBenignAdapt(ctx,ctx->sol,NULL,PETSC_TRUE,full)); 159 160 /* get back data to caller worskpace */ 161 PetscCall(VecGetArrayRead(ctx->sol,(const PetscScalar**)&array_solver)); 162 PetscCall(VecGetArray(sol,&array)); 163 PetscCall(PetscArraycpy(array,array_solver,n)); 164 PetscCall(VecRestoreArray(sol,&array)); 165 PetscCall(VecRestoreArrayRead(ctx->sol,(const PetscScalar**)&array_solver)); 166 } else { 167 if (ctx->benign_n) { 168 PetscCall(PCBDDCReuseSolversBenignAdapt(ctx,rhs,ctx->rhs,PETSC_FALSE,full)); 169 if (transpose) { 170 PetscCall(MatSolveTranspose(ctx->F,ctx->rhs,sol)); 171 } else { 172 PetscCall(MatSolve(ctx->F,ctx->rhs,sol)); 173 } 174 PetscCall(PCBDDCReuseSolversBenignAdapt(ctx,sol,NULL,PETSC_TRUE,full)); 175 } else { 176 if (transpose) { 177 PetscCall(MatSolveTranspose(ctx->F,rhs,sol)); 178 } else { 179 PetscCall(MatSolve(ctx->F,rhs,sol)); 180 } 181 } 182 } 183 /* restore defaults */ 184 #if defined(PETSC_HAVE_MUMPS) 185 PetscCall(MatMumpsSetIcntl(ctx->F,26,-1)); 186 #endif 187 #if defined(PETSC_HAVE_MKL_PARDISO) 188 PetscCall(MatMkl_PardisoSetCntl(ctx->F,70,0)); 189 #endif 190 PetscFunctionReturn(0); 191 } 192 193 static PetscErrorCode PCBDDCReuseSolvers_Correction(PC pc, Vec rhs, Vec sol) 194 { 195 PetscFunctionBegin; 196 PetscCall(PCBDDCReuseSolvers_Solve_Private(pc,rhs,sol,PETSC_FALSE,PETSC_TRUE)); 197 PetscFunctionReturn(0); 198 } 199 200 static PetscErrorCode PCBDDCReuseSolvers_CorrectionTranspose(PC pc, Vec rhs, Vec sol) 201 { 202 PetscFunctionBegin; 203 PetscCall(PCBDDCReuseSolvers_Solve_Private(pc,rhs,sol,PETSC_TRUE,PETSC_TRUE)); 204 PetscFunctionReturn(0); 205 } 206 207 static PetscErrorCode PCBDDCReuseSolvers_Interior(PC pc, Vec rhs, Vec sol) 208 { 209 PetscFunctionBegin; 210 PetscCall(PCBDDCReuseSolvers_Solve_Private(pc,rhs,sol,PETSC_FALSE,PETSC_FALSE)); 211 PetscFunctionReturn(0); 212 } 213 214 static PetscErrorCode PCBDDCReuseSolvers_InteriorTranspose(PC pc, Vec rhs, Vec sol) 215 { 216 PetscFunctionBegin; 217 PetscCall(PCBDDCReuseSolvers_Solve_Private(pc,rhs,sol,PETSC_TRUE,PETSC_FALSE)); 218 PetscFunctionReturn(0); 219 } 220 221 static PetscErrorCode PCBDDCReuseSolvers_View(PC pc, PetscViewer viewer) 222 { 223 PCBDDCReuseSolvers ctx; 224 PetscBool iascii; 225 226 PetscFunctionBegin; 227 PetscCall(PCShellGetContext(pc,&ctx)); 228 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 229 if (iascii) { 230 PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_INFO)); 231 } 232 PetscCall(MatView(ctx->F,viewer)); 233 if (iascii) { 234 PetscCall(PetscViewerPopFormat(viewer)); 235 } 236 PetscFunctionReturn(0); 237 } 238 239 static PetscErrorCode PCBDDCReuseSolversReset(PCBDDCReuseSolvers reuse) 240 { 241 PetscInt i; 242 243 PetscFunctionBegin; 244 PetscCall(MatDestroy(&reuse->F)); 245 PetscCall(VecDestroy(&reuse->sol)); 246 PetscCall(VecDestroy(&reuse->rhs)); 247 PetscCall(PCDestroy(&reuse->interior_solver)); 248 PetscCall(PCDestroy(&reuse->correction_solver)); 249 PetscCall(ISDestroy(&reuse->is_R)); 250 PetscCall(ISDestroy(&reuse->is_B)); 251 PetscCall(VecScatterDestroy(&reuse->correction_scatter_B)); 252 PetscCall(VecDestroy(&reuse->sol_B)); 253 PetscCall(VecDestroy(&reuse->rhs_B)); 254 for (i=0;i<reuse->benign_n;i++) { 255 PetscCall(ISDestroy(&reuse->benign_zerodiag_subs[i])); 256 } 257 PetscCall(PetscFree(reuse->benign_zerodiag_subs)); 258 PetscCall(PetscFree(reuse->benign_save_vals)); 259 PetscCall(MatDestroy(&reuse->benign_csAIB)); 260 PetscCall(MatDestroy(&reuse->benign_AIIm1ones)); 261 PetscCall(VecDestroy(&reuse->benign_corr_work)); 262 PetscCall(VecDestroy(&reuse->benign_dummy_schur_vec)); 263 PetscFunctionReturn(0); 264 } 265 266 static PetscErrorCode PCBDDCReuseSolvers_Destroy(PC pc) 267 { 268 PCBDDCReuseSolvers ctx; 269 270 PetscFunctionBegin; 271 PetscCall(PCShellGetContext(pc,&ctx)); 272 PetscCall(PCBDDCReuseSolversReset(ctx)); 273 PetscCall(PetscFree(ctx)); 274 PetscCall(PCShellSetContext(pc,NULL)); 275 PetscFunctionReturn(0); 276 } 277 278 static PetscErrorCode PCBDDCComputeExplicitSchur(Mat M, PetscBool issym, MatReuse reuse, Mat *S) 279 { 280 Mat B, C, D, Bd, Cd, AinvBd; 281 KSP ksp; 282 PC pc; 283 PetscBool isLU, isILU, isCHOL, Bdense, Cdense; 284 PetscReal fill = 2.0; 285 PetscInt n_I; 286 PetscMPIInt size; 287 288 PetscFunctionBegin; 289 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)M),&size)); 290 PetscCheck(size == 1,PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for parallel matrices"); 291 if (reuse == MAT_REUSE_MATRIX) { 292 PetscBool Sdense; 293 294 PetscCall(PetscObjectTypeCompare((PetscObject)*S, MATSEQDENSE, &Sdense)); 295 PetscCheck(Sdense,PetscObjectComm((PetscObject)M),PETSC_ERR_SUP,"S should dense"); 296 } 297 PetscCall(MatSchurComplementGetSubMatrices(M, NULL, NULL, &B, &C, &D)); 298 PetscCall(MatSchurComplementGetKSP(M, &ksp)); 299 PetscCall(KSPGetPC(ksp, &pc)); 300 PetscCall(PetscObjectTypeCompare((PetscObject) pc, PCLU, &isLU)); 301 PetscCall(PetscObjectTypeCompare((PetscObject) pc, PCILU, &isILU)); 302 PetscCall(PetscObjectTypeCompare((PetscObject) pc, PCCHOLESKY, &isCHOL)); 303 PetscCall(PetscObjectTypeCompare((PetscObject) B, MATSEQDENSE, &Bdense)); 304 PetscCall(PetscObjectTypeCompare((PetscObject) C, MATSEQDENSE, &Cdense)); 305 PetscCall(MatGetSize(B,&n_I,NULL)); 306 if (n_I) { 307 if (!Bdense) { 308 PetscCall(MatConvert(B, MATSEQDENSE, MAT_INITIAL_MATRIX, &Bd)); 309 } else { 310 Bd = B; 311 } 312 313 if (isLU || isILU || isCHOL) { 314 Mat fact; 315 PetscCall(KSPSetUp(ksp)); 316 PetscCall(PCFactorGetMatrix(pc, &fact)); 317 PetscCall(MatDuplicate(Bd, MAT_DO_NOT_COPY_VALUES, &AinvBd)); 318 PetscCall(MatMatSolve(fact, Bd, AinvBd)); 319 } else { 320 PetscBool ex = PETSC_TRUE; 321 322 if (ex) { 323 Mat Ainvd; 324 325 PetscCall(PCComputeOperator(pc, MATDENSE, &Ainvd)); 326 PetscCall(MatMatMult(Ainvd, Bd, MAT_INITIAL_MATRIX, fill, &AinvBd)); 327 PetscCall(MatDestroy(&Ainvd)); 328 } else { 329 Vec sol,rhs; 330 PetscScalar *arrayrhs,*arraysol; 331 PetscInt i,nrhs,n; 332 333 PetscCall(MatDuplicate(Bd, MAT_DO_NOT_COPY_VALUES, &AinvBd)); 334 PetscCall(MatGetSize(Bd,&n,&nrhs)); 335 PetscCall(MatDenseGetArray(Bd,&arrayrhs)); 336 PetscCall(MatDenseGetArray(AinvBd,&arraysol)); 337 PetscCall(KSPGetSolution(ksp,&sol)); 338 PetscCall(KSPGetRhs(ksp,&rhs)); 339 for (i=0;i<nrhs;i++) { 340 PetscCall(VecPlaceArray(rhs,arrayrhs+i*n)); 341 PetscCall(VecPlaceArray(sol,arraysol+i*n)); 342 PetscCall(KSPSolve(ksp,rhs,sol)); 343 PetscCall(VecResetArray(rhs)); 344 PetscCall(VecResetArray(sol)); 345 } 346 PetscCall(MatDenseRestoreArray(Bd,&arrayrhs)); 347 PetscCall(MatDenseRestoreArray(AinvBd,&arrayrhs)); 348 } 349 } 350 if (!Bdense & !issym) { 351 PetscCall(MatDestroy(&Bd)); 352 } 353 354 if (!issym) { 355 if (!Cdense) { 356 PetscCall(MatConvert(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &Cd)); 357 } else { 358 Cd = C; 359 } 360 PetscCall(MatMatMult(Cd, AinvBd, reuse, fill, S)); 361 if (!Cdense) { 362 PetscCall(MatDestroy(&Cd)); 363 } 364 } else { 365 PetscCall(MatTransposeMatMult(Bd, AinvBd, reuse, fill, S)); 366 if (!Bdense) { 367 PetscCall(MatDestroy(&Bd)); 368 } 369 } 370 PetscCall(MatDestroy(&AinvBd)); 371 } 372 373 if (D) { 374 Mat Dd; 375 PetscBool Ddense; 376 377 PetscCall(PetscObjectTypeCompare((PetscObject)D,MATSEQDENSE,&Ddense)); 378 if (!Ddense) { 379 PetscCall(MatConvert(D, MATSEQDENSE, MAT_INITIAL_MATRIX, &Dd)); 380 } else { 381 Dd = D; 382 } 383 if (n_I) { 384 PetscCall(MatAYPX(*S,-1.0,Dd,SAME_NONZERO_PATTERN)); 385 } else { 386 if (reuse == MAT_INITIAL_MATRIX) { 387 PetscCall(MatDuplicate(Dd,MAT_COPY_VALUES,S)); 388 } else { 389 PetscCall(MatCopy(Dd,*S,SAME_NONZERO_PATTERN)); 390 } 391 } 392 if (!Ddense) { 393 PetscCall(MatDestroy(&Dd)); 394 } 395 } else { 396 PetscCall(MatScale(*S,-1.0)); 397 } 398 PetscFunctionReturn(0); 399 } 400 401 PetscErrorCode PCBDDCSubSchursSetUp(PCBDDCSubSchurs sub_schurs, Mat Ain, Mat Sin, PetscBool exact_schur, PetscInt xadj[], PetscInt adjncy[], PetscInt nlayers, Vec scaling, PetscBool compute_Stilda, PetscBool reuse_solvers, PetscBool benign_trick, PetscInt benign_n, PetscInt benign_p0_lidx[], IS benign_zerodiag_subs[], Mat change, IS change_primal) 402 { 403 Mat F,A_II,A_IB,A_BI,A_BB,AE_II; 404 Mat S_all; 405 Vec gstash,lstash; 406 VecScatter sstash; 407 IS is_I,is_I_layer; 408 IS all_subsets,all_subsets_mult,all_subsets_n; 409 PetscScalar *stasharray,*Bwork; 410 PetscInt *nnz,*all_local_idx_N; 411 PetscInt *auxnum1,*auxnum2; 412 PetscInt i,subset_size,max_subset_size; 413 PetscInt n_B,extra,local_size,global_size; 414 PetscInt local_stash_size; 415 PetscBLASInt B_N,B_ierr,B_lwork,*pivots; 416 MPI_Comm comm_n; 417 PetscBool deluxe = PETSC_TRUE; 418 PetscBool use_potr = PETSC_FALSE, use_sytr = PETSC_FALSE; 419 PetscViewer matl_dbg_viewer = NULL; 420 PetscBool flg; 421 422 PetscFunctionBegin; 423 PetscCall(MatDestroy(&sub_schurs->A)); 424 PetscCall(MatDestroy(&sub_schurs->S)); 425 if (Ain) { 426 PetscCall(PetscObjectReference((PetscObject)Ain)); 427 sub_schurs->A = Ain; 428 } 429 430 PetscCall(PetscObjectReference((PetscObject)Sin)); 431 sub_schurs->S = Sin; 432 if (sub_schurs->schur_explicit) { 433 sub_schurs->schur_explicit = (PetscBool)(!!sub_schurs->A); 434 } 435 436 /* preliminary checks */ 437 PetscCheck(sub_schurs->schur_explicit || !compute_Stilda,PetscObjectComm((PetscObject)sub_schurs->l2gmap),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_PARDISO"); 438 439 if (benign_trick) sub_schurs->is_posdef = PETSC_FALSE; 440 441 /* debug (MATLAB) */ 442 if (sub_schurs->debug) { 443 PetscMPIInt size,rank; 444 PetscInt nr,*print_schurs_ranks,print_schurs = PETSC_FALSE; 445 446 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&size)); 447 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&rank)); 448 nr = size; 449 PetscCall(PetscMalloc1(nr,&print_schurs_ranks)); 450 PetscOptionsBegin(PetscObjectComm((PetscObject)sub_schurs->l2gmap),sub_schurs->prefix,"BDDC sub_schurs options","PC"); 451 PetscCall(PetscOptionsIntArray("-sub_schurs_debug_ranks","Ranks to debug (all if the option is not used)",NULL,print_schurs_ranks,&nr,&flg)); 452 if (!flg) print_schurs = PETSC_TRUE; 453 else { 454 print_schurs = PETSC_FALSE; 455 for (i=0;i<nr;i++) if (print_schurs_ranks[i] == (PetscInt)rank) { print_schurs = PETSC_TRUE; break; } 456 } 457 PetscOptionsEnd(); 458 PetscCall(PetscFree(print_schurs_ranks)); 459 if (print_schurs) { 460 char filename[256]; 461 462 PetscCall(PetscSNPrintf(filename,sizeof(filename),"sub_schurs_Schur_r%d.m",PetscGlobalRank)); 463 PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&matl_dbg_viewer)); 464 PetscCall(PetscViewerPushFormat(matl_dbg_viewer,PETSC_VIEWER_ASCII_MATLAB)); 465 } 466 } 467 468 /* restrict work on active processes */ 469 if (sub_schurs->restrict_comm) { 470 PetscSubcomm subcomm; 471 PetscMPIInt color,rank; 472 473 color = 0; 474 if (!sub_schurs->n_subs) color = 1; /* this can happen if we are in a multilevel case or if the subdomain is disconnected */ 475 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&rank)); 476 PetscCall(PetscSubcommCreate(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&subcomm)); 477 PetscCall(PetscSubcommSetNumber(subcomm,2)); 478 PetscCall(PetscSubcommSetTypeGeneral(subcomm,color,rank)); 479 PetscCall(PetscCommDuplicate(PetscSubcommChild(subcomm),&comm_n,NULL)); 480 PetscCall(PetscSubcommDestroy(&subcomm)); 481 if (!sub_schurs->n_subs) { 482 PetscCall(PetscCommDestroy(&comm_n)); 483 PetscFunctionReturn(0); 484 } 485 } else { 486 PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&comm_n,NULL)); 487 } 488 489 /* get Schur complement matrices */ 490 if (!sub_schurs->schur_explicit) { 491 Mat tA_IB,tA_BI,tA_BB; 492 PetscBool isseqsbaij; 493 PetscCall(MatSchurComplementGetSubMatrices(sub_schurs->S,&A_II,NULL,&tA_IB,&tA_BI,&tA_BB)); 494 PetscCall(PetscObjectTypeCompare((PetscObject)tA_BB,MATSEQSBAIJ,&isseqsbaij)); 495 if (isseqsbaij) { 496 PetscCall(MatConvert(tA_BB,MATSEQAIJ,MAT_INITIAL_MATRIX,&A_BB)); 497 PetscCall(MatConvert(tA_IB,MATSEQAIJ,MAT_INITIAL_MATRIX,&A_IB)); 498 PetscCall(MatConvert(tA_BI,MATSEQAIJ,MAT_INITIAL_MATRIX,&A_BI)); 499 } else { 500 PetscCall(PetscObjectReference((PetscObject)tA_BB)); 501 A_BB = tA_BB; 502 PetscCall(PetscObjectReference((PetscObject)tA_IB)); 503 A_IB = tA_IB; 504 PetscCall(PetscObjectReference((PetscObject)tA_BI)); 505 A_BI = tA_BI; 506 } 507 } else { 508 A_II = NULL; 509 A_IB = NULL; 510 A_BI = NULL; 511 A_BB = NULL; 512 } 513 S_all = NULL; 514 515 /* determine interior problems */ 516 PetscCall(ISGetLocalSize(sub_schurs->is_I,&i)); 517 if (nlayers >= 0 && i) { /* Interior problems can be different from the original one */ 518 PetscBT touched; 519 const PetscInt* idx_B; 520 PetscInt n_I,n_B,n_local_dofs,n_prev_added,j,layer,*local_numbering; 521 522 PetscCheck(xadj,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot request layering without adjacency"); 523 /* get sizes */ 524 PetscCall(ISGetLocalSize(sub_schurs->is_I,&n_I)); 525 PetscCall(ISGetLocalSize(sub_schurs->is_B,&n_B)); 526 527 PetscCall(PetscMalloc1(n_I+n_B,&local_numbering)); 528 PetscCall(PetscBTCreate(n_I+n_B,&touched)); 529 PetscCall(PetscBTMemzero(n_I+n_B,touched)); 530 531 /* all boundary dofs must be skipped when adding layers */ 532 PetscCall(ISGetIndices(sub_schurs->is_B,&idx_B)); 533 for (j=0;j<n_B;j++) { 534 PetscCall(PetscBTSet(touched,idx_B[j])); 535 } 536 PetscCall(PetscArraycpy(local_numbering,idx_B,n_B)); 537 PetscCall(ISRestoreIndices(sub_schurs->is_B,&idx_B)); 538 539 /* add prescribed number of layers of dofs */ 540 n_local_dofs = n_B; 541 n_prev_added = n_B; 542 for (layer=0;layer<nlayers;layer++) { 543 PetscInt n_added = 0; 544 if (n_local_dofs == n_I+n_B) break; 545 PetscCheck(n_local_dofs <= n_I+n_B,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error querying layer %" PetscInt_FMT ". Out of bound access (%" PetscInt_FMT " > %" PetscInt_FMT ")",layer,n_local_dofs,n_I+n_B); 546 PetscCall(PCBDDCAdjGetNextLayer_Private(local_numbering+n_local_dofs,n_prev_added,touched,xadj,adjncy,&n_added)); 547 n_prev_added = n_added; 548 n_local_dofs += n_added; 549 if (!n_added) break; 550 } 551 PetscCall(PetscBTDestroy(&touched)); 552 553 /* IS for I layer dofs in original numbering */ 554 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)sub_schurs->is_I),n_local_dofs-n_B,local_numbering+n_B,PETSC_COPY_VALUES,&is_I_layer)); 555 PetscCall(PetscFree(local_numbering)); 556 PetscCall(ISSort(is_I_layer)); 557 /* IS for I layer dofs in I numbering */ 558 if (!sub_schurs->schur_explicit) { 559 ISLocalToGlobalMapping ItoNmap; 560 PetscCall(ISLocalToGlobalMappingCreateIS(sub_schurs->is_I,&ItoNmap)); 561 PetscCall(ISGlobalToLocalMappingApplyIS(ItoNmap,IS_GTOLM_DROP,is_I_layer,&is_I)); 562 PetscCall(ISLocalToGlobalMappingDestroy(&ItoNmap)); 563 564 /* II block */ 565 PetscCall(MatCreateSubMatrix(A_II,is_I,is_I,MAT_INITIAL_MATRIX,&AE_II)); 566 } 567 } else { 568 PetscInt n_I; 569 570 /* IS for I dofs in original numbering */ 571 PetscCall(PetscObjectReference((PetscObject)sub_schurs->is_I)); 572 is_I_layer = sub_schurs->is_I; 573 574 /* IS for I dofs in I numbering (strided 1) */ 575 if (!sub_schurs->schur_explicit) { 576 PetscCall(ISGetSize(sub_schurs->is_I,&n_I)); 577 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)sub_schurs->is_I),n_I,0,1,&is_I)); 578 579 /* II block is the same */ 580 PetscCall(PetscObjectReference((PetscObject)A_II)); 581 AE_II = A_II; 582 } 583 } 584 585 /* Get info on subset sizes and sum of all subsets sizes */ 586 max_subset_size = 0; 587 local_size = 0; 588 for (i=0;i<sub_schurs->n_subs;i++) { 589 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size)); 590 max_subset_size = PetscMax(subset_size,max_subset_size); 591 local_size += subset_size; 592 } 593 594 /* Work arrays for local indices */ 595 extra = 0; 596 PetscCall(ISGetLocalSize(sub_schurs->is_B,&n_B)); 597 if (sub_schurs->schur_explicit && is_I_layer) { 598 PetscCall(ISGetLocalSize(is_I_layer,&extra)); 599 } 600 PetscCall(PetscMalloc1(n_B+extra,&all_local_idx_N)); 601 if (extra) { 602 const PetscInt *idxs; 603 PetscCall(ISGetIndices(is_I_layer,&idxs)); 604 PetscCall(PetscArraycpy(all_local_idx_N,idxs,extra)); 605 PetscCall(ISRestoreIndices(is_I_layer,&idxs)); 606 } 607 PetscCall(PetscMalloc1(sub_schurs->n_subs,&auxnum1)); 608 PetscCall(PetscMalloc1(sub_schurs->n_subs,&auxnum2)); 609 610 /* Get local indices in local numbering */ 611 local_size = 0; 612 local_stash_size = 0; 613 for (i=0;i<sub_schurs->n_subs;i++) { 614 const PetscInt *idxs; 615 616 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size)); 617 PetscCall(ISGetIndices(sub_schurs->is_subs[i],&idxs)); 618 /* start (smallest in global ordering) and multiplicity */ 619 auxnum1[i] = idxs[0]; 620 auxnum2[i] = subset_size*subset_size; 621 /* subset indices in local numbering */ 622 PetscCall(PetscArraycpy(all_local_idx_N+local_size+extra,idxs,subset_size)); 623 PetscCall(ISRestoreIndices(sub_schurs->is_subs[i],&idxs)); 624 local_size += subset_size; 625 local_stash_size += subset_size*subset_size; 626 } 627 628 /* allocate extra workspace needed only for GETRI or SYTRF */ 629 use_potr = use_sytr = PETSC_FALSE; 630 if (benign_trick || (sub_schurs->is_hermitian && sub_schurs->is_posdef)) { 631 use_potr = PETSC_TRUE; 632 } else if (sub_schurs->is_symmetric) { 633 use_sytr = PETSC_TRUE; 634 } 635 if (local_size && !use_potr) { 636 PetscScalar lwork,dummyscalar = 0.; 637 PetscBLASInt dummyint = 0; 638 639 B_lwork = -1; 640 PetscCall(PetscBLASIntCast(local_size,&B_N)); 641 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 642 if (use_sytr) { 643 PetscStackCallBLAS("LAPACKsytrf",LAPACKsytrf_("L",&B_N,&dummyscalar,&B_N,&dummyint,&lwork,&B_lwork,&B_ierr)); 644 PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYTRF Lapack routine %d",(int)B_ierr); 645 } else { 646 PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,&dummyscalar,&B_N,&dummyint,&lwork,&B_lwork,&B_ierr)); 647 PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GETRI Lapack routine %d",(int)B_ierr); 648 } 649 PetscCall(PetscFPTrapPop()); 650 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork)); 651 PetscCall(PetscMalloc2(B_lwork,&Bwork,B_N,&pivots)); 652 } else { 653 Bwork = NULL; 654 pivots = NULL; 655 } 656 657 /* prepare data for summing up properly schurs on subsets */ 658 PetscCall(ISCreateGeneral(comm_n,sub_schurs->n_subs,auxnum1,PETSC_OWN_POINTER,&all_subsets_n)); 659 PetscCall(ISLocalToGlobalMappingApplyIS(sub_schurs->l2gmap,all_subsets_n,&all_subsets)); 660 PetscCall(ISDestroy(&all_subsets_n)); 661 PetscCall(ISCreateGeneral(comm_n,sub_schurs->n_subs,auxnum2,PETSC_OWN_POINTER,&all_subsets_mult)); 662 PetscCall(ISRenumber(all_subsets,all_subsets_mult,&global_size,&all_subsets_n)); 663 PetscCall(ISDestroy(&all_subsets)); 664 PetscCall(ISDestroy(&all_subsets_mult)); 665 PetscCall(ISGetLocalSize(all_subsets_n,&i)); 666 PetscCheck(i == local_stash_size,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid size of new subset! %" PetscInt_FMT " != %" PetscInt_FMT,i,local_stash_size); 667 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,local_stash_size,NULL,&lstash)); 668 PetscCall(VecCreateMPI(comm_n,PETSC_DECIDE,global_size,&gstash)); 669 PetscCall(VecScatterCreate(lstash,NULL,gstash,all_subsets_n,&sstash)); 670 PetscCall(ISDestroy(&all_subsets_n)); 671 672 /* subset indices in local boundary numbering */ 673 if (!sub_schurs->is_Ej_all) { 674 PetscInt *all_local_idx_B; 675 676 PetscCall(PetscMalloc1(local_size,&all_local_idx_B)); 677 PetscCall(ISGlobalToLocalMappingApply(sub_schurs->BtoNmap,IS_GTOLM_DROP,local_size,all_local_idx_N+extra,&subset_size,all_local_idx_B)); 678 PetscCheck(subset_size == local_size,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in sub_schurs serial (BtoNmap)! %" PetscInt_FMT " != %" PetscInt_FMT,subset_size,local_size); 679 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,local_size,all_local_idx_B,PETSC_OWN_POINTER,&sub_schurs->is_Ej_all)); 680 } 681 682 if (change) { 683 ISLocalToGlobalMapping BtoS; 684 IS change_primal_B; 685 IS change_primal_all; 686 687 PetscCheck(!sub_schurs->change_primal_sub,PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 688 PetscCheck(!sub_schurs->change,PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 689 PetscCall(PetscMalloc1(sub_schurs->n_subs,&sub_schurs->change_primal_sub)); 690 for (i=0;i<sub_schurs->n_subs;i++) { 691 ISLocalToGlobalMapping NtoS; 692 PetscCall(ISLocalToGlobalMappingCreateIS(sub_schurs->is_subs[i],&NtoS)); 693 PetscCall(ISGlobalToLocalMappingApplyIS(NtoS,IS_GTOLM_DROP,change_primal,&sub_schurs->change_primal_sub[i])); 694 PetscCall(ISLocalToGlobalMappingDestroy(&NtoS)); 695 } 696 PetscCall(ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap,IS_GTOLM_DROP,change_primal,&change_primal_B)); 697 PetscCall(ISLocalToGlobalMappingCreateIS(sub_schurs->is_Ej_all,&BtoS)); 698 PetscCall(ISGlobalToLocalMappingApplyIS(BtoS,IS_GTOLM_DROP,change_primal_B,&change_primal_all)); 699 PetscCall(ISLocalToGlobalMappingDestroy(&BtoS)); 700 PetscCall(ISDestroy(&change_primal_B)); 701 PetscCall(PetscMalloc1(sub_schurs->n_subs,&sub_schurs->change)); 702 for (i=0;i<sub_schurs->n_subs;i++) { 703 Mat change_sub; 704 705 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size)); 706 PetscCall(KSPCreate(PETSC_COMM_SELF,&sub_schurs->change[i])); 707 PetscCall(KSPSetType(sub_schurs->change[i],KSPPREONLY)); 708 if (!sub_schurs->change_with_qr) { 709 PetscCall(MatCreateSubMatrix(change,sub_schurs->is_subs[i],sub_schurs->is_subs[i],MAT_INITIAL_MATRIX,&change_sub)); 710 } else { 711 Mat change_subt; 712 PetscCall(MatCreateSubMatrix(change,sub_schurs->is_subs[i],sub_schurs->is_subs[i],MAT_INITIAL_MATRIX,&change_subt)); 713 PetscCall(MatConvert(change_subt,MATSEQDENSE,MAT_INITIAL_MATRIX,&change_sub)); 714 PetscCall(MatDestroy(&change_subt)); 715 } 716 PetscCall(KSPSetOperators(sub_schurs->change[i],change_sub,change_sub)); 717 PetscCall(MatDestroy(&change_sub)); 718 PetscCall(KSPSetOptionsPrefix(sub_schurs->change[i],sub_schurs->prefix)); 719 PetscCall(KSPAppendOptionsPrefix(sub_schurs->change[i],"sub_schurs_change_")); 720 } 721 PetscCall(ISDestroy(&change_primal_all)); 722 } 723 724 /* Local matrix of all local Schur on subsets (transposed) */ 725 if (!sub_schurs->S_Ej_all) { 726 Mat T; 727 PetscScalar *v; 728 PetscInt *ii,*jj; 729 PetscInt cum,i,j,k; 730 731 /* MatSeqAIJSetPreallocation + MatSetValues is slow for these kind of matrices (may have large blocks) 732 Allocate properly a representative matrix and duplicate */ 733 PetscCall(PetscMalloc3(local_size+1,&ii,local_stash_size,&jj,local_stash_size,&v)); 734 ii[0] = 0; 735 cum = 0; 736 for (i=0;i<sub_schurs->n_subs;i++) { 737 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size)); 738 for (j=0;j<subset_size;j++) { 739 const PetscInt row = cum+j; 740 PetscInt col = cum; 741 742 ii[row+1] = ii[row] + subset_size; 743 for (k=ii[row];k<ii[row+1];k++) { 744 jj[k] = col; 745 col++; 746 } 747 } 748 cum += subset_size; 749 } 750 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,local_size,local_size,ii,jj,v,&T)); 751 PetscCall(MatDuplicate(T,MAT_DO_NOT_COPY_VALUES,&sub_schurs->S_Ej_all)); 752 PetscCall(MatDestroy(&T)); 753 PetscCall(PetscFree3(ii,jj,v)); 754 } 755 /* matrices for deluxe scaling and adaptive selection */ 756 if (compute_Stilda) { 757 if (!sub_schurs->sum_S_Ej_tilda_all) { 758 PetscCall(MatDuplicate(sub_schurs->S_Ej_all,MAT_DO_NOT_COPY_VALUES,&sub_schurs->sum_S_Ej_tilda_all)); 759 } 760 if (!sub_schurs->sum_S_Ej_inv_all && deluxe) { 761 PetscCall(MatDuplicate(sub_schurs->S_Ej_all,MAT_DO_NOT_COPY_VALUES,&sub_schurs->sum_S_Ej_inv_all)); 762 } 763 } 764 765 /* Compute Schur complements explicitly */ 766 F = NULL; 767 if (!sub_schurs->schur_explicit) { 768 /* this code branch is used when MatFactor with Schur complement support is not present or when explicitly requested; 769 it is not efficient, unless the economic version of the scaling is used */ 770 Mat S_Ej_expl; 771 PetscScalar *work; 772 PetscInt j,*dummy_idx; 773 PetscBool Sdense; 774 775 PetscCall(PetscMalloc2(max_subset_size,&dummy_idx,max_subset_size*max_subset_size,&work)); 776 local_size = 0; 777 for (i=0;i<sub_schurs->n_subs;i++) { 778 IS is_subset_B; 779 Mat AE_EE,AE_IE,AE_EI,S_Ej; 780 781 /* subsets in original and boundary numbering */ 782 PetscCall(ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap,IS_GTOLM_DROP,sub_schurs->is_subs[i],&is_subset_B)); 783 /* EE block */ 784 PetscCall(MatCreateSubMatrix(A_BB,is_subset_B,is_subset_B,MAT_INITIAL_MATRIX,&AE_EE)); 785 /* IE block */ 786 PetscCall(MatCreateSubMatrix(A_IB,is_I,is_subset_B,MAT_INITIAL_MATRIX,&AE_IE)); 787 /* EI block */ 788 if (sub_schurs->is_symmetric) { 789 PetscCall(MatCreateTranspose(AE_IE,&AE_EI)); 790 } else if (sub_schurs->is_hermitian) { 791 PetscCall(MatCreateHermitianTranspose(AE_IE,&AE_EI)); 792 } else { 793 PetscCall(MatCreateSubMatrix(A_BI,is_subset_B,is_I,MAT_INITIAL_MATRIX,&AE_EI)); 794 } 795 PetscCall(ISDestroy(&is_subset_B)); 796 PetscCall(MatCreateSchurComplement(AE_II,AE_II,AE_IE,AE_EI,AE_EE,&S_Ej)); 797 PetscCall(MatDestroy(&AE_EE)); 798 PetscCall(MatDestroy(&AE_IE)); 799 PetscCall(MatDestroy(&AE_EI)); 800 if (AE_II == A_II) { /* we can reuse the same ksp */ 801 KSP ksp; 802 PetscCall(MatSchurComplementGetKSP(sub_schurs->S,&ksp)); 803 PetscCall(MatSchurComplementSetKSP(S_Ej,ksp)); 804 } else { /* build new ksp object which inherits ksp and pc types from the original one */ 805 KSP origksp,schurksp; 806 PC origpc,schurpc; 807 KSPType ksp_type; 808 PetscInt n_internal; 809 PetscBool ispcnone; 810 811 PetscCall(MatSchurComplementGetKSP(sub_schurs->S,&origksp)); 812 PetscCall(MatSchurComplementGetKSP(S_Ej,&schurksp)); 813 PetscCall(KSPGetType(origksp,&ksp_type)); 814 PetscCall(KSPSetType(schurksp,ksp_type)); 815 PetscCall(KSPGetPC(schurksp,&schurpc)); 816 PetscCall(KSPGetPC(origksp,&origpc)); 817 PetscCall(PetscObjectTypeCompare((PetscObject)origpc,PCNONE,&ispcnone)); 818 if (!ispcnone) { 819 PCType pc_type; 820 PetscCall(PCGetType(origpc,&pc_type)); 821 PetscCall(PCSetType(schurpc,pc_type)); 822 } else { 823 PetscCall(PCSetType(schurpc,PCLU)); 824 } 825 PetscCall(ISGetSize(is_I,&n_internal)); 826 if (!n_internal) { /* UMFPACK gives error with 0 sized problems */ 827 MatSolverType solver = NULL; 828 PetscCall(PCFactorGetMatSolverType(origpc,(MatSolverType*)&solver)); 829 if (solver) { 830 PetscCall(PCFactorSetMatSolverType(schurpc,solver)); 831 } 832 } 833 PetscCall(KSPSetUp(schurksp)); 834 } 835 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size)); 836 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,work,&S_Ej_expl)); 837 PetscCall(PCBDDCComputeExplicitSchur(S_Ej,sub_schurs->is_symmetric,MAT_REUSE_MATRIX,&S_Ej_expl)); 838 PetscCall(PetscObjectTypeCompare((PetscObject)S_Ej_expl,MATSEQDENSE,&Sdense)); 839 if (Sdense) { 840 for (j=0;j<subset_size;j++) { 841 dummy_idx[j]=local_size+j; 842 } 843 PetscCall(MatSetValues(sub_schurs->S_Ej_all,subset_size,dummy_idx,subset_size,dummy_idx,work,INSERT_VALUES)); 844 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented for sparse matrices"); 845 PetscCall(MatDestroy(&S_Ej)); 846 PetscCall(MatDestroy(&S_Ej_expl)); 847 local_size += subset_size; 848 } 849 PetscCall(PetscFree2(dummy_idx,work)); 850 /* free */ 851 PetscCall(ISDestroy(&is_I)); 852 PetscCall(MatDestroy(&AE_II)); 853 PetscCall(PetscFree(all_local_idx_N)); 854 } else { 855 Mat A,cs_AIB_mat = NULL,benign_AIIm1_ones_mat = NULL; 856 Mat *gdswA; 857 Vec Dall = NULL; 858 IS is_A_all,*is_p_r = NULL; 859 MatType Stype; 860 PetscScalar *work,*S_data,*schur_factor,infty = PETSC_MAX_REAL; 861 PetscScalar *SEj_arr = NULL,*SEjinv_arr = NULL; 862 const PetscScalar *rS_data; 863 PetscInt n,n_I,size_schur,size_active_schur,cum,cum2; 864 PetscBool economic,solver_S,S_lower_triangular = PETSC_FALSE; 865 PetscBool schur_has_vertices,factor_workaround; 866 PetscBool use_cholesky; 867 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 868 PetscBool oldpin; 869 #endif 870 871 /* get sizes */ 872 n_I = 0; 873 if (is_I_layer) { 874 PetscCall(ISGetLocalSize(is_I_layer,&n_I)); 875 } 876 economic = PETSC_FALSE; 877 PetscCall(ISGetLocalSize(sub_schurs->is_I,&cum)); 878 if (cum != n_I) economic = PETSC_TRUE; 879 PetscCall(MatGetLocalSize(sub_schurs->A,&n,NULL)); 880 size_active_schur = local_size; 881 882 /* import scaling vector (wrong formulation if we have 3D edges) */ 883 if (scaling && compute_Stilda) { 884 const PetscScalar *array; 885 PetscScalar *array2; 886 const PetscInt *idxs; 887 PetscInt i; 888 889 PetscCall(ISGetIndices(sub_schurs->is_Ej_all,&idxs)); 890 PetscCall(VecCreateSeq(PETSC_COMM_SELF,size_active_schur,&Dall)); 891 PetscCall(VecGetArrayRead(scaling,&array)); 892 PetscCall(VecGetArray(Dall,&array2)); 893 for (i=0;i<size_active_schur;i++) array2[i] = array[idxs[i]]; 894 PetscCall(VecRestoreArray(Dall,&array2)); 895 PetscCall(VecRestoreArrayRead(scaling,&array)); 896 PetscCall(ISRestoreIndices(sub_schurs->is_Ej_all,&idxs)); 897 deluxe = PETSC_FALSE; 898 } 899 900 /* size active schurs does not count any dirichlet or vertex dof on the interface */ 901 factor_workaround = PETSC_FALSE; 902 schur_has_vertices = PETSC_FALSE; 903 cum = n_I+size_active_schur; 904 if (sub_schurs->is_dir) { 905 const PetscInt* idxs; 906 PetscInt n_dir; 907 908 PetscCall(ISGetLocalSize(sub_schurs->is_dir,&n_dir)); 909 PetscCall(ISGetIndices(sub_schurs->is_dir,&idxs)); 910 PetscCall(PetscArraycpy(all_local_idx_N+cum,idxs,n_dir)); 911 PetscCall(ISRestoreIndices(sub_schurs->is_dir,&idxs)); 912 cum += n_dir; 913 if (!sub_schurs->gdsw) factor_workaround = PETSC_TRUE; 914 } 915 /* include the primal vertices in the Schur complement */ 916 if (exact_schur && sub_schurs->is_vertices && (compute_Stilda || benign_n)) { 917 PetscInt n_v; 918 919 PetscCall(ISGetLocalSize(sub_schurs->is_vertices,&n_v)); 920 if (n_v) { 921 const PetscInt* idxs; 922 923 PetscCall(ISGetIndices(sub_schurs->is_vertices,&idxs)); 924 PetscCall(PetscArraycpy(all_local_idx_N+cum,idxs,n_v)); 925 PetscCall(ISRestoreIndices(sub_schurs->is_vertices,&idxs)); 926 cum += n_v; 927 if (!sub_schurs->gdsw) factor_workaround = PETSC_TRUE; 928 schur_has_vertices = PETSC_TRUE; 929 } 930 } 931 size_schur = cum - n_I; 932 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,cum,all_local_idx_N,PETSC_OWN_POINTER,&is_A_all)); 933 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 934 oldpin = sub_schurs->A->boundtocpu; 935 PetscCall(MatBindToCPU(sub_schurs->A,PETSC_TRUE)); 936 #endif 937 if (cum == n) { 938 PetscCall(ISSetPermutation(is_A_all)); 939 PetscCall(MatPermute(sub_schurs->A,is_A_all,is_A_all,&A)); 940 } else { 941 PetscCall(MatCreateSubMatrix(sub_schurs->A,is_A_all,is_A_all,MAT_INITIAL_MATRIX,&A)); 942 } 943 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 944 PetscCall(MatBindToCPU(sub_schurs->A,oldpin)); 945 #endif 946 PetscCall(MatSetOptionsPrefix(A,sub_schurs->prefix)); 947 PetscCall(MatAppendOptionsPrefix(A,"sub_schurs_")); 948 949 /* if we actually change the basis for the pressures, LDL^T factors will use a lot of memory 950 this is a workaround */ 951 if (benign_n) { 952 Vec v,benign_AIIm1_ones; 953 ISLocalToGlobalMapping N_to_reor; 954 IS is_p0,is_p0_p; 955 PetscScalar *cs_AIB,*AIIm1_data; 956 PetscInt sizeA; 957 958 PetscCall(ISLocalToGlobalMappingCreateIS(is_A_all,&N_to_reor)); 959 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,benign_n,benign_p0_lidx,PETSC_COPY_VALUES,&is_p0)); 960 PetscCall(ISGlobalToLocalMappingApplyIS(N_to_reor,IS_GTOLM_DROP,is_p0,&is_p0_p)); 961 PetscCall(ISDestroy(&is_p0)); 962 PetscCall(MatCreateVecs(A,&v,&benign_AIIm1_ones)); 963 PetscCall(VecGetSize(v,&sizeA)); 964 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,sizeA,benign_n,NULL,&benign_AIIm1_ones_mat)); 965 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,size_schur,benign_n,NULL,&cs_AIB_mat)); 966 PetscCall(MatDenseGetArray(cs_AIB_mat,&cs_AIB)); 967 PetscCall(MatDenseGetArray(benign_AIIm1_ones_mat,&AIIm1_data)); 968 PetscCall(PetscMalloc1(benign_n,&is_p_r)); 969 /* compute colsum of A_IB restricted to pressures */ 970 for (i=0;i<benign_n;i++) { 971 const PetscScalar *array; 972 const PetscInt *idxs; 973 PetscInt j,nz; 974 975 PetscCall(ISGlobalToLocalMappingApplyIS(N_to_reor,IS_GTOLM_DROP,benign_zerodiag_subs[i],&is_p_r[i])); 976 PetscCall(ISGetLocalSize(is_p_r[i],&nz)); 977 PetscCall(ISGetIndices(is_p_r[i],&idxs)); 978 for (j=0;j<nz;j++) AIIm1_data[idxs[j]+sizeA*i] = 1.; 979 PetscCall(ISRestoreIndices(is_p_r[i],&idxs)); 980 PetscCall(VecPlaceArray(benign_AIIm1_ones,AIIm1_data+sizeA*i)); 981 PetscCall(MatMult(A,benign_AIIm1_ones,v)); 982 PetscCall(VecResetArray(benign_AIIm1_ones)); 983 PetscCall(VecGetArrayRead(v,&array)); 984 for (j=0;j<size_schur;j++) { 985 #if defined(PETSC_USE_COMPLEX) 986 cs_AIB[i*size_schur + j] = (PetscRealPart(array[j+n_I])/nz + PETSC_i*(PetscImaginaryPart(array[j+n_I])/nz)); 987 #else 988 cs_AIB[i*size_schur + j] = array[j+n_I]/nz; 989 #endif 990 } 991 PetscCall(VecRestoreArrayRead(v,&array)); 992 } 993 PetscCall(MatDenseRestoreArray(cs_AIB_mat,&cs_AIB)); 994 PetscCall(MatDenseRestoreArray(benign_AIIm1_ones_mat,&AIIm1_data)); 995 PetscCall(VecDestroy(&v)); 996 PetscCall(VecDestroy(&benign_AIIm1_ones)); 997 PetscCall(MatSetOption(A,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE)); 998 PetscCall(MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE)); 999 PetscCall(MatSetOption(A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE)); 1000 PetscCall(MatZeroRowsColumnsIS(A,is_p0_p,1.0,NULL,NULL)); 1001 PetscCall(ISDestroy(&is_p0_p)); 1002 PetscCall(ISLocalToGlobalMappingDestroy(&N_to_reor)); 1003 } 1004 PetscCall(MatSetOption(A,MAT_SYMMETRIC,sub_schurs->is_symmetric)); 1005 PetscCall(MatSetOption(A,MAT_HERMITIAN,sub_schurs->is_hermitian)); 1006 PetscCall(MatSetOption(A,MAT_SPD,sub_schurs->is_posdef)); 1007 1008 /* for complexes, symmetric and hermitian at the same time implies null imaginary part */ 1009 use_cholesky = (PetscBool)((use_potr || use_sytr) && sub_schurs->is_hermitian && sub_schurs->is_symmetric); 1010 1011 /* when using the benign subspace trick, the local Schur complements are SPD */ 1012 /* MKL_PARDISO does not handle well the computation of a Schur complement from a symmetric indefinite factorization 1013 Use LU and adapt pivoting perturbation (still, solution is not as accurate as with using MUMPS) */ 1014 if (benign_trick) { 1015 sub_schurs->is_posdef = PETSC_TRUE; 1016 PetscCall(PetscStrcmp(sub_schurs->mat_solver_type,MATSOLVERMKL_PARDISO,&flg)); 1017 if (flg) use_cholesky = PETSC_FALSE; 1018 } 1019 1020 if (n_I) { 1021 IS is_schur; 1022 char stype[64]; 1023 PetscBool gpu = PETSC_FALSE; 1024 1025 if (use_cholesky) { 1026 PetscCall(MatGetFactor(A,sub_schurs->mat_solver_type,MAT_FACTOR_CHOLESKY,&F)); 1027 } else { 1028 PetscCall(MatGetFactor(A,sub_schurs->mat_solver_type,MAT_FACTOR_LU,&F)); 1029 } 1030 PetscCall(MatSetErrorIfFailure(A,PETSC_TRUE)); 1031 #if defined(PETSC_HAVE_MKL_PARDISO) 1032 if (benign_trick) PetscCall(MatMkl_PardisoSetCntl(F,10,10)); 1033 #endif 1034 /* subsets ordered last */ 1035 PetscCall(ISCreateStride(PETSC_COMM_SELF,size_schur,n_I,1,&is_schur)); 1036 PetscCall(MatFactorSetSchurIS(F,is_schur)); 1037 PetscCall(ISDestroy(&is_schur)); 1038 1039 /* factorization step */ 1040 if (use_cholesky) { 1041 PetscCall(MatCholeskyFactorSymbolic(F,A,NULL,NULL)); 1042 #if defined(PETSC_HAVE_MUMPS) /* be sure that icntl 19 is not set by command line */ 1043 PetscCall(MatMumpsSetIcntl(F,19,2)); 1044 #endif 1045 PetscCall(MatCholeskyFactorNumeric(F,A,NULL)); 1046 S_lower_triangular = PETSC_TRUE; 1047 } else { 1048 PetscCall(MatLUFactorSymbolic(F,A,NULL,NULL,NULL)); 1049 #if defined(PETSC_HAVE_MUMPS) /* be sure that icntl 19 is not set by command line */ 1050 PetscCall(MatMumpsSetIcntl(F,19,3)); 1051 #endif 1052 PetscCall(MatLUFactorNumeric(F,A,NULL)); 1053 } 1054 PetscCall(MatViewFromOptions(F,(PetscObject)A,"-mat_factor_view")); 1055 1056 if (matl_dbg_viewer) { 1057 Mat S; 1058 IS is; 1059 1060 PetscCall(PetscObjectSetName((PetscObject)A,"A")); 1061 PetscCall(MatView(A,matl_dbg_viewer)); 1062 PetscCall(MatFactorCreateSchurComplement(F,&S,NULL)); 1063 PetscCall(PetscObjectSetName((PetscObject)S,"S")); 1064 PetscCall(MatView(S,matl_dbg_viewer)); 1065 PetscCall(MatDestroy(&S)); 1066 PetscCall(ISCreateStride(PETSC_COMM_SELF,n_I,0,1,&is)); 1067 PetscCall(PetscObjectSetName((PetscObject)is,"I")); 1068 PetscCall(ISView(is,matl_dbg_viewer)); 1069 PetscCall(ISDestroy(&is)); 1070 PetscCall(ISCreateStride(PETSC_COMM_SELF,size_schur,n_I,1,&is)); 1071 PetscCall(PetscObjectSetName((PetscObject)is,"B")); 1072 PetscCall(ISView(is,matl_dbg_viewer)); 1073 PetscCall(ISDestroy(&is)); 1074 PetscCall(PetscObjectSetName((PetscObject)is_A_all,"IA")); 1075 PetscCall(ISView(is_A_all,matl_dbg_viewer)); 1076 for (i=0,cum=0;i<sub_schurs->n_subs;i++) { 1077 IS is; 1078 char name[16]; 1079 1080 PetscCall(PetscSNPrintf(name,sizeof(name),"IE%" PetscInt_FMT,i)); 1081 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size)); 1082 PetscCall(ISCreateStride(PETSC_COMM_SELF,subset_size,cum,1,&is)); 1083 PetscCall(PetscObjectSetName((PetscObject)is,name)); 1084 PetscCall(ISView(is,matl_dbg_viewer)); 1085 PetscCall(ISDestroy(&is)); 1086 if (sub_schurs->change) { 1087 Mat T; 1088 1089 PetscCall(PetscSNPrintf(name,sizeof(name),"TE%" PetscInt_FMT,i)); 1090 PetscCall(KSPGetOperators(sub_schurs->change[i],&T,NULL)); 1091 PetscCall(PetscObjectSetName((PetscObject)T,name)); 1092 PetscCall(MatView(T,matl_dbg_viewer)); 1093 PetscCall(PetscSNPrintf(name,sizeof(name),"ITE%" PetscInt_FMT,i)); 1094 PetscCall(PetscObjectSetName((PetscObject)sub_schurs->change_primal_sub[i],name)); 1095 PetscCall(ISView(sub_schurs->change_primal_sub[i],matl_dbg_viewer)); 1096 } 1097 cum += subset_size; 1098 } 1099 PetscCall(PetscViewerFlush(matl_dbg_viewer)); 1100 } 1101 1102 /* get explicit Schur Complement computed during numeric factorization */ 1103 PetscCall(MatFactorGetSchurComplement(F,&S_all,NULL)); 1104 PetscCall(PetscStrncpy(stype,MATSEQDENSE,sizeof(stype))); 1105 #if defined(PETSC_HAVE_CUDA) 1106 PetscCall(PetscObjectTypeCompareAny((PetscObject)A,&gpu,MATSEQAIJVIENNACL,MATSEQAIJCUSPARSE,"")); 1107 #endif 1108 if (gpu) { 1109 PetscCall(PetscStrncpy(stype,MATSEQDENSECUDA,sizeof(stype))); 1110 } 1111 PetscCall(PetscOptionsGetString(NULL,sub_schurs->prefix,"-sub_schurs_schur_mat_type",stype,sizeof(stype),NULL)); 1112 PetscCall(MatConvert(S_all,stype,MAT_INPLACE_MATRIX,&S_all)); 1113 PetscCall(MatSetOption(S_all,MAT_SPD,sub_schurs->is_posdef)); 1114 PetscCall(MatSetOption(S_all,MAT_HERMITIAN,sub_schurs->is_hermitian)); 1115 PetscCall(MatGetType(S_all,&Stype)); 1116 1117 /* we can reuse the solvers if we are not using the economic version */ 1118 reuse_solvers = (PetscBool)(reuse_solvers && !economic); 1119 if (!sub_schurs->gdsw) { 1120 factor_workaround = (PetscBool)(reuse_solvers && factor_workaround); 1121 if (!sub_schurs->is_posdef && factor_workaround && compute_Stilda && size_active_schur) 1122 reuse_solvers = factor_workaround = PETSC_FALSE; 1123 } 1124 solver_S = PETSC_TRUE; 1125 1126 /* update the Schur complement with the change of basis on the pressures */ 1127 if (benign_n) { 1128 const PetscScalar *cs_AIB; 1129 PetscScalar *S_data,*AIIm1_data; 1130 Mat S2 = NULL,S3 = NULL; /* dbg */ 1131 PetscScalar *S2_data,*S3_data; /* dbg */ 1132 Vec v,benign_AIIm1_ones; 1133 PetscInt sizeA; 1134 1135 PetscCall(MatDenseGetArray(S_all,&S_data)); 1136 PetscCall(MatCreateVecs(A,&v,&benign_AIIm1_ones)); 1137 PetscCall(VecGetSize(v,&sizeA)); 1138 #if defined(PETSC_HAVE_MUMPS) 1139 PetscCall(MatMumpsSetIcntl(F,26,0)); 1140 #endif 1141 #if defined(PETSC_HAVE_MKL_PARDISO) 1142 PetscCall(MatMkl_PardisoSetCntl(F,70,1)); 1143 #endif 1144 PetscCall(MatDenseGetArrayRead(cs_AIB_mat,&cs_AIB)); 1145 PetscCall(MatDenseGetArray(benign_AIIm1_ones_mat,&AIIm1_data)); 1146 if (matl_dbg_viewer) { 1147 PetscCall(MatDuplicate(S_all,MAT_DO_NOT_COPY_VALUES,&S2)); 1148 PetscCall(MatDuplicate(S_all,MAT_DO_NOT_COPY_VALUES,&S3)); 1149 PetscCall(MatDenseGetArray(S2,&S2_data)); 1150 PetscCall(MatDenseGetArray(S3,&S3_data)); 1151 } 1152 for (i=0;i<benign_n;i++) { 1153 PetscScalar *array,sum = 0.,one = 1.,*sums; 1154 const PetscInt *idxs; 1155 PetscInt k,j,nz; 1156 PetscBLASInt B_k,B_n; 1157 1158 PetscCall(PetscCalloc1(benign_n,&sums)); 1159 PetscCall(VecPlaceArray(benign_AIIm1_ones,AIIm1_data+sizeA*i)); 1160 PetscCall(VecCopy(benign_AIIm1_ones,v)); 1161 PetscCall(MatSolve(F,v,benign_AIIm1_ones)); 1162 PetscCall(MatMult(A,benign_AIIm1_ones,v)); 1163 PetscCall(VecResetArray(benign_AIIm1_ones)); 1164 /* p0 dofs (eliminated) are excluded from the sums */ 1165 for (k=0;k<benign_n;k++) { 1166 PetscCall(ISGetLocalSize(is_p_r[k],&nz)); 1167 PetscCall(ISGetIndices(is_p_r[k],&idxs)); 1168 for (j=0;j<nz-1;j++) sums[k] -= AIIm1_data[idxs[j]+sizeA*i]; 1169 PetscCall(ISRestoreIndices(is_p_r[k],&idxs)); 1170 } 1171 PetscCall(VecGetArrayRead(v,(const PetscScalar**)&array)); 1172 if (matl_dbg_viewer) { 1173 Vec vv; 1174 char name[16]; 1175 1176 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,size_schur,array+n_I,&vv)); 1177 PetscCall(PetscSNPrintf(name,sizeof(name),"Pvs%" PetscInt_FMT,i)); 1178 PetscCall(PetscObjectSetName((PetscObject)vv,name)); 1179 PetscCall(VecView(vv,matl_dbg_viewer)); 1180 } 1181 /* perform sparse rank updates on symmetric Schur (TODO: move outside of the loop?) */ 1182 /* cs_AIB already scaled by 1./nz */ 1183 B_k = 1; 1184 for (k=0;k<benign_n;k++) { 1185 sum = sums[k]; 1186 PetscCall(PetscBLASIntCast(size_schur,&B_n)); 1187 1188 if (PetscAbsScalar(sum) == 0.0) continue; 1189 if (k == i) { 1190 PetscStackCallBLAS("BLASsyrk",BLASsyrk_("L","N",&B_n,&B_k,&sum,cs_AIB+i*size_schur,&B_n,&one,S_data,&B_n)); 1191 if (matl_dbg_viewer) { 1192 PetscStackCallBLAS("BLASsyrk",BLASsyrk_("L","N",&B_n,&B_k,&sum,cs_AIB+i*size_schur,&B_n,&one,S3_data,&B_n)); 1193 } 1194 } else { /* XXX Is it correct to use symmetric rank-2 update with half of the sum? */ 1195 sum /= 2.0; 1196 PetscStackCallBLAS("BLASsyr2k",BLASsyr2k_("L","N",&B_n,&B_k,&sum,cs_AIB+k*size_schur,&B_n,cs_AIB+i*size_schur,&B_n,&one,S_data,&B_n)); 1197 if (matl_dbg_viewer) { 1198 PetscStackCallBLAS("BLASsyr2k",BLASsyr2k_("L","N",&B_n,&B_k,&sum,cs_AIB+k*size_schur,&B_n,cs_AIB+i*size_schur,&B_n,&one,S3_data,&B_n)); 1199 } 1200 } 1201 } 1202 sum = 1.; 1203 PetscStackCallBLAS("BLASsyr2k",BLASsyr2k_("L","N",&B_n,&B_k,&sum,array+n_I,&B_n,cs_AIB+i*size_schur,&B_n,&one,S_data,&B_n)); 1204 if (matl_dbg_viewer) { 1205 PetscStackCallBLAS("BLASsyr2k",BLASsyr2k_("L","N",&B_n,&B_k,&sum,array+n_I,&B_n,cs_AIB+i*size_schur,&B_n,&one,S2_data,&B_n)); 1206 } 1207 PetscCall(VecRestoreArrayRead(v,(const PetscScalar**)&array)); 1208 /* set p0 entry of AIIm1_ones to zero */ 1209 PetscCall(ISGetLocalSize(is_p_r[i],&nz)); 1210 PetscCall(ISGetIndices(is_p_r[i],&idxs)); 1211 for (j=0;j<benign_n;j++) AIIm1_data[idxs[nz-1]+sizeA*j] = 0.; 1212 PetscCall(ISRestoreIndices(is_p_r[i],&idxs)); 1213 PetscCall(PetscFree(sums)); 1214 } 1215 PetscCall(VecDestroy(&benign_AIIm1_ones)); 1216 if (matl_dbg_viewer) { 1217 PetscCall(MatDenseRestoreArray(S2,&S2_data)); 1218 PetscCall(MatDenseRestoreArray(S3,&S3_data)); 1219 } 1220 if (!S_lower_triangular) { /* I need to expand the upper triangular data (column oriented) */ 1221 PetscInt k,j; 1222 for (k=0;k<size_schur;k++) { 1223 for (j=k;j<size_schur;j++) { 1224 S_data[j*size_schur+k] = PetscConj(S_data[k*size_schur+j]); 1225 } 1226 } 1227 } 1228 1229 /* restore defaults */ 1230 #if defined(PETSC_HAVE_MUMPS) 1231 PetscCall(MatMumpsSetIcntl(F,26,-1)); 1232 #endif 1233 #if defined(PETSC_HAVE_MKL_PARDISO) 1234 PetscCall(MatMkl_PardisoSetCntl(F,70,0)); 1235 #endif 1236 PetscCall(MatDenseRestoreArrayRead(cs_AIB_mat,&cs_AIB)); 1237 PetscCall(MatDenseRestoreArray(benign_AIIm1_ones_mat,&AIIm1_data)); 1238 PetscCall(VecDestroy(&v)); 1239 PetscCall(MatDenseRestoreArray(S_all,&S_data)); 1240 if (matl_dbg_viewer) { 1241 Mat S; 1242 1243 PetscCall(MatFactorRestoreSchurComplement(F,&S_all,MAT_FACTOR_SCHUR_UNFACTORED)); 1244 PetscCall(MatFactorCreateSchurComplement(F,&S,NULL)); 1245 PetscCall(PetscObjectSetName((PetscObject)S,"Sb")); 1246 PetscCall(MatView(S,matl_dbg_viewer)); 1247 PetscCall(MatDestroy(&S)); 1248 PetscCall(PetscObjectSetName((PetscObject)S2,"S2P")); 1249 PetscCall(MatView(S2,matl_dbg_viewer)); 1250 PetscCall(PetscObjectSetName((PetscObject)S3,"S3P")); 1251 PetscCall(MatView(S3,matl_dbg_viewer)); 1252 PetscCall(PetscObjectSetName((PetscObject)cs_AIB_mat,"cs")); 1253 PetscCall(MatView(cs_AIB_mat,matl_dbg_viewer)); 1254 PetscCall(MatFactorGetSchurComplement(F,&S_all,NULL)); 1255 } 1256 PetscCall(MatDestroy(&S2)); 1257 PetscCall(MatDestroy(&S3)); 1258 } 1259 if (!reuse_solvers) { 1260 for (i=0;i<benign_n;i++) { 1261 PetscCall(ISDestroy(&is_p_r[i])); 1262 } 1263 PetscCall(PetscFree(is_p_r)); 1264 PetscCall(MatDestroy(&cs_AIB_mat)); 1265 PetscCall(MatDestroy(&benign_AIIm1_ones_mat)); 1266 } 1267 } else { /* we can't use MatFactor when size_schur == size_of_the_problem */ 1268 PetscCall(MatConvert(A,MATSEQDENSE,MAT_INITIAL_MATRIX,&S_all)); 1269 PetscCall(MatGetType(S_all,&Stype)); 1270 reuse_solvers = PETSC_FALSE; /* TODO: why we can't reuse the solvers here? */ 1271 factor_workaround = PETSC_FALSE; 1272 solver_S = PETSC_FALSE; 1273 } 1274 1275 if (reuse_solvers) { 1276 Mat A_II,pA_II,Afake; 1277 Vec vec1_B; 1278 PCBDDCReuseSolvers msolv_ctx; 1279 PetscInt n_R; 1280 1281 if (sub_schurs->reuse_solver) { 1282 PetscCall(PCBDDCReuseSolversReset(sub_schurs->reuse_solver)); 1283 } else { 1284 PetscCall(PetscNew(&sub_schurs->reuse_solver)); 1285 } 1286 msolv_ctx = sub_schurs->reuse_solver; 1287 PetscCall(MatSchurComplementGetSubMatrices(sub_schurs->S,&A_II,&pA_II,NULL,NULL,NULL)); 1288 PetscCall(PetscObjectReference((PetscObject)F)); 1289 msolv_ctx->F = F; 1290 PetscCall(MatCreateVecs(F,&msolv_ctx->sol,NULL)); 1291 /* currently PETSc has no support for MatSolve(F,x,x), so cheat and let rhs and sol share the same memory */ 1292 { 1293 PetscScalar *array; 1294 PetscInt n; 1295 1296 PetscCall(VecGetLocalSize(msolv_ctx->sol,&n)); 1297 PetscCall(VecGetArray(msolv_ctx->sol,&array)); 1298 PetscCall(VecCreateSeqWithArray(PetscObjectComm((PetscObject)msolv_ctx->sol),1,n,array,&msolv_ctx->rhs)); 1299 PetscCall(VecRestoreArray(msolv_ctx->sol,&array)); 1300 } 1301 msolv_ctx->has_vertices = schur_has_vertices; 1302 1303 /* interior solver */ 1304 PetscCall(PCCreate(PetscObjectComm((PetscObject)A_II),&msolv_ctx->interior_solver)); 1305 PetscCall(PCSetOperators(msolv_ctx->interior_solver,A_II,pA_II)); 1306 PetscCall(PCSetType(msolv_ctx->interior_solver,PCSHELL)); 1307 PetscCall(PCShellSetName(msolv_ctx->interior_solver,"Interior solver (w/o Schur factorization)")); 1308 PetscCall(PCShellSetContext(msolv_ctx->interior_solver,msolv_ctx)); 1309 PetscCall(PCShellSetView(msolv_ctx->interior_solver,PCBDDCReuseSolvers_View)); 1310 PetscCall(PCShellSetApply(msolv_ctx->interior_solver,PCBDDCReuseSolvers_Interior)); 1311 PetscCall(PCShellSetApplyTranspose(msolv_ctx->interior_solver,PCBDDCReuseSolvers_InteriorTranspose)); 1312 if (sub_schurs->gdsw) PetscCall(PCShellSetDestroy(msolv_ctx->interior_solver,PCBDDCReuseSolvers_Destroy)); 1313 1314 /* correction solver */ 1315 if (!sub_schurs->gdsw) { 1316 PetscCall(PCCreate(PetscObjectComm((PetscObject)A_II),&msolv_ctx->correction_solver)); 1317 PetscCall(PCSetType(msolv_ctx->correction_solver,PCSHELL)); 1318 PetscCall(PCShellSetName(msolv_ctx->correction_solver,"Correction solver (with Schur factorization)")); 1319 PetscCall(PCShellSetContext(msolv_ctx->correction_solver,msolv_ctx)); 1320 PetscCall(PCShellSetView(msolv_ctx->interior_solver,PCBDDCReuseSolvers_View)); 1321 PetscCall(PCShellSetApply(msolv_ctx->correction_solver,PCBDDCReuseSolvers_Correction)); 1322 PetscCall(PCShellSetApplyTranspose(msolv_ctx->correction_solver,PCBDDCReuseSolvers_CorrectionTranspose)); 1323 1324 /* scatter and vecs for Schur complement solver */ 1325 PetscCall(MatCreateVecs(S_all,&msolv_ctx->sol_B,&msolv_ctx->rhs_B)); 1326 PetscCall(MatCreateVecs(sub_schurs->S,&vec1_B,NULL)); 1327 if (!schur_has_vertices) { 1328 PetscCall(ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap,IS_GTOLM_DROP,is_A_all,&msolv_ctx->is_B)); 1329 PetscCall(VecScatterCreate(vec1_B,msolv_ctx->is_B,msolv_ctx->sol_B,NULL,&msolv_ctx->correction_scatter_B)); 1330 PetscCall(PetscObjectReference((PetscObject)is_A_all)); 1331 msolv_ctx->is_R = is_A_all; 1332 } else { 1333 IS is_B_all; 1334 const PetscInt* idxs; 1335 PetscInt dual,n_v,n; 1336 1337 PetscCall(ISGetLocalSize(sub_schurs->is_vertices,&n_v)); 1338 dual = size_schur - n_v; 1339 PetscCall(ISGetLocalSize(is_A_all,&n)); 1340 PetscCall(ISGetIndices(is_A_all,&idxs)); 1341 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)is_A_all),dual,idxs+n_I,PETSC_COPY_VALUES,&is_B_all)); 1342 PetscCall(ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap,IS_GTOLM_DROP,is_B_all,&msolv_ctx->is_B)); 1343 PetscCall(ISDestroy(&is_B_all)); 1344 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)is_A_all),dual,0,1,&is_B_all)); 1345 PetscCall(VecScatterCreate(vec1_B,msolv_ctx->is_B,msolv_ctx->sol_B,is_B_all,&msolv_ctx->correction_scatter_B)); 1346 PetscCall(ISDestroy(&is_B_all)); 1347 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)is_A_all),n-n_v,idxs,PETSC_COPY_VALUES,&msolv_ctx->is_R)); 1348 PetscCall(ISRestoreIndices(is_A_all,&idxs)); 1349 } 1350 PetscCall(ISGetLocalSize(msolv_ctx->is_R,&n_R)); 1351 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF,n_R,n_R,0,NULL,&Afake)); 1352 PetscCall(MatAssemblyBegin(Afake,MAT_FINAL_ASSEMBLY)); 1353 PetscCall(MatAssemblyEnd(Afake,MAT_FINAL_ASSEMBLY)); 1354 PetscCall(PCSetOperators(msolv_ctx->correction_solver,Afake,Afake)); 1355 PetscCall(MatDestroy(&Afake)); 1356 PetscCall(VecDestroy(&vec1_B)); 1357 } 1358 /* communicate benign info to solver context */ 1359 if (benign_n) { 1360 PetscScalar *array; 1361 1362 msolv_ctx->benign_n = benign_n; 1363 msolv_ctx->benign_zerodiag_subs = is_p_r; 1364 PetscCall(PetscMalloc1(benign_n,&msolv_ctx->benign_save_vals)); 1365 msolv_ctx->benign_csAIB = cs_AIB_mat; 1366 PetscCall(MatCreateVecs(cs_AIB_mat,&msolv_ctx->benign_corr_work,NULL)); 1367 PetscCall(VecGetArray(msolv_ctx->benign_corr_work,&array)); 1368 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,size_schur,array,&msolv_ctx->benign_dummy_schur_vec)); 1369 PetscCall(VecRestoreArray(msolv_ctx->benign_corr_work,&array)); 1370 msolv_ctx->benign_AIIm1ones = benign_AIIm1_ones_mat; 1371 } 1372 } else { 1373 if (sub_schurs->reuse_solver) { 1374 PetscCall(PCBDDCReuseSolversReset(sub_schurs->reuse_solver)); 1375 } 1376 PetscCall(PetscFree(sub_schurs->reuse_solver)); 1377 } 1378 PetscCall(MatDestroy(&A)); 1379 PetscCall(ISDestroy(&is_A_all)); 1380 1381 /* Work arrays */ 1382 PetscCall(PetscMalloc1(max_subset_size*max_subset_size,&work)); 1383 1384 /* S_Ej_all */ 1385 cum = cum2 = 0; 1386 PetscCall(MatDenseGetArrayRead(S_all,&rS_data)); 1387 PetscCall(MatSeqAIJGetArray(sub_schurs->S_Ej_all,&SEj_arr)); 1388 if (sub_schurs->sum_S_Ej_inv_all) { 1389 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&SEjinv_arr)); 1390 } 1391 if (sub_schurs->gdsw) { 1392 PetscCall(MatCreateSubMatrices(sub_schurs->A,sub_schurs->n_subs,sub_schurs->is_subs,sub_schurs->is_subs,MAT_INITIAL_MATRIX,&gdswA)); 1393 } 1394 for (i=0;i<sub_schurs->n_subs;i++) { 1395 PetscInt j; 1396 1397 /* get S_E (or K^i_EE for GDSW) */ 1398 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size)); 1399 if (sub_schurs->gdsw) { 1400 Mat T; 1401 1402 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,work,&T)); 1403 PetscCall(MatConvert(gdswA[i],MATDENSE,MAT_REUSE_MATRIX,&T)); 1404 PetscCall(MatDestroy(&T)); 1405 } else { 1406 if (S_lower_triangular) { /* I need to expand the upper triangular data (column oriented) */ 1407 PetscInt k; 1408 for (k=0;k<subset_size;k++) { 1409 for (j=k;j<subset_size;j++) { 1410 work[k*subset_size+j] = rS_data[cum2+k*size_schur+j]; 1411 work[j*subset_size+k] = PetscConj(rS_data[cum2+k*size_schur+j]); 1412 } 1413 } 1414 } else { /* just copy to workspace */ 1415 PetscInt k; 1416 for (k=0;k<subset_size;k++) { 1417 for (j=0;j<subset_size;j++) { 1418 work[k*subset_size+j] = rS_data[cum2+k*size_schur+j]; 1419 } 1420 } 1421 } 1422 } 1423 /* insert S_E values */ 1424 if (sub_schurs->change) { 1425 Mat change_sub,SEj,T; 1426 1427 /* change basis */ 1428 PetscCall(KSPGetOperators(sub_schurs->change[i],&change_sub,NULL)); 1429 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,work,&SEj)); 1430 if (!sub_schurs->change_with_qr) { /* currently there's no support for PtAP with P SeqAIJ */ 1431 Mat T2; 1432 PetscCall(MatTransposeMatMult(change_sub,SEj,MAT_INITIAL_MATRIX,1.0,&T2)); 1433 PetscCall(MatMatMult(T2,change_sub,MAT_INITIAL_MATRIX,1.0,&T)); 1434 PetscCall(MatConvert(T,MATSEQDENSE,MAT_INPLACE_MATRIX,&T)); 1435 PetscCall(MatDestroy(&T2)); 1436 } else { 1437 PetscCall(MatPtAP(SEj,change_sub,MAT_INITIAL_MATRIX,1.0,&T)); 1438 } 1439 PetscCall(MatCopy(T,SEj,SAME_NONZERO_PATTERN)); 1440 PetscCall(MatDestroy(&T)); 1441 PetscCall(MatZeroRowsColumnsIS(SEj,sub_schurs->change_primal_sub[i],1.0,NULL,NULL)); 1442 PetscCall(MatDestroy(&SEj)); 1443 } 1444 PetscCall(PetscArraycpy(SEj_arr,work,subset_size*subset_size)); 1445 if (compute_Stilda) { 1446 if (deluxe) { /* if adaptivity is requested, invert S_E blocks */ 1447 Mat M; 1448 const PetscScalar *vals; 1449 PetscBool isdense,isdensecuda; 1450 1451 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,work,&M)); 1452 PetscCall(MatSetOption(M,MAT_SPD,sub_schurs->is_posdef)); 1453 PetscCall(MatSetOption(M,MAT_HERMITIAN,sub_schurs->is_hermitian)); 1454 if (!PetscBTLookup(sub_schurs->is_edge,i)) { 1455 PetscCall(MatSetType(M,Stype)); 1456 } 1457 PetscCall(PetscObjectTypeCompare((PetscObject)M,MATSEQDENSE,&isdense)); 1458 PetscCall(PetscObjectTypeCompare((PetscObject)M,MATSEQDENSECUDA,&isdensecuda)); 1459 if (use_cholesky) { 1460 PetscCall(MatCholeskyFactor(M,NULL,NULL)); 1461 } else { 1462 PetscCall(MatLUFactor(M,NULL,NULL,NULL)); 1463 } 1464 if (isdense) { 1465 PetscCall(MatSeqDenseInvertFactors_Private(M)); 1466 #if defined(PETSC_HAVE_CUDA) 1467 } else if (isdensecuda) { 1468 PetscCall(MatSeqDenseCUDAInvertFactors_Private(M)); 1469 #endif 1470 } else SETERRQ(PetscObjectComm((PetscObject)M),PETSC_ERR_SUP,"Not implemented for type %s",Stype); 1471 PetscCall(MatDenseGetArrayRead(M,&vals)); 1472 PetscCall(PetscArraycpy(SEjinv_arr,vals,subset_size*subset_size)); 1473 PetscCall(MatDenseRestoreArrayRead(M,&vals)); 1474 PetscCall(MatDestroy(&M)); 1475 } else if (scaling) { /* not using deluxe */ 1476 Mat SEj; 1477 Vec D; 1478 PetscScalar *array; 1479 1480 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,work,&SEj)); 1481 PetscCall(VecGetArray(Dall,&array)); 1482 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,subset_size,array+cum,&D)); 1483 PetscCall(VecRestoreArray(Dall,&array)); 1484 PetscCall(VecShift(D,-1.)); 1485 PetscCall(MatDiagonalScale(SEj,D,D)); 1486 PetscCall(MatDestroy(&SEj)); 1487 PetscCall(VecDestroy(&D)); 1488 PetscCall(PetscArraycpy(SEj_arr,work,subset_size*subset_size)); 1489 } 1490 } 1491 cum += subset_size; 1492 cum2 += subset_size*(size_schur + 1); 1493 SEj_arr += subset_size*subset_size; 1494 if (SEjinv_arr) SEjinv_arr += subset_size*subset_size; 1495 } 1496 if (sub_schurs->gdsw) { 1497 PetscCall(MatDestroySubMatrices(sub_schurs->n_subs,&gdswA)); 1498 } 1499 PetscCall(MatDenseRestoreArrayRead(S_all,&rS_data)); 1500 PetscCall(MatSeqAIJRestoreArray(sub_schurs->S_Ej_all,&SEj_arr)); 1501 if (sub_schurs->sum_S_Ej_inv_all) { 1502 PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&SEjinv_arr)); 1503 } 1504 if (solver_S) { 1505 PetscCall(MatFactorRestoreSchurComplement(F,&S_all,MAT_FACTOR_SCHUR_UNFACTORED)); 1506 } 1507 1508 /* may prevent from unneeded copies, since MUMPS or MKL_Pardiso always use CPU memory 1509 however, preliminary tests indicate using GPUs is still faster in the solve phase */ 1510 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 1511 if (reuse_solvers) { 1512 Mat St; 1513 MatFactorSchurStatus st; 1514 1515 flg = PETSC_FALSE; 1516 PetscCall(PetscOptionsGetBool(NULL,sub_schurs->prefix,"-sub_schurs_schur_pin_to_cpu",&flg,NULL)); 1517 PetscCall(MatFactorGetSchurComplement(F,&St,&st)); 1518 PetscCall(MatBindToCPU(St,flg)); 1519 PetscCall(MatFactorRestoreSchurComplement(F,&St,st)); 1520 } 1521 #endif 1522 1523 schur_factor = NULL; 1524 if (compute_Stilda && size_active_schur) { 1525 1526 if (sub_schurs->n_subs == 1 && size_schur == size_active_schur && deluxe) { /* we already computed the inverse */ 1527 PetscCall(MatSeqAIJGetArrayWrite(sub_schurs->sum_S_Ej_tilda_all,&SEjinv_arr)); 1528 PetscCall(PetscArraycpy(SEjinv_arr,work,size_schur*size_schur)); 1529 PetscCall(MatSeqAIJRestoreArrayWrite(sub_schurs->sum_S_Ej_tilda_all,&SEjinv_arr)); 1530 } else { 1531 Mat S_all_inv=NULL; 1532 1533 if (solver_S && !sub_schurs->gdsw) { 1534 /* for adaptive selection we need S^-1; for solver reusage we need S_\Delta\Delta^-1. 1535 The latter is not the principal subminor for S^-1. However, the factors can be reused since S_\Delta\Delta is the leading principal submatrix of S */ 1536 if (factor_workaround) {/* invert without calling MatFactorInvertSchurComplement, since we are hacking */ 1537 PetscScalar *data; 1538 PetscInt nd = 0; 1539 1540 if (!use_potr) { 1541 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor update not yet implemented for non SPD matrices"); 1542 } 1543 PetscCall(MatFactorGetSchurComplement(F,&S_all_inv,NULL)); 1544 PetscCall(MatDenseGetArray(S_all_inv,&data)); 1545 if (sub_schurs->is_dir) { /* dirichlet dofs could have different scalings */ 1546 PetscCall(ISGetLocalSize(sub_schurs->is_dir,&nd)); 1547 } 1548 1549 /* factor and invert activedofs and vertices (dirichlet dofs does not contribute) */ 1550 if (schur_has_vertices) { 1551 Mat M; 1552 PetscScalar *tdata; 1553 PetscInt nv = 0, news; 1554 1555 PetscCall(ISGetLocalSize(sub_schurs->is_vertices,&nv)); 1556 news = size_active_schur + nv; 1557 PetscCall(PetscCalloc1(news*news,&tdata)); 1558 for (i=0;i<size_active_schur;i++) { 1559 PetscCall(PetscArraycpy(tdata+i*(news+1),data+i*(size_schur+1),size_active_schur-i)); 1560 PetscCall(PetscArraycpy(tdata+i*(news+1)+size_active_schur-i,data+i*size_schur+size_active_schur+nd,nv)); 1561 } 1562 for (i=0;i<nv;i++) { 1563 PetscInt k = i+size_active_schur; 1564 PetscCall(PetscArraycpy(tdata+k*(news+1),data+(k+nd)*(size_schur+1),nv-i)); 1565 } 1566 1567 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,news,news,tdata,&M)); 1568 PetscCall(MatSetOption(M,MAT_SPD,PETSC_TRUE)); 1569 PetscCall(MatCholeskyFactor(M,NULL,NULL)); 1570 /* save the factors */ 1571 cum = 0; 1572 PetscCall(PetscMalloc1((size_active_schur*(size_active_schur +1))/2+nd,&schur_factor)); 1573 for (i=0;i<size_active_schur;i++) { 1574 PetscCall(PetscArraycpy(schur_factor+cum,tdata+i*(news+1),size_active_schur-i)); 1575 cum += size_active_schur - i; 1576 } 1577 for (i=0;i<nd;i++) schur_factor[cum+i] = PetscSqrtReal(PetscRealPart(data[(i+size_active_schur)*(size_schur+1)])); 1578 PetscCall(MatSeqDenseInvertFactors_Private(M)); 1579 /* move back just the active dofs to the Schur complement */ 1580 for (i=0;i<size_active_schur;i++) { 1581 PetscCall(PetscArraycpy(data+i*size_schur,tdata+i*news,size_active_schur)); 1582 } 1583 PetscCall(PetscFree(tdata)); 1584 PetscCall(MatDestroy(&M)); 1585 } else { /* we can factorize and invert just the activedofs */ 1586 Mat M; 1587 PetscScalar *aux; 1588 1589 PetscCall(PetscMalloc1(nd,&aux)); 1590 for (i=0;i<nd;i++) aux[i] = 1.0/data[(i+size_active_schur)*(size_schur+1)]; 1591 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,size_active_schur,size_active_schur,data,&M)); 1592 PetscCall(MatDenseSetLDA(M,size_schur)); 1593 PetscCall(MatSetOption(M,MAT_SPD,PETSC_TRUE)); 1594 PetscCall(MatCholeskyFactor(M,NULL,NULL)); 1595 PetscCall(MatSeqDenseInvertFactors_Private(M)); 1596 PetscCall(MatDestroy(&M)); 1597 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,size_schur,nd,data+size_active_schur*size_schur,&M)); 1598 PetscCall(MatZeroEntries(M)); 1599 PetscCall(MatDestroy(&M)); 1600 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,nd,size_schur,data+size_active_schur,&M)); 1601 PetscCall(MatDenseSetLDA(M,size_schur)); 1602 PetscCall(MatZeroEntries(M)); 1603 PetscCall(MatDestroy(&M)); 1604 for (i=0;i<nd;i++) data[(i+size_active_schur)*(size_schur+1)] = aux[i]; 1605 PetscCall(PetscFree(aux)); 1606 } 1607 PetscCall(MatDenseRestoreArray(S_all_inv,&data)); 1608 } else { /* use MatFactor calls to invert S */ 1609 PetscCall(MatFactorInvertSchurComplement(F)); 1610 PetscCall(MatFactorGetSchurComplement(F,&S_all_inv,NULL)); 1611 } 1612 } else if (!sub_schurs->gdsw) { /* we need to invert explicitly since we are not using MatFactor for S */ 1613 PetscCall(PetscObjectReference((PetscObject)S_all)); 1614 S_all_inv = S_all; 1615 PetscCall(MatDenseGetArray(S_all_inv,&S_data)); 1616 PetscCall(PetscBLASIntCast(size_schur,&B_N)); 1617 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 1618 if (use_potr) { 1619 PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("L",&B_N,S_data,&B_N,&B_ierr)); 1620 PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRF Lapack routine %d",(int)B_ierr); 1621 PetscStackCallBLAS("LAPACKpotri",LAPACKpotri_("L",&B_N,S_data,&B_N,&B_ierr)); 1622 PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRI Lapack routine %d",(int)B_ierr); 1623 } else if (use_sytr) { 1624 PetscStackCallBLAS("LAPACKsytrf",LAPACKsytrf_("L",&B_N,S_data,&B_N,pivots,Bwork,&B_lwork,&B_ierr)); 1625 PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYTRF Lapack routine %d",(int)B_ierr); 1626 PetscStackCallBLAS("LAPACKsytri",LAPACKsytri_("L",&B_N,S_data,&B_N,pivots,Bwork,&B_ierr)); 1627 PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYTRI Lapack routine %d",(int)B_ierr); 1628 } else { 1629 PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&B_N,&B_N,S_data,&B_N,pivots,&B_ierr)); 1630 PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRF Lapack routine %d",(int)B_ierr); 1631 PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,S_data,&B_N,pivots,Bwork,&B_lwork,&B_ierr)); 1632 PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRI Lapack routine %d",(int)B_ierr); 1633 } 1634 PetscCall(PetscLogFlops(1.0*size_schur*size_schur*size_schur)); 1635 PetscCall(PetscFPTrapPop()); 1636 PetscCall(MatDenseRestoreArray(S_all_inv,&S_data)); 1637 } else if (sub_schurs->gdsw) { 1638 Mat tS, tX, SEj, S_II, S_IE, S_EE; 1639 KSP pS_II; 1640 PC pS_II_pc; 1641 IS EE, II; 1642 PetscInt nS; 1643 1644 PetscCall(MatFactorCreateSchurComplement(F,&tS,NULL)); 1645 PetscCall(MatGetSize(tS,&nS,NULL)); 1646 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&SEjinv_arr)); 1647 for (i=0,cum=0;i<sub_schurs->n_subs;i++) { /* naive implementation */ 1648 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size)); 1649 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,SEjinv_arr,&SEj)); 1650 1651 PetscCall(ISCreateStride(PETSC_COMM_SELF,subset_size,cum,1,&EE)); 1652 PetscCall(ISComplement(EE,0,nS,&II)); 1653 PetscCall(MatCreateSubMatrix(tS,II,II,MAT_INITIAL_MATRIX,&S_II)); 1654 PetscCall(MatCreateSubMatrix(tS,II,EE,MAT_INITIAL_MATRIX,&S_IE)); 1655 PetscCall(MatCreateSubMatrix(tS,EE,EE,MAT_INITIAL_MATRIX,&S_EE)); 1656 PetscCall(ISDestroy(&II)); 1657 PetscCall(ISDestroy(&EE)); 1658 1659 PetscCall(KSPCreate(PETSC_COMM_SELF,&pS_II)); 1660 PetscCall(KSPSetType(pS_II,KSPPREONLY)); 1661 PetscCall(KSPGetPC(pS_II,&pS_II_pc)); 1662 PetscCall(PCSetType(pS_II_pc,PCSVD)); 1663 PetscCall(KSPSetOptionsPrefix(pS_II,sub_schurs->prefix)); 1664 PetscCall(KSPAppendOptionsPrefix(pS_II,"pseudo_")); 1665 PetscCall(KSPSetOperators(pS_II,S_II,S_II)); 1666 PetscCall(MatDestroy(&S_II)); 1667 PetscCall(KSPSetFromOptions(pS_II)); 1668 PetscCall(KSPSetUp(pS_II)); 1669 PetscCall(MatDuplicate(S_IE,MAT_DO_NOT_COPY_VALUES,&tX)); 1670 PetscCall(KSPMatSolve(pS_II,S_IE,tX)); 1671 PetscCall(KSPDestroy(&pS_II)); 1672 1673 PetscCall(MatTransposeMatMult(S_IE,tX,MAT_REUSE_MATRIX,PETSC_DEFAULT,&SEj)); 1674 PetscCall(MatDestroy(&S_IE)); 1675 PetscCall(MatDestroy(&tX)); 1676 PetscCall(MatAYPX(SEj,-1,S_EE,SAME_NONZERO_PATTERN)); 1677 PetscCall(MatDestroy(&S_EE)); 1678 1679 PetscCall(MatDestroy(&SEj)); 1680 cum += subset_size; 1681 SEjinv_arr += subset_size*subset_size; 1682 } 1683 PetscCall(MatDestroy(&tS)); 1684 PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&SEjinv_arr)); 1685 } 1686 /* S_Ej_tilda_all */ 1687 cum = cum2 = 0; 1688 rS_data = NULL; 1689 if (S_all_inv) PetscCall(MatDenseGetArrayRead(S_all_inv,&rS_data)); 1690 PetscCall(MatSeqAIJGetArrayWrite(sub_schurs->sum_S_Ej_tilda_all,&SEjinv_arr)); 1691 for (i=0;i<sub_schurs->n_subs;i++) { 1692 PetscInt j; 1693 1694 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size)); 1695 /* get (St^-1)_E */ 1696 /* Unless we are changing the variables, I don't need to expand to upper triangular since St^-1 1697 will be properly accessed later during adaptive selection */ 1698 if (rS_data) { 1699 if (S_lower_triangular) { 1700 PetscInt k; 1701 if (sub_schurs->change) { 1702 for (k=0;k<subset_size;k++) { 1703 for (j=k;j<subset_size;j++) { 1704 work[k*subset_size+j] = rS_data[cum2+k*size_schur+j]; 1705 work[j*subset_size+k] = work[k*subset_size+j]; 1706 } 1707 } 1708 } else { 1709 for (k=0;k<subset_size;k++) { 1710 for (j=k;j<subset_size;j++) { 1711 work[k*subset_size+j] = rS_data[cum2+k*size_schur+j]; 1712 } 1713 } 1714 } 1715 } else { 1716 PetscInt k; 1717 for (k=0;k<subset_size;k++) { 1718 for (j=0;j<subset_size;j++) { 1719 work[k*subset_size+j] = rS_data[cum2+k*size_schur+j]; 1720 } 1721 } 1722 } 1723 } 1724 if (sub_schurs->change) { 1725 Mat change_sub,SEj,T; 1726 PetscScalar val = sub_schurs->gdsw ? PETSC_SMALL : 1./PETSC_SMALL; 1727 1728 /* change basis */ 1729 PetscCall(KSPGetOperators(sub_schurs->change[i],&change_sub,NULL)); 1730 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,rS_data ? work : SEjinv_arr,&SEj)); 1731 if (!sub_schurs->change_with_qr) { /* currently there's no support for PtAP with P SeqAIJ */ 1732 Mat T2; 1733 PetscCall(MatTransposeMatMult(change_sub,SEj,MAT_INITIAL_MATRIX,1.0,&T2)); 1734 PetscCall(MatMatMult(T2,change_sub,MAT_INITIAL_MATRIX,1.0,&T)); 1735 PetscCall(MatDestroy(&T2)); 1736 PetscCall(MatConvert(T,MATSEQDENSE,MAT_INPLACE_MATRIX,&T)); 1737 } else { 1738 PetscCall(MatPtAP(SEj,change_sub,MAT_INITIAL_MATRIX,1.0,&T)); 1739 } 1740 PetscCall(MatCopy(T,SEj,SAME_NONZERO_PATTERN)); 1741 PetscCall(MatDestroy(&T)); 1742 PetscCall(MatZeroRowsColumnsIS(SEj,sub_schurs->change_primal_sub[i],val,NULL,NULL)); 1743 PetscCall(MatDestroy(&SEj)); 1744 } 1745 if (rS_data) PetscCall(PetscArraycpy(SEjinv_arr,work,subset_size*subset_size)); 1746 cum += subset_size; 1747 cum2 += subset_size*(size_schur + 1); 1748 SEjinv_arr += subset_size*subset_size; 1749 } 1750 PetscCall(MatSeqAIJRestoreArrayWrite(sub_schurs->sum_S_Ej_tilda_all,&SEjinv_arr)); 1751 if (S_all_inv) { 1752 PetscCall(MatDenseRestoreArrayRead(S_all_inv,&rS_data)); 1753 if (solver_S) { 1754 if (schur_has_vertices) { 1755 PetscCall(MatFactorRestoreSchurComplement(F,&S_all_inv,MAT_FACTOR_SCHUR_FACTORED)); 1756 } else { 1757 PetscCall(MatFactorRestoreSchurComplement(F,&S_all_inv,MAT_FACTOR_SCHUR_INVERTED)); 1758 } 1759 } 1760 } 1761 PetscCall(MatDestroy(&S_all_inv)); 1762 } 1763 1764 /* move back factors if needed */ 1765 if (schur_has_vertices && factor_workaround && !sub_schurs->gdsw) { 1766 Mat S_tmp; 1767 PetscInt nd = 0; 1768 1769 PetscCheck(solver_S,PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 1770 PetscCall(MatFactorGetSchurComplement(F,&S_tmp,NULL)); 1771 if (use_potr) { 1772 PetscScalar *data; 1773 1774 PetscCall(MatDenseGetArray(S_tmp,&data)); 1775 PetscCall(PetscArrayzero(data,size_schur*size_schur)); 1776 1777 if (S_lower_triangular) { 1778 cum = 0; 1779 for (i=0;i<size_active_schur;i++) { 1780 PetscCall(PetscArraycpy(data+i*(size_schur+1),schur_factor+cum,size_active_schur-i)); 1781 cum += size_active_schur-i; 1782 } 1783 } else { 1784 PetscCall(PetscArraycpy(data,schur_factor,size_schur*size_schur)); 1785 } 1786 if (sub_schurs->is_dir) { 1787 PetscCall(ISGetLocalSize(sub_schurs->is_dir,&nd)); 1788 for (i=0;i<nd;i++) { 1789 data[(i+size_active_schur)*(size_schur+1)] = schur_factor[cum+i]; 1790 } 1791 } 1792 /* workaround: since I cannot modify the matrices used inside the solvers for the forward and backward substitutions, 1793 set the diagonal entry of the Schur factor to a very large value */ 1794 for (i=size_active_schur+nd;i<size_schur;i++) { 1795 data[i*(size_schur+1)] = infty; 1796 } 1797 PetscCall(MatDenseRestoreArray(S_tmp,&data)); 1798 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor update not yet implemented for non SPD matrices"); 1799 PetscCall(MatFactorRestoreSchurComplement(F,&S_tmp,MAT_FACTOR_SCHUR_FACTORED)); 1800 } 1801 } else if (factor_workaround && !sub_schurs->gdsw) { /* we need to eliminate any unneeded coupling */ 1802 PetscScalar *data; 1803 PetscInt nd = 0; 1804 1805 if (sub_schurs->is_dir) { /* dirichlet dofs could have different scalings */ 1806 PetscCall(ISGetLocalSize(sub_schurs->is_dir,&nd)); 1807 } 1808 PetscCall(MatFactorGetSchurComplement(F,&S_all,NULL)); 1809 PetscCall(MatDenseGetArray(S_all,&data)); 1810 for (i=0;i<size_active_schur;i++) { 1811 PetscCall(PetscArrayzero(data+i*size_schur+size_active_schur,size_schur-size_active_schur)); 1812 } 1813 for (i=size_active_schur+nd;i<size_schur;i++) { 1814 PetscCall(PetscArrayzero(data+i*size_schur+size_active_schur,size_schur-size_active_schur)); 1815 data[i*(size_schur+1)] = infty; 1816 } 1817 PetscCall(MatDenseRestoreArray(S_all,&data)); 1818 PetscCall(MatFactorRestoreSchurComplement(F,&S_all,MAT_FACTOR_SCHUR_UNFACTORED)); 1819 } 1820 PetscCall(PetscFree(work)); 1821 PetscCall(PetscFree(schur_factor)); 1822 PetscCall(VecDestroy(&Dall)); 1823 } 1824 PetscCall(ISDestroy(&is_I_layer)); 1825 PetscCall(MatDestroy(&S_all)); 1826 PetscCall(MatDestroy(&A_BB)); 1827 PetscCall(MatDestroy(&A_IB)); 1828 PetscCall(MatDestroy(&A_BI)); 1829 PetscCall(MatDestroy(&F)); 1830 1831 PetscCall(PetscMalloc1(sub_schurs->n_subs,&nnz)); 1832 for (i=0;i<sub_schurs->n_subs;i++) { 1833 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&nnz[i])); 1834 } 1835 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,sub_schurs->n_subs,nnz,PETSC_OWN_POINTER,&is_I_layer)); 1836 PetscCall(MatSetVariableBlockSizes(sub_schurs->S_Ej_all,sub_schurs->n_subs,nnz)); 1837 PetscCall(MatAssemblyBegin(sub_schurs->S_Ej_all,MAT_FINAL_ASSEMBLY)); 1838 PetscCall(MatAssemblyEnd(sub_schurs->S_Ej_all,MAT_FINAL_ASSEMBLY)); 1839 if (compute_Stilda) { 1840 PetscCall(MatSetVariableBlockSizes(sub_schurs->sum_S_Ej_tilda_all,sub_schurs->n_subs,nnz)); 1841 PetscCall(MatAssemblyBegin(sub_schurs->sum_S_Ej_tilda_all,MAT_FINAL_ASSEMBLY)); 1842 PetscCall(MatAssemblyEnd(sub_schurs->sum_S_Ej_tilda_all,MAT_FINAL_ASSEMBLY)); 1843 if (deluxe) { 1844 PetscCall(MatSetVariableBlockSizes(sub_schurs->sum_S_Ej_inv_all,sub_schurs->n_subs,nnz)); 1845 PetscCall(MatAssemblyBegin(sub_schurs->sum_S_Ej_inv_all,MAT_FINAL_ASSEMBLY)); 1846 PetscCall(MatAssemblyEnd(sub_schurs->sum_S_Ej_inv_all,MAT_FINAL_ASSEMBLY)); 1847 } 1848 } 1849 PetscCall(ISDestroy(&is_I_layer)); 1850 1851 /* Get local part of (\sum_j S_Ej) */ 1852 if (!sub_schurs->sum_S_Ej_all) { 1853 PetscCall(MatDuplicate(sub_schurs->S_Ej_all,MAT_DO_NOT_COPY_VALUES,&sub_schurs->sum_S_Ej_all)); 1854 } 1855 PetscCall(VecSet(gstash,0.0)); 1856 PetscCall(MatSeqAIJGetArray(sub_schurs->S_Ej_all,&stasharray)); 1857 PetscCall(VecPlaceArray(lstash,stasharray)); 1858 PetscCall(VecScatterBegin(sstash,lstash,gstash,ADD_VALUES,SCATTER_FORWARD)); 1859 PetscCall(VecScatterEnd(sstash,lstash,gstash,ADD_VALUES,SCATTER_FORWARD)); 1860 PetscCall(MatSeqAIJRestoreArray(sub_schurs->S_Ej_all,&stasharray)); 1861 PetscCall(VecResetArray(lstash)); 1862 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&stasharray)); 1863 PetscCall(VecPlaceArray(lstash,stasharray)); 1864 PetscCall(VecScatterBegin(sstash,gstash,lstash,INSERT_VALUES,SCATTER_REVERSE)); 1865 PetscCall(VecScatterEnd(sstash,gstash,lstash,INSERT_VALUES,SCATTER_REVERSE)); 1866 PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all,&stasharray)); 1867 PetscCall(VecResetArray(lstash)); 1868 1869 /* Get local part of (\sum_j S^-1_Ej) (\sum_j St^-1_Ej) */ 1870 if (compute_Stilda) { 1871 PetscCall(VecSet(gstash,0.0)); 1872 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&stasharray)); 1873 PetscCall(VecPlaceArray(lstash,stasharray)); 1874 PetscCall(VecScatterBegin(sstash,lstash,gstash,ADD_VALUES,SCATTER_FORWARD)); 1875 PetscCall(VecScatterEnd(sstash,lstash,gstash,ADD_VALUES,SCATTER_FORWARD)); 1876 PetscCall(VecScatterBegin(sstash,gstash,lstash,INSERT_VALUES,SCATTER_REVERSE)); 1877 PetscCall(VecScatterEnd(sstash,gstash,lstash,INSERT_VALUES,SCATTER_REVERSE)); 1878 PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&stasharray)); 1879 PetscCall(VecResetArray(lstash)); 1880 if (deluxe) { 1881 PetscCall(VecSet(gstash,0.0)); 1882 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&stasharray)); 1883 PetscCall(VecPlaceArray(lstash,stasharray)); 1884 PetscCall(VecScatterBegin(sstash,lstash,gstash,ADD_VALUES,SCATTER_FORWARD)); 1885 PetscCall(VecScatterEnd(sstash,lstash,gstash,ADD_VALUES,SCATTER_FORWARD)); 1886 PetscCall(VecScatterBegin(sstash,gstash,lstash,INSERT_VALUES,SCATTER_REVERSE)); 1887 PetscCall(VecScatterEnd(sstash,gstash,lstash,INSERT_VALUES,SCATTER_REVERSE)); 1888 PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&stasharray)); 1889 PetscCall(VecResetArray(lstash)); 1890 } else if (!sub_schurs->gdsw) { 1891 PetscScalar *array; 1892 PetscInt cum; 1893 1894 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&array)); 1895 cum = 0; 1896 for (i=0;i<sub_schurs->n_subs;i++) { 1897 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size)); 1898 PetscCall(PetscBLASIntCast(subset_size,&B_N)); 1899 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 1900 if (use_potr) { 1901 PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("L",&B_N,array+cum,&B_N,&B_ierr)); 1902 PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRF Lapack routine %d",(int)B_ierr); 1903 PetscStackCallBLAS("LAPACKpotri",LAPACKpotri_("L",&B_N,array+cum,&B_N,&B_ierr)); 1904 PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRI Lapack routine %d",(int)B_ierr); 1905 } else if (use_sytr) { 1906 PetscStackCallBLAS("LAPACKsytrf",LAPACKsytrf_("L",&B_N,array+cum,&B_N,pivots,Bwork,&B_lwork,&B_ierr)); 1907 PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYTRF Lapack routine %d",(int)B_ierr); 1908 PetscStackCallBLAS("LAPACKsytri",LAPACKsytri_("L",&B_N,array+cum,&B_N,pivots,Bwork,&B_ierr)); 1909 PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYTRI Lapack routine %d",(int)B_ierr); 1910 } else { 1911 PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&B_N,&B_N,array+cum,&B_N,pivots,&B_ierr)); 1912 PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRF Lapack routine %d",(int)B_ierr); 1913 PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,array+cum,&B_N,pivots,Bwork,&B_lwork,&B_ierr)); 1914 PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRI Lapack routine %d",(int)B_ierr); 1915 } 1916 PetscCall(PetscLogFlops(1.0*subset_size*subset_size*subset_size)); 1917 PetscCall(PetscFPTrapPop()); 1918 cum += subset_size*subset_size; 1919 } 1920 PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&array)); 1921 PetscCall(PetscObjectReference((PetscObject)sub_schurs->sum_S_Ej_all)); 1922 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_inv_all)); 1923 sub_schurs->sum_S_Ej_inv_all = sub_schurs->sum_S_Ej_all; 1924 } 1925 } 1926 PetscCall(VecDestroy(&lstash)); 1927 PetscCall(VecDestroy(&gstash)); 1928 PetscCall(VecScatterDestroy(&sstash)); 1929 1930 if (matl_dbg_viewer) { 1931 if (sub_schurs->S_Ej_all) { 1932 PetscCall(PetscObjectSetName((PetscObject)sub_schurs->S_Ej_all,"SE")); 1933 PetscCall(MatView(sub_schurs->S_Ej_all,matl_dbg_viewer)); 1934 } 1935 if (sub_schurs->sum_S_Ej_all) { 1936 PetscCall(PetscObjectSetName((PetscObject)sub_schurs->sum_S_Ej_all,"SSE")); 1937 PetscCall(MatView(sub_schurs->sum_S_Ej_all,matl_dbg_viewer)); 1938 } 1939 if (sub_schurs->sum_S_Ej_inv_all) { 1940 PetscCall(PetscObjectSetName((PetscObject)sub_schurs->sum_S_Ej_inv_all,"SSEm")); 1941 PetscCall(MatView(sub_schurs->sum_S_Ej_inv_all,matl_dbg_viewer)); 1942 } 1943 if (sub_schurs->sum_S_Ej_tilda_all) { 1944 PetscCall(PetscObjectSetName((PetscObject)sub_schurs->sum_S_Ej_tilda_all,"SSEt")); 1945 PetscCall(MatView(sub_schurs->sum_S_Ej_tilda_all,matl_dbg_viewer)); 1946 } 1947 } 1948 1949 /* free workspace */ 1950 if (matl_dbg_viewer) PetscCall(PetscViewerFlush(matl_dbg_viewer)); 1951 if (sub_schurs->debug) PetscCallMPI(MPI_Barrier(comm_n)); 1952 PetscCall(PetscViewerDestroy(&matl_dbg_viewer)); 1953 PetscCall(PetscFree2(Bwork,pivots)); 1954 PetscCall(PetscCommDestroy(&comm_n)); 1955 PetscFunctionReturn(0); 1956 } 1957 1958 PetscErrorCode PCBDDCSubSchursInit(PCBDDCSubSchurs sub_schurs, const char* prefix, IS is_I, IS is_B, PCBDDCGraph graph, ISLocalToGlobalMapping BtoNmap, PetscBool copycc, PetscBool gdsw) 1959 { 1960 IS *faces,*edges,*all_cc,vertices; 1961 PetscInt s,i,n_faces,n_edges,n_all_cc; 1962 PetscBool is_sorted,ispardiso,ismumps; 1963 1964 PetscFunctionBegin; 1965 PetscCall(ISSorted(is_I,&is_sorted)); 1966 PetscCheck(is_sorted,PetscObjectComm((PetscObject)is_I),PETSC_ERR_PLIB,"IS for I dofs should be shorted"); 1967 PetscCall(ISSorted(is_B,&is_sorted)); 1968 PetscCheck(is_sorted,PetscObjectComm((PetscObject)is_B),PETSC_ERR_PLIB,"IS for B dofs should be shorted"); 1969 1970 /* reset any previous data */ 1971 PetscCall(PCBDDCSubSchursReset(sub_schurs)); 1972 1973 sub_schurs->gdsw = gdsw; 1974 1975 /* get index sets for faces and edges (already sorted by global ordering) */ 1976 PetscCall(PCBDDCGraphGetCandidatesIS(graph,&n_faces,&faces,&n_edges,&edges,&vertices)); 1977 n_all_cc = n_faces+n_edges; 1978 PetscCall(PetscBTCreate(n_all_cc,&sub_schurs->is_edge)); 1979 PetscCall(PetscMalloc1(n_all_cc,&all_cc)); 1980 n_all_cc = 0; 1981 for (i=0;i<n_faces;i++) { 1982 PetscCall(ISGetSize(faces[i],&s)); 1983 if (!s) continue; 1984 if (copycc) { 1985 PetscCall(ISDuplicate(faces[i],&all_cc[n_all_cc])); 1986 } else { 1987 PetscCall(PetscObjectReference((PetscObject)faces[i])); 1988 all_cc[n_all_cc] = faces[i]; 1989 } 1990 n_all_cc++; 1991 } 1992 for (i=0;i<n_edges;i++) { 1993 PetscCall(ISGetSize(edges[i],&s)); 1994 if (!s) continue; 1995 if (copycc) { 1996 PetscCall(ISDuplicate(edges[i],&all_cc[n_all_cc])); 1997 } else { 1998 PetscCall(PetscObjectReference((PetscObject)edges[i])); 1999 all_cc[n_all_cc] = edges[i]; 2000 } 2001 PetscCall(PetscBTSet(sub_schurs->is_edge,n_all_cc)); 2002 n_all_cc++; 2003 } 2004 PetscCall(PetscObjectReference((PetscObject)vertices)); 2005 sub_schurs->is_vertices = vertices; 2006 PetscCall(PCBDDCGraphRestoreCandidatesIS(graph,&n_faces,&faces,&n_edges,&edges,&vertices)); 2007 sub_schurs->is_dir = NULL; 2008 PetscCall(PCBDDCGraphGetDirichletDofsB(graph,&sub_schurs->is_dir)); 2009 2010 /* Determine if MatFactor can be used */ 2011 PetscCall(PetscStrallocpy(prefix,&sub_schurs->prefix)); 2012 #if defined(PETSC_HAVE_MUMPS) 2013 PetscCall(PetscStrncpy(sub_schurs->mat_solver_type,MATSOLVERMUMPS,sizeof(sub_schurs->mat_solver_type))); 2014 #elif defined(PETSC_HAVE_MKL_PARDISO) 2015 PetscCall(PetscStrncpy(sub_schurs->mat_solver_type,MATSOLVERMKL_PARDISO,sizeof(sub_schurs->mat_solver_type))); 2016 #else 2017 PetscCall(PetscStrncpy(sub_schurs->mat_solver_type,MATSOLVERPETSC,sizeof(sub_schurs->mat_solver_type))); 2018 #endif 2019 #if defined(PETSC_USE_COMPLEX) 2020 sub_schurs->is_hermitian = PETSC_FALSE; /* Hermitian Cholesky is not supported by PETSc and external packages */ 2021 #else 2022 sub_schurs->is_hermitian = PETSC_TRUE; 2023 #endif 2024 sub_schurs->is_posdef = PETSC_TRUE; 2025 sub_schurs->is_symmetric = PETSC_TRUE; 2026 sub_schurs->debug = PETSC_FALSE; 2027 sub_schurs->restrict_comm = PETSC_FALSE; 2028 PetscOptionsBegin(PetscObjectComm((PetscObject)graph->l2gmap),sub_schurs->prefix,"BDDC sub_schurs options","PC"); 2029 PetscCall(PetscOptionsString("-sub_schurs_mat_solver_type","Specific direct solver to use",NULL,sub_schurs->mat_solver_type,sub_schurs->mat_solver_type,sizeof(sub_schurs->mat_solver_type),NULL)); 2030 PetscCall(PetscOptionsBool("-sub_schurs_symmetric","Symmetric problem",NULL,sub_schurs->is_symmetric,&sub_schurs->is_symmetric,NULL)); 2031 PetscCall(PetscOptionsBool("-sub_schurs_hermitian","Hermitian problem",NULL,sub_schurs->is_hermitian,&sub_schurs->is_hermitian,NULL)); 2032 PetscCall(PetscOptionsBool("-sub_schurs_posdef","Positive definite problem",NULL,sub_schurs->is_posdef,&sub_schurs->is_posdef,NULL)); 2033 PetscCall(PetscOptionsBool("-sub_schurs_restrictcomm","Restrict communicator on active processes only",NULL,sub_schurs->restrict_comm,&sub_schurs->restrict_comm,NULL)); 2034 PetscCall(PetscOptionsBool("-sub_schurs_debug","Debug output",NULL,sub_schurs->debug,&sub_schurs->debug,NULL)); 2035 PetscOptionsEnd(); 2036 PetscCall(PetscStrcmp(sub_schurs->mat_solver_type,MATSOLVERMUMPS,&ismumps)); 2037 PetscCall(PetscStrcmp(sub_schurs->mat_solver_type,MATSOLVERMKL_PARDISO,&ispardiso)); 2038 sub_schurs->schur_explicit = (PetscBool)(ispardiso || ismumps); 2039 2040 /* for reals, symmetric and hermitian are synonims */ 2041 #if !defined(PETSC_USE_COMPLEX) 2042 sub_schurs->is_symmetric = (PetscBool)(sub_schurs->is_symmetric && sub_schurs->is_hermitian); 2043 sub_schurs->is_hermitian = sub_schurs->is_symmetric; 2044 #endif 2045 2046 PetscCall(PetscObjectReference((PetscObject)is_I)); 2047 sub_schurs->is_I = is_I; 2048 PetscCall(PetscObjectReference((PetscObject)is_B)); 2049 sub_schurs->is_B = is_B; 2050 PetscCall(PetscObjectReference((PetscObject)graph->l2gmap)); 2051 sub_schurs->l2gmap = graph->l2gmap; 2052 PetscCall(PetscObjectReference((PetscObject)BtoNmap)); 2053 sub_schurs->BtoNmap = BtoNmap; 2054 sub_schurs->n_subs = n_all_cc; 2055 sub_schurs->is_subs = all_cc; 2056 sub_schurs->S_Ej_all = NULL; 2057 sub_schurs->sum_S_Ej_all = NULL; 2058 sub_schurs->sum_S_Ej_inv_all = NULL; 2059 sub_schurs->sum_S_Ej_tilda_all = NULL; 2060 sub_schurs->is_Ej_all = NULL; 2061 PetscFunctionReturn(0); 2062 } 2063 2064 PetscErrorCode PCBDDCSubSchursCreate(PCBDDCSubSchurs *sub_schurs) 2065 { 2066 PCBDDCSubSchurs schurs_ctx; 2067 2068 PetscFunctionBegin; 2069 PetscCall(PetscNew(&schurs_ctx)); 2070 schurs_ctx->n_subs = 0; 2071 *sub_schurs = schurs_ctx; 2072 PetscFunctionReturn(0); 2073 } 2074 2075 PetscErrorCode PCBDDCSubSchursReset(PCBDDCSubSchurs sub_schurs) 2076 { 2077 PetscInt i; 2078 2079 PetscFunctionBegin; 2080 if (!sub_schurs) PetscFunctionReturn(0); 2081 PetscCall(PetscFree(sub_schurs->prefix)); 2082 PetscCall(MatDestroy(&sub_schurs->A)); 2083 PetscCall(MatDestroy(&sub_schurs->S)); 2084 PetscCall(ISDestroy(&sub_schurs->is_I)); 2085 PetscCall(ISDestroy(&sub_schurs->is_B)); 2086 PetscCall(ISLocalToGlobalMappingDestroy(&sub_schurs->l2gmap)); 2087 PetscCall(ISLocalToGlobalMappingDestroy(&sub_schurs->BtoNmap)); 2088 PetscCall(MatDestroy(&sub_schurs->S_Ej_all)); 2089 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_all)); 2090 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_inv_all)); 2091 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_tilda_all)); 2092 PetscCall(ISDestroy(&sub_schurs->is_Ej_all)); 2093 PetscCall(ISDestroy(&sub_schurs->is_vertices)); 2094 PetscCall(ISDestroy(&sub_schurs->is_dir)); 2095 PetscCall(PetscBTDestroy(&sub_schurs->is_edge)); 2096 for (i=0;i<sub_schurs->n_subs;i++) { 2097 PetscCall(ISDestroy(&sub_schurs->is_subs[i])); 2098 } 2099 if (sub_schurs->n_subs) { 2100 PetscCall(PetscFree(sub_schurs->is_subs)); 2101 } 2102 if (sub_schurs->reuse_solver) { 2103 PetscCall(PCBDDCReuseSolversReset(sub_schurs->reuse_solver)); 2104 } 2105 PetscCall(PetscFree(sub_schurs->reuse_solver)); 2106 if (sub_schurs->change) { 2107 for (i=0;i<sub_schurs->n_subs;i++) { 2108 PetscCall(KSPDestroy(&sub_schurs->change[i])); 2109 PetscCall(ISDestroy(&sub_schurs->change_primal_sub[i])); 2110 } 2111 } 2112 PetscCall(PetscFree(sub_schurs->change)); 2113 PetscCall(PetscFree(sub_schurs->change_primal_sub)); 2114 sub_schurs->n_subs = 0; 2115 PetscFunctionReturn(0); 2116 } 2117 2118 PetscErrorCode PCBDDCSubSchursDestroy(PCBDDCSubSchurs* sub_schurs) 2119 { 2120 PetscFunctionBegin; 2121 PetscCall(PCBDDCSubSchursReset(*sub_schurs)); 2122 PetscCall(PetscFree(*sub_schurs)); 2123 PetscFunctionReturn(0); 2124 } 2125 2126 static inline PetscErrorCode PCBDDCAdjGetNextLayer_Private(PetscInt* queue_tip,PetscInt n_prev,PetscBT touched,PetscInt* xadj,PetscInt* adjncy,PetscInt* n_added) 2127 { 2128 PetscInt i,j,n; 2129 2130 PetscFunctionBegin; 2131 n = 0; 2132 for (i=-n_prev;i<0;i++) { 2133 PetscInt start_dof = queue_tip[i]; 2134 for (j=xadj[start_dof];j<xadj[start_dof+1];j++) { 2135 PetscInt dof = adjncy[j]; 2136 if (!PetscBTLookup(touched,dof)) { 2137 PetscCall(PetscBTSet(touched,dof)); 2138 queue_tip[n] = dof; 2139 n++; 2140 } 2141 } 2142 } 2143 *n_added = n; 2144 PetscFunctionReturn(0); 2145 } 2146