1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <../src/mat/impls/dense/seq/dense.h> 4 #include <petscblaslapack.h> 5 6 static inline PetscErrorCode PCBDDCAdjGetNextLayer_Private(PetscInt*,PetscInt,PetscBT,PetscInt*,PetscInt*,PetscInt*); 7 static PetscErrorCode PCBDDCComputeExplicitSchur(Mat,PetscBool,MatReuse,Mat*); 8 static PetscErrorCode PCBDDCReuseSolvers_Interior(PC,Vec,Vec); 9 static PetscErrorCode PCBDDCReuseSolvers_Correction(PC,Vec,Vec); 10 11 /* if v2 is not present, correction is done in-place */ 12 PetscErrorCode PCBDDCReuseSolversBenignAdapt(PCBDDCReuseSolvers ctx, Vec v, Vec v2, PetscBool sol, PetscBool full) 13 { 14 PetscScalar *array; 15 PetscScalar *array2; 16 17 PetscFunctionBegin; 18 if (!ctx->benign_n) PetscFunctionReturn(0); 19 if (sol && full) { 20 PetscInt n_I,size_schur; 21 22 /* get sizes */ 23 CHKERRQ(MatGetSize(ctx->benign_csAIB,&size_schur,NULL)); 24 CHKERRQ(VecGetSize(v,&n_I)); 25 n_I = n_I - size_schur; 26 /* get schur sol from array */ 27 CHKERRQ(VecGetArray(v,&array)); 28 CHKERRQ(VecPlaceArray(ctx->benign_dummy_schur_vec,array+n_I)); 29 CHKERRQ(VecRestoreArray(v,&array)); 30 /* apply interior sol correction */ 31 CHKERRQ(MatMultTranspose(ctx->benign_csAIB,ctx->benign_dummy_schur_vec,ctx->benign_corr_work)); 32 CHKERRQ(VecResetArray(ctx->benign_dummy_schur_vec)); 33 CHKERRQ(MatMultAdd(ctx->benign_AIIm1ones,ctx->benign_corr_work,v,v)); 34 } 35 if (v2) { 36 PetscInt nl; 37 38 CHKERRQ(VecGetArrayRead(v,(const PetscScalar**)&array)); 39 CHKERRQ(VecGetLocalSize(v2,&nl)); 40 CHKERRQ(VecGetArray(v2,&array2)); 41 CHKERRQ(PetscArraycpy(array2,array,nl)); 42 } else { 43 CHKERRQ(VecGetArray(v,&array)); 44 array2 = array; 45 } 46 if (!sol) { /* change rhs */ 47 PetscInt n; 48 for (n=0;n<ctx->benign_n;n++) { 49 PetscScalar sum = 0.; 50 const PetscInt *cols; 51 PetscInt nz,i; 52 53 CHKERRQ(ISGetLocalSize(ctx->benign_zerodiag_subs[n],&nz)); 54 CHKERRQ(ISGetIndices(ctx->benign_zerodiag_subs[n],&cols)); 55 for (i=0;i<nz-1;i++) sum += array[cols[i]]; 56 #if defined(PETSC_USE_COMPLEX) 57 sum = -(PetscRealPart(sum)/nz + PETSC_i*(PetscImaginaryPart(sum)/nz)); 58 #else 59 sum = -sum/nz; 60 #endif 61 for (i=0;i<nz-1;i++) array2[cols[i]] += sum; 62 ctx->benign_save_vals[n] = array2[cols[nz-1]]; 63 array2[cols[nz-1]] = sum; 64 CHKERRQ(ISRestoreIndices(ctx->benign_zerodiag_subs[n],&cols)); 65 } 66 } else { 67 PetscInt n; 68 for (n=0;n<ctx->benign_n;n++) { 69 PetscScalar sum = 0.; 70 const PetscInt *cols; 71 PetscInt nz,i; 72 CHKERRQ(ISGetLocalSize(ctx->benign_zerodiag_subs[n],&nz)); 73 CHKERRQ(ISGetIndices(ctx->benign_zerodiag_subs[n],&cols)); 74 for (i=0;i<nz-1;i++) sum += array[cols[i]]; 75 #if defined(PETSC_USE_COMPLEX) 76 sum = -(PetscRealPart(sum)/nz + PETSC_i*(PetscImaginaryPart(sum)/nz)); 77 #else 78 sum = -sum/nz; 79 #endif 80 for (i=0;i<nz-1;i++) array2[cols[i]] += sum; 81 array2[cols[nz-1]] = ctx->benign_save_vals[n]; 82 CHKERRQ(ISRestoreIndices(ctx->benign_zerodiag_subs[n],&cols)); 83 } 84 } 85 if (v2) { 86 CHKERRQ(VecRestoreArrayRead(v,(const PetscScalar**)&array)); 87 CHKERRQ(VecRestoreArray(v2,&array2)); 88 } else { 89 CHKERRQ(VecRestoreArray(v,&array)); 90 } 91 if (!sol && full) { 92 Vec usedv; 93 PetscInt n_I,size_schur; 94 95 /* get sizes */ 96 CHKERRQ(MatGetSize(ctx->benign_csAIB,&size_schur,NULL)); 97 CHKERRQ(VecGetSize(v,&n_I)); 98 n_I = n_I - size_schur; 99 /* compute schur rhs correction */ 100 if (v2) { 101 usedv = v2; 102 } else { 103 usedv = v; 104 } 105 /* apply schur rhs correction */ 106 CHKERRQ(MatMultTranspose(ctx->benign_AIIm1ones,usedv,ctx->benign_corr_work)); 107 CHKERRQ(VecGetArrayRead(usedv,(const PetscScalar**)&array)); 108 CHKERRQ(VecPlaceArray(ctx->benign_dummy_schur_vec,array+n_I)); 109 CHKERRQ(VecRestoreArrayRead(usedv,(const PetscScalar**)&array)); 110 CHKERRQ(MatMultAdd(ctx->benign_csAIB,ctx->benign_corr_work,ctx->benign_dummy_schur_vec,ctx->benign_dummy_schur_vec)); 111 CHKERRQ(VecResetArray(ctx->benign_dummy_schur_vec)); 112 } 113 PetscFunctionReturn(0); 114 } 115 116 static PetscErrorCode PCBDDCReuseSolvers_Solve_Private(PC pc, Vec rhs, Vec sol, PetscBool transpose, PetscBool full) 117 { 118 PCBDDCReuseSolvers ctx; 119 PetscBool copy = PETSC_FALSE; 120 121 PetscFunctionBegin; 122 CHKERRQ(PCShellGetContext(pc,&ctx)); 123 if (full) { 124 #if defined(PETSC_HAVE_MUMPS) 125 CHKERRQ(MatMumpsSetIcntl(ctx->F,26,-1)); 126 #endif 127 #if defined(PETSC_HAVE_MKL_PARDISO) 128 CHKERRQ(MatMkl_PardisoSetCntl(ctx->F,70,0)); 129 #endif 130 copy = ctx->has_vertices; 131 } else { /* interior solver */ 132 #if defined(PETSC_HAVE_MUMPS) 133 CHKERRQ(MatMumpsSetIcntl(ctx->F,26,0)); 134 #endif 135 #if defined(PETSC_HAVE_MKL_PARDISO) 136 CHKERRQ(MatMkl_PardisoSetCntl(ctx->F,70,1)); 137 #endif 138 copy = PETSC_TRUE; 139 } 140 /* copy rhs into factored matrix workspace */ 141 if (copy) { 142 PetscInt n; 143 PetscScalar *array,*array_solver; 144 145 CHKERRQ(VecGetLocalSize(rhs,&n)); 146 CHKERRQ(VecGetArrayRead(rhs,(const PetscScalar**)&array)); 147 CHKERRQ(VecGetArray(ctx->rhs,&array_solver)); 148 CHKERRQ(PetscArraycpy(array_solver,array,n)); 149 CHKERRQ(VecRestoreArray(ctx->rhs,&array_solver)); 150 CHKERRQ(VecRestoreArrayRead(rhs,(const PetscScalar**)&array)); 151 152 CHKERRQ(PCBDDCReuseSolversBenignAdapt(ctx,ctx->rhs,NULL,PETSC_FALSE,full)); 153 if (transpose) { 154 CHKERRQ(MatSolveTranspose(ctx->F,ctx->rhs,ctx->sol)); 155 } else { 156 CHKERRQ(MatSolve(ctx->F,ctx->rhs,ctx->sol)); 157 } 158 CHKERRQ(PCBDDCReuseSolversBenignAdapt(ctx,ctx->sol,NULL,PETSC_TRUE,full)); 159 160 /* get back data to caller worskpace */ 161 CHKERRQ(VecGetArrayRead(ctx->sol,(const PetscScalar**)&array_solver)); 162 CHKERRQ(VecGetArray(sol,&array)); 163 CHKERRQ(PetscArraycpy(array,array_solver,n)); 164 CHKERRQ(VecRestoreArray(sol,&array)); 165 CHKERRQ(VecRestoreArrayRead(ctx->sol,(const PetscScalar**)&array_solver)); 166 } else { 167 if (ctx->benign_n) { 168 CHKERRQ(PCBDDCReuseSolversBenignAdapt(ctx,rhs,ctx->rhs,PETSC_FALSE,full)); 169 if (transpose) { 170 CHKERRQ(MatSolveTranspose(ctx->F,ctx->rhs,sol)); 171 } else { 172 CHKERRQ(MatSolve(ctx->F,ctx->rhs,sol)); 173 } 174 CHKERRQ(PCBDDCReuseSolversBenignAdapt(ctx,sol,NULL,PETSC_TRUE,full)); 175 } else { 176 if (transpose) { 177 CHKERRQ(MatSolveTranspose(ctx->F,rhs,sol)); 178 } else { 179 CHKERRQ(MatSolve(ctx->F,rhs,sol)); 180 } 181 } 182 } 183 /* restore defaults */ 184 #if defined(PETSC_HAVE_MUMPS) 185 CHKERRQ(MatMumpsSetIcntl(ctx->F,26,-1)); 186 #endif 187 #if defined(PETSC_HAVE_MKL_PARDISO) 188 CHKERRQ(MatMkl_PardisoSetCntl(ctx->F,70,0)); 189 #endif 190 PetscFunctionReturn(0); 191 } 192 193 static PetscErrorCode PCBDDCReuseSolvers_Correction(PC pc, Vec rhs, Vec sol) 194 { 195 PetscFunctionBegin; 196 CHKERRQ(PCBDDCReuseSolvers_Solve_Private(pc,rhs,sol,PETSC_FALSE,PETSC_TRUE)); 197 PetscFunctionReturn(0); 198 } 199 200 static PetscErrorCode PCBDDCReuseSolvers_CorrectionTranspose(PC pc, Vec rhs, Vec sol) 201 { 202 PetscFunctionBegin; 203 CHKERRQ(PCBDDCReuseSolvers_Solve_Private(pc,rhs,sol,PETSC_TRUE,PETSC_TRUE)); 204 PetscFunctionReturn(0); 205 } 206 207 static PetscErrorCode PCBDDCReuseSolvers_Interior(PC pc, Vec rhs, Vec sol) 208 { 209 PetscFunctionBegin; 210 CHKERRQ(PCBDDCReuseSolvers_Solve_Private(pc,rhs,sol,PETSC_FALSE,PETSC_FALSE)); 211 PetscFunctionReturn(0); 212 } 213 214 static PetscErrorCode PCBDDCReuseSolvers_InteriorTranspose(PC pc, Vec rhs, Vec sol) 215 { 216 PetscFunctionBegin; 217 CHKERRQ(PCBDDCReuseSolvers_Solve_Private(pc,rhs,sol,PETSC_TRUE,PETSC_FALSE)); 218 PetscFunctionReturn(0); 219 } 220 221 static PetscErrorCode PCBDDCReuseSolvers_View(PC pc, PetscViewer viewer) 222 { 223 PCBDDCReuseSolvers ctx; 224 PetscBool iascii; 225 226 PetscFunctionBegin; 227 CHKERRQ(PCShellGetContext(pc,&ctx)); 228 CHKERRQ(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 229 if (iascii) { 230 CHKERRQ(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_INFO)); 231 } 232 CHKERRQ(MatView(ctx->F,viewer)); 233 if (iascii) { 234 CHKERRQ(PetscViewerPopFormat(viewer)); 235 } 236 PetscFunctionReturn(0); 237 } 238 239 static PetscErrorCode PCBDDCReuseSolversReset(PCBDDCReuseSolvers reuse) 240 { 241 PetscInt i; 242 243 PetscFunctionBegin; 244 CHKERRQ(MatDestroy(&reuse->F)); 245 CHKERRQ(VecDestroy(&reuse->sol)); 246 CHKERRQ(VecDestroy(&reuse->rhs)); 247 CHKERRQ(PCDestroy(&reuse->interior_solver)); 248 CHKERRQ(PCDestroy(&reuse->correction_solver)); 249 CHKERRQ(ISDestroy(&reuse->is_R)); 250 CHKERRQ(ISDestroy(&reuse->is_B)); 251 CHKERRQ(VecScatterDestroy(&reuse->correction_scatter_B)); 252 CHKERRQ(VecDestroy(&reuse->sol_B)); 253 CHKERRQ(VecDestroy(&reuse->rhs_B)); 254 for (i=0;i<reuse->benign_n;i++) { 255 CHKERRQ(ISDestroy(&reuse->benign_zerodiag_subs[i])); 256 } 257 CHKERRQ(PetscFree(reuse->benign_zerodiag_subs)); 258 CHKERRQ(PetscFree(reuse->benign_save_vals)); 259 CHKERRQ(MatDestroy(&reuse->benign_csAIB)); 260 CHKERRQ(MatDestroy(&reuse->benign_AIIm1ones)); 261 CHKERRQ(VecDestroy(&reuse->benign_corr_work)); 262 CHKERRQ(VecDestroy(&reuse->benign_dummy_schur_vec)); 263 PetscFunctionReturn(0); 264 } 265 266 static PetscErrorCode PCBDDCComputeExplicitSchur(Mat M, PetscBool issym, MatReuse reuse, Mat *S) 267 { 268 Mat B, C, D, Bd, Cd, AinvBd; 269 KSP ksp; 270 PC pc; 271 PetscBool isLU, isILU, isCHOL, Bdense, Cdense; 272 PetscReal fill = 2.0; 273 PetscInt n_I; 274 PetscMPIInt size; 275 276 PetscFunctionBegin; 277 CHKERRMPI(MPI_Comm_size(PetscObjectComm((PetscObject)M),&size)); 278 PetscCheckFalse(size != 1,PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for parallel matrices"); 279 if (reuse == MAT_REUSE_MATRIX) { 280 PetscBool Sdense; 281 282 CHKERRQ(PetscObjectTypeCompare((PetscObject)*S, MATSEQDENSE, &Sdense)); 283 PetscCheck(Sdense,PetscObjectComm((PetscObject)M),PETSC_ERR_SUP,"S should dense"); 284 } 285 CHKERRQ(MatSchurComplementGetSubMatrices(M, NULL, NULL, &B, &C, &D)); 286 CHKERRQ(MatSchurComplementGetKSP(M, &ksp)); 287 CHKERRQ(KSPGetPC(ksp, &pc)); 288 CHKERRQ(PetscObjectTypeCompare((PetscObject) pc, PCLU, &isLU)); 289 CHKERRQ(PetscObjectTypeCompare((PetscObject) pc, PCILU, &isILU)); 290 CHKERRQ(PetscObjectTypeCompare((PetscObject) pc, PCCHOLESKY, &isCHOL)); 291 CHKERRQ(PetscObjectTypeCompare((PetscObject) B, MATSEQDENSE, &Bdense)); 292 CHKERRQ(PetscObjectTypeCompare((PetscObject) C, MATSEQDENSE, &Cdense)); 293 CHKERRQ(MatGetSize(B,&n_I,NULL)); 294 if (n_I) { 295 if (!Bdense) { 296 CHKERRQ(MatConvert(B, MATSEQDENSE, MAT_INITIAL_MATRIX, &Bd)); 297 } else { 298 Bd = B; 299 } 300 301 if (isLU || isILU || isCHOL) { 302 Mat fact; 303 CHKERRQ(KSPSetUp(ksp)); 304 CHKERRQ(PCFactorGetMatrix(pc, &fact)); 305 CHKERRQ(MatDuplicate(Bd, MAT_DO_NOT_COPY_VALUES, &AinvBd)); 306 CHKERRQ(MatMatSolve(fact, Bd, AinvBd)); 307 } else { 308 PetscBool ex = PETSC_TRUE; 309 310 if (ex) { 311 Mat Ainvd; 312 313 CHKERRQ(PCComputeOperator(pc, MATDENSE, &Ainvd)); 314 CHKERRQ(MatMatMult(Ainvd, Bd, MAT_INITIAL_MATRIX, fill, &AinvBd)); 315 CHKERRQ(MatDestroy(&Ainvd)); 316 } else { 317 Vec sol,rhs; 318 PetscScalar *arrayrhs,*arraysol; 319 PetscInt i,nrhs,n; 320 321 CHKERRQ(MatDuplicate(Bd, MAT_DO_NOT_COPY_VALUES, &AinvBd)); 322 CHKERRQ(MatGetSize(Bd,&n,&nrhs)); 323 CHKERRQ(MatDenseGetArray(Bd,&arrayrhs)); 324 CHKERRQ(MatDenseGetArray(AinvBd,&arraysol)); 325 CHKERRQ(KSPGetSolution(ksp,&sol)); 326 CHKERRQ(KSPGetRhs(ksp,&rhs)); 327 for (i=0;i<nrhs;i++) { 328 CHKERRQ(VecPlaceArray(rhs,arrayrhs+i*n)); 329 CHKERRQ(VecPlaceArray(sol,arraysol+i*n)); 330 CHKERRQ(KSPSolve(ksp,rhs,sol)); 331 CHKERRQ(VecResetArray(rhs)); 332 CHKERRQ(VecResetArray(sol)); 333 } 334 CHKERRQ(MatDenseRestoreArray(Bd,&arrayrhs)); 335 CHKERRQ(MatDenseRestoreArray(AinvBd,&arrayrhs)); 336 } 337 } 338 if (!Bdense & !issym) { 339 CHKERRQ(MatDestroy(&Bd)); 340 } 341 342 if (!issym) { 343 if (!Cdense) { 344 CHKERRQ(MatConvert(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &Cd)); 345 } else { 346 Cd = C; 347 } 348 CHKERRQ(MatMatMult(Cd, AinvBd, reuse, fill, S)); 349 if (!Cdense) { 350 CHKERRQ(MatDestroy(&Cd)); 351 } 352 } else { 353 CHKERRQ(MatTransposeMatMult(Bd, AinvBd, reuse, fill, S)); 354 if (!Bdense) { 355 CHKERRQ(MatDestroy(&Bd)); 356 } 357 } 358 CHKERRQ(MatDestroy(&AinvBd)); 359 } 360 361 if (D) { 362 Mat Dd; 363 PetscBool Ddense; 364 365 CHKERRQ(PetscObjectTypeCompare((PetscObject)D,MATSEQDENSE,&Ddense)); 366 if (!Ddense) { 367 CHKERRQ(MatConvert(D, MATSEQDENSE, MAT_INITIAL_MATRIX, &Dd)); 368 } else { 369 Dd = D; 370 } 371 if (n_I) { 372 CHKERRQ(MatAYPX(*S,-1.0,Dd,SAME_NONZERO_PATTERN)); 373 } else { 374 if (reuse == MAT_INITIAL_MATRIX) { 375 CHKERRQ(MatDuplicate(Dd,MAT_COPY_VALUES,S)); 376 } else { 377 CHKERRQ(MatCopy(Dd,*S,SAME_NONZERO_PATTERN)); 378 } 379 } 380 if (!Ddense) { 381 CHKERRQ(MatDestroy(&Dd)); 382 } 383 } else { 384 CHKERRQ(MatScale(*S,-1.0)); 385 } 386 PetscFunctionReturn(0); 387 } 388 389 PetscErrorCode PCBDDCSubSchursSetUp(PCBDDCSubSchurs sub_schurs, Mat Ain, Mat Sin, PetscBool exact_schur, PetscInt xadj[], PetscInt adjncy[], PetscInt nlayers, Vec scaling, PetscBool compute_Stilda, PetscBool reuse_solvers, PetscBool benign_trick, PetscInt benign_n, PetscInt benign_p0_lidx[], IS benign_zerodiag_subs[], Mat change, IS change_primal) 390 { 391 Mat F,A_II,A_IB,A_BI,A_BB,AE_II; 392 Mat S_all; 393 Vec gstash,lstash; 394 VecScatter sstash; 395 IS is_I,is_I_layer; 396 IS all_subsets,all_subsets_mult,all_subsets_n; 397 PetscScalar *stasharray,*Bwork; 398 PetscInt *nnz,*all_local_idx_N; 399 PetscInt *auxnum1,*auxnum2; 400 PetscInt i,subset_size,max_subset_size; 401 PetscInt n_B,extra,local_size,global_size; 402 PetscInt local_stash_size; 403 PetscBLASInt B_N,B_ierr,B_lwork,*pivots; 404 MPI_Comm comm_n; 405 PetscBool deluxe = PETSC_TRUE; 406 PetscBool use_potr = PETSC_FALSE, use_sytr = PETSC_FALSE; 407 PetscViewer matl_dbg_viewer = NULL; 408 PetscErrorCode ierr; 409 PetscBool flg; 410 411 PetscFunctionBegin; 412 CHKERRQ(MatDestroy(&sub_schurs->A)); 413 CHKERRQ(MatDestroy(&sub_schurs->S)); 414 if (Ain) { 415 CHKERRQ(PetscObjectReference((PetscObject)Ain)); 416 sub_schurs->A = Ain; 417 } 418 419 CHKERRQ(PetscObjectReference((PetscObject)Sin)); 420 sub_schurs->S = Sin; 421 if (sub_schurs->schur_explicit) { 422 sub_schurs->schur_explicit = (PetscBool)(!!sub_schurs->A); 423 } 424 425 /* preliminary checks */ 426 PetscCheckFalse(!sub_schurs->schur_explicit && compute_Stilda,PetscObjectComm((PetscObject)sub_schurs->l2gmap),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_PARDISO"); 427 428 if (benign_trick) sub_schurs->is_posdef = PETSC_FALSE; 429 430 /* debug (MATLAB) */ 431 if (sub_schurs->debug) { 432 PetscMPIInt size,rank; 433 PetscInt nr,*print_schurs_ranks,print_schurs = PETSC_FALSE; 434 435 CHKERRMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&size)); 436 CHKERRMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&rank)); 437 nr = size; 438 CHKERRQ(PetscMalloc1(nr,&print_schurs_ranks)); 439 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)sub_schurs->l2gmap),sub_schurs->prefix,"BDDC sub_schurs options","PC");CHKERRQ(ierr); 440 CHKERRQ(PetscOptionsIntArray("-sub_schurs_debug_ranks","Ranks to debug (all if the option is not used)",NULL,print_schurs_ranks,&nr,&flg)); 441 if (!flg) print_schurs = PETSC_TRUE; 442 else { 443 print_schurs = PETSC_FALSE; 444 for (i=0;i<nr;i++) if (print_schurs_ranks[i] == (PetscInt)rank) { print_schurs = PETSC_TRUE; break; } 445 } 446 ierr = PetscOptionsEnd();CHKERRQ(ierr); 447 CHKERRQ(PetscFree(print_schurs_ranks)); 448 if (print_schurs) { 449 char filename[256]; 450 451 CHKERRQ(PetscSNPrintf(filename,sizeof(filename),"sub_schurs_Schur_r%d.m",PetscGlobalRank)); 452 CHKERRQ(PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&matl_dbg_viewer)); 453 CHKERRQ(PetscViewerPushFormat(matl_dbg_viewer,PETSC_VIEWER_ASCII_MATLAB)); 454 } 455 } 456 457 /* restrict work on active processes */ 458 if (sub_schurs->restrict_comm) { 459 PetscSubcomm subcomm; 460 PetscMPIInt color,rank; 461 462 color = 0; 463 if (!sub_schurs->n_subs) color = 1; /* this can happen if we are in a multilevel case or if the subdomain is disconnected */ 464 CHKERRMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&rank)); 465 CHKERRQ(PetscSubcommCreate(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&subcomm)); 466 CHKERRQ(PetscSubcommSetNumber(subcomm,2)); 467 CHKERRQ(PetscSubcommSetTypeGeneral(subcomm,color,rank)); 468 CHKERRQ(PetscCommDuplicate(PetscSubcommChild(subcomm),&comm_n,NULL)); 469 CHKERRQ(PetscSubcommDestroy(&subcomm)); 470 if (!sub_schurs->n_subs) { 471 CHKERRQ(PetscCommDestroy(&comm_n)); 472 PetscFunctionReturn(0); 473 } 474 } else { 475 CHKERRQ(PetscCommDuplicate(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&comm_n,NULL)); 476 } 477 478 /* get Schur complement matrices */ 479 if (!sub_schurs->schur_explicit) { 480 Mat tA_IB,tA_BI,tA_BB; 481 PetscBool isseqsbaij; 482 CHKERRQ(MatSchurComplementGetSubMatrices(sub_schurs->S,&A_II,NULL,&tA_IB,&tA_BI,&tA_BB)); 483 CHKERRQ(PetscObjectTypeCompare((PetscObject)tA_BB,MATSEQSBAIJ,&isseqsbaij)); 484 if (isseqsbaij) { 485 CHKERRQ(MatConvert(tA_BB,MATSEQAIJ,MAT_INITIAL_MATRIX,&A_BB)); 486 CHKERRQ(MatConvert(tA_IB,MATSEQAIJ,MAT_INITIAL_MATRIX,&A_IB)); 487 CHKERRQ(MatConvert(tA_BI,MATSEQAIJ,MAT_INITIAL_MATRIX,&A_BI)); 488 } else { 489 CHKERRQ(PetscObjectReference((PetscObject)tA_BB)); 490 A_BB = tA_BB; 491 CHKERRQ(PetscObjectReference((PetscObject)tA_IB)); 492 A_IB = tA_IB; 493 CHKERRQ(PetscObjectReference((PetscObject)tA_BI)); 494 A_BI = tA_BI; 495 } 496 } else { 497 A_II = NULL; 498 A_IB = NULL; 499 A_BI = NULL; 500 A_BB = NULL; 501 } 502 S_all = NULL; 503 504 /* determine interior problems */ 505 CHKERRQ(ISGetLocalSize(sub_schurs->is_I,&i)); 506 if (nlayers >= 0 && i) { /* Interior problems can be different from the original one */ 507 PetscBT touched; 508 const PetscInt* idx_B; 509 PetscInt n_I,n_B,n_local_dofs,n_prev_added,j,layer,*local_numbering; 510 511 PetscCheck(xadj,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot request layering without adjacency"); 512 /* get sizes */ 513 CHKERRQ(ISGetLocalSize(sub_schurs->is_I,&n_I)); 514 CHKERRQ(ISGetLocalSize(sub_schurs->is_B,&n_B)); 515 516 CHKERRQ(PetscMalloc1(n_I+n_B,&local_numbering)); 517 CHKERRQ(PetscBTCreate(n_I+n_B,&touched)); 518 CHKERRQ(PetscBTMemzero(n_I+n_B,touched)); 519 520 /* all boundary dofs must be skipped when adding layers */ 521 CHKERRQ(ISGetIndices(sub_schurs->is_B,&idx_B)); 522 for (j=0;j<n_B;j++) { 523 CHKERRQ(PetscBTSet(touched,idx_B[j])); 524 } 525 CHKERRQ(PetscArraycpy(local_numbering,idx_B,n_B)); 526 CHKERRQ(ISRestoreIndices(sub_schurs->is_B,&idx_B)); 527 528 /* add prescribed number of layers of dofs */ 529 n_local_dofs = n_B; 530 n_prev_added = n_B; 531 for (layer=0;layer<nlayers;layer++) { 532 PetscInt n_added = 0; 533 if (n_local_dofs == n_I+n_B) break; 534 PetscCheckFalse(n_local_dofs > n_I+n_B,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error querying layer %D. Out of bound access (%D > %D)",layer,n_local_dofs,n_I+n_B); 535 CHKERRQ(PCBDDCAdjGetNextLayer_Private(local_numbering+n_local_dofs,n_prev_added,touched,xadj,adjncy,&n_added)); 536 n_prev_added = n_added; 537 n_local_dofs += n_added; 538 if (!n_added) break; 539 } 540 CHKERRQ(PetscBTDestroy(&touched)); 541 542 /* IS for I layer dofs in original numbering */ 543 CHKERRQ(ISCreateGeneral(PetscObjectComm((PetscObject)sub_schurs->is_I),n_local_dofs-n_B,local_numbering+n_B,PETSC_COPY_VALUES,&is_I_layer)); 544 CHKERRQ(PetscFree(local_numbering)); 545 CHKERRQ(ISSort(is_I_layer)); 546 /* IS for I layer dofs in I numbering */ 547 if (!sub_schurs->schur_explicit) { 548 ISLocalToGlobalMapping ItoNmap; 549 CHKERRQ(ISLocalToGlobalMappingCreateIS(sub_schurs->is_I,&ItoNmap)); 550 CHKERRQ(ISGlobalToLocalMappingApplyIS(ItoNmap,IS_GTOLM_DROP,is_I_layer,&is_I)); 551 CHKERRQ(ISLocalToGlobalMappingDestroy(&ItoNmap)); 552 553 /* II block */ 554 CHKERRQ(MatCreateSubMatrix(A_II,is_I,is_I,MAT_INITIAL_MATRIX,&AE_II)); 555 } 556 } else { 557 PetscInt n_I; 558 559 /* IS for I dofs in original numbering */ 560 CHKERRQ(PetscObjectReference((PetscObject)sub_schurs->is_I)); 561 is_I_layer = sub_schurs->is_I; 562 563 /* IS for I dofs in I numbering (strided 1) */ 564 if (!sub_schurs->schur_explicit) { 565 CHKERRQ(ISGetSize(sub_schurs->is_I,&n_I)); 566 CHKERRQ(ISCreateStride(PetscObjectComm((PetscObject)sub_schurs->is_I),n_I,0,1,&is_I)); 567 568 /* II block is the same */ 569 CHKERRQ(PetscObjectReference((PetscObject)A_II)); 570 AE_II = A_II; 571 } 572 } 573 574 /* Get info on subset sizes and sum of all subsets sizes */ 575 max_subset_size = 0; 576 local_size = 0; 577 for (i=0;i<sub_schurs->n_subs;i++) { 578 CHKERRQ(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size)); 579 max_subset_size = PetscMax(subset_size,max_subset_size); 580 local_size += subset_size; 581 } 582 583 /* Work arrays for local indices */ 584 extra = 0; 585 CHKERRQ(ISGetLocalSize(sub_schurs->is_B,&n_B)); 586 if (sub_schurs->schur_explicit && is_I_layer) { 587 CHKERRQ(ISGetLocalSize(is_I_layer,&extra)); 588 } 589 CHKERRQ(PetscMalloc1(n_B+extra,&all_local_idx_N)); 590 if (extra) { 591 const PetscInt *idxs; 592 CHKERRQ(ISGetIndices(is_I_layer,&idxs)); 593 CHKERRQ(PetscArraycpy(all_local_idx_N,idxs,extra)); 594 CHKERRQ(ISRestoreIndices(is_I_layer,&idxs)); 595 } 596 CHKERRQ(PetscMalloc1(sub_schurs->n_subs,&auxnum1)); 597 CHKERRQ(PetscMalloc1(sub_schurs->n_subs,&auxnum2)); 598 599 /* Get local indices in local numbering */ 600 local_size = 0; 601 local_stash_size = 0; 602 for (i=0;i<sub_schurs->n_subs;i++) { 603 const PetscInt *idxs; 604 605 CHKERRQ(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size)); 606 CHKERRQ(ISGetIndices(sub_schurs->is_subs[i],&idxs)); 607 /* start (smallest in global ordering) and multiplicity */ 608 auxnum1[i] = idxs[0]; 609 auxnum2[i] = subset_size*subset_size; 610 /* subset indices in local numbering */ 611 CHKERRQ(PetscArraycpy(all_local_idx_N+local_size+extra,idxs,subset_size)); 612 CHKERRQ(ISRestoreIndices(sub_schurs->is_subs[i],&idxs)); 613 local_size += subset_size; 614 local_stash_size += subset_size*subset_size; 615 } 616 617 /* allocate extra workspace needed only for GETRI or SYTRF */ 618 use_potr = use_sytr = PETSC_FALSE; 619 if (benign_trick || (sub_schurs->is_hermitian && sub_schurs->is_posdef)) { 620 use_potr = PETSC_TRUE; 621 } else if (sub_schurs->is_symmetric) { 622 use_sytr = PETSC_TRUE; 623 } 624 if (local_size && !use_potr) { 625 PetscScalar lwork,dummyscalar = 0.; 626 PetscBLASInt dummyint = 0; 627 628 B_lwork = -1; 629 CHKERRQ(PetscBLASIntCast(local_size,&B_N)); 630 CHKERRQ(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 631 if (use_sytr) { 632 PetscStackCallBLAS("LAPACKsytrf",LAPACKsytrf_("L",&B_N,&dummyscalar,&B_N,&dummyint,&lwork,&B_lwork,&B_ierr)); 633 PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYTRF Lapack routine %d",(int)B_ierr); 634 } else { 635 PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,&dummyscalar,&B_N,&dummyint,&lwork,&B_lwork,&B_ierr)); 636 PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GETRI Lapack routine %d",(int)B_ierr); 637 } 638 CHKERRQ(PetscFPTrapPop()); 639 CHKERRQ(PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork)); 640 CHKERRQ(PetscMalloc2(B_lwork,&Bwork,B_N,&pivots)); 641 } else { 642 Bwork = NULL; 643 pivots = NULL; 644 } 645 646 /* prepare data for summing up properly schurs on subsets */ 647 CHKERRQ(ISCreateGeneral(comm_n,sub_schurs->n_subs,auxnum1,PETSC_OWN_POINTER,&all_subsets_n)); 648 CHKERRQ(ISLocalToGlobalMappingApplyIS(sub_schurs->l2gmap,all_subsets_n,&all_subsets)); 649 CHKERRQ(ISDestroy(&all_subsets_n)); 650 CHKERRQ(ISCreateGeneral(comm_n,sub_schurs->n_subs,auxnum2,PETSC_OWN_POINTER,&all_subsets_mult)); 651 CHKERRQ(ISRenumber(all_subsets,all_subsets_mult,&global_size,&all_subsets_n)); 652 CHKERRQ(ISDestroy(&all_subsets)); 653 CHKERRQ(ISDestroy(&all_subsets_mult)); 654 CHKERRQ(ISGetLocalSize(all_subsets_n,&i)); 655 PetscCheckFalse(i != local_stash_size,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid size of new subset! %D != %D",i,local_stash_size); 656 CHKERRQ(VecCreateSeqWithArray(PETSC_COMM_SELF,1,local_stash_size,NULL,&lstash)); 657 CHKERRQ(VecCreateMPI(comm_n,PETSC_DECIDE,global_size,&gstash)); 658 CHKERRQ(VecScatterCreate(lstash,NULL,gstash,all_subsets_n,&sstash)); 659 CHKERRQ(ISDestroy(&all_subsets_n)); 660 661 /* subset indices in local boundary numbering */ 662 if (!sub_schurs->is_Ej_all) { 663 PetscInt *all_local_idx_B; 664 665 CHKERRQ(PetscMalloc1(local_size,&all_local_idx_B)); 666 CHKERRQ(ISGlobalToLocalMappingApply(sub_schurs->BtoNmap,IS_GTOLM_DROP,local_size,all_local_idx_N+extra,&subset_size,all_local_idx_B)); 667 PetscCheckFalse(subset_size != local_size,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in sub_schurs serial (BtoNmap)! %D != %D",subset_size,local_size); 668 CHKERRQ(ISCreateGeneral(PETSC_COMM_SELF,local_size,all_local_idx_B,PETSC_OWN_POINTER,&sub_schurs->is_Ej_all)); 669 } 670 671 if (change) { 672 ISLocalToGlobalMapping BtoS; 673 IS change_primal_B; 674 IS change_primal_all; 675 676 PetscCheck(!sub_schurs->change_primal_sub,PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 677 PetscCheck(!sub_schurs->change,PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 678 CHKERRQ(PetscMalloc1(sub_schurs->n_subs,&sub_schurs->change_primal_sub)); 679 for (i=0;i<sub_schurs->n_subs;i++) { 680 ISLocalToGlobalMapping NtoS; 681 CHKERRQ(ISLocalToGlobalMappingCreateIS(sub_schurs->is_subs[i],&NtoS)); 682 CHKERRQ(ISGlobalToLocalMappingApplyIS(NtoS,IS_GTOLM_DROP,change_primal,&sub_schurs->change_primal_sub[i])); 683 CHKERRQ(ISLocalToGlobalMappingDestroy(&NtoS)); 684 } 685 CHKERRQ(ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap,IS_GTOLM_DROP,change_primal,&change_primal_B)); 686 CHKERRQ(ISLocalToGlobalMappingCreateIS(sub_schurs->is_Ej_all,&BtoS)); 687 CHKERRQ(ISGlobalToLocalMappingApplyIS(BtoS,IS_GTOLM_DROP,change_primal_B,&change_primal_all)); 688 CHKERRQ(ISLocalToGlobalMappingDestroy(&BtoS)); 689 CHKERRQ(ISDestroy(&change_primal_B)); 690 CHKERRQ(PetscMalloc1(sub_schurs->n_subs,&sub_schurs->change)); 691 for (i=0;i<sub_schurs->n_subs;i++) { 692 Mat change_sub; 693 694 CHKERRQ(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size)); 695 CHKERRQ(KSPCreate(PETSC_COMM_SELF,&sub_schurs->change[i])); 696 CHKERRQ(KSPSetType(sub_schurs->change[i],KSPPREONLY)); 697 if (!sub_schurs->change_with_qr) { 698 CHKERRQ(MatCreateSubMatrix(change,sub_schurs->is_subs[i],sub_schurs->is_subs[i],MAT_INITIAL_MATRIX,&change_sub)); 699 } else { 700 Mat change_subt; 701 CHKERRQ(MatCreateSubMatrix(change,sub_schurs->is_subs[i],sub_schurs->is_subs[i],MAT_INITIAL_MATRIX,&change_subt)); 702 CHKERRQ(MatConvert(change_subt,MATSEQDENSE,MAT_INITIAL_MATRIX,&change_sub)); 703 CHKERRQ(MatDestroy(&change_subt)); 704 } 705 CHKERRQ(KSPSetOperators(sub_schurs->change[i],change_sub,change_sub)); 706 CHKERRQ(MatDestroy(&change_sub)); 707 CHKERRQ(KSPSetOptionsPrefix(sub_schurs->change[i],sub_schurs->prefix)); 708 CHKERRQ(KSPAppendOptionsPrefix(sub_schurs->change[i],"sub_schurs_change_")); 709 } 710 CHKERRQ(ISDestroy(&change_primal_all)); 711 } 712 713 /* Local matrix of all local Schur on subsets (transposed) */ 714 if (!sub_schurs->S_Ej_all) { 715 Mat T; 716 PetscScalar *v; 717 PetscInt *ii,*jj; 718 PetscInt cum,i,j,k; 719 720 /* MatSeqAIJSetPreallocation + MatSetValues is slow for these kind of matrices (may have large blocks) 721 Allocate properly a representative matrix and duplicate */ 722 CHKERRQ(PetscMalloc3(local_size+1,&ii,local_stash_size,&jj,local_stash_size,&v)); 723 ii[0] = 0; 724 cum = 0; 725 for (i=0;i<sub_schurs->n_subs;i++) { 726 CHKERRQ(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size)); 727 for (j=0;j<subset_size;j++) { 728 const PetscInt row = cum+j; 729 PetscInt col = cum; 730 731 ii[row+1] = ii[row] + subset_size; 732 for (k=ii[row];k<ii[row+1];k++) { 733 jj[k] = col; 734 col++; 735 } 736 } 737 cum += subset_size; 738 } 739 CHKERRQ(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,local_size,local_size,ii,jj,v,&T)); 740 CHKERRQ(MatDuplicate(T,MAT_DO_NOT_COPY_VALUES,&sub_schurs->S_Ej_all)); 741 CHKERRQ(MatDestroy(&T)); 742 CHKERRQ(PetscFree3(ii,jj,v)); 743 } 744 /* matrices for deluxe scaling and adaptive selection */ 745 if (compute_Stilda) { 746 if (!sub_schurs->sum_S_Ej_tilda_all) { 747 CHKERRQ(MatDuplicate(sub_schurs->S_Ej_all,MAT_DO_NOT_COPY_VALUES,&sub_schurs->sum_S_Ej_tilda_all)); 748 } 749 if (!sub_schurs->sum_S_Ej_inv_all && deluxe) { 750 CHKERRQ(MatDuplicate(sub_schurs->S_Ej_all,MAT_DO_NOT_COPY_VALUES,&sub_schurs->sum_S_Ej_inv_all)); 751 } 752 } 753 754 /* Compute Schur complements explicitly */ 755 F = NULL; 756 if (!sub_schurs->schur_explicit) { 757 /* this code branch is used when MatFactor with Schur complement support is not present or when explicitly requested; 758 it is not efficient, unless the economic version of the scaling is used */ 759 Mat S_Ej_expl; 760 PetscScalar *work; 761 PetscInt j,*dummy_idx; 762 PetscBool Sdense; 763 764 CHKERRQ(PetscMalloc2(max_subset_size,&dummy_idx,max_subset_size*max_subset_size,&work)); 765 local_size = 0; 766 for (i=0;i<sub_schurs->n_subs;i++) { 767 IS is_subset_B; 768 Mat AE_EE,AE_IE,AE_EI,S_Ej; 769 770 /* subsets in original and boundary numbering */ 771 CHKERRQ(ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap,IS_GTOLM_DROP,sub_schurs->is_subs[i],&is_subset_B)); 772 /* EE block */ 773 CHKERRQ(MatCreateSubMatrix(A_BB,is_subset_B,is_subset_B,MAT_INITIAL_MATRIX,&AE_EE)); 774 /* IE block */ 775 CHKERRQ(MatCreateSubMatrix(A_IB,is_I,is_subset_B,MAT_INITIAL_MATRIX,&AE_IE)); 776 /* EI block */ 777 if (sub_schurs->is_symmetric) { 778 CHKERRQ(MatCreateTranspose(AE_IE,&AE_EI)); 779 } else if (sub_schurs->is_hermitian) { 780 CHKERRQ(MatCreateHermitianTranspose(AE_IE,&AE_EI)); 781 } else { 782 CHKERRQ(MatCreateSubMatrix(A_BI,is_subset_B,is_I,MAT_INITIAL_MATRIX,&AE_EI)); 783 } 784 CHKERRQ(ISDestroy(&is_subset_B)); 785 CHKERRQ(MatCreateSchurComplement(AE_II,AE_II,AE_IE,AE_EI,AE_EE,&S_Ej)); 786 CHKERRQ(MatDestroy(&AE_EE)); 787 CHKERRQ(MatDestroy(&AE_IE)); 788 CHKERRQ(MatDestroy(&AE_EI)); 789 if (AE_II == A_II) { /* we can reuse the same ksp */ 790 KSP ksp; 791 CHKERRQ(MatSchurComplementGetKSP(sub_schurs->S,&ksp)); 792 CHKERRQ(MatSchurComplementSetKSP(S_Ej,ksp)); 793 } else { /* build new ksp object which inherits ksp and pc types from the original one */ 794 KSP origksp,schurksp; 795 PC origpc,schurpc; 796 KSPType ksp_type; 797 PetscInt n_internal; 798 PetscBool ispcnone; 799 800 CHKERRQ(MatSchurComplementGetKSP(sub_schurs->S,&origksp)); 801 CHKERRQ(MatSchurComplementGetKSP(S_Ej,&schurksp)); 802 CHKERRQ(KSPGetType(origksp,&ksp_type)); 803 CHKERRQ(KSPSetType(schurksp,ksp_type)); 804 CHKERRQ(KSPGetPC(schurksp,&schurpc)); 805 CHKERRQ(KSPGetPC(origksp,&origpc)); 806 CHKERRQ(PetscObjectTypeCompare((PetscObject)origpc,PCNONE,&ispcnone)); 807 if (!ispcnone) { 808 PCType pc_type; 809 CHKERRQ(PCGetType(origpc,&pc_type)); 810 CHKERRQ(PCSetType(schurpc,pc_type)); 811 } else { 812 CHKERRQ(PCSetType(schurpc,PCLU)); 813 } 814 CHKERRQ(ISGetSize(is_I,&n_internal)); 815 if (!n_internal) { /* UMFPACK gives error with 0 sized problems */ 816 MatSolverType solver = NULL; 817 CHKERRQ(PCFactorGetMatSolverType(origpc,(MatSolverType*)&solver)); 818 if (solver) { 819 CHKERRQ(PCFactorSetMatSolverType(schurpc,solver)); 820 } 821 } 822 CHKERRQ(KSPSetUp(schurksp)); 823 } 824 CHKERRQ(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size)); 825 CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,work,&S_Ej_expl)); 826 CHKERRQ(PCBDDCComputeExplicitSchur(S_Ej,sub_schurs->is_symmetric,MAT_REUSE_MATRIX,&S_Ej_expl)); 827 CHKERRQ(PetscObjectTypeCompare((PetscObject)S_Ej_expl,MATSEQDENSE,&Sdense)); 828 if (Sdense) { 829 for (j=0;j<subset_size;j++) { 830 dummy_idx[j]=local_size+j; 831 } 832 CHKERRQ(MatSetValues(sub_schurs->S_Ej_all,subset_size,dummy_idx,subset_size,dummy_idx,work,INSERT_VALUES)); 833 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented for sparse matrices"); 834 CHKERRQ(MatDestroy(&S_Ej)); 835 CHKERRQ(MatDestroy(&S_Ej_expl)); 836 local_size += subset_size; 837 } 838 CHKERRQ(PetscFree2(dummy_idx,work)); 839 /* free */ 840 CHKERRQ(ISDestroy(&is_I)); 841 CHKERRQ(MatDestroy(&AE_II)); 842 CHKERRQ(PetscFree(all_local_idx_N)); 843 } else { 844 Mat A,cs_AIB_mat = NULL,benign_AIIm1_ones_mat = NULL; 845 Vec Dall = NULL; 846 IS is_A_all,*is_p_r = NULL; 847 MatType Stype; 848 PetscScalar *work,*S_data,*schur_factor,infty = PETSC_MAX_REAL; 849 PetscScalar *SEj_arr = NULL,*SEjinv_arr = NULL; 850 const PetscScalar *rS_data; 851 PetscInt n,n_I,size_schur,size_active_schur,cum,cum2; 852 PetscBool economic,solver_S,S_lower_triangular = PETSC_FALSE; 853 PetscBool schur_has_vertices,factor_workaround; 854 PetscBool use_cholesky; 855 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 856 PetscBool oldpin; 857 #endif 858 859 /* get sizes */ 860 n_I = 0; 861 if (is_I_layer) { 862 CHKERRQ(ISGetLocalSize(is_I_layer,&n_I)); 863 } 864 economic = PETSC_FALSE; 865 CHKERRQ(ISGetLocalSize(sub_schurs->is_I,&cum)); 866 if (cum != n_I) economic = PETSC_TRUE; 867 CHKERRQ(MatGetLocalSize(sub_schurs->A,&n,NULL)); 868 size_active_schur = local_size; 869 870 /* import scaling vector (wrong formulation if we have 3D edges) */ 871 if (scaling && compute_Stilda) { 872 const PetscScalar *array; 873 PetscScalar *array2; 874 const PetscInt *idxs; 875 PetscInt i; 876 877 CHKERRQ(ISGetIndices(sub_schurs->is_Ej_all,&idxs)); 878 CHKERRQ(VecCreateSeq(PETSC_COMM_SELF,size_active_schur,&Dall)); 879 CHKERRQ(VecGetArrayRead(scaling,&array)); 880 CHKERRQ(VecGetArray(Dall,&array2)); 881 for (i=0;i<size_active_schur;i++) array2[i] = array[idxs[i]]; 882 CHKERRQ(VecRestoreArray(Dall,&array2)); 883 CHKERRQ(VecRestoreArrayRead(scaling,&array)); 884 CHKERRQ(ISRestoreIndices(sub_schurs->is_Ej_all,&idxs)); 885 deluxe = PETSC_FALSE; 886 } 887 888 /* size active schurs does not count any dirichlet or vertex dof on the interface */ 889 factor_workaround = PETSC_FALSE; 890 schur_has_vertices = PETSC_FALSE; 891 cum = n_I+size_active_schur; 892 if (sub_schurs->is_dir) { 893 const PetscInt* idxs; 894 PetscInt n_dir; 895 896 CHKERRQ(ISGetLocalSize(sub_schurs->is_dir,&n_dir)); 897 CHKERRQ(ISGetIndices(sub_schurs->is_dir,&idxs)); 898 CHKERRQ(PetscArraycpy(all_local_idx_N+cum,idxs,n_dir)); 899 CHKERRQ(ISRestoreIndices(sub_schurs->is_dir,&idxs)); 900 cum += n_dir; 901 factor_workaround = PETSC_TRUE; 902 } 903 /* include the primal vertices in the Schur complement */ 904 if (exact_schur && sub_schurs->is_vertices && (compute_Stilda || benign_n)) { 905 PetscInt n_v; 906 907 CHKERRQ(ISGetLocalSize(sub_schurs->is_vertices,&n_v)); 908 if (n_v) { 909 const PetscInt* idxs; 910 911 CHKERRQ(ISGetIndices(sub_schurs->is_vertices,&idxs)); 912 CHKERRQ(PetscArraycpy(all_local_idx_N+cum,idxs,n_v)); 913 CHKERRQ(ISRestoreIndices(sub_schurs->is_vertices,&idxs)); 914 cum += n_v; 915 factor_workaround = PETSC_TRUE; 916 schur_has_vertices = PETSC_TRUE; 917 } 918 } 919 size_schur = cum - n_I; 920 CHKERRQ(ISCreateGeneral(PETSC_COMM_SELF,cum,all_local_idx_N,PETSC_OWN_POINTER,&is_A_all)); 921 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 922 oldpin = sub_schurs->A->boundtocpu; 923 CHKERRQ(MatBindToCPU(sub_schurs->A,PETSC_TRUE)); 924 #endif 925 if (cum == n) { 926 CHKERRQ(ISSetPermutation(is_A_all)); 927 CHKERRQ(MatPermute(sub_schurs->A,is_A_all,is_A_all,&A)); 928 } else { 929 CHKERRQ(MatCreateSubMatrix(sub_schurs->A,is_A_all,is_A_all,MAT_INITIAL_MATRIX,&A)); 930 } 931 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 932 CHKERRQ(MatBindToCPU(sub_schurs->A,oldpin)); 933 #endif 934 CHKERRQ(MatSetOptionsPrefix(A,sub_schurs->prefix)); 935 CHKERRQ(MatAppendOptionsPrefix(A,"sub_schurs_")); 936 937 /* if we actually change the basis for the pressures, LDL^T factors will use a lot of memory 938 this is a workaround */ 939 if (benign_n) { 940 Vec v,benign_AIIm1_ones; 941 ISLocalToGlobalMapping N_to_reor; 942 IS is_p0,is_p0_p; 943 PetscScalar *cs_AIB,*AIIm1_data; 944 PetscInt sizeA; 945 946 CHKERRQ(ISLocalToGlobalMappingCreateIS(is_A_all,&N_to_reor)); 947 CHKERRQ(ISCreateGeneral(PETSC_COMM_SELF,benign_n,benign_p0_lidx,PETSC_COPY_VALUES,&is_p0)); 948 CHKERRQ(ISGlobalToLocalMappingApplyIS(N_to_reor,IS_GTOLM_DROP,is_p0,&is_p0_p)); 949 CHKERRQ(ISDestroy(&is_p0)); 950 CHKERRQ(MatCreateVecs(A,&v,&benign_AIIm1_ones)); 951 CHKERRQ(VecGetSize(v,&sizeA)); 952 CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,sizeA,benign_n,NULL,&benign_AIIm1_ones_mat)); 953 CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,size_schur,benign_n,NULL,&cs_AIB_mat)); 954 CHKERRQ(MatDenseGetArray(cs_AIB_mat,&cs_AIB)); 955 CHKERRQ(MatDenseGetArray(benign_AIIm1_ones_mat,&AIIm1_data)); 956 CHKERRQ(PetscMalloc1(benign_n,&is_p_r)); 957 /* compute colsum of A_IB restricted to pressures */ 958 for (i=0;i<benign_n;i++) { 959 const PetscScalar *array; 960 const PetscInt *idxs; 961 PetscInt j,nz; 962 963 CHKERRQ(ISGlobalToLocalMappingApplyIS(N_to_reor,IS_GTOLM_DROP,benign_zerodiag_subs[i],&is_p_r[i])); 964 CHKERRQ(ISGetLocalSize(is_p_r[i],&nz)); 965 CHKERRQ(ISGetIndices(is_p_r[i],&idxs)); 966 for (j=0;j<nz;j++) AIIm1_data[idxs[j]+sizeA*i] = 1.; 967 CHKERRQ(ISRestoreIndices(is_p_r[i],&idxs)); 968 CHKERRQ(VecPlaceArray(benign_AIIm1_ones,AIIm1_data+sizeA*i)); 969 CHKERRQ(MatMult(A,benign_AIIm1_ones,v)); 970 CHKERRQ(VecResetArray(benign_AIIm1_ones)); 971 CHKERRQ(VecGetArrayRead(v,&array)); 972 for (j=0;j<size_schur;j++) { 973 #if defined(PETSC_USE_COMPLEX) 974 cs_AIB[i*size_schur + j] = (PetscRealPart(array[j+n_I])/nz + PETSC_i*(PetscImaginaryPart(array[j+n_I])/nz)); 975 #else 976 cs_AIB[i*size_schur + j] = array[j+n_I]/nz; 977 #endif 978 } 979 CHKERRQ(VecRestoreArrayRead(v,&array)); 980 } 981 CHKERRQ(MatDenseRestoreArray(cs_AIB_mat,&cs_AIB)); 982 CHKERRQ(MatDenseRestoreArray(benign_AIIm1_ones_mat,&AIIm1_data)); 983 CHKERRQ(VecDestroy(&v)); 984 CHKERRQ(VecDestroy(&benign_AIIm1_ones)); 985 CHKERRQ(MatSetOption(A,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE)); 986 CHKERRQ(MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE)); 987 CHKERRQ(MatSetOption(A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE)); 988 CHKERRQ(MatZeroRowsColumnsIS(A,is_p0_p,1.0,NULL,NULL)); 989 CHKERRQ(ISDestroy(&is_p0_p)); 990 CHKERRQ(ISLocalToGlobalMappingDestroy(&N_to_reor)); 991 } 992 CHKERRQ(MatSetOption(A,MAT_SYMMETRIC,sub_schurs->is_symmetric)); 993 CHKERRQ(MatSetOption(A,MAT_HERMITIAN,sub_schurs->is_hermitian)); 994 CHKERRQ(MatSetOption(A,MAT_SPD,sub_schurs->is_posdef)); 995 996 /* for complexes, symmetric and hermitian at the same time implies null imaginary part */ 997 use_cholesky = (PetscBool)((use_potr || use_sytr) && sub_schurs->is_hermitian && sub_schurs->is_symmetric); 998 999 /* when using the benign subspace trick, the local Schur complements are SPD */ 1000 /* MKL_PARDISO does not handle well the computation of a Schur complement from a symmetric indefinite factorization 1001 Use LU and adapt pivoting perturbation (still, solution is not as accurate as with using MUMPS) */ 1002 if (benign_trick) { 1003 sub_schurs->is_posdef = PETSC_TRUE; 1004 CHKERRQ(PetscStrcmp(sub_schurs->mat_solver_type,MATSOLVERMKL_PARDISO,&flg)); 1005 if (flg) use_cholesky = PETSC_FALSE; 1006 } 1007 1008 if (n_I) { 1009 IS is_schur; 1010 char stype[64]; 1011 PetscBool gpu = PETSC_FALSE; 1012 1013 if (use_cholesky) { 1014 CHKERRQ(MatGetFactor(A,sub_schurs->mat_solver_type,MAT_FACTOR_CHOLESKY,&F)); 1015 } else { 1016 CHKERRQ(MatGetFactor(A,sub_schurs->mat_solver_type,MAT_FACTOR_LU,&F)); 1017 } 1018 CHKERRQ(MatSetErrorIfFailure(A,PETSC_TRUE)); 1019 #if defined(PETSC_HAVE_MKL_PARDISO) 1020 if (benign_trick) CHKERRQ(MatMkl_PardisoSetCntl(F,10,10)); 1021 #endif 1022 /* subsets ordered last */ 1023 CHKERRQ(ISCreateStride(PETSC_COMM_SELF,size_schur,n_I,1,&is_schur)); 1024 CHKERRQ(MatFactorSetSchurIS(F,is_schur)); 1025 CHKERRQ(ISDestroy(&is_schur)); 1026 1027 /* factorization step */ 1028 if (use_cholesky) { 1029 CHKERRQ(MatCholeskyFactorSymbolic(F,A,NULL,NULL)); 1030 #if defined(PETSC_HAVE_MUMPS) /* be sure that icntl 19 is not set by command line */ 1031 CHKERRQ(MatMumpsSetIcntl(F,19,2)); 1032 #endif 1033 CHKERRQ(MatCholeskyFactorNumeric(F,A,NULL)); 1034 S_lower_triangular = PETSC_TRUE; 1035 } else { 1036 CHKERRQ(MatLUFactorSymbolic(F,A,NULL,NULL,NULL)); 1037 #if defined(PETSC_HAVE_MUMPS) /* be sure that icntl 19 is not set by command line */ 1038 CHKERRQ(MatMumpsSetIcntl(F,19,3)); 1039 #endif 1040 CHKERRQ(MatLUFactorNumeric(F,A,NULL)); 1041 } 1042 CHKERRQ(MatViewFromOptions(F,(PetscObject)A,"-mat_factor_view")); 1043 1044 if (matl_dbg_viewer) { 1045 Mat S; 1046 IS is; 1047 1048 CHKERRQ(PetscObjectSetName((PetscObject)A,"A")); 1049 CHKERRQ(MatView(A,matl_dbg_viewer)); 1050 CHKERRQ(MatFactorCreateSchurComplement(F,&S,NULL)); 1051 CHKERRQ(PetscObjectSetName((PetscObject)S,"S")); 1052 CHKERRQ(MatView(S,matl_dbg_viewer)); 1053 CHKERRQ(MatDestroy(&S)); 1054 CHKERRQ(ISCreateStride(PETSC_COMM_SELF,n_I,0,1,&is)); 1055 CHKERRQ(PetscObjectSetName((PetscObject)is,"I")); 1056 CHKERRQ(ISView(is,matl_dbg_viewer)); 1057 CHKERRQ(ISDestroy(&is)); 1058 CHKERRQ(ISCreateStride(PETSC_COMM_SELF,size_schur,n_I,1,&is)); 1059 CHKERRQ(PetscObjectSetName((PetscObject)is,"B")); 1060 CHKERRQ(ISView(is,matl_dbg_viewer)); 1061 CHKERRQ(ISDestroy(&is)); 1062 CHKERRQ(PetscObjectSetName((PetscObject)is_A_all,"IA")); 1063 CHKERRQ(ISView(is_A_all,matl_dbg_viewer)); 1064 } 1065 1066 /* get explicit Schur Complement computed during numeric factorization */ 1067 CHKERRQ(MatFactorGetSchurComplement(F,&S_all,NULL)); 1068 CHKERRQ(PetscStrncpy(stype,MATSEQDENSE,sizeof(stype))); 1069 #if defined(PETSC_HAVE_CUDA) 1070 CHKERRQ(PetscObjectTypeCompareAny((PetscObject)A,&gpu,MATSEQAIJVIENNACL,MATSEQAIJCUSPARSE,"")); 1071 #endif 1072 if (gpu) { 1073 CHKERRQ(PetscStrncpy(stype,MATSEQDENSECUDA,sizeof(stype))); 1074 } 1075 CHKERRQ(PetscOptionsGetString(NULL,sub_schurs->prefix,"-sub_schurs_schur_mat_type",stype,sizeof(stype),NULL)); 1076 CHKERRQ(MatConvert(S_all,stype,MAT_INPLACE_MATRIX,&S_all)); 1077 CHKERRQ(MatSetOption(S_all,MAT_SPD,sub_schurs->is_posdef)); 1078 CHKERRQ(MatSetOption(S_all,MAT_HERMITIAN,sub_schurs->is_hermitian)); 1079 CHKERRQ(MatGetType(S_all,&Stype)); 1080 1081 /* we can reuse the solvers if we are not using the economic version */ 1082 reuse_solvers = (PetscBool)(reuse_solvers && !economic); 1083 factor_workaround = (PetscBool)(reuse_solvers && factor_workaround); 1084 if (!sub_schurs->is_posdef && factor_workaround && compute_Stilda && size_active_schur) 1085 reuse_solvers = factor_workaround = PETSC_FALSE; 1086 1087 solver_S = PETSC_TRUE; 1088 1089 /* update the Schur complement with the change of basis on the pressures */ 1090 if (benign_n) { 1091 const PetscScalar *cs_AIB; 1092 PetscScalar *S_data,*AIIm1_data; 1093 Mat S2 = NULL,S3 = NULL; /* dbg */ 1094 PetscScalar *S2_data,*S3_data; /* dbg */ 1095 Vec v,benign_AIIm1_ones; 1096 PetscInt sizeA; 1097 1098 CHKERRQ(MatDenseGetArray(S_all,&S_data)); 1099 CHKERRQ(MatCreateVecs(A,&v,&benign_AIIm1_ones)); 1100 CHKERRQ(VecGetSize(v,&sizeA)); 1101 #if defined(PETSC_HAVE_MUMPS) 1102 CHKERRQ(MatMumpsSetIcntl(F,26,0)); 1103 #endif 1104 #if defined(PETSC_HAVE_MKL_PARDISO) 1105 CHKERRQ(MatMkl_PardisoSetCntl(F,70,1)); 1106 #endif 1107 CHKERRQ(MatDenseGetArrayRead(cs_AIB_mat,&cs_AIB)); 1108 CHKERRQ(MatDenseGetArray(benign_AIIm1_ones_mat,&AIIm1_data)); 1109 if (matl_dbg_viewer) { 1110 CHKERRQ(MatDuplicate(S_all,MAT_DO_NOT_COPY_VALUES,&S2)); 1111 CHKERRQ(MatDuplicate(S_all,MAT_DO_NOT_COPY_VALUES,&S3)); 1112 CHKERRQ(MatDenseGetArray(S2,&S2_data)); 1113 CHKERRQ(MatDenseGetArray(S3,&S3_data)); 1114 } 1115 for (i=0;i<benign_n;i++) { 1116 PetscScalar *array,sum = 0.,one = 1.,*sums; 1117 const PetscInt *idxs; 1118 PetscInt k,j,nz; 1119 PetscBLASInt B_k,B_n; 1120 1121 CHKERRQ(PetscCalloc1(benign_n,&sums)); 1122 CHKERRQ(VecPlaceArray(benign_AIIm1_ones,AIIm1_data+sizeA*i)); 1123 CHKERRQ(VecCopy(benign_AIIm1_ones,v)); 1124 CHKERRQ(MatSolve(F,v,benign_AIIm1_ones)); 1125 CHKERRQ(MatMult(A,benign_AIIm1_ones,v)); 1126 CHKERRQ(VecResetArray(benign_AIIm1_ones)); 1127 /* p0 dofs (eliminated) are excluded from the sums */ 1128 for (k=0;k<benign_n;k++) { 1129 CHKERRQ(ISGetLocalSize(is_p_r[k],&nz)); 1130 CHKERRQ(ISGetIndices(is_p_r[k],&idxs)); 1131 for (j=0;j<nz-1;j++) sums[k] -= AIIm1_data[idxs[j]+sizeA*i]; 1132 CHKERRQ(ISRestoreIndices(is_p_r[k],&idxs)); 1133 } 1134 CHKERRQ(VecGetArrayRead(v,(const PetscScalar**)&array)); 1135 if (matl_dbg_viewer) { 1136 Vec vv; 1137 char name[16]; 1138 1139 CHKERRQ(VecCreateSeqWithArray(PETSC_COMM_SELF,1,size_schur,array+n_I,&vv)); 1140 CHKERRQ(PetscSNPrintf(name,sizeof(name),"Pvs%D",i)); 1141 CHKERRQ(PetscObjectSetName((PetscObject)vv,name)); 1142 CHKERRQ(VecView(vv,matl_dbg_viewer)); 1143 } 1144 /* perform sparse rank updates on symmetric Schur (TODO: move outside of the loop?) */ 1145 /* cs_AIB already scaled by 1./nz */ 1146 B_k = 1; 1147 for (k=0;k<benign_n;k++) { 1148 sum = sums[k]; 1149 CHKERRQ(PetscBLASIntCast(size_schur,&B_n)); 1150 1151 if (PetscAbsScalar(sum) == 0.0) continue; 1152 if (k == i) { 1153 PetscStackCallBLAS("BLASsyrk",BLASsyrk_("L","N",&B_n,&B_k,&sum,cs_AIB+i*size_schur,&B_n,&one,S_data,&B_n)); 1154 if (matl_dbg_viewer) { 1155 PetscStackCallBLAS("BLASsyrk",BLASsyrk_("L","N",&B_n,&B_k,&sum,cs_AIB+i*size_schur,&B_n,&one,S3_data,&B_n)); 1156 } 1157 } else { /* XXX Is it correct to use symmetric rank-2 update with half of the sum? */ 1158 sum /= 2.0; 1159 PetscStackCallBLAS("BLASsyr2k",BLASsyr2k_("L","N",&B_n,&B_k,&sum,cs_AIB+k*size_schur,&B_n,cs_AIB+i*size_schur,&B_n,&one,S_data,&B_n)); 1160 if (matl_dbg_viewer) { 1161 PetscStackCallBLAS("BLASsyr2k",BLASsyr2k_("L","N",&B_n,&B_k,&sum,cs_AIB+k*size_schur,&B_n,cs_AIB+i*size_schur,&B_n,&one,S3_data,&B_n)); 1162 } 1163 } 1164 } 1165 sum = 1.; 1166 PetscStackCallBLAS("BLASsyr2k",BLASsyr2k_("L","N",&B_n,&B_k,&sum,array+n_I,&B_n,cs_AIB+i*size_schur,&B_n,&one,S_data,&B_n)); 1167 if (matl_dbg_viewer) { 1168 PetscStackCallBLAS("BLASsyr2k",BLASsyr2k_("L","N",&B_n,&B_k,&sum,array+n_I,&B_n,cs_AIB+i*size_schur,&B_n,&one,S2_data,&B_n)); 1169 } 1170 CHKERRQ(VecRestoreArrayRead(v,(const PetscScalar**)&array)); 1171 /* set p0 entry of AIIm1_ones to zero */ 1172 CHKERRQ(ISGetLocalSize(is_p_r[i],&nz)); 1173 CHKERRQ(ISGetIndices(is_p_r[i],&idxs)); 1174 for (j=0;j<benign_n;j++) AIIm1_data[idxs[nz-1]+sizeA*j] = 0.; 1175 CHKERRQ(ISRestoreIndices(is_p_r[i],&idxs)); 1176 CHKERRQ(PetscFree(sums)); 1177 } 1178 CHKERRQ(VecDestroy(&benign_AIIm1_ones)); 1179 if (matl_dbg_viewer) { 1180 CHKERRQ(MatDenseRestoreArray(S2,&S2_data)); 1181 CHKERRQ(MatDenseRestoreArray(S3,&S3_data)); 1182 } 1183 if (!S_lower_triangular) { /* I need to expand the upper triangular data (column oriented) */ 1184 PetscInt k,j; 1185 for (k=0;k<size_schur;k++) { 1186 for (j=k;j<size_schur;j++) { 1187 S_data[j*size_schur+k] = PetscConj(S_data[k*size_schur+j]); 1188 } 1189 } 1190 } 1191 1192 /* restore defaults */ 1193 #if defined(PETSC_HAVE_MUMPS) 1194 CHKERRQ(MatMumpsSetIcntl(F,26,-1)); 1195 #endif 1196 #if defined(PETSC_HAVE_MKL_PARDISO) 1197 CHKERRQ(MatMkl_PardisoSetCntl(F,70,0)); 1198 #endif 1199 CHKERRQ(MatDenseRestoreArrayRead(cs_AIB_mat,&cs_AIB)); 1200 CHKERRQ(MatDenseRestoreArray(benign_AIIm1_ones_mat,&AIIm1_data)); 1201 CHKERRQ(VecDestroy(&v)); 1202 CHKERRQ(MatDenseRestoreArray(S_all,&S_data)); 1203 if (matl_dbg_viewer) { 1204 Mat S; 1205 1206 CHKERRQ(MatFactorRestoreSchurComplement(F,&S_all,MAT_FACTOR_SCHUR_UNFACTORED)); 1207 CHKERRQ(MatFactorCreateSchurComplement(F,&S,NULL)); 1208 CHKERRQ(PetscObjectSetName((PetscObject)S,"Sb")); 1209 CHKERRQ(MatView(S,matl_dbg_viewer)); 1210 CHKERRQ(MatDestroy(&S)); 1211 CHKERRQ(PetscObjectSetName((PetscObject)S2,"S2P")); 1212 CHKERRQ(MatView(S2,matl_dbg_viewer)); 1213 CHKERRQ(PetscObjectSetName((PetscObject)S3,"S3P")); 1214 CHKERRQ(MatView(S3,matl_dbg_viewer)); 1215 CHKERRQ(PetscObjectSetName((PetscObject)cs_AIB_mat,"cs")); 1216 CHKERRQ(MatView(cs_AIB_mat,matl_dbg_viewer)); 1217 CHKERRQ(MatFactorGetSchurComplement(F,&S_all,NULL)); 1218 } 1219 CHKERRQ(MatDestroy(&S2)); 1220 CHKERRQ(MatDestroy(&S3)); 1221 } 1222 if (!reuse_solvers) { 1223 for (i=0;i<benign_n;i++) { 1224 CHKERRQ(ISDestroy(&is_p_r[i])); 1225 } 1226 CHKERRQ(PetscFree(is_p_r)); 1227 CHKERRQ(MatDestroy(&cs_AIB_mat)); 1228 CHKERRQ(MatDestroy(&benign_AIIm1_ones_mat)); 1229 } 1230 } else { /* we can't use MatFactor when size_schur == size_of_the_problem */ 1231 CHKERRQ(MatConvert(A,MATSEQDENSE,MAT_INITIAL_MATRIX,&S_all)); 1232 CHKERRQ(MatGetType(S_all,&Stype)); 1233 reuse_solvers = PETSC_FALSE; /* TODO: why we can't reuse the solvers here? */ 1234 factor_workaround = PETSC_FALSE; 1235 solver_S = PETSC_FALSE; 1236 } 1237 1238 if (reuse_solvers) { 1239 Mat A_II,Afake; 1240 Vec vec1_B; 1241 PCBDDCReuseSolvers msolv_ctx; 1242 PetscInt n_R; 1243 1244 if (sub_schurs->reuse_solver) { 1245 CHKERRQ(PCBDDCReuseSolversReset(sub_schurs->reuse_solver)); 1246 } else { 1247 CHKERRQ(PetscNew(&sub_schurs->reuse_solver)); 1248 } 1249 msolv_ctx = sub_schurs->reuse_solver; 1250 CHKERRQ(MatSchurComplementGetSubMatrices(sub_schurs->S,&A_II,NULL,NULL,NULL,NULL)); 1251 CHKERRQ(PetscObjectReference((PetscObject)F)); 1252 msolv_ctx->F = F; 1253 CHKERRQ(MatCreateVecs(F,&msolv_ctx->sol,NULL)); 1254 /* currently PETSc has no support for MatSolve(F,x,x), so cheat and let rhs and sol share the same memory */ 1255 { 1256 PetscScalar *array; 1257 PetscInt n; 1258 1259 CHKERRQ(VecGetLocalSize(msolv_ctx->sol,&n)); 1260 CHKERRQ(VecGetArray(msolv_ctx->sol,&array)); 1261 CHKERRQ(VecCreateSeqWithArray(PetscObjectComm((PetscObject)msolv_ctx->sol),1,n,array,&msolv_ctx->rhs)); 1262 CHKERRQ(VecRestoreArray(msolv_ctx->sol,&array)); 1263 } 1264 msolv_ctx->has_vertices = schur_has_vertices; 1265 1266 /* interior solver */ 1267 CHKERRQ(PCCreate(PetscObjectComm((PetscObject)A_II),&msolv_ctx->interior_solver)); 1268 CHKERRQ(PCSetOperators(msolv_ctx->interior_solver,A_II,A_II)); 1269 CHKERRQ(PCSetType(msolv_ctx->interior_solver,PCSHELL)); 1270 CHKERRQ(PCShellSetName(msolv_ctx->interior_solver,"Interior solver (w/o Schur factorization)")); 1271 CHKERRQ(PCShellSetContext(msolv_ctx->interior_solver,msolv_ctx)); 1272 CHKERRQ(PCShellSetView(msolv_ctx->interior_solver,PCBDDCReuseSolvers_View)); 1273 CHKERRQ(PCShellSetApply(msolv_ctx->interior_solver,PCBDDCReuseSolvers_Interior)); 1274 CHKERRQ(PCShellSetApplyTranspose(msolv_ctx->interior_solver,PCBDDCReuseSolvers_InteriorTranspose)); 1275 1276 /* correction solver */ 1277 CHKERRQ(PCCreate(PetscObjectComm((PetscObject)A_II),&msolv_ctx->correction_solver)); 1278 CHKERRQ(PCSetType(msolv_ctx->correction_solver,PCSHELL)); 1279 CHKERRQ(PCShellSetName(msolv_ctx->correction_solver,"Correction solver (with Schur factorization)")); 1280 CHKERRQ(PCShellSetContext(msolv_ctx->correction_solver,msolv_ctx)); 1281 CHKERRQ(PCShellSetView(msolv_ctx->interior_solver,PCBDDCReuseSolvers_View)); 1282 CHKERRQ(PCShellSetApply(msolv_ctx->correction_solver,PCBDDCReuseSolvers_Correction)); 1283 CHKERRQ(PCShellSetApplyTranspose(msolv_ctx->correction_solver,PCBDDCReuseSolvers_CorrectionTranspose)); 1284 1285 /* scatter and vecs for Schur complement solver */ 1286 CHKERRQ(MatCreateVecs(S_all,&msolv_ctx->sol_B,&msolv_ctx->rhs_B)); 1287 CHKERRQ(MatCreateVecs(sub_schurs->S,&vec1_B,NULL)); 1288 if (!schur_has_vertices) { 1289 CHKERRQ(ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap,IS_GTOLM_DROP,is_A_all,&msolv_ctx->is_B)); 1290 CHKERRQ(VecScatterCreate(vec1_B,msolv_ctx->is_B,msolv_ctx->sol_B,NULL,&msolv_ctx->correction_scatter_B)); 1291 CHKERRQ(PetscObjectReference((PetscObject)is_A_all)); 1292 msolv_ctx->is_R = is_A_all; 1293 } else { 1294 IS is_B_all; 1295 const PetscInt* idxs; 1296 PetscInt dual,n_v,n; 1297 1298 CHKERRQ(ISGetLocalSize(sub_schurs->is_vertices,&n_v)); 1299 dual = size_schur - n_v; 1300 CHKERRQ(ISGetLocalSize(is_A_all,&n)); 1301 CHKERRQ(ISGetIndices(is_A_all,&idxs)); 1302 CHKERRQ(ISCreateGeneral(PetscObjectComm((PetscObject)is_A_all),dual,idxs+n_I,PETSC_COPY_VALUES,&is_B_all)); 1303 CHKERRQ(ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap,IS_GTOLM_DROP,is_B_all,&msolv_ctx->is_B)); 1304 CHKERRQ(ISDestroy(&is_B_all)); 1305 CHKERRQ(ISCreateStride(PetscObjectComm((PetscObject)is_A_all),dual,0,1,&is_B_all)); 1306 CHKERRQ(VecScatterCreate(vec1_B,msolv_ctx->is_B,msolv_ctx->sol_B,is_B_all,&msolv_ctx->correction_scatter_B)); 1307 CHKERRQ(ISDestroy(&is_B_all)); 1308 CHKERRQ(ISCreateGeneral(PetscObjectComm((PetscObject)is_A_all),n-n_v,idxs,PETSC_COPY_VALUES,&msolv_ctx->is_R)); 1309 CHKERRQ(ISRestoreIndices(is_A_all,&idxs)); 1310 } 1311 CHKERRQ(ISGetLocalSize(msolv_ctx->is_R,&n_R)); 1312 CHKERRQ(MatCreateSeqAIJ(PETSC_COMM_SELF,n_R,n_R,0,NULL,&Afake)); 1313 CHKERRQ(MatAssemblyBegin(Afake,MAT_FINAL_ASSEMBLY)); 1314 CHKERRQ(MatAssemblyEnd(Afake,MAT_FINAL_ASSEMBLY)); 1315 CHKERRQ(PCSetOperators(msolv_ctx->correction_solver,Afake,Afake)); 1316 CHKERRQ(MatDestroy(&Afake)); 1317 CHKERRQ(VecDestroy(&vec1_B)); 1318 1319 /* communicate benign info to solver context */ 1320 if (benign_n) { 1321 PetscScalar *array; 1322 1323 msolv_ctx->benign_n = benign_n; 1324 msolv_ctx->benign_zerodiag_subs = is_p_r; 1325 CHKERRQ(PetscMalloc1(benign_n,&msolv_ctx->benign_save_vals)); 1326 msolv_ctx->benign_csAIB = cs_AIB_mat; 1327 CHKERRQ(MatCreateVecs(cs_AIB_mat,&msolv_ctx->benign_corr_work,NULL)); 1328 CHKERRQ(VecGetArray(msolv_ctx->benign_corr_work,&array)); 1329 CHKERRQ(VecCreateSeqWithArray(PETSC_COMM_SELF,1,size_schur,array,&msolv_ctx->benign_dummy_schur_vec)); 1330 CHKERRQ(VecRestoreArray(msolv_ctx->benign_corr_work,&array)); 1331 msolv_ctx->benign_AIIm1ones = benign_AIIm1_ones_mat; 1332 } 1333 } else { 1334 if (sub_schurs->reuse_solver) { 1335 CHKERRQ(PCBDDCReuseSolversReset(sub_schurs->reuse_solver)); 1336 } 1337 CHKERRQ(PetscFree(sub_schurs->reuse_solver)); 1338 } 1339 CHKERRQ(MatDestroy(&A)); 1340 CHKERRQ(ISDestroy(&is_A_all)); 1341 1342 /* Work arrays */ 1343 CHKERRQ(PetscMalloc1(max_subset_size*max_subset_size,&work)); 1344 1345 /* S_Ej_all */ 1346 cum = cum2 = 0; 1347 CHKERRQ(MatDenseGetArrayRead(S_all,&rS_data)); 1348 CHKERRQ(MatSeqAIJGetArray(sub_schurs->S_Ej_all,&SEj_arr)); 1349 if (compute_Stilda) { 1350 CHKERRQ(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&SEjinv_arr)); 1351 } 1352 for (i=0;i<sub_schurs->n_subs;i++) { 1353 PetscInt j; 1354 1355 /* get S_E */ 1356 CHKERRQ(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size)); 1357 if (S_lower_triangular) { /* I need to expand the upper triangular data (column oriented) */ 1358 PetscInt k; 1359 for (k=0;k<subset_size;k++) { 1360 for (j=k;j<subset_size;j++) { 1361 work[k*subset_size+j] = rS_data[cum2+k*size_schur+j]; 1362 work[j*subset_size+k] = PetscConj(rS_data[cum2+k*size_schur+j]); 1363 } 1364 } 1365 } else { /* just copy to workspace */ 1366 PetscInt k; 1367 for (k=0;k<subset_size;k++) { 1368 for (j=0;j<subset_size;j++) { 1369 work[k*subset_size+j] = rS_data[cum2+k*size_schur+j]; 1370 } 1371 } 1372 } 1373 /* insert S_E values */ 1374 if (sub_schurs->change) { 1375 Mat change_sub,SEj,T; 1376 1377 /* change basis */ 1378 CHKERRQ(KSPGetOperators(sub_schurs->change[i],&change_sub,NULL)); 1379 CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,work,&SEj)); 1380 if (!sub_schurs->change_with_qr) { /* currently there's no support for PtAP with P SeqAIJ */ 1381 Mat T2; 1382 CHKERRQ(MatTransposeMatMult(change_sub,SEj,MAT_INITIAL_MATRIX,1.0,&T2)); 1383 CHKERRQ(MatMatMult(T2,change_sub,MAT_INITIAL_MATRIX,1.0,&T)); 1384 CHKERRQ(MatConvert(T,MATSEQDENSE,MAT_INPLACE_MATRIX,&T)); 1385 CHKERRQ(MatDestroy(&T2)); 1386 } else { 1387 CHKERRQ(MatPtAP(SEj,change_sub,MAT_INITIAL_MATRIX,1.0,&T)); 1388 } 1389 CHKERRQ(MatCopy(T,SEj,SAME_NONZERO_PATTERN)); 1390 CHKERRQ(MatDestroy(&T)); 1391 CHKERRQ(MatZeroRowsColumnsIS(SEj,sub_schurs->change_primal_sub[i],1.0,NULL,NULL)); 1392 CHKERRQ(MatDestroy(&SEj)); 1393 } 1394 if (deluxe) { 1395 CHKERRQ(PetscArraycpy(SEj_arr,work,subset_size*subset_size)); 1396 /* if adaptivity is requested, invert S_E blocks */ 1397 if (compute_Stilda) { 1398 Mat M; 1399 const PetscScalar *vals; 1400 PetscBool isdense,isdensecuda; 1401 1402 CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,work,&M)); 1403 CHKERRQ(MatSetOption(M,MAT_SPD,sub_schurs->is_posdef)); 1404 CHKERRQ(MatSetOption(M,MAT_HERMITIAN,sub_schurs->is_hermitian)); 1405 if (!PetscBTLookup(sub_schurs->is_edge,i)) { 1406 CHKERRQ(MatSetType(M,Stype)); 1407 } 1408 CHKERRQ(PetscObjectTypeCompare((PetscObject)M,MATSEQDENSE,&isdense)); 1409 CHKERRQ(PetscObjectTypeCompare((PetscObject)M,MATSEQDENSECUDA,&isdensecuda)); 1410 if (use_cholesky) { 1411 CHKERRQ(MatCholeskyFactor(M,NULL,NULL)); 1412 } else { 1413 CHKERRQ(MatLUFactor(M,NULL,NULL,NULL)); 1414 } 1415 if (isdense) { 1416 CHKERRQ(MatSeqDenseInvertFactors_Private(M)); 1417 #if defined(PETSC_HAVE_CUDA) 1418 } else if (isdensecuda) { 1419 CHKERRQ(MatSeqDenseCUDAInvertFactors_Private(M)); 1420 #endif 1421 } else SETERRQ(PetscObjectComm((PetscObject)M),PETSC_ERR_SUP,"Not implemented for type %s",Stype); 1422 CHKERRQ(MatDenseGetArrayRead(M,&vals)); 1423 CHKERRQ(PetscArraycpy(SEjinv_arr,vals,subset_size*subset_size)); 1424 CHKERRQ(MatDenseRestoreArrayRead(M,&vals)); 1425 CHKERRQ(MatDestroy(&M)); 1426 } 1427 } else if (compute_Stilda) { /* not using deluxe */ 1428 Mat SEj; 1429 Vec D; 1430 PetscScalar *array; 1431 1432 CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,work,&SEj)); 1433 CHKERRQ(VecGetArray(Dall,&array)); 1434 CHKERRQ(VecCreateSeqWithArray(PETSC_COMM_SELF,1,subset_size,array+cum,&D)); 1435 CHKERRQ(VecRestoreArray(Dall,&array)); 1436 CHKERRQ(VecShift(D,-1.)); 1437 CHKERRQ(MatDiagonalScale(SEj,D,D)); 1438 CHKERRQ(MatDestroy(&SEj)); 1439 CHKERRQ(VecDestroy(&D)); 1440 CHKERRQ(PetscArraycpy(SEj_arr,work,subset_size*subset_size)); 1441 } 1442 cum += subset_size; 1443 cum2 += subset_size*(size_schur + 1); 1444 SEj_arr += subset_size*subset_size; 1445 if (SEjinv_arr) SEjinv_arr += subset_size*subset_size; 1446 } 1447 CHKERRQ(MatDenseRestoreArrayRead(S_all,&rS_data)); 1448 CHKERRQ(MatSeqAIJRestoreArray(sub_schurs->S_Ej_all,&SEj_arr)); 1449 if (compute_Stilda) { 1450 CHKERRQ(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&SEjinv_arr)); 1451 } 1452 if (solver_S) { 1453 CHKERRQ(MatFactorRestoreSchurComplement(F,&S_all,MAT_FACTOR_SCHUR_UNFACTORED)); 1454 } 1455 1456 /* may prevent from unneeded copies, since MUMPS or MKL_Pardiso always use CPU memory 1457 however, preliminary tests indicate using GPUs is still faster in the solve phase */ 1458 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 1459 if (reuse_solvers) { 1460 Mat St; 1461 MatFactorSchurStatus st; 1462 1463 flg = PETSC_FALSE; 1464 CHKERRQ(PetscOptionsGetBool(NULL,sub_schurs->prefix,"-sub_schurs_schur_pin_to_cpu",&flg,NULL)); 1465 CHKERRQ(MatFactorGetSchurComplement(F,&St,&st)); 1466 CHKERRQ(MatBindToCPU(St,flg)); 1467 CHKERRQ(MatFactorRestoreSchurComplement(F,&St,st)); 1468 } 1469 #endif 1470 1471 schur_factor = NULL; 1472 if (compute_Stilda && size_active_schur) { 1473 1474 CHKERRQ(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&SEjinv_arr)); 1475 if (sub_schurs->n_subs == 1 && size_schur == size_active_schur && deluxe) { /* we already computed the inverse */ 1476 CHKERRQ(PetscArraycpy(SEjinv_arr,work,size_schur*size_schur)); 1477 } else { 1478 Mat S_all_inv=NULL; 1479 1480 if (solver_S) { 1481 /* for adaptive selection we need S^-1; for solver reusage we need S_\Delta\Delta^-1. 1482 The latter is not the principal subminor for S^-1. However, the factors can be reused since S_\Delta\Delta is the leading principal submatrix of S */ 1483 if (factor_workaround) {/* invert without calling MatFactorInvertSchurComplement, since we are hacking */ 1484 PetscScalar *data; 1485 PetscInt nd = 0; 1486 1487 if (!use_potr) { 1488 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor update not yet implemented for non SPD matrices"); 1489 } 1490 CHKERRQ(MatFactorGetSchurComplement(F,&S_all_inv,NULL)); 1491 CHKERRQ(MatDenseGetArray(S_all_inv,&data)); 1492 if (sub_schurs->is_dir) { /* dirichlet dofs could have different scalings */ 1493 CHKERRQ(ISGetLocalSize(sub_schurs->is_dir,&nd)); 1494 } 1495 1496 /* factor and invert activedofs and vertices (dirichlet dofs does not contribute) */ 1497 if (schur_has_vertices) { 1498 Mat M; 1499 PetscScalar *tdata; 1500 PetscInt nv = 0, news; 1501 1502 CHKERRQ(ISGetLocalSize(sub_schurs->is_vertices,&nv)); 1503 news = size_active_schur + nv; 1504 CHKERRQ(PetscCalloc1(news*news,&tdata)); 1505 for (i=0;i<size_active_schur;i++) { 1506 CHKERRQ(PetscArraycpy(tdata+i*(news+1),data+i*(size_schur+1),size_active_schur-i)); 1507 CHKERRQ(PetscArraycpy(tdata+i*(news+1)+size_active_schur-i,data+i*size_schur+size_active_schur+nd,nv)); 1508 } 1509 for (i=0;i<nv;i++) { 1510 PetscInt k = i+size_active_schur; 1511 CHKERRQ(PetscArraycpy(tdata+k*(news+1),data+(k+nd)*(size_schur+1),nv-i)); 1512 } 1513 1514 CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,news,news,tdata,&M)); 1515 CHKERRQ(MatSetOption(M,MAT_SPD,PETSC_TRUE)); 1516 CHKERRQ(MatCholeskyFactor(M,NULL,NULL)); 1517 /* save the factors */ 1518 cum = 0; 1519 CHKERRQ(PetscMalloc1((size_active_schur*(size_active_schur +1))/2+nd,&schur_factor)); 1520 for (i=0;i<size_active_schur;i++) { 1521 CHKERRQ(PetscArraycpy(schur_factor+cum,tdata+i*(news+1),size_active_schur-i)); 1522 cum += size_active_schur - i; 1523 } 1524 for (i=0;i<nd;i++) schur_factor[cum+i] = PetscSqrtReal(PetscRealPart(data[(i+size_active_schur)*(size_schur+1)])); 1525 CHKERRQ(MatSeqDenseInvertFactors_Private(M)); 1526 /* move back just the active dofs to the Schur complement */ 1527 for (i=0;i<size_active_schur;i++) { 1528 CHKERRQ(PetscArraycpy(data+i*size_schur,tdata+i*news,size_active_schur)); 1529 } 1530 CHKERRQ(PetscFree(tdata)); 1531 CHKERRQ(MatDestroy(&M)); 1532 } else { /* we can factorize and invert just the activedofs */ 1533 Mat M; 1534 PetscScalar *aux; 1535 1536 CHKERRQ(PetscMalloc1(nd,&aux)); 1537 for (i=0;i<nd;i++) aux[i] = 1.0/data[(i+size_active_schur)*(size_schur+1)]; 1538 CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,size_active_schur,size_active_schur,data,&M)); 1539 CHKERRQ(MatDenseSetLDA(M,size_schur)); 1540 CHKERRQ(MatSetOption(M,MAT_SPD,PETSC_TRUE)); 1541 CHKERRQ(MatCholeskyFactor(M,NULL,NULL)); 1542 CHKERRQ(MatSeqDenseInvertFactors_Private(M)); 1543 CHKERRQ(MatDestroy(&M)); 1544 CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,size_schur,nd,data+size_active_schur*size_schur,&M)); 1545 CHKERRQ(MatZeroEntries(M)); 1546 CHKERRQ(MatDestroy(&M)); 1547 CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,nd,size_schur,data+size_active_schur,&M)); 1548 CHKERRQ(MatDenseSetLDA(M,size_schur)); 1549 CHKERRQ(MatZeroEntries(M)); 1550 CHKERRQ(MatDestroy(&M)); 1551 for (i=0;i<nd;i++) data[(i+size_active_schur)*(size_schur+1)] = aux[i]; 1552 CHKERRQ(PetscFree(aux)); 1553 } 1554 CHKERRQ(MatDenseRestoreArray(S_all_inv,&data)); 1555 } else { /* use MatFactor calls to invert S */ 1556 CHKERRQ(MatFactorInvertSchurComplement(F)); 1557 CHKERRQ(MatFactorGetSchurComplement(F,&S_all_inv,NULL)); 1558 } 1559 } else { /* we need to invert explicitly since we are not using MatFactor for S */ 1560 CHKERRQ(PetscObjectReference((PetscObject)S_all)); 1561 S_all_inv = S_all; 1562 CHKERRQ(MatDenseGetArray(S_all_inv,&S_data)); 1563 CHKERRQ(PetscBLASIntCast(size_schur,&B_N)); 1564 CHKERRQ(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 1565 if (use_potr) { 1566 PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("L",&B_N,S_data,&B_N,&B_ierr)); 1567 PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRF Lapack routine %d",(int)B_ierr); 1568 PetscStackCallBLAS("LAPACKpotri",LAPACKpotri_("L",&B_N,S_data,&B_N,&B_ierr)); 1569 PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRI Lapack routine %d",(int)B_ierr); 1570 } else if (use_sytr) { 1571 PetscStackCallBLAS("LAPACKsytrf",LAPACKsytrf_("L",&B_N,S_data,&B_N,pivots,Bwork,&B_lwork,&B_ierr)); 1572 PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYTRF Lapack routine %d",(int)B_ierr); 1573 PetscStackCallBLAS("LAPACKsytri",LAPACKsytri_("L",&B_N,S_data,&B_N,pivots,Bwork,&B_ierr)); 1574 PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYTRI Lapack routine %d",(int)B_ierr); 1575 } else { 1576 PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&B_N,&B_N,S_data,&B_N,pivots,&B_ierr)); 1577 PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRF Lapack routine %d",(int)B_ierr); 1578 PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,S_data,&B_N,pivots,Bwork,&B_lwork,&B_ierr)); 1579 PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRI Lapack routine %d",(int)B_ierr); 1580 } 1581 CHKERRQ(PetscLogFlops(1.0*size_schur*size_schur*size_schur)); 1582 CHKERRQ(PetscFPTrapPop()); 1583 CHKERRQ(MatDenseRestoreArray(S_all_inv,&S_data)); 1584 } 1585 /* S_Ej_tilda_all */ 1586 cum = cum2 = 0; 1587 CHKERRQ(MatDenseGetArrayRead(S_all_inv,&rS_data)); 1588 for (i=0;i<sub_schurs->n_subs;i++) { 1589 PetscInt j; 1590 1591 CHKERRQ(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size)); 1592 /* get (St^-1)_E */ 1593 /* Unless we are changing the variables, I don't need to expand to upper triangular since St^-1 1594 will be properly accessed later during adaptive selection */ 1595 if (S_lower_triangular) { 1596 PetscInt k; 1597 if (sub_schurs->change) { 1598 for (k=0;k<subset_size;k++) { 1599 for (j=k;j<subset_size;j++) { 1600 work[k*subset_size+j] = rS_data[cum2+k*size_schur+j]; 1601 work[j*subset_size+k] = work[k*subset_size+j]; 1602 } 1603 } 1604 } else { 1605 for (k=0;k<subset_size;k++) { 1606 for (j=k;j<subset_size;j++) { 1607 work[k*subset_size+j] = rS_data[cum2+k*size_schur+j]; 1608 } 1609 } 1610 } 1611 } else { 1612 PetscInt k; 1613 for (k=0;k<subset_size;k++) { 1614 for (j=0;j<subset_size;j++) { 1615 work[k*subset_size+j] = rS_data[cum2+k*size_schur+j]; 1616 } 1617 } 1618 } 1619 if (sub_schurs->change) { 1620 Mat change_sub,SEj,T; 1621 1622 /* change basis */ 1623 CHKERRQ(KSPGetOperators(sub_schurs->change[i],&change_sub,NULL)); 1624 CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,work,&SEj)); 1625 if (!sub_schurs->change_with_qr) { /* currently there's no support for PtAP with P SeqAIJ */ 1626 Mat T2; 1627 CHKERRQ(MatTransposeMatMult(change_sub,SEj,MAT_INITIAL_MATRIX,1.0,&T2)); 1628 CHKERRQ(MatMatMult(T2,change_sub,MAT_INITIAL_MATRIX,1.0,&T)); 1629 CHKERRQ(MatDestroy(&T2)); 1630 CHKERRQ(MatConvert(T,MATSEQDENSE,MAT_INPLACE_MATRIX,&T)); 1631 } else { 1632 CHKERRQ(MatPtAP(SEj,change_sub,MAT_INITIAL_MATRIX,1.0,&T)); 1633 } 1634 CHKERRQ(MatCopy(T,SEj,SAME_NONZERO_PATTERN)); 1635 CHKERRQ(MatDestroy(&T)); 1636 /* set diagonal entry to a very large value to pick the basis we are eliminating as the first eigenvectors with adaptive selection */ 1637 CHKERRQ(MatZeroRowsColumnsIS(SEj,sub_schurs->change_primal_sub[i],1./PETSC_SMALL,NULL,NULL)); 1638 CHKERRQ(MatDestroy(&SEj)); 1639 } 1640 CHKERRQ(PetscArraycpy(SEjinv_arr,work,subset_size*subset_size)); 1641 cum += subset_size; 1642 cum2 += subset_size*(size_schur + 1); 1643 SEjinv_arr += subset_size*subset_size; 1644 } 1645 CHKERRQ(MatDenseRestoreArrayRead(S_all_inv,&rS_data)); 1646 if (solver_S) { 1647 if (schur_has_vertices) { 1648 CHKERRQ(MatFactorRestoreSchurComplement(F,&S_all_inv,MAT_FACTOR_SCHUR_FACTORED)); 1649 } else { 1650 CHKERRQ(MatFactorRestoreSchurComplement(F,&S_all_inv,MAT_FACTOR_SCHUR_INVERTED)); 1651 } 1652 } 1653 CHKERRQ(MatDestroy(&S_all_inv)); 1654 } 1655 CHKERRQ(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&SEjinv_arr)); 1656 1657 /* move back factors if needed */ 1658 if (schur_has_vertices) { 1659 Mat S_tmp; 1660 PetscInt nd = 0; 1661 1662 PetscCheck(solver_S,PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 1663 CHKERRQ(MatFactorGetSchurComplement(F,&S_tmp,NULL)); 1664 if (use_potr) { 1665 PetscScalar *data; 1666 1667 CHKERRQ(MatDenseGetArray(S_tmp,&data)); 1668 CHKERRQ(PetscArrayzero(data,size_schur*size_schur)); 1669 1670 if (S_lower_triangular) { 1671 cum = 0; 1672 for (i=0;i<size_active_schur;i++) { 1673 CHKERRQ(PetscArraycpy(data+i*(size_schur+1),schur_factor+cum,size_active_schur-i)); 1674 cum += size_active_schur-i; 1675 } 1676 } else { 1677 CHKERRQ(PetscArraycpy(data,schur_factor,size_schur*size_schur)); 1678 } 1679 if (sub_schurs->is_dir) { 1680 CHKERRQ(ISGetLocalSize(sub_schurs->is_dir,&nd)); 1681 for (i=0;i<nd;i++) { 1682 data[(i+size_active_schur)*(size_schur+1)] = schur_factor[cum+i]; 1683 } 1684 } 1685 /* workaround: since I cannot modify the matrices used inside the solvers for the forward and backward substitutions, 1686 set the diagonal entry of the Schur factor to a very large value */ 1687 for (i=size_active_schur+nd;i<size_schur;i++) { 1688 data[i*(size_schur+1)] = infty; 1689 } 1690 CHKERRQ(MatDenseRestoreArray(S_tmp,&data)); 1691 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor update not yet implemented for non SPD matrices"); 1692 CHKERRQ(MatFactorRestoreSchurComplement(F,&S_tmp,MAT_FACTOR_SCHUR_FACTORED)); 1693 } 1694 } else if (factor_workaround) { /* we need to eliminate any unneeded coupling */ 1695 PetscScalar *data; 1696 PetscInt nd = 0; 1697 1698 if (sub_schurs->is_dir) { /* dirichlet dofs could have different scalings */ 1699 CHKERRQ(ISGetLocalSize(sub_schurs->is_dir,&nd)); 1700 } 1701 CHKERRQ(MatFactorGetSchurComplement(F,&S_all,NULL)); 1702 CHKERRQ(MatDenseGetArray(S_all,&data)); 1703 for (i=0;i<size_active_schur;i++) { 1704 CHKERRQ(PetscArrayzero(data+i*size_schur+size_active_schur,size_schur-size_active_schur)); 1705 } 1706 for (i=size_active_schur+nd;i<size_schur;i++) { 1707 CHKERRQ(PetscArrayzero(data+i*size_schur+size_active_schur,size_schur-size_active_schur)); 1708 data[i*(size_schur+1)] = infty; 1709 } 1710 CHKERRQ(MatDenseRestoreArray(S_all,&data)); 1711 CHKERRQ(MatFactorRestoreSchurComplement(F,&S_all,MAT_FACTOR_SCHUR_UNFACTORED)); 1712 } 1713 CHKERRQ(PetscFree(work)); 1714 CHKERRQ(PetscFree(schur_factor)); 1715 CHKERRQ(VecDestroy(&Dall)); 1716 } 1717 CHKERRQ(ISDestroy(&is_I_layer)); 1718 CHKERRQ(MatDestroy(&S_all)); 1719 CHKERRQ(MatDestroy(&A_BB)); 1720 CHKERRQ(MatDestroy(&A_IB)); 1721 CHKERRQ(MatDestroy(&A_BI)); 1722 CHKERRQ(MatDestroy(&F)); 1723 1724 CHKERRQ(PetscMalloc1(sub_schurs->n_subs,&nnz)); 1725 for (i=0;i<sub_schurs->n_subs;i++) { 1726 CHKERRQ(ISGetLocalSize(sub_schurs->is_subs[i],&nnz[i])); 1727 } 1728 CHKERRQ(ISCreateGeneral(PETSC_COMM_SELF,sub_schurs->n_subs,nnz,PETSC_OWN_POINTER,&is_I_layer)); 1729 CHKERRQ(MatSetVariableBlockSizes(sub_schurs->S_Ej_all,sub_schurs->n_subs,nnz)); 1730 CHKERRQ(MatAssemblyBegin(sub_schurs->S_Ej_all,MAT_FINAL_ASSEMBLY)); 1731 CHKERRQ(MatAssemblyEnd(sub_schurs->S_Ej_all,MAT_FINAL_ASSEMBLY)); 1732 if (compute_Stilda) { 1733 CHKERRQ(MatSetVariableBlockSizes(sub_schurs->sum_S_Ej_tilda_all,sub_schurs->n_subs,nnz)); 1734 CHKERRQ(MatAssemblyBegin(sub_schurs->sum_S_Ej_tilda_all,MAT_FINAL_ASSEMBLY)); 1735 CHKERRQ(MatAssemblyEnd(sub_schurs->sum_S_Ej_tilda_all,MAT_FINAL_ASSEMBLY)); 1736 if (deluxe) { 1737 CHKERRQ(MatSetVariableBlockSizes(sub_schurs->sum_S_Ej_inv_all,sub_schurs->n_subs,nnz)); 1738 CHKERRQ(MatAssemblyBegin(sub_schurs->sum_S_Ej_inv_all,MAT_FINAL_ASSEMBLY)); 1739 CHKERRQ(MatAssemblyEnd(sub_schurs->sum_S_Ej_inv_all,MAT_FINAL_ASSEMBLY)); 1740 } 1741 } 1742 CHKERRQ(ISDestroy(&is_I_layer)); 1743 1744 /* Get local part of (\sum_j S_Ej) */ 1745 if (!sub_schurs->sum_S_Ej_all) { 1746 CHKERRQ(MatDuplicate(sub_schurs->S_Ej_all,MAT_DO_NOT_COPY_VALUES,&sub_schurs->sum_S_Ej_all)); 1747 } 1748 CHKERRQ(VecSet(gstash,0.0)); 1749 CHKERRQ(MatSeqAIJGetArray(sub_schurs->S_Ej_all,&stasharray)); 1750 CHKERRQ(VecPlaceArray(lstash,stasharray)); 1751 CHKERRQ(VecScatterBegin(sstash,lstash,gstash,ADD_VALUES,SCATTER_FORWARD)); 1752 CHKERRQ(VecScatterEnd(sstash,lstash,gstash,ADD_VALUES,SCATTER_FORWARD)); 1753 CHKERRQ(MatSeqAIJRestoreArray(sub_schurs->S_Ej_all,&stasharray)); 1754 CHKERRQ(VecResetArray(lstash)); 1755 CHKERRQ(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&stasharray)); 1756 CHKERRQ(VecPlaceArray(lstash,stasharray)); 1757 CHKERRQ(VecScatterBegin(sstash,gstash,lstash,INSERT_VALUES,SCATTER_REVERSE)); 1758 CHKERRQ(VecScatterEnd(sstash,gstash,lstash,INSERT_VALUES,SCATTER_REVERSE)); 1759 CHKERRQ(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all,&stasharray)); 1760 CHKERRQ(VecResetArray(lstash)); 1761 1762 /* Get local part of (\sum_j S^-1_Ej) (\sum_j St^-1_Ej) */ 1763 if (compute_Stilda) { 1764 CHKERRQ(VecSet(gstash,0.0)); 1765 CHKERRQ(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&stasharray)); 1766 CHKERRQ(VecPlaceArray(lstash,stasharray)); 1767 CHKERRQ(VecScatterBegin(sstash,lstash,gstash,ADD_VALUES,SCATTER_FORWARD)); 1768 CHKERRQ(VecScatterEnd(sstash,lstash,gstash,ADD_VALUES,SCATTER_FORWARD)); 1769 CHKERRQ(VecScatterBegin(sstash,gstash,lstash,INSERT_VALUES,SCATTER_REVERSE)); 1770 CHKERRQ(VecScatterEnd(sstash,gstash,lstash,INSERT_VALUES,SCATTER_REVERSE)); 1771 CHKERRQ(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&stasharray)); 1772 CHKERRQ(VecResetArray(lstash)); 1773 if (deluxe) { 1774 CHKERRQ(VecSet(gstash,0.0)); 1775 CHKERRQ(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&stasharray)); 1776 CHKERRQ(VecPlaceArray(lstash,stasharray)); 1777 CHKERRQ(VecScatterBegin(sstash,lstash,gstash,ADD_VALUES,SCATTER_FORWARD)); 1778 CHKERRQ(VecScatterEnd(sstash,lstash,gstash,ADD_VALUES,SCATTER_FORWARD)); 1779 CHKERRQ(VecScatterBegin(sstash,gstash,lstash,INSERT_VALUES,SCATTER_REVERSE)); 1780 CHKERRQ(VecScatterEnd(sstash,gstash,lstash,INSERT_VALUES,SCATTER_REVERSE)); 1781 CHKERRQ(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&stasharray)); 1782 CHKERRQ(VecResetArray(lstash)); 1783 } else { 1784 PetscScalar *array; 1785 PetscInt cum; 1786 1787 CHKERRQ(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&array)); 1788 cum = 0; 1789 for (i=0;i<sub_schurs->n_subs;i++) { 1790 CHKERRQ(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size)); 1791 CHKERRQ(PetscBLASIntCast(subset_size,&B_N)); 1792 CHKERRQ(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 1793 if (use_potr) { 1794 PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("L",&B_N,array+cum,&B_N,&B_ierr)); 1795 PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRF Lapack routine %d",(int)B_ierr); 1796 PetscStackCallBLAS("LAPACKpotri",LAPACKpotri_("L",&B_N,array+cum,&B_N,&B_ierr)); 1797 PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRI Lapack routine %d",(int)B_ierr); 1798 } else if (use_sytr) { 1799 PetscStackCallBLAS("LAPACKsytrf",LAPACKsytrf_("L",&B_N,array+cum,&B_N,pivots,Bwork,&B_lwork,&B_ierr)); 1800 PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYTRF Lapack routine %d",(int)B_ierr); 1801 PetscStackCallBLAS("LAPACKsytri",LAPACKsytri_("L",&B_N,array+cum,&B_N,pivots,Bwork,&B_ierr)); 1802 PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYTRI Lapack routine %d",(int)B_ierr); 1803 } else { 1804 PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&B_N,&B_N,array+cum,&B_N,pivots,&B_ierr)); 1805 PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRF Lapack routine %d",(int)B_ierr); 1806 PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,array+cum,&B_N,pivots,Bwork,&B_lwork,&B_ierr)); 1807 PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRI Lapack routine %d",(int)B_ierr); 1808 } 1809 CHKERRQ(PetscLogFlops(1.0*subset_size*subset_size*subset_size)); 1810 CHKERRQ(PetscFPTrapPop()); 1811 cum += subset_size*subset_size; 1812 } 1813 CHKERRQ(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&array)); 1814 CHKERRQ(PetscObjectReference((PetscObject)sub_schurs->sum_S_Ej_all)); 1815 CHKERRQ(MatDestroy(&sub_schurs->sum_S_Ej_inv_all)); 1816 sub_schurs->sum_S_Ej_inv_all = sub_schurs->sum_S_Ej_all; 1817 } 1818 } 1819 CHKERRQ(VecDestroy(&lstash)); 1820 CHKERRQ(VecDestroy(&gstash)); 1821 CHKERRQ(VecScatterDestroy(&sstash)); 1822 1823 if (matl_dbg_viewer) { 1824 PetscInt cum; 1825 1826 if (sub_schurs->S_Ej_all) { 1827 CHKERRQ(PetscObjectSetName((PetscObject)sub_schurs->S_Ej_all,"SE")); 1828 CHKERRQ(MatView(sub_schurs->S_Ej_all,matl_dbg_viewer)); 1829 } 1830 if (sub_schurs->sum_S_Ej_all) { 1831 CHKERRQ(PetscObjectSetName((PetscObject)sub_schurs->sum_S_Ej_all,"SSE")); 1832 CHKERRQ(MatView(sub_schurs->sum_S_Ej_all,matl_dbg_viewer)); 1833 } 1834 if (sub_schurs->sum_S_Ej_inv_all) { 1835 CHKERRQ(PetscObjectSetName((PetscObject)sub_schurs->sum_S_Ej_inv_all,"SSEm")); 1836 CHKERRQ(MatView(sub_schurs->sum_S_Ej_inv_all,matl_dbg_viewer)); 1837 } 1838 if (sub_schurs->sum_S_Ej_tilda_all) { 1839 CHKERRQ(PetscObjectSetName((PetscObject)sub_schurs->sum_S_Ej_tilda_all,"SSEt")); 1840 CHKERRQ(MatView(sub_schurs->sum_S_Ej_tilda_all,matl_dbg_viewer)); 1841 } 1842 for (i=0,cum=0;i<sub_schurs->n_subs;i++) { 1843 IS is; 1844 char name[16]; 1845 1846 CHKERRQ(PetscSNPrintf(name,sizeof(name),"IE%D",i)); 1847 CHKERRQ(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size)); 1848 CHKERRQ(ISCreateStride(PETSC_COMM_SELF,subset_size,cum,1,&is)); 1849 CHKERRQ(PetscObjectSetName((PetscObject)is,name)); 1850 CHKERRQ(ISView(is,matl_dbg_viewer)); 1851 CHKERRQ(ISDestroy(&is)); 1852 cum += subset_size; 1853 } 1854 } 1855 1856 /* free workspace */ 1857 CHKERRQ(PetscViewerDestroy(&matl_dbg_viewer)); 1858 CHKERRQ(PetscFree2(Bwork,pivots)); 1859 CHKERRQ(PetscCommDestroy(&comm_n)); 1860 PetscFunctionReturn(0); 1861 } 1862 1863 PetscErrorCode PCBDDCSubSchursInit(PCBDDCSubSchurs sub_schurs, const char* prefix, IS is_I, IS is_B, PCBDDCGraph graph, ISLocalToGlobalMapping BtoNmap, PetscBool copycc) 1864 { 1865 IS *faces,*edges,*all_cc,vertices; 1866 PetscInt i,n_faces,n_edges,n_all_cc; 1867 PetscBool is_sorted,ispardiso,ismumps; 1868 PetscErrorCode ierr; 1869 1870 PetscFunctionBegin; 1871 CHKERRQ(ISSorted(is_I,&is_sorted)); 1872 PetscCheck(is_sorted,PetscObjectComm((PetscObject)is_I),PETSC_ERR_PLIB,"IS for I dofs should be shorted"); 1873 CHKERRQ(ISSorted(is_B,&is_sorted)); 1874 PetscCheck(is_sorted,PetscObjectComm((PetscObject)is_B),PETSC_ERR_PLIB,"IS for B dofs should be shorted"); 1875 1876 /* reset any previous data */ 1877 CHKERRQ(PCBDDCSubSchursReset(sub_schurs)); 1878 1879 /* get index sets for faces and edges (already sorted by global ordering) */ 1880 CHKERRQ(PCBDDCGraphGetCandidatesIS(graph,&n_faces,&faces,&n_edges,&edges,&vertices)); 1881 n_all_cc = n_faces+n_edges; 1882 CHKERRQ(PetscBTCreate(n_all_cc,&sub_schurs->is_edge)); 1883 CHKERRQ(PetscMalloc1(n_all_cc,&all_cc)); 1884 for (i=0;i<n_faces;i++) { 1885 if (copycc) { 1886 CHKERRQ(ISDuplicate(faces[i],&all_cc[i])); 1887 } else { 1888 CHKERRQ(PetscObjectReference((PetscObject)faces[i])); 1889 all_cc[i] = faces[i]; 1890 } 1891 } 1892 for (i=0;i<n_edges;i++) { 1893 if (copycc) { 1894 CHKERRQ(ISDuplicate(edges[i],&all_cc[n_faces+i])); 1895 } else { 1896 CHKERRQ(PetscObjectReference((PetscObject)edges[i])); 1897 all_cc[n_faces+i] = edges[i]; 1898 } 1899 CHKERRQ(PetscBTSet(sub_schurs->is_edge,n_faces+i)); 1900 } 1901 CHKERRQ(PetscObjectReference((PetscObject)vertices)); 1902 sub_schurs->is_vertices = vertices; 1903 CHKERRQ(PCBDDCGraphRestoreCandidatesIS(graph,&n_faces,&faces,&n_edges,&edges,&vertices)); 1904 sub_schurs->is_dir = NULL; 1905 CHKERRQ(PCBDDCGraphGetDirichletDofsB(graph,&sub_schurs->is_dir)); 1906 1907 /* Determine if MatFactor can be used */ 1908 CHKERRQ(PetscStrallocpy(prefix,&sub_schurs->prefix)); 1909 #if defined(PETSC_HAVE_MUMPS) 1910 CHKERRQ(PetscStrncpy(sub_schurs->mat_solver_type,MATSOLVERMUMPS,sizeof(sub_schurs->mat_solver_type))); 1911 #elif defined(PETSC_HAVE_MKL_PARDISO) 1912 CHKERRQ(PetscStrncpy(sub_schurs->mat_solver_type,MATSOLVERMKL_PARDISO,sizeof(sub_schurs->mat_solver_type))); 1913 #else 1914 CHKERRQ(PetscStrncpy(sub_schurs->mat_solver_type,MATSOLVERPETSC,sizeof(sub_schurs->mat_solver_type))); 1915 #endif 1916 #if defined(PETSC_USE_COMPLEX) 1917 sub_schurs->is_hermitian = PETSC_FALSE; /* Hermitian Cholesky is not supported by PETSc and external packages */ 1918 #else 1919 sub_schurs->is_hermitian = PETSC_TRUE; 1920 #endif 1921 sub_schurs->is_posdef = PETSC_TRUE; 1922 sub_schurs->is_symmetric = PETSC_TRUE; 1923 sub_schurs->debug = PETSC_FALSE; 1924 sub_schurs->restrict_comm = PETSC_FALSE; 1925 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)graph->l2gmap),sub_schurs->prefix,"BDDC sub_schurs options","PC");CHKERRQ(ierr); 1926 CHKERRQ(PetscOptionsString("-sub_schurs_mat_solver_type","Specific direct solver to use",NULL,sub_schurs->mat_solver_type,sub_schurs->mat_solver_type,sizeof(sub_schurs->mat_solver_type),NULL)); 1927 CHKERRQ(PetscOptionsBool("-sub_schurs_symmetric","Symmetric problem",NULL,sub_schurs->is_symmetric,&sub_schurs->is_symmetric,NULL)); 1928 CHKERRQ(PetscOptionsBool("-sub_schurs_hermitian","Hermitian problem",NULL,sub_schurs->is_hermitian,&sub_schurs->is_hermitian,NULL)); 1929 CHKERRQ(PetscOptionsBool("-sub_schurs_posdef","Positive definite problem",NULL,sub_schurs->is_posdef,&sub_schurs->is_posdef,NULL)); 1930 CHKERRQ(PetscOptionsBool("-sub_schurs_restrictcomm","Restrict communicator on active processes only",NULL,sub_schurs->restrict_comm,&sub_schurs->restrict_comm,NULL)); 1931 CHKERRQ(PetscOptionsBool("-sub_schurs_debug","Debug output",NULL,sub_schurs->debug,&sub_schurs->debug,NULL)); 1932 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1933 CHKERRQ(PetscStrcmp(sub_schurs->mat_solver_type,MATSOLVERMUMPS,&ismumps)); 1934 CHKERRQ(PetscStrcmp(sub_schurs->mat_solver_type,MATSOLVERMKL_PARDISO,&ispardiso)); 1935 sub_schurs->schur_explicit = (PetscBool)(ispardiso || ismumps); 1936 1937 /* for reals, symmetric and hermitian are synonims */ 1938 #if !defined(PETSC_USE_COMPLEX) 1939 sub_schurs->is_symmetric = (PetscBool)(sub_schurs->is_symmetric && sub_schurs->is_hermitian); 1940 sub_schurs->is_hermitian = sub_schurs->is_symmetric; 1941 #endif 1942 1943 CHKERRQ(PetscObjectReference((PetscObject)is_I)); 1944 sub_schurs->is_I = is_I; 1945 CHKERRQ(PetscObjectReference((PetscObject)is_B)); 1946 sub_schurs->is_B = is_B; 1947 CHKERRQ(PetscObjectReference((PetscObject)graph->l2gmap)); 1948 sub_schurs->l2gmap = graph->l2gmap; 1949 CHKERRQ(PetscObjectReference((PetscObject)BtoNmap)); 1950 sub_schurs->BtoNmap = BtoNmap; 1951 sub_schurs->n_subs = n_all_cc; 1952 sub_schurs->is_subs = all_cc; 1953 sub_schurs->S_Ej_all = NULL; 1954 sub_schurs->sum_S_Ej_all = NULL; 1955 sub_schurs->sum_S_Ej_inv_all = NULL; 1956 sub_schurs->sum_S_Ej_tilda_all = NULL; 1957 sub_schurs->is_Ej_all = NULL; 1958 PetscFunctionReturn(0); 1959 } 1960 1961 PetscErrorCode PCBDDCSubSchursCreate(PCBDDCSubSchurs *sub_schurs) 1962 { 1963 PCBDDCSubSchurs schurs_ctx; 1964 1965 PetscFunctionBegin; 1966 CHKERRQ(PetscNew(&schurs_ctx)); 1967 schurs_ctx->n_subs = 0; 1968 *sub_schurs = schurs_ctx; 1969 PetscFunctionReturn(0); 1970 } 1971 1972 PetscErrorCode PCBDDCSubSchursReset(PCBDDCSubSchurs sub_schurs) 1973 { 1974 PetscInt i; 1975 1976 PetscFunctionBegin; 1977 if (!sub_schurs) PetscFunctionReturn(0); 1978 CHKERRQ(PetscFree(sub_schurs->prefix)); 1979 CHKERRQ(MatDestroy(&sub_schurs->A)); 1980 CHKERRQ(MatDestroy(&sub_schurs->S)); 1981 CHKERRQ(ISDestroy(&sub_schurs->is_I)); 1982 CHKERRQ(ISDestroy(&sub_schurs->is_B)); 1983 CHKERRQ(ISLocalToGlobalMappingDestroy(&sub_schurs->l2gmap)); 1984 CHKERRQ(ISLocalToGlobalMappingDestroy(&sub_schurs->BtoNmap)); 1985 CHKERRQ(MatDestroy(&sub_schurs->S_Ej_all)); 1986 CHKERRQ(MatDestroy(&sub_schurs->sum_S_Ej_all)); 1987 CHKERRQ(MatDestroy(&sub_schurs->sum_S_Ej_inv_all)); 1988 CHKERRQ(MatDestroy(&sub_schurs->sum_S_Ej_tilda_all)); 1989 CHKERRQ(ISDestroy(&sub_schurs->is_Ej_all)); 1990 CHKERRQ(ISDestroy(&sub_schurs->is_vertices)); 1991 CHKERRQ(ISDestroy(&sub_schurs->is_dir)); 1992 CHKERRQ(PetscBTDestroy(&sub_schurs->is_edge)); 1993 for (i=0;i<sub_schurs->n_subs;i++) { 1994 CHKERRQ(ISDestroy(&sub_schurs->is_subs[i])); 1995 } 1996 if (sub_schurs->n_subs) { 1997 CHKERRQ(PetscFree(sub_schurs->is_subs)); 1998 } 1999 if (sub_schurs->reuse_solver) { 2000 CHKERRQ(PCBDDCReuseSolversReset(sub_schurs->reuse_solver)); 2001 } 2002 CHKERRQ(PetscFree(sub_schurs->reuse_solver)); 2003 if (sub_schurs->change) { 2004 for (i=0;i<sub_schurs->n_subs;i++) { 2005 CHKERRQ(KSPDestroy(&sub_schurs->change[i])); 2006 CHKERRQ(ISDestroy(&sub_schurs->change_primal_sub[i])); 2007 } 2008 } 2009 CHKERRQ(PetscFree(sub_schurs->change)); 2010 CHKERRQ(PetscFree(sub_schurs->change_primal_sub)); 2011 sub_schurs->n_subs = 0; 2012 PetscFunctionReturn(0); 2013 } 2014 2015 PetscErrorCode PCBDDCSubSchursDestroy(PCBDDCSubSchurs* sub_schurs) 2016 { 2017 PetscFunctionBegin; 2018 CHKERRQ(PCBDDCSubSchursReset(*sub_schurs)); 2019 CHKERRQ(PetscFree(*sub_schurs)); 2020 PetscFunctionReturn(0); 2021 } 2022 2023 static inline PetscErrorCode PCBDDCAdjGetNextLayer_Private(PetscInt* queue_tip,PetscInt n_prev,PetscBT touched,PetscInt* xadj,PetscInt* adjncy,PetscInt* n_added) 2024 { 2025 PetscInt i,j,n; 2026 2027 PetscFunctionBegin; 2028 n = 0; 2029 for (i=-n_prev;i<0;i++) { 2030 PetscInt start_dof = queue_tip[i]; 2031 for (j=xadj[start_dof];j<xadj[start_dof+1];j++) { 2032 PetscInt dof = adjncy[j]; 2033 if (!PetscBTLookup(touched,dof)) { 2034 CHKERRQ(PetscBTSet(touched,dof)); 2035 queue_tip[n] = dof; 2036 n++; 2037 } 2038 } 2039 } 2040 *n_added = n; 2041 PetscFunctionReturn(0); 2042 } 2043