1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 #include <petsc/private/sfimpl.h> 6 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCComputeNoNetFlux" 10 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 11 { 12 Mat loc_divudotp; 13 Vec p,v,vins,quad_vec,*quad_vecs; 14 ISLocalToGlobalMapping rmap; 15 IS *faces,*edges; 16 PetscScalar *vals; 17 const PetscScalar *array; 18 PetscInt i,maxneighs,lmaxneighs,maxsize,nf,ne; 19 PetscMPIInt rank; 20 PetscErrorCode ierr; 21 22 PetscFunctionBegin; 23 ierr = PCBDDCGraphGetCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 24 if (graph->twodim) { 25 lmaxneighs = 2; 26 } else { 27 lmaxneighs = 1; 28 for (i=0;i<ne;i++) { 29 const PetscInt *idxs; 30 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 31 lmaxneighs = PetscMax(lmaxneighs,graph->count[idxs[0]]); 32 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 33 } 34 lmaxneighs++; /* graph count does not include self */ 35 } 36 ierr = MPIU_Allreduce(&lmaxneighs,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 37 maxsize = 0; 38 for (i=0;i<ne;i++) { 39 PetscInt nn; 40 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 41 maxsize = PetscMax(maxsize,nn); 42 } 43 for (i=0;i<nf;i++) { 44 PetscInt nn; 45 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 46 maxsize = PetscMax(maxsize,nn); 47 } 48 ierr = PetscMalloc1(maxsize,&vals);CHKERRQ(ierr); 49 /* create vectors to hold quadrature weights */ 50 ierr = MatCreateVecs(A,&quad_vec,NULL);CHKERRQ(ierr); 51 ierr = MatGetLocalToGlobalMapping(A,&rmap,NULL);CHKERRQ(ierr); 52 ierr = VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);CHKERRQ(ierr); 53 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 54 for (i=0;i<maxneighs;i++) { 55 PetscInt first,last; 56 57 ierr = VecSetLocalToGlobalMapping(quad_vecs[i],rmap);CHKERRQ(ierr); 58 /* the near-null space fo BDDC carries information on quadrature weights, 59 and these can be collinear -> so cheat with MatNullSpaceCreate 60 and create a suitable set of basis vectors first */ 61 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 62 if (i>=first && i < last) { 63 PetscScalar *data; 64 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 65 data[i-first] = 1.; 66 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 67 } 68 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 69 } 70 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);CHKERRQ(ierr); 71 for (i=0;i<maxneighs;i++) { /* reset vectors */ 72 PetscInt first,last; 73 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 74 if (i>=first && i < last) { 75 PetscScalar *data; 76 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 77 data[i-first] = 0.; 78 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 79 } 80 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 81 } 82 /* compute local quad vec */ 83 ierr = MatISGetLocalMat(divudotp,&loc_divudotp);CHKERRQ(ierr); 84 ierr = MatCreateVecs(loc_divudotp,&v,&p);CHKERRQ(ierr); 85 ierr = VecSet(p,1.);CHKERRQ(ierr); 86 ierr = MatMultTranspose(loc_divudotp,p,v);CHKERRQ(ierr); 87 if (vl2l) { 88 ierr = VecGetSubVector(v,vl2l,&vins);CHKERRQ(ierr); 89 } else { 90 vins = v; 91 } 92 ierr = VecGetArrayRead(vins,&array);CHKERRQ(ierr); 93 ierr = VecDestroy(&p);CHKERRQ(ierr); 94 /* insert in global quadrature vecs */ 95 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 96 for (i=0;i<nf;i++) { 97 const PetscInt *idxs; 98 PetscInt idx,nn,j; 99 100 ierr = ISGetIndices(faces[i],&idxs);CHKERRQ(ierr); 101 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 102 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 103 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 104 idx = -(idx+1); 105 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 106 ierr = ISRestoreIndices(faces[i],&idxs);CHKERRQ(ierr); 107 ierr = ISDestroy(&faces[i]);CHKERRQ(ierr); 108 } 109 for (i=0;i<ne;i++) { 110 const PetscInt *idxs; 111 PetscInt idx,nn,j; 112 113 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 114 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 115 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 116 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 117 idx = -(idx+1); 118 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 119 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 120 ierr = ISDestroy(&edges[i]);CHKERRQ(ierr); 121 } 122 ierr = PetscFree(edges);CHKERRQ(ierr); 123 ierr = PetscFree(faces);CHKERRQ(ierr); 124 ierr = VecRestoreArrayRead(vins,&array);CHKERRQ(ierr); 125 if (vl2l) { 126 ierr = VecRestoreSubVector(v,vl2l,&vins);CHKERRQ(ierr); 127 } 128 ierr = VecDestroy(&v);CHKERRQ(ierr); 129 ierr = PetscFree(vals);CHKERRQ(ierr); 130 131 /* assemble near null space */ 132 for (i=0;i<maxneighs;i++) { 133 ierr = VecAssemblyBegin(quad_vecs[i]);CHKERRQ(ierr); 134 } 135 for (i=0;i<maxneighs;i++) { 136 ierr = VecAssemblyEnd(quad_vecs[i]);CHKERRQ(ierr); 137 } 138 ierr = VecDestroyVecs(maxneighs,&quad_vecs);CHKERRQ(ierr); 139 PetscFunctionReturn(0); 140 } 141 142 143 #undef __FUNCT__ 144 #define __FUNCT__ "PCBDDCComputeLocalTopologyInfo" 145 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 146 { 147 PetscErrorCode ierr; 148 Vec local,global; 149 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 150 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 151 152 PetscFunctionBegin; 153 ierr = MatCreateVecs(pc->pmat,&global,NULL);CHKERRQ(ierr); 154 /* need to convert from global to local topology information and remove references to information in global ordering */ 155 ierr = MatCreateVecs(matis->A,&local,NULL);CHKERRQ(ierr); 156 if (pcbddc->user_provided_isfordofs) { 157 if (pcbddc->n_ISForDofs) { 158 PetscInt i; 159 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 160 for (i=0;i<pcbddc->n_ISForDofs;i++) { 161 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 162 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 163 } 164 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 165 pcbddc->n_ISForDofs = 0; 166 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 167 } 168 } else { 169 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering if bs > 1 */ 170 PetscInt i, n = matis->A->rmap->n; 171 ierr = MatGetBlockSize(pc->pmat,&i);CHKERRQ(ierr); 172 if (i > 1) { 173 pcbddc->n_ISForDofsLocal = i; 174 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 175 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 176 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 177 } 178 } 179 } 180 } 181 182 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 183 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 184 } 185 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 186 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 187 } 188 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 189 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 190 } 191 ierr = VecDestroy(&global);CHKERRQ(ierr); 192 ierr = VecDestroy(&local);CHKERRQ(ierr); 193 PetscFunctionReturn(0); 194 } 195 196 #undef __FUNCT__ 197 #define __FUNCT__ "PCBDDCBenignRemoveInterior" 198 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z) 199 { 200 PC_IS *pcis = (PC_IS*)(pc->data); 201 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 202 PetscErrorCode ierr; 203 204 PetscFunctionBegin; 205 if (!pcbddc->benign_have_null) { 206 PetscFunctionReturn(0); 207 } 208 if (pcbddc->ChangeOfBasisMatrix) { 209 Vec swap; 210 211 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);CHKERRQ(ierr); 212 swap = pcbddc->work_change; 213 pcbddc->work_change = r; 214 r = swap; 215 } 216 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 217 ierr = VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 218 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 219 ierr = VecSet(z,0.);CHKERRQ(ierr); 220 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 221 ierr = VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 222 if (pcbddc->ChangeOfBasisMatrix) { 223 Vec swap; 224 225 swap = r; 226 r = pcbddc->work_change; 227 pcbddc->work_change = swap; 228 ierr = VecCopy(z,pcbddc->work_change);CHKERRQ(ierr); 229 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);CHKERRQ(ierr); 230 } 231 PetscFunctionReturn(0); 232 } 233 234 #undef __FUNCT__ 235 #define __FUNCT__ "PCBDDCBenignMatMult_Private_Private" 236 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 237 { 238 PCBDDCBenignMatMult_ctx ctx; 239 PetscErrorCode ierr; 240 PetscBool apply_right,apply_left,reset_x; 241 242 PetscFunctionBegin; 243 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 244 if (transpose) { 245 apply_right = ctx->apply_left; 246 apply_left = ctx->apply_right; 247 } else { 248 apply_right = ctx->apply_right; 249 apply_left = ctx->apply_left; 250 } 251 reset_x = PETSC_FALSE; 252 if (apply_right) { 253 const PetscScalar *ax; 254 PetscInt nl,i; 255 256 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 257 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 258 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 259 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 260 for (i=0;i<ctx->benign_n;i++) { 261 PetscScalar sum,val; 262 const PetscInt *idxs; 263 PetscInt nz,j; 264 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 265 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 266 sum = 0.; 267 if (ctx->apply_p0) { 268 val = ctx->work[idxs[nz-1]]; 269 for (j=0;j<nz-1;j++) { 270 sum += ctx->work[idxs[j]]; 271 ctx->work[idxs[j]] += val; 272 } 273 } else { 274 for (j=0;j<nz-1;j++) { 275 sum += ctx->work[idxs[j]]; 276 } 277 } 278 ctx->work[idxs[nz-1]] -= sum; 279 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 280 } 281 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 282 reset_x = PETSC_TRUE; 283 } 284 if (transpose) { 285 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 286 } else { 287 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 288 } 289 if (reset_x) { 290 ierr = VecResetArray(x);CHKERRQ(ierr); 291 } 292 if (apply_left) { 293 PetscScalar *ay; 294 PetscInt i; 295 296 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 297 for (i=0;i<ctx->benign_n;i++) { 298 PetscScalar sum,val; 299 const PetscInt *idxs; 300 PetscInt nz,j; 301 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 302 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 303 val = -ay[idxs[nz-1]]; 304 if (ctx->apply_p0) { 305 sum = 0.; 306 for (j=0;j<nz-1;j++) { 307 sum += ay[idxs[j]]; 308 ay[idxs[j]] += val; 309 } 310 ay[idxs[nz-1]] += sum; 311 } else { 312 for (j=0;j<nz-1;j++) { 313 ay[idxs[j]] += val; 314 } 315 ay[idxs[nz-1]] = 0.; 316 } 317 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 318 } 319 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 320 } 321 PetscFunctionReturn(0); 322 } 323 324 #undef __FUNCT__ 325 #define __FUNCT__ "PCBDDCBenignMatMultTranspose_Private" 326 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 327 { 328 PetscErrorCode ierr; 329 330 PetscFunctionBegin; 331 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 332 PetscFunctionReturn(0); 333 } 334 335 #undef __FUNCT__ 336 #define __FUNCT__ "PCBDDCBenignMatMult_Private" 337 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 338 { 339 PetscErrorCode ierr; 340 341 PetscFunctionBegin; 342 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 343 PetscFunctionReturn(0); 344 } 345 346 #undef __FUNCT__ 347 #define __FUNCT__ "PCBDDCBenignShellMat" 348 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 349 { 350 PC_IS *pcis = (PC_IS*)pc->data; 351 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 352 PCBDDCBenignMatMult_ctx ctx; 353 PetscErrorCode ierr; 354 355 PetscFunctionBegin; 356 if (!restore) { 357 Mat A_IB,A_BI; 358 PetscScalar *work; 359 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 360 361 if (pcbddc->benign_original_mat) { 362 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 363 } 364 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) { 365 PetscFunctionReturn(0); 366 } 367 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 368 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 369 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 370 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 371 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 372 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 373 ierr = PetscNew(&ctx);CHKERRQ(ierr); 374 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 375 ctx->apply_left = PETSC_TRUE; 376 ctx->apply_right = PETSC_FALSE; 377 ctx->apply_p0 = PETSC_FALSE; 378 ctx->benign_n = pcbddc->benign_n; 379 if (reuse) { 380 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 381 ctx->free = PETSC_FALSE; 382 } else { /* TODO: could be optimized for successive solves */ 383 ISLocalToGlobalMapping N_to_D; 384 PetscInt i; 385 386 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 387 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 388 for (i=0;i<pcbddc->benign_n;i++) { 389 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 390 } 391 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 392 ctx->free = PETSC_TRUE; 393 } 394 ctx->A = pcis->A_IB; 395 ctx->work = work; 396 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 397 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 398 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 399 pcis->A_IB = A_IB; 400 401 /* A_BI as A_IB^T */ 402 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 403 pcbddc->benign_original_mat = pcis->A_BI; 404 pcis->A_BI = A_BI; 405 } else { 406 if (!pcbddc->benign_original_mat) { 407 PetscFunctionReturn(0); 408 } 409 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 410 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 411 pcis->A_IB = ctx->A; 412 ctx->A = NULL; 413 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 414 pcis->A_BI = pcbddc->benign_original_mat; 415 pcbddc->benign_original_mat = NULL; 416 if (ctx->free) { 417 PetscInt i; 418 for (i=0;i<ctx->benign_n;i++) { 419 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 420 } 421 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 422 } 423 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 424 ierr = PetscFree(ctx);CHKERRQ(ierr); 425 } 426 PetscFunctionReturn(0); 427 } 428 429 /* used just in bddc debug mode */ 430 #undef __FUNCT__ 431 #define __FUNCT__ "PCBDDCBenignProject" 432 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 433 { 434 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 435 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 436 Mat An; 437 PetscErrorCode ierr; 438 439 PetscFunctionBegin; 440 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 441 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 442 if (is1) { 443 ierr = MatGetSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 444 ierr = MatDestroy(&An);CHKERRQ(ierr); 445 } else { 446 *B = An; 447 } 448 PetscFunctionReturn(0); 449 } 450 451 /* TODO: add reuse flag */ 452 #undef __FUNCT__ 453 #define __FUNCT__ "MatSeqAIJCompress" 454 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 455 { 456 Mat Bt; 457 PetscScalar *a,*bdata; 458 const PetscInt *ii,*ij; 459 PetscInt m,n,i,nnz,*bii,*bij; 460 PetscBool flg_row; 461 PetscErrorCode ierr; 462 463 PetscFunctionBegin; 464 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 465 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 466 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 467 nnz = n; 468 for (i=0;i<ii[n];i++) { 469 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 470 } 471 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 472 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 473 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 474 nnz = 0; 475 bii[0] = 0; 476 for (i=0;i<n;i++) { 477 PetscInt j; 478 for (j=ii[i];j<ii[i+1];j++) { 479 PetscScalar entry = a[j]; 480 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 481 bij[nnz] = ij[j]; 482 bdata[nnz] = entry; 483 nnz++; 484 } 485 } 486 bii[i+1] = nnz; 487 } 488 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 489 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 490 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 491 { 492 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 493 b->free_a = PETSC_TRUE; 494 b->free_ij = PETSC_TRUE; 495 } 496 *B = Bt; 497 PetscFunctionReturn(0); 498 } 499 500 #undef __FUNCT__ 501 #define __FUNCT__ "MatDetectDisconnectedComponents" 502 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 503 { 504 Mat B; 505 IS is_dummy,*cc_n; 506 ISLocalToGlobalMapping l2gmap_dummy; 507 PCBDDCGraph graph; 508 PetscInt i,n; 509 PetscInt *xadj,*adjncy; 510 PetscInt *xadj_filtered,*adjncy_filtered; 511 PetscBool flg_row,isseqaij; 512 PetscErrorCode ierr; 513 514 PetscFunctionBegin; 515 if (!A->rmap->N || !A->cmap->N) { 516 *ncc = 0; 517 *cc = NULL; 518 PetscFunctionReturn(0); 519 } 520 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 521 if (!isseqaij && filter) { 522 PetscBool isseqdense; 523 524 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 525 if (!isseqdense) { 526 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 527 } else { /* TODO: rectangular case and LDA */ 528 PetscScalar *array; 529 PetscReal chop=1.e-6; 530 531 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 532 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 533 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 534 for (i=0;i<n;i++) { 535 PetscInt j; 536 for (j=i+1;j<n;j++) { 537 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 538 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 539 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 540 } 541 } 542 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 543 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 544 } 545 } else { 546 B = A; 547 } 548 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 549 550 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 551 if (filter) { 552 PetscScalar *data; 553 PetscInt j,cum; 554 555 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 556 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 557 cum = 0; 558 for (i=0;i<n;i++) { 559 PetscInt t; 560 561 for (j=xadj[i];j<xadj[i+1];j++) { 562 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 563 continue; 564 } 565 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 566 } 567 t = xadj_filtered[i]; 568 xadj_filtered[i] = cum; 569 cum += t; 570 } 571 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 572 } else { 573 xadj_filtered = NULL; 574 adjncy_filtered = NULL; 575 } 576 577 /* compute local connected components using PCBDDCGraph */ 578 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 579 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 580 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 581 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 582 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n);CHKERRQ(ierr); 583 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 584 if (xadj_filtered) { 585 graph->xadj = xadj_filtered; 586 graph->adjncy = adjncy_filtered; 587 } else { 588 graph->xadj = xadj; 589 graph->adjncy = adjncy; 590 } 591 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 592 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 593 /* partial clean up */ 594 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 595 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 596 if (A != B) { 597 ierr = MatDestroy(&B);CHKERRQ(ierr); 598 } 599 600 /* get back data */ 601 if (ncc) *ncc = graph->ncc; 602 if (cc) { 603 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 604 for (i=0;i<graph->ncc;i++) { 605 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 606 } 607 *cc = cc_n; 608 } 609 /* clean up graph */ 610 graph->xadj = 0; 611 graph->adjncy = 0; 612 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 613 PetscFunctionReturn(0); 614 } 615 616 #undef __FUNCT__ 617 #define __FUNCT__ "PCBDDCBenignCheck" 618 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 619 { 620 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 621 PC_IS* pcis = (PC_IS*)(pc->data); 622 IS dirIS = NULL; 623 PetscInt i; 624 PetscErrorCode ierr; 625 626 PetscFunctionBegin; 627 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 628 if (zerodiag) { 629 Mat A; 630 Vec vec3_N; 631 PetscScalar *vals; 632 const PetscInt *idxs; 633 PetscInt nz,*count; 634 635 /* p0 */ 636 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 637 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 638 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 639 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 640 for (i=0;i<nz;i++) vals[i] = 1.; 641 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 642 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 643 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 644 /* v_I */ 645 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 646 for (i=0;i<nz;i++) vals[i] = 0.; 647 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 648 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 649 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 650 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 651 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 652 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 653 if (dirIS) { 654 PetscInt n; 655 656 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 657 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 658 for (i=0;i<n;i++) vals[i] = 0.; 659 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 660 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 661 } 662 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 663 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 664 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 665 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 666 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 667 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 668 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 669 if (PetscAbsScalar(vals[0]) > 1.e-1) { 670 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 671 } 672 ierr = PetscFree(vals);CHKERRQ(ierr); 673 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 674 675 /* there should not be any pressure dofs lying on the interface */ 676 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 677 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 678 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 679 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 680 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 681 for (i=0;i<nz;i++) { 682 if (count[idxs[i]]) { 683 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 684 } 685 } 686 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 687 ierr = PetscFree(count);CHKERRQ(ierr); 688 } 689 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 690 691 /* check PCBDDCBenignGetOrSetP0 */ 692 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 693 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 694 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 695 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 696 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 697 for (i=0;i<pcbddc->benign_n;i++) { 698 if ((PetscInt)PetscRealPart(pcbddc->benign_p0[i]) != -PetscGlobalRank-i) { 699 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %1.4e at %d instead of %1.4e\n",pcbddc->benign_p0[i],i,-PetscGlobalRank-i);CHKERRQ(ierr); 700 } 701 } 702 PetscFunctionReturn(0); 703 } 704 705 #undef __FUNCT__ 706 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 707 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 708 { 709 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 710 IS pressures,zerodiag,*zerodiag_subs; 711 PetscInt nz,n; 712 PetscInt *interior_dofs,n_interior_dofs; 713 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag; 714 PetscErrorCode ierr; 715 716 PetscFunctionBegin; 717 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 718 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 719 for (n=0;n<pcbddc->benign_n;n++) { 720 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 721 } 722 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 723 pcbddc->benign_n = 0; 724 /* if a local info on dofs is present, assumes that the last field represents "pressures" 725 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 726 Checks if all the pressure dofs in each subdomain have a zero diagonal 727 If not, a change of basis on pressures is not needed 728 since the local Schur complements are already SPD 729 */ 730 has_null_pressures = PETSC_TRUE; 731 have_null = PETSC_TRUE; 732 if (pcbddc->n_ISForDofsLocal) { 733 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 734 735 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 736 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 737 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 738 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 739 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 740 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 741 if (!sorted) { 742 ierr = ISSort(pressures);CHKERRQ(ierr); 743 } 744 } else { 745 pressures = NULL; 746 } 747 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 748 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 749 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 750 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 751 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 752 if (!sorted) { 753 ierr = ISSort(zerodiag);CHKERRQ(ierr); 754 } 755 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 756 if (!nz) { 757 if (n) have_null = PETSC_FALSE; 758 has_null_pressures = PETSC_FALSE; 759 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 760 } 761 recompute_zerodiag = PETSC_FALSE; 762 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 763 zerodiag_subs = NULL; 764 pcbddc->benign_n = 0; 765 n_interior_dofs = 0; 766 interior_dofs = NULL; 767 if (pcbddc->current_level) { /* need to compute interior nodes */ 768 PetscInt n,i,j; 769 PetscInt n_neigh,*neigh,*n_shared,**shared; 770 PetscInt *iwork; 771 772 ierr = ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);CHKERRQ(ierr); 773 ierr = ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 774 ierr = PetscCalloc1(n,&iwork);CHKERRQ(ierr); 775 ierr = PetscMalloc1(n,&interior_dofs);CHKERRQ(ierr); 776 for (i=0;i<n_neigh;i++) 777 for (j=0;j<n_shared[i];j++) 778 iwork[shared[i][j]] += 1; 779 for (i=0;i<n;i++) 780 if (!iwork[i]) 781 interior_dofs[n_interior_dofs++] = i; 782 ierr = PetscFree(iwork);CHKERRQ(ierr); 783 ierr = ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 784 } 785 if (has_null_pressures) { 786 IS *subs; 787 PetscInt nsubs,i,j,nl; 788 const PetscInt *idxs; 789 PetscScalar *array; 790 Vec *work; 791 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 792 793 subs = pcbddc->local_subs; 794 nsubs = pcbddc->n_local_subs; 795 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 796 if (pcbddc->current_level) { 797 ierr = VecDuplicateVecs(matis->y,2,&work);CHKERRQ(ierr); 798 ierr = ISGetLocalSize(zerodiag,&nl);CHKERRQ(ierr); 799 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 800 /* work[0] = 1_p */ 801 ierr = VecSet(work[0],0.);CHKERRQ(ierr); 802 ierr = VecGetArray(work[0],&array);CHKERRQ(ierr); 803 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 804 ierr = VecRestoreArray(work[0],&array);CHKERRQ(ierr); 805 /* work[0] = 1_v */ 806 ierr = VecSet(work[1],1.);CHKERRQ(ierr); 807 ierr = VecGetArray(work[1],&array);CHKERRQ(ierr); 808 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 809 ierr = VecRestoreArray(work[1],&array);CHKERRQ(ierr); 810 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 811 } 812 if (nsubs > 1) { 813 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 814 for (i=0;i<nsubs;i++) { 815 ISLocalToGlobalMapping l2g; 816 IS t_zerodiag_subs; 817 PetscInt nl; 818 819 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 820 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 821 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 822 if (nl) { 823 PetscBool valid = PETSC_TRUE; 824 825 if (pcbddc->current_level) { 826 ierr = VecSet(matis->x,0);CHKERRQ(ierr); 827 ierr = ISGetLocalSize(subs[i],&nl);CHKERRQ(ierr); 828 ierr = ISGetIndices(subs[i],&idxs);CHKERRQ(ierr); 829 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 830 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 831 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 832 ierr = ISRestoreIndices(subs[i],&idxs);CHKERRQ(ierr); 833 ierr = VecPointwiseMult(matis->x,work[0],matis->x);CHKERRQ(ierr); 834 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 835 ierr = VecPointwiseMult(matis->y,work[1],matis->y);CHKERRQ(ierr); 836 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 837 for (j=0;j<n_interior_dofs;j++) { 838 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 839 valid = PETSC_FALSE; 840 break; 841 } 842 } 843 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 844 } 845 if (valid && pcbddc->NeumannBoundariesLocal) { 846 IS t_bc; 847 PetscInt nzb; 848 849 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pcbddc->NeumannBoundariesLocal,&t_bc);CHKERRQ(ierr); 850 ierr = ISGetLocalSize(t_bc,&nzb);CHKERRQ(ierr); 851 ierr = ISDestroy(&t_bc);CHKERRQ(ierr); 852 if (nzb) valid = PETSC_FALSE; 853 } 854 if (valid && pressures) { 855 IS t_pressure_subs; 856 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 857 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 858 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 859 } 860 if (valid) { 861 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 862 pcbddc->benign_n++; 863 } else { 864 recompute_zerodiag = PETSC_TRUE; 865 } 866 } 867 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 868 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 869 } 870 } else { /* there's just one subdomain (or zero if they have not been detected */ 871 PetscBool valid = PETSC_TRUE; 872 873 if (pcbddc->NeumannBoundariesLocal) { 874 PetscInt nzb; 875 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nzb);CHKERRQ(ierr); 876 if (nzb) valid = PETSC_FALSE; 877 } 878 if (valid && pressures) { 879 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 880 } 881 if (valid && pcbddc->current_level) { 882 ierr = MatMult(matis->A,work[0],matis->x);CHKERRQ(ierr); 883 ierr = VecPointwiseMult(matis->x,work[1],matis->x);CHKERRQ(ierr); 884 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 885 for (j=0;j<n_interior_dofs;j++) { 886 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 887 valid = PETSC_FALSE; 888 break; 889 } 890 } 891 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 892 } 893 if (valid) { 894 pcbddc->benign_n = 1; 895 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 896 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 897 zerodiag_subs[0] = zerodiag; 898 } 899 } 900 if (pcbddc->current_level) { 901 ierr = VecDestroyVecs(2,&work);CHKERRQ(ierr); 902 } 903 } 904 ierr = PetscFree(interior_dofs);CHKERRQ(ierr); 905 906 if (!pcbddc->benign_n) { 907 PetscInt n; 908 909 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 910 recompute_zerodiag = PETSC_FALSE; 911 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 912 if (n) { 913 has_null_pressures = PETSC_FALSE; 914 have_null = PETSC_FALSE; 915 } 916 } 917 918 /* final check for null pressures */ 919 if (zerodiag && pressures) { 920 PetscInt nz,np; 921 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 922 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 923 if (nz != np) have_null = PETSC_FALSE; 924 } 925 926 if (recompute_zerodiag) { 927 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 928 if (pcbddc->benign_n == 1) { 929 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 930 zerodiag = zerodiag_subs[0]; 931 } else { 932 PetscInt i,nzn,*new_idxs; 933 934 nzn = 0; 935 for (i=0;i<pcbddc->benign_n;i++) { 936 PetscInt ns; 937 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 938 nzn += ns; 939 } 940 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 941 nzn = 0; 942 for (i=0;i<pcbddc->benign_n;i++) { 943 PetscInt ns,*idxs; 944 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 945 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 946 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 947 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 948 nzn += ns; 949 } 950 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 951 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 952 } 953 have_null = PETSC_FALSE; 954 } 955 956 /* Prepare matrix to compute no-net-flux */ 957 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 958 Mat A,loc_divudotp; 959 ISLocalToGlobalMapping rl2g,cl2g,l2gmap; 960 IS row,col,isused = NULL; 961 PetscInt M,N,n,st,n_isused; 962 963 if (pressures) { 964 isused = pressures; 965 } else { 966 isused = zerodiag; 967 } 968 ierr = MatGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);CHKERRQ(ierr); 969 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 970 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 971 if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field"); 972 n_isused = 0; 973 if (isused) { 974 ierr = ISGetLocalSize(isused,&n_isused);CHKERRQ(ierr); 975 } 976 ierr = MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 977 st = st-n_isused; 978 if (n) { 979 const PetscInt *gidxs; 980 981 ierr = MatGetSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);CHKERRQ(ierr); 982 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 983 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 984 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 985 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 986 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 987 } else { 988 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);CHKERRQ(ierr); 989 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 990 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 991 } 992 ierr = MatGetSize(pc->pmat,NULL,&N);CHKERRQ(ierr); 993 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 994 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 995 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 996 ierr = ISDestroy(&row);CHKERRQ(ierr); 997 ierr = ISDestroy(&col);CHKERRQ(ierr); 998 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);CHKERRQ(ierr); 999 ierr = MatSetType(pcbddc->divudotp,MATIS);CHKERRQ(ierr); 1000 ierr = MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1001 ierr = MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);CHKERRQ(ierr); 1002 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 1003 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 1004 ierr = MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);CHKERRQ(ierr); 1005 ierr = MatDestroy(&loc_divudotp);CHKERRQ(ierr); 1006 ierr = MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1007 ierr = MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1008 } 1009 1010 /* change of basis and p0 dofs */ 1011 if (has_null_pressures) { 1012 IS zerodiagc; 1013 const PetscInt *idxs,*idxsc; 1014 PetscInt i,s,*nnz; 1015 1016 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 1017 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 1018 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 1019 /* local change of basis for pressures */ 1020 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 1021 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 1022 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 1023 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1024 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 1025 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 1026 for (i=0;i<pcbddc->benign_n;i++) { 1027 PetscInt nzs,j; 1028 1029 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 1030 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1031 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 1032 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 1033 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1034 } 1035 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 1036 ierr = PetscFree(nnz);CHKERRQ(ierr); 1037 /* set identity on velocities */ 1038 for (i=0;i<n-nz;i++) { 1039 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 1040 } 1041 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 1042 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 1043 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 1044 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 1045 /* set change on pressures */ 1046 for (s=0;s<pcbddc->benign_n;s++) { 1047 PetscScalar *array; 1048 PetscInt nzs; 1049 1050 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 1051 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 1052 for (i=0;i<nzs-1;i++) { 1053 PetscScalar vals[2]; 1054 PetscInt cols[2]; 1055 1056 cols[0] = idxs[i]; 1057 cols[1] = idxs[nzs-1]; 1058 vals[0] = 1.; 1059 vals[1] = 1.; 1060 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 1061 } 1062 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 1063 for (i=0;i<nzs-1;i++) array[i] = -1.; 1064 array[nzs-1] = 1.; 1065 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 1066 /* store local idxs for p0 */ 1067 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 1068 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 1069 ierr = PetscFree(array);CHKERRQ(ierr); 1070 } 1071 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1072 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1073 /* project if needed */ 1074 if (pcbddc->benign_change_explicit) { 1075 Mat M; 1076 1077 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 1078 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1079 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 1080 ierr = MatDestroy(&M);CHKERRQ(ierr); 1081 } 1082 /* store global idxs for p0 */ 1083 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 1084 } 1085 pcbddc->benign_zerodiag_subs = zerodiag_subs; 1086 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 1087 1088 /* determines if the coarse solver will be singular or not */ 1089 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1090 /* determines if the problem has subdomains with 0 pressure block */ 1091 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1092 *zerodiaglocal = zerodiag; 1093 PetscFunctionReturn(0); 1094 } 1095 1096 #undef __FUNCT__ 1097 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 1098 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 1099 { 1100 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1101 PetscScalar *array; 1102 PetscErrorCode ierr; 1103 1104 PetscFunctionBegin; 1105 if (!pcbddc->benign_sf) { 1106 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 1107 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 1108 } 1109 if (get) { 1110 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 1111 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 1112 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 1113 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 1114 } else { 1115 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 1116 ierr = PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 1117 ierr = PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 1118 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 1119 } 1120 PetscFunctionReturn(0); 1121 } 1122 1123 #undef __FUNCT__ 1124 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 1125 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 1126 { 1127 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1128 PetscErrorCode ierr; 1129 1130 PetscFunctionBegin; 1131 /* TODO: add error checking 1132 - avoid nested pop (or push) calls. 1133 - cannot push before pop. 1134 - cannot call this if pcbddc->local_mat is NULL 1135 */ 1136 if (!pcbddc->benign_n) { 1137 PetscFunctionReturn(0); 1138 } 1139 if (pop) { 1140 if (pcbddc->benign_change_explicit) { 1141 IS is_p0; 1142 MatReuse reuse; 1143 1144 /* extract B_0 */ 1145 reuse = MAT_INITIAL_MATRIX; 1146 if (pcbddc->benign_B0) { 1147 reuse = MAT_REUSE_MATRIX; 1148 } 1149 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 1150 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 1151 /* remove rows and cols from local problem */ 1152 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 1153 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1154 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 1155 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 1156 } else { 1157 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1158 PetscScalar *vals; 1159 PetscInt i,n,*idxs_ins; 1160 1161 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 1162 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 1163 if (!pcbddc->benign_B0) { 1164 PetscInt *nnz; 1165 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 1166 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 1167 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1168 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 1169 for (i=0;i<pcbddc->benign_n;i++) { 1170 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 1171 nnz[i] = n - nnz[i]; 1172 } 1173 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 1174 ierr = PetscFree(nnz);CHKERRQ(ierr); 1175 } 1176 1177 for (i=0;i<pcbddc->benign_n;i++) { 1178 PetscScalar *array; 1179 PetscInt *idxs,j,nz,cum; 1180 1181 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 1182 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1183 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 1184 for (j=0;j<nz;j++) vals[j] = 1.; 1185 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1186 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 1187 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 1188 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 1189 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 1190 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 1191 cum = 0; 1192 for (j=0;j<n;j++) { 1193 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 1194 vals[cum] = array[j]; 1195 idxs_ins[cum] = j; 1196 cum++; 1197 } 1198 } 1199 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 1200 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 1201 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 1202 } 1203 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1204 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1205 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 1206 } 1207 } else { /* push */ 1208 if (pcbddc->benign_change_explicit) { 1209 PetscInt i; 1210 1211 for (i=0;i<pcbddc->benign_n;i++) { 1212 PetscScalar *B0_vals; 1213 PetscInt *B0_cols,B0_ncol; 1214 1215 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 1216 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 1217 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 1218 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 1219 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 1220 } 1221 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1222 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1223 } else { 1224 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 1225 } 1226 } 1227 PetscFunctionReturn(0); 1228 } 1229 1230 #undef __FUNCT__ 1231 #define __FUNCT__ "PCBDDCAdaptiveSelection" 1232 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 1233 { 1234 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1235 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1236 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 1237 PetscBLASInt *B_iwork,*B_ifail; 1238 PetscScalar *work,lwork; 1239 PetscScalar *St,*S,*eigv; 1240 PetscScalar *Sarray,*Starray; 1241 PetscReal *eigs,thresh; 1242 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 1243 PetscBool allocated_S_St; 1244 #if defined(PETSC_USE_COMPLEX) 1245 PetscReal *rwork; 1246 #endif 1247 PetscErrorCode ierr; 1248 1249 PetscFunctionBegin; 1250 if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data"); 1251 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 1252 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 1253 1254 if (pcbddc->dbg_flag) { 1255 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1256 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1257 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 1258 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 1259 } 1260 1261 if (pcbddc->dbg_flag) { 1262 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 1263 } 1264 1265 /* max size of subsets */ 1266 mss = 0; 1267 for (i=0;i<sub_schurs->n_subs;i++) { 1268 PetscInt subset_size; 1269 1270 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1271 mss = PetscMax(mss,subset_size); 1272 } 1273 1274 /* min/max and threshold */ 1275 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 1276 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 1277 nmax = PetscMax(nmin,nmax); 1278 allocated_S_St = PETSC_FALSE; 1279 if (nmin) { 1280 allocated_S_St = PETSC_TRUE; 1281 } 1282 1283 /* allocate lapack workspace */ 1284 cum = cum2 = 0; 1285 maxneigs = 0; 1286 for (i=0;i<sub_schurs->n_subs;i++) { 1287 PetscInt n,subset_size; 1288 1289 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1290 n = PetscMin(subset_size,nmax); 1291 cum += subset_size; 1292 cum2 += subset_size*n; 1293 maxneigs = PetscMax(maxneigs,n); 1294 } 1295 if (mss) { 1296 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 1297 PetscBLASInt B_itype = 1; 1298 PetscBLASInt B_N = mss; 1299 PetscReal zero = 0.0; 1300 PetscReal eps = 0.0; /* dlamch? */ 1301 1302 B_lwork = -1; 1303 S = NULL; 1304 St = NULL; 1305 eigs = NULL; 1306 eigv = NULL; 1307 B_iwork = NULL; 1308 B_ifail = NULL; 1309 #if defined(PETSC_USE_COMPLEX) 1310 rwork = NULL; 1311 #endif 1312 thresh = 1.0; 1313 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1314 #if defined(PETSC_USE_COMPLEX) 1315 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1316 #else 1317 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1318 #endif 1319 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 1320 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1321 } else { 1322 /* TODO */ 1323 } 1324 } else { 1325 lwork = 0; 1326 } 1327 1328 nv = 0; 1329 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 1330 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 1331 } 1332 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 1333 if (allocated_S_St) { 1334 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 1335 } 1336 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 1337 #if defined(PETSC_USE_COMPLEX) 1338 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 1339 #endif 1340 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 1341 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 1342 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 1343 nv+cum,&pcbddc->adaptive_constraints_idxs, 1344 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 1345 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 1346 1347 maxneigs = 0; 1348 cum = cumarray = 0; 1349 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 1350 pcbddc->adaptive_constraints_data_ptr[0] = 0; 1351 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 1352 const PetscInt *idxs; 1353 1354 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 1355 for (cum=0;cum<nv;cum++) { 1356 pcbddc->adaptive_constraints_n[cum] = 1; 1357 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 1358 pcbddc->adaptive_constraints_data[cum] = 1.0; 1359 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 1360 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 1361 } 1362 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 1363 } 1364 1365 if (mss) { /* multilevel */ 1366 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1367 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1368 } 1369 1370 thresh = pcbddc->adaptive_threshold; 1371 for (i=0;i<sub_schurs->n_subs;i++) { 1372 const PetscInt *idxs; 1373 PetscReal upper,lower; 1374 PetscInt j,subset_size,eigs_start = 0; 1375 PetscBLASInt B_N; 1376 PetscBool same_data = PETSC_FALSE; 1377 1378 if (pcbddc->use_deluxe_scaling) { 1379 upper = PETSC_MAX_REAL; 1380 lower = thresh; 1381 } else { 1382 upper = 1./thresh; 1383 lower = 0.; 1384 } 1385 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1386 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1387 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 1388 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 1389 if (sub_schurs->is_hermitian) { 1390 PetscInt j,k; 1391 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 1392 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1393 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1394 } 1395 for (j=0;j<subset_size;j++) { 1396 for (k=j;k<subset_size;k++) { 1397 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 1398 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 1399 } 1400 } 1401 } else { 1402 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1403 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1404 } 1405 } else { 1406 S = Sarray + cumarray; 1407 St = Starray + cumarray; 1408 } 1409 /* see if we can save some work */ 1410 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 1411 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 1412 } 1413 1414 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 1415 B_neigs = 0; 1416 } else { 1417 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 1418 PetscBLASInt B_itype = 1; 1419 PetscBLASInt B_IL, B_IU; 1420 PetscReal eps = -1.0; /* dlamch? */ 1421 PetscInt nmin_s; 1422 PetscBool compute_range = PETSC_FALSE; 1423 1424 if (pcbddc->dbg_flag) { 1425 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 1426 } 1427 1428 compute_range = PETSC_FALSE; 1429 if (thresh > 1.+PETSC_SMALL && !same_data) { 1430 compute_range = PETSC_TRUE; 1431 } 1432 1433 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1434 if (compute_range) { 1435 1436 /* ask for eigenvalues larger than thresh */ 1437 #if defined(PETSC_USE_COMPLEX) 1438 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1439 #else 1440 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1441 #endif 1442 } else if (!same_data) { 1443 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 1444 B_IL = 1; 1445 #if defined(PETSC_USE_COMPLEX) 1446 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1447 #else 1448 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1449 #endif 1450 } else { /* same_data is true, so get the adaptive function requested by the user */ 1451 PetscInt k; 1452 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 1453 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 1454 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 1455 nmin = nmax; 1456 ierr = PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));CHKERRQ(ierr); 1457 for (k=0;k<nmax;k++) { 1458 eigs[k] = 1./PETSC_SMALL; 1459 eigv[k*(subset_size+1)] = 1.0; 1460 } 1461 } 1462 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1463 if (B_ierr) { 1464 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1465 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1466 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1467 } 1468 1469 if (B_neigs > nmax) { 1470 if (pcbddc->dbg_flag) { 1471 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 1472 } 1473 if (pcbddc->use_deluxe_scaling) eigs_start = B_neigs -nmax; 1474 B_neigs = nmax; 1475 } 1476 1477 nmin_s = PetscMin(nmin,B_N); 1478 if (B_neigs < nmin_s) { 1479 PetscBLASInt B_neigs2; 1480 1481 if (pcbddc->use_deluxe_scaling) { 1482 B_IL = B_N - nmin_s + 1; 1483 B_IU = B_N - B_neigs; 1484 } else { 1485 B_IL = B_neigs + 1; 1486 B_IU = nmin_s; 1487 } 1488 if (pcbddc->dbg_flag) { 1489 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 1490 } 1491 if (sub_schurs->is_hermitian) { 1492 PetscInt j,k; 1493 for (j=0;j<subset_size;j++) { 1494 for (k=j;k<subset_size;k++) { 1495 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 1496 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 1497 } 1498 } 1499 } else { 1500 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1501 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1502 } 1503 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1504 #if defined(PETSC_USE_COMPLEX) 1505 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1506 #else 1507 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1508 #endif 1509 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1510 B_neigs += B_neigs2; 1511 } 1512 if (B_ierr) { 1513 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1514 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1515 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1516 } 1517 if (pcbddc->dbg_flag) { 1518 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 1519 for (j=0;j<B_neigs;j++) { 1520 if (eigs[j] == 0.0) { 1521 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 1522 } else { 1523 if (pcbddc->use_deluxe_scaling) { 1524 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 1525 } else { 1526 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 1527 } 1528 } 1529 } 1530 } 1531 } else { 1532 /* TODO */ 1533 } 1534 } 1535 /* change the basis back to the original one */ 1536 if (sub_schurs->change) { 1537 Mat change,phi,phit; 1538 1539 if (pcbddc->dbg_flag > 1) { 1540 PetscInt ii; 1541 for (ii=0;ii<B_neigs;ii++) { 1542 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1543 for (j=0;j<B_N;j++) { 1544 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 1545 } 1546 } 1547 } 1548 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 1549 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 1550 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 1551 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1552 ierr = MatDestroy(&phit);CHKERRQ(ierr); 1553 ierr = MatDestroy(&phi);CHKERRQ(ierr); 1554 } 1555 maxneigs = PetscMax(B_neigs,maxneigs); 1556 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 1557 if (B_neigs) { 1558 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1559 1560 if (pcbddc->dbg_flag > 1) { 1561 PetscInt ii; 1562 for (ii=0;ii<B_neigs;ii++) { 1563 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1564 for (j=0;j<B_N;j++) { 1565 #if defined(PETSC_USE_COMPLEX) 1566 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1567 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1568 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 1569 #else 1570 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 1571 #endif 1572 } 1573 } 1574 } 1575 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 1576 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 1577 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 1578 cum++; 1579 } 1580 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1581 /* shift for next computation */ 1582 cumarray += subset_size*subset_size; 1583 } 1584 if (pcbddc->dbg_flag) { 1585 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1586 } 1587 1588 if (mss) { 1589 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1590 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1591 /* destroy matrices (junk) */ 1592 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 1593 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 1594 } 1595 if (allocated_S_St) { 1596 ierr = PetscFree2(S,St);CHKERRQ(ierr); 1597 } 1598 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 1599 #if defined(PETSC_USE_COMPLEX) 1600 ierr = PetscFree(rwork);CHKERRQ(ierr); 1601 #endif 1602 if (pcbddc->dbg_flag) { 1603 PetscInt maxneigs_r; 1604 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1605 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 1606 } 1607 PetscFunctionReturn(0); 1608 } 1609 1610 #undef __FUNCT__ 1611 #define __FUNCT__ "PCBDDCSetUpSolvers" 1612 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 1613 { 1614 PetscScalar *coarse_submat_vals; 1615 PetscErrorCode ierr; 1616 1617 PetscFunctionBegin; 1618 /* Setup local scatters R_to_B and (optionally) R_to_D */ 1619 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 1620 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 1621 1622 /* Setup local neumann solver ksp_R */ 1623 /* PCBDDCSetUpLocalScatters should be called first! */ 1624 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 1625 1626 /* 1627 Setup local correction and local part of coarse basis. 1628 Gives back the dense local part of the coarse matrix in column major ordering 1629 */ 1630 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 1631 1632 /* Compute total number of coarse nodes and setup coarse solver */ 1633 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 1634 1635 /* free */ 1636 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 1637 PetscFunctionReturn(0); 1638 } 1639 1640 #undef __FUNCT__ 1641 #define __FUNCT__ "PCBDDCResetCustomization" 1642 PetscErrorCode PCBDDCResetCustomization(PC pc) 1643 { 1644 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1645 PetscErrorCode ierr; 1646 1647 PetscFunctionBegin; 1648 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1649 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 1650 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1651 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 1652 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1653 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 1654 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1655 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1656 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1657 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 1658 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 1659 PetscFunctionReturn(0); 1660 } 1661 1662 #undef __FUNCT__ 1663 #define __FUNCT__ "PCBDDCResetTopography" 1664 PetscErrorCode PCBDDCResetTopography(PC pc) 1665 { 1666 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1667 PetscInt i; 1668 PetscErrorCode ierr; 1669 1670 PetscFunctionBegin; 1671 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 1672 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1673 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 1674 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 1675 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1676 ierr = MatDestroy(&pcbddc->divudotp);CHKERRQ(ierr); 1677 ierr = ISDestroy(&pcbddc->divudotp_vl2l);CHKERRQ(ierr); 1678 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 1679 for (i=0;i<pcbddc->n_local_subs;i++) { 1680 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 1681 } 1682 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 1683 if (pcbddc->sub_schurs) { 1684 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 1685 } 1686 PetscFunctionReturn(0); 1687 } 1688 1689 #undef __FUNCT__ 1690 #define __FUNCT__ "PCBDDCResetSolvers" 1691 PetscErrorCode PCBDDCResetSolvers(PC pc) 1692 { 1693 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1694 PetscErrorCode ierr; 1695 1696 PetscFunctionBegin; 1697 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 1698 if (pcbddc->coarse_phi_B) { 1699 PetscScalar *array; 1700 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 1701 ierr = PetscFree(array);CHKERRQ(ierr); 1702 } 1703 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1704 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1705 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1706 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1707 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1708 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1709 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1710 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1711 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1712 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1713 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1714 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1715 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1716 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 1717 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 1718 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 1719 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 1720 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1721 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1722 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1723 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 1724 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 1725 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 1726 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 1727 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 1728 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 1729 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 1730 if (pcbddc->benign_zerodiag_subs) { 1731 PetscInt i; 1732 for (i=0;i<pcbddc->benign_n;i++) { 1733 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1734 } 1735 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 1736 } 1737 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 1738 PetscFunctionReturn(0); 1739 } 1740 1741 #undef __FUNCT__ 1742 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 1743 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 1744 { 1745 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1746 PC_IS *pcis = (PC_IS*)pc->data; 1747 VecType impVecType; 1748 PetscInt n_constraints,n_R,old_size; 1749 PetscErrorCode ierr; 1750 1751 PetscFunctionBegin; 1752 if (!pcbddc->ConstraintMatrix) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 1753 /* get sizes */ 1754 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 1755 n_R = pcis->n - pcbddc->n_vertices; 1756 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 1757 /* local work vectors (try to avoid unneeded work)*/ 1758 /* R nodes */ 1759 old_size = -1; 1760 if (pcbddc->vec1_R) { 1761 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 1762 } 1763 if (n_R != old_size) { 1764 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1765 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1766 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 1767 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 1768 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 1769 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 1770 } 1771 /* local primal dofs */ 1772 old_size = -1; 1773 if (pcbddc->vec1_P) { 1774 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 1775 } 1776 if (pcbddc->local_primal_size != old_size) { 1777 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1778 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 1779 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 1780 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 1781 } 1782 /* local explicit constraints */ 1783 old_size = -1; 1784 if (pcbddc->vec1_C) { 1785 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 1786 } 1787 if (n_constraints && n_constraints != old_size) { 1788 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1789 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 1790 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 1791 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 1792 } 1793 PetscFunctionReturn(0); 1794 } 1795 1796 #undef __FUNCT__ 1797 #define __FUNCT__ "PCBDDCSetUpCorrection" 1798 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 1799 { 1800 PetscErrorCode ierr; 1801 /* pointers to pcis and pcbddc */ 1802 PC_IS* pcis = (PC_IS*)pc->data; 1803 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1804 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1805 /* submatrices of local problem */ 1806 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 1807 /* submatrices of local coarse problem */ 1808 Mat S_VV,S_CV,S_VC,S_CC; 1809 /* working matrices */ 1810 Mat C_CR; 1811 /* additional working stuff */ 1812 PC pc_R; 1813 Mat F; 1814 Vec dummy_vec; 1815 PetscBool isLU,isCHOL,isILU,need_benign_correction; 1816 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 1817 PetscScalar *work; 1818 PetscInt *idx_V_B; 1819 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 1820 PetscInt i,n_R,n_D,n_B; 1821 1822 /* some shortcuts to scalars */ 1823 PetscScalar one=1.0,m_one=-1.0; 1824 1825 PetscFunctionBegin; 1826 if (!pcbddc->symmetric_primal && pcbddc->benign_n) { 1827 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 1828 } 1829 1830 /* Set Non-overlapping dimensions */ 1831 n_vertices = pcbddc->n_vertices; 1832 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 1833 n_B = pcis->n_B; 1834 n_D = pcis->n - n_B; 1835 n_R = pcis->n - n_vertices; 1836 1837 /* vertices in boundary numbering */ 1838 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 1839 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 1840 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i); 1841 1842 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 1843 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 1844 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 1845 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 1846 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 1847 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 1848 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 1849 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 1850 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 1851 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 1852 1853 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 1854 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 1855 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 1856 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 1857 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 1858 lda_rhs = n_R; 1859 need_benign_correction = PETSC_FALSE; 1860 if (isLU || isILU || isCHOL) { 1861 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 1862 } else if (sub_schurs && sub_schurs->reuse_solver) { 1863 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1864 MatFactorType type; 1865 1866 F = reuse_solver->F; 1867 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 1868 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 1869 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 1870 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 1871 } else { 1872 F = NULL; 1873 } 1874 1875 /* allocate workspace */ 1876 n = 0; 1877 if (n_constraints) { 1878 n += lda_rhs*n_constraints; 1879 } 1880 if (n_vertices) { 1881 n = PetscMax(2*lda_rhs*n_vertices,n); 1882 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 1883 } 1884 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1885 1886 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 1887 dummy_vec = NULL; 1888 if (need_benign_correction && lda_rhs != n_R && F) { 1889 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);CHKERRQ(ierr); 1890 } 1891 1892 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 1893 if (n_constraints) { 1894 Mat M1,M2,M3,C_B; 1895 IS is_aux; 1896 PetscScalar *array,*array2; 1897 1898 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1899 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1900 1901 /* Extract constraints on R nodes: C_{CR} */ 1902 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 1903 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 1904 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 1905 1906 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 1907 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 1908 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1909 for (i=0;i<n_constraints;i++) { 1910 const PetscScalar *row_cmat_values; 1911 const PetscInt *row_cmat_indices; 1912 PetscInt size_of_constraint,j; 1913 1914 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1915 for (j=0;j<size_of_constraint;j++) { 1916 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 1917 } 1918 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1919 } 1920 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 1921 if (F) { 1922 Mat B; 1923 1924 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 1925 if (need_benign_correction) { 1926 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1927 1928 /* rhs is already zero on interior dofs, no need to change the rhs */ 1929 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 1930 } 1931 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 1932 if (need_benign_correction) { 1933 PetscScalar *marr; 1934 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1935 1936 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1937 if (lda_rhs != n_R) { 1938 for (i=0;i<n_constraints;i++) { 1939 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 1940 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1941 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 1942 } 1943 } else { 1944 for (i=0;i<n_constraints;i++) { 1945 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1946 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1947 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1948 } 1949 } 1950 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1951 } 1952 ierr = MatDestroy(&B);CHKERRQ(ierr); 1953 } else { 1954 PetscScalar *marr; 1955 1956 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1957 for (i=0;i<n_constraints;i++) { 1958 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1959 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 1960 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1961 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1962 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1963 } 1964 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1965 } 1966 if (!pcbddc->switch_static) { 1967 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1968 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1969 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1970 for (i=0;i<n_constraints;i++) { 1971 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 1972 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 1973 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1974 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1975 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1976 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1977 } 1978 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1979 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1980 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1981 } else { 1982 if (lda_rhs != n_R) { 1983 IS dummy; 1984 1985 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 1986 ierr = MatGetSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1987 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1988 } else { 1989 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 1990 pcbddc->local_auxmat2 = local_auxmat2_R; 1991 } 1992 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1993 } 1994 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1995 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 1996 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 1997 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 1998 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 1999 if (isCHOL) { 2000 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 2001 } else { 2002 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 2003 } 2004 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 2005 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 2006 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 2007 ierr = MatDestroy(&M2);CHKERRQ(ierr); 2008 ierr = MatDestroy(&M3);CHKERRQ(ierr); 2009 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 2010 ierr = MatMatMult(M1,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 2011 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 2012 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 2013 ierr = MatDestroy(&M1);CHKERRQ(ierr); 2014 } 2015 2016 /* Get submatrices from subdomain matrix */ 2017 if (n_vertices) { 2018 IS is_aux; 2019 2020 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 2021 IS tis; 2022 2023 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 2024 ierr = ISSort(tis);CHKERRQ(ierr); 2025 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 2026 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2027 } else { 2028 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 2029 } 2030 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 2031 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 2032 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 2033 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 2034 } 2035 2036 /* Matrix of coarse basis functions (local) */ 2037 if (pcbddc->coarse_phi_B) { 2038 PetscInt on_B,on_primal,on_D=n_D; 2039 if (pcbddc->coarse_phi_D) { 2040 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 2041 } 2042 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 2043 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 2044 PetscScalar *marray; 2045 2046 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 2047 ierr = PetscFree(marray);CHKERRQ(ierr); 2048 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 2049 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 2050 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 2051 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 2052 } 2053 } 2054 2055 if (!pcbddc->coarse_phi_B) { 2056 PetscScalar *marray; 2057 2058 n = n_B*pcbddc->local_primal_size; 2059 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2060 n += n_D*pcbddc->local_primal_size; 2061 } 2062 if (!pcbddc->symmetric_primal) { 2063 n *= 2; 2064 } 2065 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 2066 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 2067 n = n_B*pcbddc->local_primal_size; 2068 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2069 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 2070 n += n_D*pcbddc->local_primal_size; 2071 } 2072 if (!pcbddc->symmetric_primal) { 2073 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 2074 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2075 n = n_B*pcbddc->local_primal_size; 2076 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 2077 } 2078 } else { 2079 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 2080 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 2081 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2082 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 2083 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 2084 } 2085 } 2086 } 2087 2088 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 2089 p0_lidx_I = NULL; 2090 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 2091 const PetscInt *idxs; 2092 2093 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 2094 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 2095 for (i=0;i<pcbddc->benign_n;i++) { 2096 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 2097 } 2098 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 2099 } 2100 2101 /* vertices */ 2102 if (n_vertices) { 2103 2104 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 2105 2106 if (n_R) { 2107 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 2108 PetscBLASInt B_N,B_one = 1; 2109 PetscScalar *x,*y; 2110 PetscBool isseqaij; 2111 2112 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 2113 if (need_benign_correction) { 2114 ISLocalToGlobalMapping RtoN; 2115 IS is_p0; 2116 PetscInt *idxs_p0,n; 2117 2118 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 2119 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 2120 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 2121 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 2122 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 2123 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 2124 ierr = MatGetSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 2125 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 2126 } 2127 2128 if (lda_rhs == n_R) { 2129 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 2130 } else { 2131 PetscScalar *av,*array; 2132 const PetscInt *xadj,*adjncy; 2133 PetscInt n; 2134 PetscBool flg_row; 2135 2136 array = work+lda_rhs*n_vertices; 2137 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 2138 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 2139 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2140 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 2141 for (i=0;i<n;i++) { 2142 PetscInt j; 2143 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 2144 } 2145 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2146 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 2147 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 2148 } 2149 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 2150 if (need_benign_correction) { 2151 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2152 PetscScalar *marr; 2153 2154 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 2155 /* need \Phi^T A_RV = (I+L)A_RV, L given by 2156 2157 | 0 0 0 | (V) 2158 L = | 0 0 -1 | (P-p0) 2159 | 0 0 -1 | (p0) 2160 2161 */ 2162 for (i=0;i<reuse_solver->benign_n;i++) { 2163 const PetscScalar *vals; 2164 const PetscInt *idxs,*idxs_zero; 2165 PetscInt n,j,nz; 2166 2167 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2168 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 2169 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 2170 for (j=0;j<n;j++) { 2171 PetscScalar val = vals[j]; 2172 PetscInt k,col = idxs[j]; 2173 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 2174 } 2175 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 2176 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 2177 } 2178 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 2179 } 2180 if (F) { 2181 /* need to correct the rhs */ 2182 if (need_benign_correction) { 2183 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2184 PetscScalar *marr; 2185 2186 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 2187 if (lda_rhs != n_R) { 2188 for (i=0;i<n_vertices;i++) { 2189 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 2190 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 2191 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 2192 } 2193 } else { 2194 for (i=0;i<n_vertices;i++) { 2195 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 2196 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 2197 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2198 } 2199 } 2200 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 2201 } 2202 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 2203 /* need to correct the solution */ 2204 if (need_benign_correction) { 2205 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2206 PetscScalar *marr; 2207 2208 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 2209 if (lda_rhs != n_R) { 2210 for (i=0;i<n_vertices;i++) { 2211 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 2212 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 2213 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 2214 } 2215 } else { 2216 for (i=0;i<n_vertices;i++) { 2217 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 2218 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 2219 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2220 } 2221 } 2222 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 2223 } 2224 } else { 2225 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 2226 for (i=0;i<n_vertices;i++) { 2227 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 2228 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 2229 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2230 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2231 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2232 } 2233 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 2234 } 2235 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 2236 /* S_VV and S_CV */ 2237 if (n_constraints) { 2238 Mat B; 2239 2240 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 2241 for (i=0;i<n_vertices;i++) { 2242 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 2243 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 2244 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2245 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2246 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2247 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2248 } 2249 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 2250 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 2251 ierr = MatDestroy(&B);CHKERRQ(ierr); 2252 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 2253 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 2254 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 2255 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 2256 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 2257 ierr = MatDestroy(&B);CHKERRQ(ierr); 2258 } 2259 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2260 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 2261 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 2262 } 2263 if (lda_rhs != n_R) { 2264 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 2265 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 2266 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 2267 } 2268 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 2269 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 2270 if (need_benign_correction) { 2271 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2272 PetscScalar *marr,*sums; 2273 2274 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 2275 ierr = MatDenseGetArray(S_VVt,&marr); 2276 for (i=0;i<reuse_solver->benign_n;i++) { 2277 const PetscScalar *vals; 2278 const PetscInt *idxs,*idxs_zero; 2279 PetscInt n,j,nz; 2280 2281 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2282 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 2283 for (j=0;j<n_vertices;j++) { 2284 PetscInt k; 2285 sums[j] = 0.; 2286 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 2287 } 2288 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 2289 for (j=0;j<n;j++) { 2290 PetscScalar val = vals[j]; 2291 PetscInt k; 2292 for (k=0;k<n_vertices;k++) { 2293 marr[idxs[j]+k*n_vertices] += val*sums[k]; 2294 } 2295 } 2296 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 2297 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 2298 } 2299 ierr = PetscFree(sums);CHKERRQ(ierr); 2300 ierr = MatDenseRestoreArray(S_VVt,&marr); 2301 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 2302 } 2303 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 2304 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 2305 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 2306 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 2307 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 2308 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 2309 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 2310 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2311 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 2312 } else { 2313 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2314 } 2315 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 2316 2317 /* coarse basis functions */ 2318 for (i=0;i<n_vertices;i++) { 2319 PetscScalar *y; 2320 2321 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 2322 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2323 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 2324 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2325 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2326 y[n_B*i+idx_V_B[i]] = 1.0; 2327 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2328 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2329 2330 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2331 PetscInt j; 2332 2333 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2334 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2335 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2336 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2337 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2338 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 2339 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2340 } 2341 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2342 } 2343 /* if n_R == 0 the object is not destroyed */ 2344 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 2345 } 2346 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 2347 2348 if (n_constraints) { 2349 Mat B; 2350 2351 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 2352 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 2353 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 2354 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 2355 if (n_vertices) { 2356 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 2357 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 2358 } else { 2359 Mat S_VCt; 2360 2361 if (lda_rhs != n_R) { 2362 ierr = MatDestroy(&B);CHKERRQ(ierr); 2363 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 2364 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 2365 } 2366 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 2367 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2368 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 2369 } 2370 } 2371 ierr = MatDestroy(&B);CHKERRQ(ierr); 2372 /* coarse basis functions */ 2373 for (i=0;i<n_constraints;i++) { 2374 PetscScalar *y; 2375 2376 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 2377 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2378 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 2379 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2380 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2381 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2382 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2383 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2384 PetscInt j; 2385 2386 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2387 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 2388 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2389 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2390 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2391 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 2392 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2393 } 2394 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2395 } 2396 } 2397 if (n_constraints) { 2398 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 2399 } 2400 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 2401 2402 /* coarse matrix entries relative to B_0 */ 2403 if (pcbddc->benign_n) { 2404 Mat B0_B,B0_BPHI; 2405 IS is_dummy; 2406 PetscScalar *data; 2407 PetscInt j; 2408 2409 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2410 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 2411 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2412 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2413 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2414 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 2415 for (j=0;j<pcbddc->benign_n;j++) { 2416 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2417 for (i=0;i<pcbddc->local_primal_size;i++) { 2418 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 2419 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 2420 } 2421 } 2422 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 2423 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2424 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2425 } 2426 2427 /* compute other basis functions for non-symmetric problems */ 2428 if (!pcbddc->symmetric_primal) { 2429 Mat B_V=NULL,B_C=NULL; 2430 PetscScalar *marray; 2431 2432 if (n_constraints) { 2433 Mat S_CCT,C_CRT; 2434 2435 ierr = MatTranspose(C_CR,MAT_INPLACE_MATRIX,&C_CRT);CHKERRQ(ierr); 2436 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 2437 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 2438 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 2439 if (n_vertices) { 2440 Mat S_VCT; 2441 2442 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 2443 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 2444 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 2445 } 2446 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 2447 } else { 2448 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);CHKERRQ(ierr); 2449 } 2450 if (n_vertices && n_R) { 2451 PetscScalar *av,*marray; 2452 const PetscInt *xadj,*adjncy; 2453 PetscInt n; 2454 PetscBool flg_row; 2455 2456 /* B_V = B_V - A_VR^T */ 2457 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 2458 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2459 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 2460 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2461 for (i=0;i<n;i++) { 2462 PetscInt j; 2463 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 2464 } 2465 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2466 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2467 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2468 } 2469 2470 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 2471 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2472 for (i=0;i<n_vertices;i++) { 2473 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 2474 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2475 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2476 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2477 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2478 } 2479 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2480 if (B_C) { 2481 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 2482 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 2483 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 2484 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2485 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2486 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2487 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2488 } 2489 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 2490 } 2491 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 2492 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 2493 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2494 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2495 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2496 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2497 } 2498 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 2499 /* coarse basis functions */ 2500 for (i=0;i<pcbddc->local_primal_size;i++) { 2501 PetscScalar *y; 2502 2503 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 2504 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2505 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 2506 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2507 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2508 if (i<n_vertices) { 2509 y[n_B*i+idx_V_B[i]] = 1.0; 2510 } 2511 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2512 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2513 2514 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2515 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2516 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2517 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2518 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2519 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2520 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2521 } 2522 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2523 } 2524 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 2525 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 2526 } 2527 /* free memory */ 2528 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 2529 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 2530 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 2531 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 2532 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 2533 ierr = PetscFree(work);CHKERRQ(ierr); 2534 if (n_vertices) { 2535 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2536 } 2537 if (n_constraints) { 2538 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 2539 } 2540 /* Checking coarse_sub_mat and coarse basis functios */ 2541 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2542 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2543 if (pcbddc->dbg_flag) { 2544 Mat coarse_sub_mat; 2545 Mat AUXMAT,TM1,TM2,TM3,TM4; 2546 Mat coarse_phi_D,coarse_phi_B; 2547 Mat coarse_psi_D,coarse_psi_B; 2548 Mat A_II,A_BB,A_IB,A_BI; 2549 Mat C_B,CPHI; 2550 IS is_dummy; 2551 Vec mones; 2552 MatType checkmattype=MATSEQAIJ; 2553 PetscReal real_value; 2554 2555 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 2556 Mat A; 2557 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 2558 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2559 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2560 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2561 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2562 ierr = MatDestroy(&A);CHKERRQ(ierr); 2563 } else { 2564 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2565 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2566 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2567 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2568 } 2569 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 2570 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 2571 if (!pcbddc->symmetric_primal) { 2572 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 2573 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 2574 } 2575 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 2576 2577 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2578 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 2579 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2580 if (!pcbddc->symmetric_primal) { 2581 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2582 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2583 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2584 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2585 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2586 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2587 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2588 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2589 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2590 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2591 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2592 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2593 } else { 2594 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2595 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2596 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2597 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2598 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2599 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2600 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2601 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2602 } 2603 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2604 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2605 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2606 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 2607 if (pcbddc->benign_n) { 2608 Mat B0_B,B0_BPHI; 2609 PetscScalar *data,*data2; 2610 PetscInt j; 2611 2612 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2613 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 2614 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2615 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2616 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 2617 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 2618 for (j=0;j<pcbddc->benign_n;j++) { 2619 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2620 for (i=0;i<pcbddc->local_primal_size;i++) { 2621 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 2622 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 2623 } 2624 } 2625 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 2626 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 2627 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2628 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2629 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2630 } 2631 #if 0 2632 { 2633 PetscViewer viewer; 2634 char filename[256]; 2635 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 2636 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 2637 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2638 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 2639 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 2640 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 2641 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 2642 if (save_change) { 2643 Mat phi_B; 2644 ierr = MatMatMult(save_change,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&phi_B);CHKERRQ(ierr); 2645 ierr = PetscObjectSetName((PetscObject)phi_B,"phi_B");CHKERRQ(ierr); 2646 ierr = MatView(phi_B,viewer);CHKERRQ(ierr); 2647 ierr = MatDestroy(&phi_B);CHKERRQ(ierr); 2648 } else { 2649 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 2650 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 2651 } 2652 if (pcbddc->coarse_phi_D) { 2653 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 2654 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 2655 } 2656 if (pcbddc->coarse_psi_B) { 2657 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 2658 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 2659 } 2660 if (pcbddc->coarse_psi_D) { 2661 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 2662 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 2663 } 2664 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 2665 } 2666 #endif 2667 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2668 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2669 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2670 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2671 2672 /* check constraints */ 2673 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2674 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 2675 if (!pcbddc->benign_n) { /* TODO: add benign case */ 2676 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2677 } else { 2678 PetscScalar *data; 2679 Mat tmat; 2680 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 2681 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 2682 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 2683 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2684 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2685 } 2686 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 2687 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2688 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2689 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2690 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2691 if (!pcbddc->symmetric_primal) { 2692 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2693 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2694 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2695 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2696 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2697 } 2698 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 2699 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 2700 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2701 ierr = VecDestroy(&mones);CHKERRQ(ierr); 2702 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2703 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 2704 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 2705 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 2706 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 2707 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 2708 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 2709 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 2710 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 2711 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 2712 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 2713 if (!pcbddc->symmetric_primal) { 2714 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 2715 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 2716 } 2717 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 2718 } 2719 /* get back data */ 2720 *coarse_submat_vals_n = coarse_submat_vals; 2721 PetscFunctionReturn(0); 2722 } 2723 2724 #undef __FUNCT__ 2725 #define __FUNCT__ "MatGetSubMatrixUnsorted" 2726 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 2727 { 2728 Mat *work_mat; 2729 IS isrow_s,iscol_s; 2730 PetscBool rsorted,csorted; 2731 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 2732 PetscErrorCode ierr; 2733 2734 PetscFunctionBegin; 2735 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 2736 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 2737 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 2738 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2739 2740 if (!rsorted) { 2741 const PetscInt *idxs; 2742 PetscInt *idxs_sorted,i; 2743 2744 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 2745 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 2746 for (i=0;i<rsize;i++) { 2747 idxs_perm_r[i] = i; 2748 } 2749 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 2750 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 2751 for (i=0;i<rsize;i++) { 2752 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 2753 } 2754 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 2755 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 2756 } else { 2757 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 2758 isrow_s = isrow; 2759 } 2760 2761 if (!csorted) { 2762 if (isrow == iscol) { 2763 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 2764 iscol_s = isrow_s; 2765 } else { 2766 const PetscInt *idxs; 2767 PetscInt *idxs_sorted,i; 2768 2769 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 2770 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 2771 for (i=0;i<csize;i++) { 2772 idxs_perm_c[i] = i; 2773 } 2774 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 2775 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 2776 for (i=0;i<csize;i++) { 2777 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 2778 } 2779 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 2780 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 2781 } 2782 } else { 2783 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 2784 iscol_s = iscol; 2785 } 2786 2787 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2788 2789 if (!rsorted || !csorted) { 2790 Mat new_mat; 2791 IS is_perm_r,is_perm_c; 2792 2793 if (!rsorted) { 2794 PetscInt *idxs_r,i; 2795 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 2796 for (i=0;i<rsize;i++) { 2797 idxs_r[idxs_perm_r[i]] = i; 2798 } 2799 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 2800 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 2801 } else { 2802 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 2803 } 2804 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 2805 2806 if (!csorted) { 2807 if (isrow_s == iscol_s) { 2808 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 2809 is_perm_c = is_perm_r; 2810 } else { 2811 PetscInt *idxs_c,i; 2812 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 2813 for (i=0;i<csize;i++) { 2814 idxs_c[idxs_perm_c[i]] = i; 2815 } 2816 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 2817 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 2818 } 2819 } else { 2820 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 2821 } 2822 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 2823 2824 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 2825 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 2826 work_mat[0] = new_mat; 2827 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 2828 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 2829 } 2830 2831 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 2832 *B = work_mat[0]; 2833 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 2834 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 2835 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 2836 PetscFunctionReturn(0); 2837 } 2838 2839 #undef __FUNCT__ 2840 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 2841 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 2842 { 2843 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2844 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2845 Mat new_mat; 2846 IS is_local,is_global; 2847 PetscInt local_size; 2848 PetscBool isseqaij; 2849 PetscErrorCode ierr; 2850 2851 PetscFunctionBegin; 2852 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2853 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 2854 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 2855 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 2856 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 2857 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 2858 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 2859 2860 /* check */ 2861 if (pcbddc->dbg_flag) { 2862 Vec x,x_change; 2863 PetscReal error; 2864 2865 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 2866 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2867 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 2868 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2869 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2870 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 2871 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2872 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2873 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2874 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2875 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2876 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 2877 ierr = VecDestroy(&x);CHKERRQ(ierr); 2878 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2879 } 2880 2881 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 2882 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2883 if (isseqaij) { 2884 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2885 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 2886 } else { 2887 Mat work_mat; 2888 2889 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2890 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2891 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 2892 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 2893 } 2894 if (matis->A->symmetric_set) { 2895 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 2896 #if !defined(PETSC_USE_COMPLEX) 2897 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 2898 #endif 2899 } 2900 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 2901 PetscFunctionReturn(0); 2902 } 2903 2904 #undef __FUNCT__ 2905 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 2906 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 2907 { 2908 PC_IS* pcis = (PC_IS*)(pc->data); 2909 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2910 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2911 PetscInt *idx_R_local=NULL; 2912 PetscInt n_vertices,i,j,n_R,n_D,n_B; 2913 PetscInt vbs,bs; 2914 PetscBT bitmask=NULL; 2915 PetscErrorCode ierr; 2916 2917 PetscFunctionBegin; 2918 /* 2919 No need to setup local scatters if 2920 - primal space is unchanged 2921 AND 2922 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 2923 AND 2924 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 2925 */ 2926 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 2927 PetscFunctionReturn(0); 2928 } 2929 /* destroy old objects */ 2930 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 2931 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 2932 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 2933 /* Set Non-overlapping dimensions */ 2934 n_B = pcis->n_B; 2935 n_D = pcis->n - n_B; 2936 n_vertices = pcbddc->n_vertices; 2937 2938 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 2939 2940 /* create auxiliary bitmask and allocate workspace */ 2941 if (!sub_schurs || !sub_schurs->reuse_solver) { 2942 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 2943 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 2944 for (i=0;i<n_vertices;i++) { 2945 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 2946 } 2947 2948 for (i=0, n_R=0; i<pcis->n; i++) { 2949 if (!PetscBTLookup(bitmask,i)) { 2950 idx_R_local[n_R++] = i; 2951 } 2952 } 2953 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 2954 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2955 2956 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2957 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 2958 } 2959 2960 /* Block code */ 2961 vbs = 1; 2962 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 2963 if (bs>1 && !(n_vertices%bs)) { 2964 PetscBool is_blocked = PETSC_TRUE; 2965 PetscInt *vary; 2966 if (!sub_schurs || !sub_schurs->reuse_solver) { 2967 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 2968 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 2969 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 2970 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 2971 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 2972 for (i=0; i<pcis->n/bs; i++) { 2973 if (vary[i]!=0 && vary[i]!=bs) { 2974 is_blocked = PETSC_FALSE; 2975 break; 2976 } 2977 } 2978 ierr = PetscFree(vary);CHKERRQ(ierr); 2979 } else { 2980 /* Verify directly the R set */ 2981 for (i=0; i<n_R/bs; i++) { 2982 PetscInt j,node=idx_R_local[bs*i]; 2983 for (j=1; j<bs; j++) { 2984 if (node != idx_R_local[bs*i+j]-j) { 2985 is_blocked = PETSC_FALSE; 2986 break; 2987 } 2988 } 2989 } 2990 } 2991 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 2992 vbs = bs; 2993 for (i=0;i<n_R/vbs;i++) { 2994 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 2995 } 2996 } 2997 } 2998 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 2999 if (sub_schurs && sub_schurs->reuse_solver) { 3000 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3001 3002 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 3003 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 3004 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 3005 reuse_solver->is_R = pcbddc->is_R_local; 3006 } else { 3007 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 3008 } 3009 3010 /* print some info if requested */ 3011 if (pcbddc->dbg_flag) { 3012 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3013 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3014 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3015 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 3016 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 3017 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 3018 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3019 } 3020 3021 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 3022 if (!sub_schurs || !sub_schurs->reuse_solver) { 3023 IS is_aux1,is_aux2; 3024 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 3025 3026 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 3027 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 3028 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 3029 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3030 for (i=0; i<n_D; i++) { 3031 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 3032 } 3033 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3034 for (i=0, j=0; i<n_R; i++) { 3035 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 3036 aux_array1[j++] = i; 3037 } 3038 } 3039 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 3040 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3041 for (i=0, j=0; i<n_B; i++) { 3042 if (!PetscBTLookup(bitmask,is_indices[i])) { 3043 aux_array2[j++] = i; 3044 } 3045 } 3046 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3047 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 3048 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 3049 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 3050 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 3051 3052 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3053 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 3054 for (i=0, j=0; i<n_R; i++) { 3055 if (PetscBTLookup(bitmask,idx_R_local[i])) { 3056 aux_array1[j++] = i; 3057 } 3058 } 3059 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 3060 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 3061 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 3062 } 3063 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 3064 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 3065 } else { 3066 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3067 IS tis; 3068 PetscInt schur_size; 3069 3070 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 3071 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 3072 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 3073 ierr = ISDestroy(&tis);CHKERRQ(ierr); 3074 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3075 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 3076 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 3077 ierr = ISDestroy(&tis);CHKERRQ(ierr); 3078 } 3079 } 3080 PetscFunctionReturn(0); 3081 } 3082 3083 3084 #undef __FUNCT__ 3085 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 3086 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 3087 { 3088 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3089 PC_IS *pcis = (PC_IS*)pc->data; 3090 PC pc_temp; 3091 Mat A_RR; 3092 MatReuse reuse; 3093 PetscScalar m_one = -1.0; 3094 PetscReal value; 3095 PetscInt n_D,n_R; 3096 PetscBool check_corr[2],issbaij; 3097 PetscErrorCode ierr; 3098 /* prefixes stuff */ 3099 char dir_prefix[256],neu_prefix[256],str_level[16]; 3100 size_t len; 3101 3102 PetscFunctionBegin; 3103 3104 /* compute prefixes */ 3105 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 3106 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 3107 if (!pcbddc->current_level) { 3108 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 3109 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 3110 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 3111 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 3112 } else { 3113 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 3114 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 3115 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 3116 len -= 15; /* remove "pc_bddc_coarse_" */ 3117 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 3118 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 3119 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 3120 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 3121 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 3122 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 3123 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 3124 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 3125 } 3126 3127 /* DIRICHLET PROBLEM */ 3128 if (dirichlet) { 3129 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3130 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 3131 if (!sub_schurs || !sub_schurs->reuse_solver) { 3132 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 3133 } 3134 if (pcbddc->dbg_flag) { 3135 Mat A_IIn; 3136 3137 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 3138 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 3139 pcis->A_II = A_IIn; 3140 } 3141 } 3142 if (pcbddc->local_mat->symmetric_set) { 3143 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 3144 } 3145 /* Matrix for Dirichlet problem is pcis->A_II */ 3146 n_D = pcis->n - pcis->n_B; 3147 if (!pcbddc->ksp_D) { /* create object if not yet build */ 3148 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 3149 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 3150 /* default */ 3151 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 3152 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 3153 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 3154 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 3155 if (issbaij) { 3156 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 3157 } else { 3158 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 3159 } 3160 /* Allow user's customization */ 3161 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 3162 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 3163 } 3164 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 3165 if (sub_schurs && sub_schurs->reuse_solver) { 3166 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3167 3168 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 3169 } 3170 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 3171 if (!n_D) { 3172 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 3173 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 3174 } 3175 /* Set Up KSP for Dirichlet problem of BDDC */ 3176 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 3177 /* set ksp_D into pcis data */ 3178 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 3179 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 3180 pcis->ksp_D = pcbddc->ksp_D; 3181 } 3182 3183 /* NEUMANN PROBLEM */ 3184 A_RR = 0; 3185 if (neumann) { 3186 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3187 PetscInt ibs,mbs; 3188 PetscBool issbaij; 3189 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3190 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 3191 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 3192 if (pcbddc->ksp_R) { /* already created ksp */ 3193 PetscInt nn_R; 3194 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 3195 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 3196 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 3197 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 3198 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 3199 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3200 reuse = MAT_INITIAL_MATRIX; 3201 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 3202 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 3203 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3204 reuse = MAT_INITIAL_MATRIX; 3205 } else { /* safe to reuse the matrix */ 3206 reuse = MAT_REUSE_MATRIX; 3207 } 3208 } 3209 /* last check */ 3210 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 3211 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3212 reuse = MAT_INITIAL_MATRIX; 3213 } 3214 } else { /* first time, so we need to create the matrix */ 3215 reuse = MAT_INITIAL_MATRIX; 3216 } 3217 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */ 3218 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 3219 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 3220 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 3221 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 3222 if (matis->A == pcbddc->local_mat) { 3223 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3224 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 3225 } else { 3226 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 3227 } 3228 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 3229 if (matis->A == pcbddc->local_mat) { 3230 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3231 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 3232 } else { 3233 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 3234 } 3235 } 3236 /* extract A_RR */ 3237 if (sub_schurs && sub_schurs->reuse_solver) { 3238 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3239 3240 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 3241 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3242 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 3243 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 3244 } else { 3245 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 3246 } 3247 } else { 3248 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3249 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 3250 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 3251 } 3252 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 3253 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 3254 } 3255 if (pcbddc->local_mat->symmetric_set) { 3256 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 3257 } 3258 if (!pcbddc->ksp_R) { /* create object if not present */ 3259 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 3260 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 3261 /* default */ 3262 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 3263 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 3264 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 3265 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 3266 if (issbaij) { 3267 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 3268 } else { 3269 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 3270 } 3271 /* Allow user's customization */ 3272 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 3273 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 3274 } 3275 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 3276 if (!n_R) { 3277 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 3278 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 3279 } 3280 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 3281 /* Reuse solver if it is present */ 3282 if (sub_schurs && sub_schurs->reuse_solver) { 3283 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3284 3285 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 3286 } 3287 /* Set Up KSP for Neumann problem of BDDC */ 3288 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 3289 } 3290 3291 if (pcbddc->dbg_flag) { 3292 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3293 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3294 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3295 } 3296 3297 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 3298 check_corr[0] = check_corr[1] = PETSC_FALSE; 3299 if (pcbddc->NullSpace_corr[0]) { 3300 ierr = PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);CHKERRQ(ierr); 3301 } 3302 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 3303 check_corr[0] = PETSC_TRUE; 3304 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);CHKERRQ(ierr); 3305 } 3306 if (neumann && pcbddc->NullSpace_corr[2]) { 3307 check_corr[1] = PETSC_TRUE; 3308 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);CHKERRQ(ierr); 3309 } 3310 3311 /* check Dirichlet and Neumann solvers */ 3312 if (pcbddc->dbg_flag) { 3313 if (dirichlet) { /* Dirichlet */ 3314 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 3315 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 3316 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 3317 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 3318 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 3319 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 3320 if (check_corr[0]) { 3321 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_TRUE);CHKERRQ(ierr); 3322 } 3323 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3324 } 3325 if (neumann) { /* Neumann */ 3326 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 3327 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3328 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 3329 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 3330 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 3331 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 3332 if (check_corr[1]) { 3333 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_FALSE);CHKERRQ(ierr); 3334 } 3335 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3336 } 3337 } 3338 /* free Neumann problem's matrix */ 3339 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3340 PetscFunctionReturn(0); 3341 } 3342 3343 #undef __FUNCT__ 3344 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 3345 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 3346 { 3347 PetscErrorCode ierr; 3348 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3349 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3350 PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE ) : PETSC_FALSE; 3351 3352 PetscFunctionBegin; 3353 if (!reuse_solver) { 3354 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 3355 } 3356 if (!pcbddc->switch_static) { 3357 if (applytranspose && pcbddc->local_auxmat1) { 3358 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3359 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3360 } 3361 if (!reuse_solver) { 3362 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3363 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3364 } else { 3365 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3366 3367 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3368 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3369 } 3370 } else { 3371 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3372 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3373 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3374 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3375 if (applytranspose && pcbddc->local_auxmat1) { 3376 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 3377 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3378 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3379 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3380 } 3381 } 3382 if (!reuse_solver || pcbddc->switch_static) { 3383 if (applytranspose) { 3384 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3385 } else { 3386 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3387 } 3388 } else { 3389 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3390 3391 if (applytranspose) { 3392 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 3393 } else { 3394 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 3395 } 3396 } 3397 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 3398 if (!pcbddc->switch_static) { 3399 if (!reuse_solver) { 3400 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3401 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3402 } else { 3403 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3404 3405 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3406 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3407 } 3408 if (!applytranspose && pcbddc->local_auxmat1) { 3409 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3410 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3411 } 3412 } else { 3413 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3414 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3415 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3416 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3417 if (!applytranspose && pcbddc->local_auxmat1) { 3418 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3419 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3420 } 3421 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3422 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3423 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3424 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3425 } 3426 PetscFunctionReturn(0); 3427 } 3428 3429 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 3430 #undef __FUNCT__ 3431 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 3432 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 3433 { 3434 PetscErrorCode ierr; 3435 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3436 PC_IS* pcis = (PC_IS*) (pc->data); 3437 const PetscScalar zero = 0.0; 3438 3439 PetscFunctionBegin; 3440 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 3441 if (!pcbddc->benign_apply_coarse_only) { 3442 if (applytranspose) { 3443 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3444 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3445 } else { 3446 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3447 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3448 } 3449 } else { 3450 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 3451 } 3452 3453 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 3454 if (pcbddc->benign_n) { 3455 PetscScalar *array; 3456 PetscInt j; 3457 3458 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3459 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 3460 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3461 } 3462 3463 /* start communications from local primal nodes to rhs of coarse solver */ 3464 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 3465 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3466 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3467 3468 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 3469 if (pcbddc->coarse_ksp) { 3470 Mat coarse_mat; 3471 Vec rhs,sol; 3472 MatNullSpace nullsp; 3473 PetscBool isbddc = PETSC_FALSE; 3474 3475 if (pcbddc->benign_have_null) { 3476 PC coarse_pc; 3477 3478 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 3479 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 3480 /* we need to propagate to coarser levels the need for a possible benign correction */ 3481 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 3482 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 3483 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 3484 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 3485 } 3486 } 3487 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 3488 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 3489 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 3490 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 3491 if (nullsp) { 3492 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 3493 } 3494 if (applytranspose) { 3495 if (pcbddc->benign_apply_coarse_only) { 3496 SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 3497 } else { 3498 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3499 } 3500 } else { 3501 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 3502 PC coarse_pc; 3503 3504 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 3505 ierr = PCPreSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 3506 ierr = PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);CHKERRQ(ierr); 3507 ierr = PCPostSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 3508 } else { 3509 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3510 } 3511 } 3512 /* we don't need the benign correction at coarser levels anymore */ 3513 if (pcbddc->benign_have_null && isbddc) { 3514 PC coarse_pc; 3515 PC_BDDC* coarsepcbddc; 3516 3517 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 3518 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 3519 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 3520 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 3521 } 3522 if (nullsp) { 3523 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 3524 } 3525 } 3526 3527 /* Local solution on R nodes */ 3528 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 3529 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 3530 } 3531 /* communications from coarse sol to local primal nodes */ 3532 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3533 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3534 3535 /* Sum contributions from the two levels */ 3536 if (!pcbddc->benign_apply_coarse_only) { 3537 if (applytranspose) { 3538 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3539 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3540 } else { 3541 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3542 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3543 } 3544 /* store p0 */ 3545 if (pcbddc->benign_n) { 3546 PetscScalar *array; 3547 PetscInt j; 3548 3549 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3550 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 3551 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3552 } 3553 } else { /* expand the coarse solution */ 3554 if (applytranspose) { 3555 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3556 } else { 3557 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3558 } 3559 } 3560 PetscFunctionReturn(0); 3561 } 3562 3563 #undef __FUNCT__ 3564 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 3565 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 3566 { 3567 PetscErrorCode ierr; 3568 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3569 PetscScalar *array; 3570 Vec from,to; 3571 3572 PetscFunctionBegin; 3573 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3574 from = pcbddc->coarse_vec; 3575 to = pcbddc->vec1_P; 3576 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3577 Vec tvec; 3578 3579 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3580 ierr = VecResetArray(tvec);CHKERRQ(ierr); 3581 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3582 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 3583 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 3584 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 3585 } 3586 } else { /* from local to global -> put data in coarse right hand side */ 3587 from = pcbddc->vec1_P; 3588 to = pcbddc->coarse_vec; 3589 } 3590 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3591 PetscFunctionReturn(0); 3592 } 3593 3594 #undef __FUNCT__ 3595 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 3596 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 3597 { 3598 PetscErrorCode ierr; 3599 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3600 PetscScalar *array; 3601 Vec from,to; 3602 3603 PetscFunctionBegin; 3604 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3605 from = pcbddc->coarse_vec; 3606 to = pcbddc->vec1_P; 3607 } else { /* from local to global -> put data in coarse right hand side */ 3608 from = pcbddc->vec1_P; 3609 to = pcbddc->coarse_vec; 3610 } 3611 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3612 if (smode == SCATTER_FORWARD) { 3613 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3614 Vec tvec; 3615 3616 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3617 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 3618 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 3619 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 3620 } 3621 } else { 3622 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 3623 ierr = VecResetArray(from);CHKERRQ(ierr); 3624 } 3625 } 3626 PetscFunctionReturn(0); 3627 } 3628 3629 /* uncomment for testing purposes */ 3630 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 3631 #undef __FUNCT__ 3632 #define __FUNCT__ "PCBDDCConstraintsSetUp" 3633 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 3634 { 3635 PetscErrorCode ierr; 3636 PC_IS* pcis = (PC_IS*)(pc->data); 3637 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3638 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3639 /* one and zero */ 3640 PetscScalar one=1.0,zero=0.0; 3641 /* space to store constraints and their local indices */ 3642 PetscScalar *constraints_data; 3643 PetscInt *constraints_idxs,*constraints_idxs_B; 3644 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 3645 PetscInt *constraints_n; 3646 /* iterators */ 3647 PetscInt i,j,k,total_counts,total_counts_cc,cum; 3648 /* BLAS integers */ 3649 PetscBLASInt lwork,lierr; 3650 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 3651 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 3652 /* reuse */ 3653 PetscInt olocal_primal_size,olocal_primal_size_cc; 3654 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 3655 /* change of basis */ 3656 PetscBool qr_needed; 3657 PetscBT change_basis,qr_needed_idx; 3658 /* auxiliary stuff */ 3659 PetscInt *nnz,*is_indices; 3660 PetscInt ncc; 3661 /* some quantities */ 3662 PetscInt n_vertices,total_primal_vertices,valid_constraints; 3663 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 3664 3665 PetscFunctionBegin; 3666 /* Destroy Mat objects computed previously */ 3667 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3668 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3669 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 3670 /* save info on constraints from previous setup (if any) */ 3671 olocal_primal_size = pcbddc->local_primal_size; 3672 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 3673 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 3674 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3675 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3676 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3677 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3678 3679 if (!pcbddc->adaptive_selection) { 3680 IS ISForVertices,*ISForFaces,*ISForEdges; 3681 MatNullSpace nearnullsp; 3682 const Vec *nearnullvecs; 3683 Vec *localnearnullsp; 3684 PetscScalar *array; 3685 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 3686 PetscBool nnsp_has_cnst; 3687 /* LAPACK working arrays for SVD or POD */ 3688 PetscBool skip_lapack,boolforchange; 3689 PetscScalar *work; 3690 PetscReal *singular_vals; 3691 #if defined(PETSC_USE_COMPLEX) 3692 PetscReal *rwork; 3693 #endif 3694 #if defined(PETSC_MISSING_LAPACK_GESVD) 3695 PetscScalar *temp_basis,*correlation_mat; 3696 #else 3697 PetscBLASInt dummy_int=1; 3698 PetscScalar dummy_scalar=1.; 3699 #endif 3700 3701 /* Get index sets for faces, edges and vertices from graph */ 3702 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 3703 /* print some info */ 3704 if (pcbddc->dbg_flag && !pcbddc->sub_schurs) { 3705 PetscInt nv; 3706 3707 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 3708 ierr = ISGetSize(ISForVertices,&nv);CHKERRQ(ierr); 3709 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3710 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3711 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 3712 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);CHKERRQ(ierr); 3713 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);CHKERRQ(ierr); 3714 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3715 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3716 } 3717 3718 /* free unneeded index sets */ 3719 if (!pcbddc->use_vertices) { 3720 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3721 } 3722 if (!pcbddc->use_edges) { 3723 for (i=0;i<n_ISForEdges;i++) { 3724 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3725 } 3726 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3727 n_ISForEdges = 0; 3728 } 3729 if (!pcbddc->use_faces) { 3730 for (i=0;i<n_ISForFaces;i++) { 3731 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3732 } 3733 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3734 n_ISForFaces = 0; 3735 } 3736 3737 /* check if near null space is attached to global mat */ 3738 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 3739 if (nearnullsp) { 3740 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 3741 /* remove any stored info */ 3742 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3743 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3744 /* store information for BDDC solver reuse */ 3745 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 3746 pcbddc->onearnullspace = nearnullsp; 3747 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3748 for (i=0;i<nnsp_size;i++) { 3749 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 3750 } 3751 } else { /* if near null space is not provided BDDC uses constants by default */ 3752 nnsp_size = 0; 3753 nnsp_has_cnst = PETSC_TRUE; 3754 } 3755 /* get max number of constraints on a single cc */ 3756 max_constraints = nnsp_size; 3757 if (nnsp_has_cnst) max_constraints++; 3758 3759 /* 3760 Evaluate maximum storage size needed by the procedure 3761 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 3762 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 3763 There can be multiple constraints per connected component 3764 */ 3765 n_vertices = 0; 3766 if (ISForVertices) { 3767 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 3768 } 3769 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 3770 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 3771 3772 total_counts = n_ISForFaces+n_ISForEdges; 3773 total_counts *= max_constraints; 3774 total_counts += n_vertices; 3775 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 3776 3777 total_counts = 0; 3778 max_size_of_constraint = 0; 3779 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 3780 IS used_is; 3781 if (i<n_ISForEdges) { 3782 used_is = ISForEdges[i]; 3783 } else { 3784 used_is = ISForFaces[i-n_ISForEdges]; 3785 } 3786 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 3787 total_counts += j; 3788 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 3789 } 3790 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 3791 3792 /* get local part of global near null space vectors */ 3793 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 3794 for (k=0;k<nnsp_size;k++) { 3795 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 3796 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3797 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3798 } 3799 3800 /* whether or not to skip lapack calls */ 3801 skip_lapack = PETSC_TRUE; 3802 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 3803 3804 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 3805 if (!skip_lapack) { 3806 PetscScalar temp_work; 3807 3808 #if defined(PETSC_MISSING_LAPACK_GESVD) 3809 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 3810 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 3811 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 3812 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 3813 #if defined(PETSC_USE_COMPLEX) 3814 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 3815 #endif 3816 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3817 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3818 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 3819 lwork = -1; 3820 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3821 #if !defined(PETSC_USE_COMPLEX) 3822 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 3823 #else 3824 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 3825 #endif 3826 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3827 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 3828 #else /* on missing GESVD */ 3829 /* SVD */ 3830 PetscInt max_n,min_n; 3831 max_n = max_size_of_constraint; 3832 min_n = max_constraints; 3833 if (max_size_of_constraint < max_constraints) { 3834 min_n = max_size_of_constraint; 3835 max_n = max_constraints; 3836 } 3837 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 3838 #if defined(PETSC_USE_COMPLEX) 3839 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 3840 #endif 3841 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3842 lwork = -1; 3843 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 3844 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 3845 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 3846 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3847 #if !defined(PETSC_USE_COMPLEX) 3848 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 3849 #else 3850 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 3851 #endif 3852 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3853 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 3854 #endif /* on missing GESVD */ 3855 /* Allocate optimal workspace */ 3856 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 3857 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 3858 } 3859 /* Now we can loop on constraining sets */ 3860 total_counts = 0; 3861 constraints_idxs_ptr[0] = 0; 3862 constraints_data_ptr[0] = 0; 3863 /* vertices */ 3864 if (n_vertices) { 3865 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3866 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3867 for (i=0;i<n_vertices;i++) { 3868 constraints_n[total_counts] = 1; 3869 constraints_data[total_counts] = 1.0; 3870 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 3871 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 3872 total_counts++; 3873 } 3874 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3875 n_vertices = total_counts; 3876 } 3877 3878 /* edges and faces */ 3879 total_counts_cc = total_counts; 3880 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 3881 IS used_is; 3882 PetscBool idxs_copied = PETSC_FALSE; 3883 3884 if (ncc<n_ISForEdges) { 3885 used_is = ISForEdges[ncc]; 3886 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 3887 } else { 3888 used_is = ISForFaces[ncc-n_ISForEdges]; 3889 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 3890 } 3891 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 3892 3893 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 3894 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3895 /* change of basis should not be performed on local periodic nodes */ 3896 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 3897 if (nnsp_has_cnst) { 3898 PetscScalar quad_value; 3899 3900 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3901 idxs_copied = PETSC_TRUE; 3902 3903 if (!pcbddc->use_nnsp_true) { 3904 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 3905 } else { 3906 quad_value = 1.0; 3907 } 3908 for (j=0;j<size_of_constraint;j++) { 3909 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 3910 } 3911 temp_constraints++; 3912 total_counts++; 3913 } 3914 for (k=0;k<nnsp_size;k++) { 3915 PetscReal real_value; 3916 PetscScalar *ptr_to_data; 3917 3918 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3919 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 3920 for (j=0;j<size_of_constraint;j++) { 3921 ptr_to_data[j] = array[is_indices[j]]; 3922 } 3923 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3924 /* check if array is null on the connected component */ 3925 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3926 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 3927 if (real_value > 0.0) { /* keep indices and values */ 3928 temp_constraints++; 3929 total_counts++; 3930 if (!idxs_copied) { 3931 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3932 idxs_copied = PETSC_TRUE; 3933 } 3934 } 3935 } 3936 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3937 valid_constraints = temp_constraints; 3938 if (!pcbddc->use_nnsp_true && temp_constraints) { 3939 if (temp_constraints == 1) { /* just normalize the constraint */ 3940 PetscScalar norm,*ptr_to_data; 3941 3942 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3943 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3944 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 3945 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 3946 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 3947 } else { /* perform SVD */ 3948 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 3949 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3950 3951 #if defined(PETSC_MISSING_LAPACK_GESVD) 3952 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 3953 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 3954 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 3955 the constraints basis will differ (by a complex factor with absolute value equal to 1) 3956 from that computed using LAPACKgesvd 3957 -> This is due to a different computation of eigenvectors in LAPACKheev 3958 -> The quality of the POD-computed basis will be the same */ 3959 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3960 /* Store upper triangular part of correlation matrix */ 3961 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3962 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3963 for (j=0;j<temp_constraints;j++) { 3964 for (k=0;k<j+1;k++) { 3965 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 3966 } 3967 } 3968 /* compute eigenvalues and eigenvectors of correlation matrix */ 3969 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3970 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 3971 #if !defined(PETSC_USE_COMPLEX) 3972 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 3973 #else 3974 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 3975 #endif 3976 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3977 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 3978 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 3979 j = 0; 3980 while (j < temp_constraints && singular_vals[j] < tol) j++; 3981 total_counts = total_counts-j; 3982 valid_constraints = temp_constraints-j; 3983 /* scale and copy POD basis into used quadrature memory */ 3984 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3985 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3986 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 3987 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3988 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 3989 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3990 if (j<temp_constraints) { 3991 PetscInt ii; 3992 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 3993 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3994 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 3995 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3996 for (k=0;k<temp_constraints-j;k++) { 3997 for (ii=0;ii<size_of_constraint;ii++) { 3998 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 3999 } 4000 } 4001 } 4002 #else /* on missing GESVD */ 4003 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4004 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 4005 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4006 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4007 #if !defined(PETSC_USE_COMPLEX) 4008 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 4009 #else 4010 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 4011 #endif 4012 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 4013 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4014 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 4015 k = temp_constraints; 4016 if (k > size_of_constraint) k = size_of_constraint; 4017 j = 0; 4018 while (j < k && singular_vals[k-j-1] < tol) j++; 4019 valid_constraints = k-j; 4020 total_counts = total_counts-temp_constraints+valid_constraints; 4021 #endif /* on missing GESVD */ 4022 } 4023 } 4024 /* update pointers information */ 4025 if (valid_constraints) { 4026 constraints_n[total_counts_cc] = valid_constraints; 4027 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 4028 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 4029 /* set change_of_basis flag */ 4030 if (boolforchange) { 4031 PetscBTSet(change_basis,total_counts_cc); 4032 } 4033 total_counts_cc++; 4034 } 4035 } 4036 /* free workspace */ 4037 if (!skip_lapack) { 4038 ierr = PetscFree(work);CHKERRQ(ierr); 4039 #if defined(PETSC_USE_COMPLEX) 4040 ierr = PetscFree(rwork);CHKERRQ(ierr); 4041 #endif 4042 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 4043 #if defined(PETSC_MISSING_LAPACK_GESVD) 4044 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 4045 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 4046 #endif 4047 } 4048 for (k=0;k<nnsp_size;k++) { 4049 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 4050 } 4051 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 4052 /* free index sets of faces, edges and vertices */ 4053 for (i=0;i<n_ISForFaces;i++) { 4054 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 4055 } 4056 if (n_ISForFaces) { 4057 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 4058 } 4059 for (i=0;i<n_ISForEdges;i++) { 4060 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 4061 } 4062 if (n_ISForEdges) { 4063 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 4064 } 4065 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 4066 } else { 4067 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4068 4069 total_counts = 0; 4070 n_vertices = 0; 4071 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 4072 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 4073 } 4074 max_constraints = 0; 4075 total_counts_cc = 0; 4076 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 4077 total_counts += pcbddc->adaptive_constraints_n[i]; 4078 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 4079 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 4080 } 4081 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 4082 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 4083 constraints_idxs = pcbddc->adaptive_constraints_idxs; 4084 constraints_data = pcbddc->adaptive_constraints_data; 4085 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 4086 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 4087 total_counts_cc = 0; 4088 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 4089 if (pcbddc->adaptive_constraints_n[i]) { 4090 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 4091 } 4092 } 4093 #if 0 4094 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 4095 for (i=0;i<total_counts_cc;i++) { 4096 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 4097 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 4098 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 4099 printf(" %d",constraints_idxs[j]); 4100 } 4101 printf("\n"); 4102 printf("number of cc: %d\n",constraints_n[i]); 4103 } 4104 for (i=0;i<n_vertices;i++) { 4105 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 4106 } 4107 for (i=0;i<sub_schurs->n_subs;i++) { 4108 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 4109 } 4110 #endif 4111 4112 max_size_of_constraint = 0; 4113 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 4114 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 4115 /* Change of basis */ 4116 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 4117 if (pcbddc->use_change_of_basis) { 4118 for (i=0;i<sub_schurs->n_subs;i++) { 4119 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 4120 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 4121 } 4122 } 4123 } 4124 } 4125 pcbddc->local_primal_size = total_counts; 4126 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 4127 4128 /* map constraints_idxs in boundary numbering */ 4129 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 4130 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i); 4131 4132 /* Create constraint matrix */ 4133 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 4134 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 4135 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 4136 4137 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 4138 /* determine if a QR strategy is needed for change of basis */ 4139 qr_needed = PETSC_FALSE; 4140 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 4141 total_primal_vertices=0; 4142 pcbddc->local_primal_size_cc = 0; 4143 for (i=0;i<total_counts_cc;i++) { 4144 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 4145 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 4146 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 4147 pcbddc->local_primal_size_cc += 1; 4148 } else if (PetscBTLookup(change_basis,i)) { 4149 for (k=0;k<constraints_n[i];k++) { 4150 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 4151 } 4152 pcbddc->local_primal_size_cc += constraints_n[i]; 4153 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 4154 PetscBTSet(qr_needed_idx,i); 4155 qr_needed = PETSC_TRUE; 4156 } 4157 } else { 4158 pcbddc->local_primal_size_cc += 1; 4159 } 4160 } 4161 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 4162 pcbddc->n_vertices = total_primal_vertices; 4163 /* permute indices in order to have a sorted set of vertices */ 4164 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 4165 4166 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 4167 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 4168 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 4169 4170 /* nonzero structure of constraint matrix */ 4171 /* and get reference dof for local constraints */ 4172 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 4173 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 4174 4175 j = total_primal_vertices; 4176 total_counts = total_primal_vertices; 4177 cum = total_primal_vertices; 4178 for (i=n_vertices;i<total_counts_cc;i++) { 4179 if (!PetscBTLookup(change_basis,i)) { 4180 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 4181 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 4182 cum++; 4183 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 4184 for (k=0;k<constraints_n[i];k++) { 4185 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 4186 nnz[j+k] = size_of_constraint; 4187 } 4188 j += constraints_n[i]; 4189 } 4190 } 4191 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 4192 ierr = PetscFree(nnz);CHKERRQ(ierr); 4193 4194 /* set values in constraint matrix */ 4195 for (i=0;i<total_primal_vertices;i++) { 4196 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4197 } 4198 total_counts = total_primal_vertices; 4199 for (i=n_vertices;i<total_counts_cc;i++) { 4200 if (!PetscBTLookup(change_basis,i)) { 4201 PetscInt *cols; 4202 4203 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 4204 cols = constraints_idxs+constraints_idxs_ptr[i]; 4205 for (k=0;k<constraints_n[i];k++) { 4206 PetscInt row = total_counts+k; 4207 PetscScalar *vals; 4208 4209 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 4210 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 4211 } 4212 total_counts += constraints_n[i]; 4213 } 4214 } 4215 /* assembling */ 4216 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4217 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4218 4219 /* 4220 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4221 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 4222 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 4223 */ 4224 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 4225 if (pcbddc->use_change_of_basis) { 4226 /* dual and primal dofs on a single cc */ 4227 PetscInt dual_dofs,primal_dofs; 4228 /* working stuff for GEQRF */ 4229 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 4230 PetscBLASInt lqr_work; 4231 /* working stuff for UNGQR */ 4232 PetscScalar *gqr_work,lgqr_work_t; 4233 PetscBLASInt lgqr_work; 4234 /* working stuff for TRTRS */ 4235 PetscScalar *trs_rhs; 4236 PetscBLASInt Blas_NRHS; 4237 /* pointers for values insertion into change of basis matrix */ 4238 PetscInt *start_rows,*start_cols; 4239 PetscScalar *start_vals; 4240 /* working stuff for values insertion */ 4241 PetscBT is_primal; 4242 PetscInt *aux_primal_numbering_B; 4243 /* matrix sizes */ 4244 PetscInt global_size,local_size; 4245 /* temporary change of basis */ 4246 Mat localChangeOfBasisMatrix; 4247 /* extra space for debugging */ 4248 PetscScalar *dbg_work; 4249 4250 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 4251 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 4252 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 4253 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 4254 /* nonzeros for local mat */ 4255 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 4256 if (!pcbddc->benign_change || pcbddc->fake_change) { 4257 for (i=0;i<pcis->n;i++) nnz[i]=1; 4258 } else { 4259 const PetscInt *ii; 4260 PetscInt n; 4261 PetscBool flg_row; 4262 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 4263 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 4264 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 4265 } 4266 for (i=n_vertices;i<total_counts_cc;i++) { 4267 if (PetscBTLookup(change_basis,i)) { 4268 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 4269 if (PetscBTLookup(qr_needed_idx,i)) { 4270 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 4271 } else { 4272 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 4273 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 4274 } 4275 } 4276 } 4277 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 4278 ierr = PetscFree(nnz);CHKERRQ(ierr); 4279 /* Set interior change in the matrix */ 4280 if (!pcbddc->benign_change || pcbddc->fake_change) { 4281 for (i=0;i<pcis->n;i++) { 4282 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 4283 } 4284 } else { 4285 const PetscInt *ii,*jj; 4286 PetscScalar *aa; 4287 PetscInt n; 4288 PetscBool flg_row; 4289 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 4290 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 4291 for (i=0;i<n;i++) { 4292 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 4293 } 4294 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 4295 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 4296 } 4297 4298 if (pcbddc->dbg_flag) { 4299 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 4300 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4301 } 4302 4303 4304 /* Now we loop on the constraints which need a change of basis */ 4305 /* 4306 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 4307 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 4308 4309 Basic blocks of change of basis matrix T computed by 4310 4311 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 4312 4313 | 1 0 ... 0 s_1/S | 4314 | 0 1 ... 0 s_2/S | 4315 | ... | 4316 | 0 ... 1 s_{n-1}/S | 4317 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 4318 4319 with S = \sum_{i=1}^n s_i^2 4320 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 4321 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 4322 4323 - QR decomposition of constraints otherwise 4324 */ 4325 if (qr_needed) { 4326 /* space to store Q */ 4327 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 4328 /* first we issue queries for optimal work */ 4329 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 4330 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 4331 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4332 lqr_work = -1; 4333 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 4334 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 4335 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 4336 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 4337 lgqr_work = -1; 4338 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 4339 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 4340 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 4341 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4342 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 4343 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 4344 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 4345 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 4346 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 4347 /* array to store scaling factors for reflectors */ 4348 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 4349 /* array to store rhs and solution of triangular solver */ 4350 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 4351 /* allocating workspace for check */ 4352 if (pcbddc->dbg_flag) { 4353 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 4354 } 4355 } 4356 /* array to store whether a node is primal or not */ 4357 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 4358 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 4359 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 4360 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i); 4361 for (i=0;i<total_primal_vertices;i++) { 4362 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 4363 } 4364 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 4365 4366 /* loop on constraints and see whether or not they need a change of basis and compute it */ 4367 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 4368 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 4369 if (PetscBTLookup(change_basis,total_counts)) { 4370 /* get constraint info */ 4371 primal_dofs = constraints_n[total_counts]; 4372 dual_dofs = size_of_constraint-primal_dofs; 4373 4374 if (pcbddc->dbg_flag) { 4375 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 4376 } 4377 4378 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 4379 4380 /* copy quadrature constraints for change of basis check */ 4381 if (pcbddc->dbg_flag) { 4382 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4383 } 4384 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 4385 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4386 4387 /* compute QR decomposition of constraints */ 4388 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4389 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4390 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4391 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4392 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 4393 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 4394 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4395 4396 /* explictly compute R^-T */ 4397 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 4398 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 4399 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4400 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 4401 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4402 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 4403 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4404 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 4405 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 4406 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4407 4408 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 4409 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4410 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4411 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 4412 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4413 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4414 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 4415 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 4416 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4417 4418 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 4419 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 4420 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 4421 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4422 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4423 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 4424 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4425 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 4426 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 4427 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4428 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 4429 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4430 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4431 4432 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 4433 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 4434 /* insert cols for primal dofs */ 4435 for (j=0;j<primal_dofs;j++) { 4436 start_vals = &qr_basis[j*size_of_constraint]; 4437 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 4438 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 4439 } 4440 /* insert cols for dual dofs */ 4441 for (j=0,k=0;j<dual_dofs;k++) { 4442 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 4443 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 4444 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 4445 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 4446 j++; 4447 } 4448 } 4449 4450 /* check change of basis */ 4451 if (pcbddc->dbg_flag) { 4452 PetscInt ii,jj; 4453 PetscBool valid_qr=PETSC_TRUE; 4454 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 4455 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4456 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 4457 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4458 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 4459 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 4460 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4461 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 4462 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4463 for (jj=0;jj<size_of_constraint;jj++) { 4464 for (ii=0;ii<primal_dofs;ii++) { 4465 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 4466 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 4467 } 4468 } 4469 if (!valid_qr) { 4470 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 4471 for (jj=0;jj<size_of_constraint;jj++) { 4472 for (ii=0;ii<primal_dofs;ii++) { 4473 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 4474 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4475 } 4476 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 4477 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4478 } 4479 } 4480 } 4481 } else { 4482 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 4483 } 4484 } 4485 } else { /* simple transformation block */ 4486 PetscInt row,col; 4487 PetscScalar val,norm; 4488 4489 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4490 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 4491 for (j=0;j<size_of_constraint;j++) { 4492 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 4493 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 4494 if (!PetscBTLookup(is_primal,row_B)) { 4495 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 4496 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 4497 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 4498 } else { 4499 for (k=0;k<size_of_constraint;k++) { 4500 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 4501 if (row != col) { 4502 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 4503 } else { 4504 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 4505 } 4506 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 4507 } 4508 } 4509 } 4510 if (pcbddc->dbg_flag) { 4511 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 4512 } 4513 } 4514 } else { 4515 if (pcbddc->dbg_flag) { 4516 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 4517 } 4518 } 4519 } 4520 4521 /* free workspace */ 4522 if (qr_needed) { 4523 if (pcbddc->dbg_flag) { 4524 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 4525 } 4526 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 4527 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 4528 ierr = PetscFree(qr_work);CHKERRQ(ierr); 4529 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 4530 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 4531 } 4532 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 4533 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4534 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4535 4536 /* assembling of global change of variable */ 4537 if (!pcbddc->fake_change) { 4538 Mat tmat; 4539 PetscInt bs; 4540 4541 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 4542 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 4543 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 4544 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 4545 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4546 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 4547 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 4548 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 4549 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 4550 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 4551 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4552 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4553 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4554 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4555 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4556 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4557 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 4558 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 4559 4560 /* check */ 4561 if (pcbddc->dbg_flag) { 4562 PetscReal error; 4563 Vec x,x_change; 4564 4565 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 4566 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 4567 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4568 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 4569 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4570 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4571 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 4572 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4573 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4574 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 4575 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4576 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4577 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4578 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 4579 ierr = VecDestroy(&x);CHKERRQ(ierr); 4580 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4581 } 4582 /* adapt sub_schurs computed (if any) */ 4583 if (pcbddc->use_deluxe_scaling) { 4584 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4585 4586 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) { 4587 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");CHKERRQ(ierr); 4588 } 4589 if (sub_schurs && sub_schurs->S_Ej_all) { 4590 Mat S_new,tmat; 4591 IS is_all_N,is_V_Sall = NULL; 4592 4593 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 4594 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 4595 if (pcbddc->deluxe_zerorows) { 4596 ISLocalToGlobalMapping NtoSall; 4597 IS is_V; 4598 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 4599 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 4600 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 4601 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 4602 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 4603 } 4604 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 4605 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4606 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 4607 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4608 if (pcbddc->deluxe_zerorows) { 4609 const PetscScalar *array; 4610 const PetscInt *idxs_V,*idxs_all; 4611 PetscInt i,n_V; 4612 4613 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4614 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 4615 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4616 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4617 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 4618 for (i=0;i<n_V;i++) { 4619 PetscScalar val; 4620 PetscInt idx; 4621 4622 idx = idxs_V[i]; 4623 val = array[idxs_all[idxs_V[i]]]; 4624 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 4625 } 4626 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4627 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4628 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 4629 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4630 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4631 } 4632 sub_schurs->S_Ej_all = S_new; 4633 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4634 if (sub_schurs->sum_S_Ej_all) { 4635 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4636 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 4637 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4638 if (pcbddc->deluxe_zerorows) { 4639 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4640 } 4641 sub_schurs->sum_S_Ej_all = S_new; 4642 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4643 } 4644 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 4645 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4646 } 4647 /* destroy any change of basis context in sub_schurs */ 4648 if (sub_schurs && sub_schurs->change) { 4649 PetscInt i; 4650 4651 for (i=0;i<sub_schurs->n_subs;i++) { 4652 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 4653 } 4654 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 4655 } 4656 } 4657 if (pcbddc->switch_static) { /* need to save the local change */ 4658 pcbddc->switch_static_change = localChangeOfBasisMatrix; 4659 } else { 4660 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 4661 } 4662 /* determine if any process has changed the pressures locally */ 4663 pcbddc->change_interior = pcbddc->benign_have_null; 4664 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 4665 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 4666 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 4667 pcbddc->use_qr_single = qr_needed; 4668 } 4669 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 4670 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 4671 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 4672 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 4673 } else { 4674 Mat benign_global = NULL; 4675 if (pcbddc->benign_have_null) { 4676 Mat tmat; 4677 4678 pcbddc->change_interior = PETSC_TRUE; 4679 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4680 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4681 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4682 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4683 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 4684 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4685 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4686 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 4687 if (pcbddc->benign_change) { 4688 Mat M; 4689 4690 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 4691 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 4692 ierr = MatISSetLocalMat(tmat,M);CHKERRQ(ierr); 4693 ierr = MatDestroy(&M);CHKERRQ(ierr); 4694 } else { 4695 Mat eye; 4696 PetscScalar *array; 4697 4698 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4699 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&eye);CHKERRQ(ierr); 4700 for (i=0;i<pcis->n;i++) { 4701 ierr = MatSetValue(eye,i,i,array[i],INSERT_VALUES);CHKERRQ(ierr); 4702 } 4703 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4704 ierr = MatAssemblyBegin(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4705 ierr = MatAssemblyEnd(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4706 ierr = MatISSetLocalMat(tmat,eye);CHKERRQ(ierr); 4707 ierr = MatDestroy(&eye);CHKERRQ(ierr); 4708 } 4709 ierr = MatISGetMPIXAIJ(tmat,MAT_INITIAL_MATRIX,&benign_global);CHKERRQ(ierr); 4710 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4711 } 4712 if (pcbddc->user_ChangeOfBasisMatrix) { 4713 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4714 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 4715 } else if (pcbddc->benign_have_null) { 4716 pcbddc->ChangeOfBasisMatrix = benign_global; 4717 } 4718 } 4719 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 4720 IS is_global; 4721 const PetscInt *gidxs; 4722 4723 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 4724 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 4725 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 4726 ierr = MatGetSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 4727 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 4728 } 4729 } 4730 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 4731 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 4732 } 4733 4734 if (!pcbddc->fake_change) { 4735 /* add pressure dofs to set of primal nodes for numbering purposes */ 4736 for (i=0;i<pcbddc->benign_n;i++) { 4737 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 4738 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 4739 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 4740 pcbddc->local_primal_size_cc++; 4741 pcbddc->local_primal_size++; 4742 } 4743 4744 /* check if a new primal space has been introduced (also take into account benign trick) */ 4745 pcbddc->new_primal_space_local = PETSC_TRUE; 4746 if (olocal_primal_size == pcbddc->local_primal_size) { 4747 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4748 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4749 if (!pcbddc->new_primal_space_local) { 4750 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4751 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4752 } 4753 } 4754 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 4755 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4756 } 4757 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 4758 4759 /* flush dbg viewer */ 4760 if (pcbddc->dbg_flag) { 4761 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4762 } 4763 4764 /* free workspace */ 4765 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 4766 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 4767 if (!pcbddc->adaptive_selection) { 4768 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 4769 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 4770 } else { 4771 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 4772 pcbddc->adaptive_constraints_idxs_ptr, 4773 pcbddc->adaptive_constraints_data_ptr, 4774 pcbddc->adaptive_constraints_idxs, 4775 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 4776 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 4777 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 4778 } 4779 PetscFunctionReturn(0); 4780 } 4781 4782 #undef __FUNCT__ 4783 #define __FUNCT__ "PCBDDCAnalyzeInterface" 4784 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 4785 { 4786 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4787 PC_IS *pcis = (PC_IS*)pc->data; 4788 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 4789 PetscInt ierr,i,N; 4790 4791 PetscFunctionBegin; 4792 /* Reset previously computed graph */ 4793 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 4794 /* Init local Graph struct */ 4795 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 4796 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 4797 4798 /* Check validity of the csr graph passed in by the user */ 4799 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 4800 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 4801 } 4802 4803 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 4804 if ( (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) && pcbddc->use_local_adj) { 4805 PetscInt *xadj,*adjncy; 4806 PetscInt nvtxs; 4807 PetscBool flg_row=PETSC_FALSE; 4808 4809 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4810 if (flg_row) { 4811 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 4812 pcbddc->computed_rowadj = PETSC_TRUE; 4813 } 4814 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4815 } 4816 if (pcbddc->dbg_flag) { 4817 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4818 } 4819 4820 /* Setup of Graph */ 4821 pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */ 4822 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 4823 4824 /* attach info on disconnected subdomains if present */ 4825 if (pcbddc->n_local_subs) { 4826 PetscInt *local_subs; 4827 4828 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 4829 for (i=0;i<pcbddc->n_local_subs;i++) { 4830 const PetscInt *idxs; 4831 PetscInt nl,j; 4832 4833 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 4834 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4835 for (j=0;j<nl;j++) { 4836 local_subs[idxs[j]] = i; 4837 } 4838 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4839 } 4840 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 4841 pcbddc->mat_graph->local_subs = local_subs; 4842 } 4843 4844 /* Graph's connected components analysis */ 4845 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 4846 PetscFunctionReturn(0); 4847 } 4848 4849 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 4850 #undef __FUNCT__ 4851 #define __FUNCT__ "PCBDDCSubsetNumbering" 4852 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 4853 { 4854 PetscSF sf; 4855 PetscLayout map; 4856 const PetscInt *idxs; 4857 PetscInt *leaf_data,*root_data,*gidxs; 4858 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 4859 PetscInt n_n,nlocals,start,first_index; 4860 PetscMPIInt commsize; 4861 PetscBool first_found; 4862 PetscErrorCode ierr; 4863 4864 PetscFunctionBegin; 4865 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 4866 if (subset_mult) { 4867 PetscCheckSameComm(subset,1,subset_mult,2); 4868 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 4869 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 4870 } 4871 /* create workspace layout for computing global indices of subset */ 4872 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 4873 lbounds[0] = lbounds[1] = 0; 4874 for (i=0;i<n;i++) { 4875 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 4876 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 4877 } 4878 lbounds[0] = -lbounds[0]; 4879 ierr = MPIU_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4880 gbounds[0] = -gbounds[0]; 4881 N = gbounds[1] - gbounds[0] + 1; 4882 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 4883 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 4884 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 4885 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 4886 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 4887 4888 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 4889 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 4890 if (subset_mult) { 4891 const PetscInt* idxs_mult; 4892 4893 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4894 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 4895 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4896 } else { 4897 for (i=0;i<n;i++) leaf_data[i] = 1; 4898 } 4899 /* local size of new subset */ 4900 n_n = 0; 4901 for (i=0;i<n;i++) n_n += leaf_data[i]; 4902 4903 /* global indexes in layout */ 4904 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 4905 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 4906 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 4907 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 4908 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 4909 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 4910 4911 /* reduce from leaves to roots */ 4912 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 4913 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4914 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4915 4916 /* count indexes in local part of layout */ 4917 nlocals = 0; 4918 first_index = -1; 4919 first_found = PETSC_FALSE; 4920 for (i=0;i<Nl;i++) { 4921 if (!first_found && root_data[i]) { 4922 first_found = PETSC_TRUE; 4923 first_index = i; 4924 } 4925 nlocals += root_data[i]; 4926 } 4927 4928 /* cumulative of number of indexes and size of subset without holes */ 4929 #if defined(PETSC_HAVE_MPI_EXSCAN) 4930 start = 0; 4931 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4932 #else 4933 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4934 start = start-nlocals; 4935 #endif 4936 4937 if (N_n) { /* compute total size of new subset if requested */ 4938 *N_n = start + nlocals; 4939 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 4940 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4941 } 4942 4943 /* adapt root data with cumulative */ 4944 if (first_found) { 4945 PetscInt old_index; 4946 4947 root_data[first_index] += start; 4948 old_index = first_index; 4949 for (i=first_index+1;i<Nl;i++) { 4950 if (root_data[i]) { 4951 root_data[i] += root_data[old_index]; 4952 old_index = i; 4953 } 4954 } 4955 } 4956 4957 /* from roots to leaves */ 4958 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4959 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4960 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 4961 4962 /* create new IS with global indexes without holes */ 4963 if (subset_mult) { 4964 const PetscInt* idxs_mult; 4965 PetscInt cum; 4966 4967 cum = 0; 4968 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4969 for (i=0;i<n;i++) { 4970 PetscInt j; 4971 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 4972 } 4973 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4974 } else { 4975 for (i=0;i<n;i++) { 4976 gidxs[i] = leaf_data[i]-1; 4977 } 4978 } 4979 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 4980 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 4981 PetscFunctionReturn(0); 4982 } 4983 4984 /* this implements stabilized Gram-Schmidt */ 4985 #undef __FUNCT__ 4986 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 4987 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 4988 { 4989 PetscInt i,j; 4990 PetscScalar *alphas; 4991 PetscErrorCode ierr; 4992 4993 PetscFunctionBegin; 4994 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 4995 for (i=0;i<n;i++) { 4996 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 4997 ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],alphas);CHKERRQ(ierr); 4998 for (j=0;j<n-i-1;j++) alphas[j] = PetscConj(-alphas[j]); 4999 ierr = VecMAXPY(vecs[j],n-i-1,alphas,vecs+i);CHKERRQ(ierr); 5000 } 5001 ierr = PetscFree(alphas);CHKERRQ(ierr); 5002 PetscFunctionReturn(0); 5003 } 5004 5005 #undef __FUNCT__ 5006 #define __FUNCT__ "MatISGetSubassemblingPattern" 5007 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 5008 { 5009 Mat A; 5010 PetscInt n_neighs,*neighs,*n_shared,**shared; 5011 PetscMPIInt size,rank,color; 5012 PetscInt *xadj,*adjncy; 5013 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 5014 PetscInt im_active,active_procs,n,i,j,local_size,threshold = 2; 5015 PetscInt void_procs,*procs_candidates = NULL; 5016 PetscInt xadj_count, *count; 5017 PetscBool ismatis,use_vwgt=PETSC_FALSE; 5018 PetscSubcomm psubcomm; 5019 MPI_Comm subcomm; 5020 PetscErrorCode ierr; 5021 5022 PetscFunctionBegin; 5023 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 5024 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 5025 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 5026 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 5027 PetscValidLogicalCollectiveInt(mat,redprocs,3); 5028 if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %d\n",*n_subdomains); 5029 5030 if (have_void) *have_void = PETSC_FALSE; 5031 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 5032 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 5033 ierr = MatISGetLocalMat(mat,&A);CHKERRQ(ierr); 5034 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 5035 im_active = !!(n); 5036 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 5037 void_procs = size - active_procs; 5038 /* get ranks of of non-active processes in mat communicator */ 5039 if (void_procs) { 5040 PetscInt ncand; 5041 5042 if (have_void) *have_void = PETSC_TRUE; 5043 ierr = PetscMalloc1(size,&procs_candidates);CHKERRQ(ierr); 5044 ierr = MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 5045 for (i=0,ncand=0;i<size;i++) { 5046 if (!procs_candidates[i]) { 5047 procs_candidates[ncand++] = i; 5048 } 5049 } 5050 /* force n_subdomains to be not greater that the number of non-active processes */ 5051 *n_subdomains = PetscMin(void_procs,*n_subdomains); 5052 } 5053 5054 /* number of subdomains requested greater than active processes -> just shift the matrix */ 5055 if (active_procs < *n_subdomains) { 5056 PetscInt issize,isidx; 5057 if (im_active) { 5058 issize = 1; 5059 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 5060 isidx = procs_candidates[rank]; 5061 } else { 5062 isidx = rank; 5063 } 5064 } else { 5065 issize = 0; 5066 isidx = -1; 5067 } 5068 *n_subdomains = active_procs; 5069 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);CHKERRQ(ierr); 5070 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 5071 PetscFunctionReturn(0); 5072 } 5073 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 5074 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 5075 threshold = PetscMax(threshold,2); 5076 5077 /* Get info on mapping */ 5078 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 5079 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 5080 5081 /* build local CSR graph of subdomains' connectivity */ 5082 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 5083 xadj[0] = 0; 5084 xadj[1] = PetscMax(n_neighs-1,0); 5085 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 5086 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 5087 ierr = PetscCalloc1(local_size,&count);CHKERRQ(ierr); 5088 for (i=1;i<n_neighs;i++) 5089 for (j=0;j<n_shared[i];j++) 5090 count[shared[i][j]] += 1; 5091 5092 xadj_count = 0; 5093 for (i=1;i<n_neighs;i++) { 5094 for (j=0;j<n_shared[i];j++) { 5095 if (count[shared[i][j]] < threshold) { 5096 adjncy[xadj_count] = neighs[i]; 5097 adjncy_wgt[xadj_count] = n_shared[i]; 5098 xadj_count++; 5099 break; 5100 } 5101 } 5102 } 5103 xadj[1] = xadj_count; 5104 ierr = PetscFree(count);CHKERRQ(ierr); 5105 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 5106 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 5107 5108 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 5109 5110 /* Restrict work on active processes only */ 5111 ierr = PetscMPIIntCast(im_active,&color);CHKERRQ(ierr); 5112 if (void_procs) { 5113 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);CHKERRQ(ierr); 5114 ierr = PetscSubcommSetNumber(psubcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 5115 ierr = PetscSubcommSetTypeGeneral(psubcomm,color,rank);CHKERRQ(ierr); 5116 subcomm = PetscSubcommChild(psubcomm); 5117 } else { 5118 psubcomm = NULL; 5119 subcomm = PetscObjectComm((PetscObject)mat); 5120 } 5121 5122 v_wgt = NULL; 5123 if (!color) { 5124 ierr = PetscFree(xadj);CHKERRQ(ierr); 5125 ierr = PetscFree(adjncy);CHKERRQ(ierr); 5126 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 5127 } else { 5128 Mat subdomain_adj; 5129 IS new_ranks,new_ranks_contig; 5130 MatPartitioning partitioner; 5131 PetscInt rstart=0,rend=0; 5132 PetscInt *is_indices,*oldranks; 5133 PetscMPIInt size; 5134 PetscBool aggregate; 5135 5136 ierr = MPI_Comm_size(subcomm,&size);CHKERRQ(ierr); 5137 if (void_procs) { 5138 PetscInt prank = rank; 5139 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 5140 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);CHKERRQ(ierr); 5141 for (i=0;i<xadj[1];i++) { 5142 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 5143 } 5144 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 5145 } else { 5146 oldranks = NULL; 5147 } 5148 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 5149 if (aggregate) { /* TODO: all this part could be made more efficient */ 5150 PetscInt lrows,row,ncols,*cols; 5151 PetscMPIInt nrank; 5152 PetscScalar *vals; 5153 5154 ierr = MPI_Comm_rank(subcomm,&nrank);CHKERRQ(ierr); 5155 lrows = 0; 5156 if (nrank<redprocs) { 5157 lrows = size/redprocs; 5158 if (nrank<size%redprocs) lrows++; 5159 } 5160 ierr = MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 5161 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 5162 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 5163 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 5164 row = nrank; 5165 ncols = xadj[1]-xadj[0]; 5166 cols = adjncy; 5167 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 5168 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 5169 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 5170 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5171 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5172 ierr = PetscFree(xadj);CHKERRQ(ierr); 5173 ierr = PetscFree(adjncy);CHKERRQ(ierr); 5174 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 5175 ierr = PetscFree(vals);CHKERRQ(ierr); 5176 if (use_vwgt) { 5177 Vec v; 5178 const PetscScalar *array; 5179 PetscInt nl; 5180 5181 ierr = MatCreateVecs(subdomain_adj,&v,NULL);CHKERRQ(ierr); 5182 ierr = VecSetValue(v,row,(PetscScalar)local_size,INSERT_VALUES);CHKERRQ(ierr); 5183 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 5184 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 5185 ierr = VecGetLocalSize(v,&nl);CHKERRQ(ierr); 5186 ierr = VecGetArrayRead(v,&array);CHKERRQ(ierr); 5187 ierr = PetscMalloc1(nl,&v_wgt);CHKERRQ(ierr); 5188 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 5189 ierr = VecRestoreArrayRead(v,&array);CHKERRQ(ierr); 5190 ierr = VecDestroy(&v);CHKERRQ(ierr); 5191 } 5192 } else { 5193 ierr = MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 5194 if (use_vwgt) { 5195 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 5196 v_wgt[0] = local_size; 5197 } 5198 } 5199 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 5200 5201 /* Partition */ 5202 ierr = MatPartitioningCreate(subcomm,&partitioner);CHKERRQ(ierr); 5203 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 5204 if (v_wgt) { 5205 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 5206 } 5207 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 5208 ierr = MatPartitioningSetNParts(partitioner,*n_subdomains);CHKERRQ(ierr); 5209 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 5210 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 5211 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 5212 5213 /* renumber new_ranks to avoid "holes" in new set of processors */ 5214 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 5215 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 5216 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5217 if (!aggregate) { 5218 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 5219 #if defined(PETSC_USE_DEBUG) 5220 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 5221 #endif 5222 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 5223 } else if (oldranks) { 5224 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 5225 } else { 5226 ranks_send_to_idx[0] = is_indices[0]; 5227 } 5228 } else { 5229 PetscInt idxs[1]; 5230 PetscMPIInt tag; 5231 MPI_Request *reqs; 5232 5233 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 5234 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 5235 for (i=rstart;i<rend;i++) { 5236 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);CHKERRQ(ierr); 5237 } 5238 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 5239 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5240 ierr = PetscFree(reqs);CHKERRQ(ierr); 5241 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 5242 #if defined(PETSC_USE_DEBUG) 5243 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 5244 #endif 5245 ranks_send_to_idx[0] = procs_candidates[oldranks[idxs[0]]]; 5246 } else if (oldranks) { 5247 ranks_send_to_idx[0] = oldranks[idxs[0]]; 5248 } else { 5249 ranks_send_to_idx[0] = idxs[0]; 5250 } 5251 } 5252 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5253 /* clean up */ 5254 ierr = PetscFree(oldranks);CHKERRQ(ierr); 5255 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 5256 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 5257 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 5258 } 5259 ierr = PetscSubcommDestroy(&psubcomm);CHKERRQ(ierr); 5260 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 5261 5262 /* assemble parallel IS for sends */ 5263 i = 1; 5264 if (!color) i=0; 5265 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);CHKERRQ(ierr); 5266 PetscFunctionReturn(0); 5267 } 5268 5269 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 5270 5271 #undef __FUNCT__ 5272 #define __FUNCT__ "MatISSubassemble" 5273 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 5274 { 5275 Mat local_mat; 5276 IS is_sends_internal; 5277 PetscInt rows,cols,new_local_rows; 5278 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs; 5279 PetscBool ismatis,isdense,newisdense,destroy_mat; 5280 ISLocalToGlobalMapping l2gmap; 5281 PetscInt* l2gmap_indices; 5282 const PetscInt* is_indices; 5283 MatType new_local_type; 5284 /* buffers */ 5285 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 5286 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 5287 PetscInt *recv_buffer_idxs_local; 5288 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 5289 PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs; 5290 /* MPI */ 5291 MPI_Comm comm,comm_n; 5292 PetscSubcomm subcomm; 5293 PetscMPIInt n_sends,n_recvs,commsize; 5294 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 5295 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 5296 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest; 5297 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs; 5298 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs; 5299 PetscErrorCode ierr; 5300 5301 PetscFunctionBegin; 5302 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 5303 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 5304 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 5305 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 5306 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 5307 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 5308 PetscValidLogicalCollectiveBool(mat,reuse,6); 5309 PetscValidLogicalCollectiveInt(mat,nis,8); 5310 PetscValidLogicalCollectiveInt(mat,nvecs,10); 5311 if (nvecs) { 5312 if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported"); 5313 PetscValidHeaderSpecific(nnsp_vec[0],VEC_CLASSID,11); 5314 } 5315 /* further checks */ 5316 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 5317 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 5318 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 5319 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 5320 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 5321 if (reuse && *mat_n) { 5322 PetscInt mrows,mcols,mnrows,mncols; 5323 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 5324 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 5325 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 5326 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 5327 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 5328 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 5329 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 5330 } 5331 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 5332 PetscValidLogicalCollectiveInt(mat,bs,0); 5333 5334 /* prepare IS for sending if not provided */ 5335 if (!is_sends) { 5336 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 5337 ierr = MatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);CHKERRQ(ierr); 5338 } else { 5339 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 5340 is_sends_internal = is_sends; 5341 } 5342 5343 /* get comm */ 5344 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 5345 5346 /* compute number of sends */ 5347 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 5348 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 5349 5350 /* compute number of receives */ 5351 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 5352 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 5353 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 5354 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 5355 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 5356 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 5357 ierr = PetscFree(iflags);CHKERRQ(ierr); 5358 5359 /* restrict comm if requested */ 5360 subcomm = 0; 5361 destroy_mat = PETSC_FALSE; 5362 if (restrict_comm) { 5363 PetscMPIInt color,subcommsize; 5364 5365 color = 0; 5366 if (restrict_full) { 5367 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 5368 } else { 5369 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 5370 } 5371 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 5372 subcommsize = commsize - subcommsize; 5373 /* check if reuse has been requested */ 5374 if (reuse) { 5375 if (*mat_n) { 5376 PetscMPIInt subcommsize2; 5377 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 5378 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 5379 comm_n = PetscObjectComm((PetscObject)*mat_n); 5380 } else { 5381 comm_n = PETSC_COMM_SELF; 5382 } 5383 } else { /* MAT_INITIAL_MATRIX */ 5384 PetscMPIInt rank; 5385 5386 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5387 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 5388 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 5389 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 5390 comm_n = PetscSubcommChild(subcomm); 5391 } 5392 /* flag to destroy *mat_n if not significative */ 5393 if (color) destroy_mat = PETSC_TRUE; 5394 } else { 5395 comm_n = comm; 5396 } 5397 5398 /* prepare send/receive buffers */ 5399 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 5400 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 5401 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 5402 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 5403 if (nis) { 5404 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 5405 } 5406 5407 /* Get data from local matrices */ 5408 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 5409 /* TODO: See below some guidelines on how to prepare the local buffers */ 5410 /* 5411 send_buffer_vals should contain the raw values of the local matrix 5412 send_buffer_idxs should contain: 5413 - MatType_PRIVATE type 5414 - PetscInt size_of_l2gmap 5415 - PetscInt global_row_indices[size_of_l2gmap] 5416 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 5417 */ 5418 else { 5419 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 5420 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 5421 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 5422 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 5423 send_buffer_idxs[1] = i; 5424 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 5425 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 5426 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 5427 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 5428 for (i=0;i<n_sends;i++) { 5429 ilengths_vals[is_indices[i]] = len*len; 5430 ilengths_idxs[is_indices[i]] = len+2; 5431 } 5432 } 5433 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 5434 /* additional is (if any) */ 5435 if (nis) { 5436 PetscMPIInt psum; 5437 PetscInt j; 5438 for (j=0,psum=0;j<nis;j++) { 5439 PetscInt plen; 5440 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 5441 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 5442 psum += len+1; /* indices + lenght */ 5443 } 5444 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 5445 for (j=0,psum=0;j<nis;j++) { 5446 PetscInt plen; 5447 const PetscInt *is_array_idxs; 5448 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 5449 send_buffer_idxs_is[psum] = plen; 5450 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 5451 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 5452 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 5453 psum += plen+1; /* indices + lenght */ 5454 } 5455 for (i=0;i<n_sends;i++) { 5456 ilengths_idxs_is[is_indices[i]] = psum; 5457 } 5458 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 5459 } 5460 5461 buf_size_idxs = 0; 5462 buf_size_vals = 0; 5463 buf_size_idxs_is = 0; 5464 buf_size_vecs = 0; 5465 for (i=0;i<n_recvs;i++) { 5466 buf_size_idxs += (PetscInt)olengths_idxs[i]; 5467 buf_size_vals += (PetscInt)olengths_vals[i]; 5468 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 5469 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 5470 } 5471 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 5472 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 5473 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 5474 ierr = PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);CHKERRQ(ierr); 5475 5476 /* get new tags for clean communications */ 5477 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 5478 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 5479 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 5480 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);CHKERRQ(ierr); 5481 5482 /* allocate for requests */ 5483 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 5484 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 5485 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 5486 ierr = PetscMalloc1(n_sends,&send_req_vecs);CHKERRQ(ierr); 5487 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 5488 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 5489 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 5490 ierr = PetscMalloc1(n_recvs,&recv_req_vecs);CHKERRQ(ierr); 5491 5492 /* communications */ 5493 ptr_idxs = recv_buffer_idxs; 5494 ptr_vals = recv_buffer_vals; 5495 ptr_idxs_is = recv_buffer_idxs_is; 5496 ptr_vecs = recv_buffer_vecs; 5497 for (i=0;i<n_recvs;i++) { 5498 source_dest = onodes[i]; 5499 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 5500 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 5501 ptr_idxs += olengths_idxs[i]; 5502 ptr_vals += olengths_vals[i]; 5503 if (nis) { 5504 source_dest = onodes_is[i]; 5505 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 5506 ptr_idxs_is += olengths_idxs_is[i]; 5507 } 5508 if (nvecs) { 5509 source_dest = onodes[i]; 5510 ierr = MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);CHKERRQ(ierr); 5511 ptr_vecs += olengths_idxs[i]-2; 5512 } 5513 } 5514 for (i=0;i<n_sends;i++) { 5515 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 5516 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 5517 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 5518 if (nis) { 5519 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 5520 } 5521 if (nvecs) { 5522 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 5523 ierr = MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);CHKERRQ(ierr); 5524 } 5525 } 5526 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 5527 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 5528 5529 /* assemble new l2g map */ 5530 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5531 ptr_idxs = recv_buffer_idxs; 5532 new_local_rows = 0; 5533 for (i=0;i<n_recvs;i++) { 5534 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 5535 ptr_idxs += olengths_idxs[i]; 5536 } 5537 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 5538 ptr_idxs = recv_buffer_idxs; 5539 new_local_rows = 0; 5540 for (i=0;i<n_recvs;i++) { 5541 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 5542 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 5543 ptr_idxs += olengths_idxs[i]; 5544 } 5545 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 5546 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 5547 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 5548 5549 /* infer new local matrix type from received local matrices type */ 5550 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 5551 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 5552 if (n_recvs) { 5553 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 5554 ptr_idxs = recv_buffer_idxs; 5555 for (i=0;i<n_recvs;i++) { 5556 if ((PetscInt)new_local_type_private != *ptr_idxs) { 5557 new_local_type_private = MATAIJ_PRIVATE; 5558 break; 5559 } 5560 ptr_idxs += olengths_idxs[i]; 5561 } 5562 switch (new_local_type_private) { 5563 case MATDENSE_PRIVATE: 5564 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 5565 new_local_type = MATSEQAIJ; 5566 bs = 1; 5567 } else { /* if I receive only 1 dense matrix */ 5568 new_local_type = MATSEQDENSE; 5569 bs = 1; 5570 } 5571 break; 5572 case MATAIJ_PRIVATE: 5573 new_local_type = MATSEQAIJ; 5574 bs = 1; 5575 break; 5576 case MATBAIJ_PRIVATE: 5577 new_local_type = MATSEQBAIJ; 5578 break; 5579 case MATSBAIJ_PRIVATE: 5580 new_local_type = MATSEQSBAIJ; 5581 break; 5582 default: 5583 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 5584 break; 5585 } 5586 } else { /* by default, new_local_type is seqdense */ 5587 new_local_type = MATSEQDENSE; 5588 bs = 1; 5589 } 5590 5591 /* create MATIS object if needed */ 5592 if (!reuse) { 5593 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 5594 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 5595 } else { 5596 /* it also destroys the local matrices */ 5597 if (*mat_n) { 5598 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 5599 } else { /* this is a fake object */ 5600 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 5601 } 5602 } 5603 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 5604 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 5605 5606 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5607 5608 /* Global to local map of received indices */ 5609 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 5610 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 5611 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 5612 5613 /* restore attributes -> type of incoming data and its size */ 5614 buf_size_idxs = 0; 5615 for (i=0;i<n_recvs;i++) { 5616 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 5617 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 5618 buf_size_idxs += (PetscInt)olengths_idxs[i]; 5619 } 5620 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 5621 5622 /* set preallocation */ 5623 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 5624 if (!newisdense) { 5625 PetscInt *new_local_nnz=0; 5626 5627 ptr_idxs = recv_buffer_idxs_local; 5628 if (n_recvs) { 5629 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 5630 } 5631 for (i=0;i<n_recvs;i++) { 5632 PetscInt j; 5633 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 5634 for (j=0;j<*(ptr_idxs+1);j++) { 5635 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 5636 } 5637 } else { 5638 /* TODO */ 5639 } 5640 ptr_idxs += olengths_idxs[i]; 5641 } 5642 if (new_local_nnz) { 5643 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 5644 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 5645 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 5646 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5647 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 5648 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5649 } else { 5650 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5651 } 5652 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 5653 } else { 5654 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5655 } 5656 5657 /* set values */ 5658 ptr_vals = recv_buffer_vals; 5659 ptr_idxs = recv_buffer_idxs_local; 5660 for (i=0;i<n_recvs;i++) { 5661 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 5662 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 5663 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 5664 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5665 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5666 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 5667 } else { 5668 /* TODO */ 5669 } 5670 ptr_idxs += olengths_idxs[i]; 5671 ptr_vals += olengths_vals[i]; 5672 } 5673 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5674 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5675 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5676 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5677 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 5678 5679 #if 0 5680 if (!restrict_comm) { /* check */ 5681 Vec lvec,rvec; 5682 PetscReal infty_error; 5683 5684 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 5685 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 5686 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 5687 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 5688 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 5689 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5690 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 5691 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 5692 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 5693 } 5694 #endif 5695 5696 /* assemble new additional is (if any) */ 5697 if (nis) { 5698 PetscInt **temp_idxs,*count_is,j,psum; 5699 5700 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5701 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 5702 ptr_idxs = recv_buffer_idxs_is; 5703 psum = 0; 5704 for (i=0;i<n_recvs;i++) { 5705 for (j=0;j<nis;j++) { 5706 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5707 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 5708 psum += plen; 5709 ptr_idxs += plen+1; /* shift pointer to received data */ 5710 } 5711 } 5712 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 5713 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 5714 for (i=1;i<nis;i++) { 5715 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 5716 } 5717 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 5718 ptr_idxs = recv_buffer_idxs_is; 5719 for (i=0;i<n_recvs;i++) { 5720 for (j=0;j<nis;j++) { 5721 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5722 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 5723 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 5724 ptr_idxs += plen+1; /* shift pointer to received data */ 5725 } 5726 } 5727 for (i=0;i<nis;i++) { 5728 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5729 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 5730 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5731 } 5732 ierr = PetscFree(count_is);CHKERRQ(ierr); 5733 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 5734 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 5735 } 5736 /* free workspace */ 5737 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 5738 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5739 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 5740 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5741 if (isdense) { 5742 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 5743 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 5744 } else { 5745 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 5746 } 5747 if (nis) { 5748 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5749 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 5750 } 5751 5752 if (nvecs) { 5753 ierr = MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5754 ierr = MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5755 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 5756 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 5757 ierr = VecCreate(comm_n,&nnsp_vec[0]);CHKERRQ(ierr); 5758 ierr = VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);CHKERRQ(ierr); 5759 ierr = VecSetType(nnsp_vec[0],VECSTANDARD);CHKERRQ(ierr); 5760 /* set values */ 5761 ptr_vals = recv_buffer_vecs; 5762 ptr_idxs = recv_buffer_idxs_local; 5763 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 5764 for (i=0;i<n_recvs;i++) { 5765 PetscInt j; 5766 for (j=0;j<*(ptr_idxs+1);j++) { 5767 send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j); 5768 } 5769 ptr_idxs += olengths_idxs[i]; 5770 ptr_vals += olengths_idxs[i]-2; 5771 } 5772 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 5773 ierr = VecAssemblyBegin(nnsp_vec[0]);CHKERRQ(ierr); 5774 ierr = VecAssemblyEnd(nnsp_vec[0]);CHKERRQ(ierr); 5775 } 5776 5777 ierr = PetscFree(recv_buffer_vecs);CHKERRQ(ierr); 5778 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 5779 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 5780 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 5781 ierr = PetscFree(recv_req_vecs);CHKERRQ(ierr); 5782 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 5783 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 5784 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 5785 ierr = PetscFree(send_req_vecs);CHKERRQ(ierr); 5786 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 5787 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 5788 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 5789 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 5790 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 5791 ierr = PetscFree(onodes);CHKERRQ(ierr); 5792 if (nis) { 5793 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 5794 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 5795 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 5796 } 5797 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 5798 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 5799 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 5800 for (i=0;i<nis;i++) { 5801 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5802 } 5803 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 5804 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 5805 } 5806 *mat_n = NULL; 5807 } 5808 PetscFunctionReturn(0); 5809 } 5810 5811 /* temporary hack into ksp private data structure */ 5812 #include <petsc/private/kspimpl.h> 5813 5814 #undef __FUNCT__ 5815 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 5816 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 5817 { 5818 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5819 PC_IS *pcis = (PC_IS*)pc->data; 5820 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 5821 Mat coarsedivudotp = NULL; 5822 MatNullSpace CoarseNullSpace = NULL; 5823 ISLocalToGlobalMapping coarse_islg; 5824 IS coarse_is,*isarray; 5825 PetscInt i,im_active=-1,active_procs=-1; 5826 PetscInt nis,nisdofs,nisneu,nisvert; 5827 PC pc_temp; 5828 PCType coarse_pc_type; 5829 KSPType coarse_ksp_type; 5830 PetscBool multilevel_requested,multilevel_allowed; 5831 PetscBool isredundant,isbddc,isnn,coarse_reuse; 5832 Mat t_coarse_mat_is; 5833 PetscInt ncoarse; 5834 PetscBool compute_vecs = PETSC_FALSE; 5835 PetscScalar *array; 5836 MatReuse coarse_mat_reuse; 5837 PetscBool restr, full_restr, have_void; 5838 PetscErrorCode ierr; 5839 5840 PetscFunctionBegin; 5841 /* Assign global numbering to coarse dofs */ 5842 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 5843 PetscInt ocoarse_size; 5844 compute_vecs = PETSC_TRUE; 5845 ocoarse_size = pcbddc->coarse_size; 5846 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 5847 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 5848 /* see if we can avoid some work */ 5849 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 5850 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 5851 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 5852 PC pc; 5853 PetscBool isbddc; 5854 5855 /* temporary workaround since PCBDDC does not have a reset method so far */ 5856 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 5857 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5858 if (isbddc) { 5859 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 5860 } else { 5861 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 5862 } 5863 coarse_reuse = PETSC_FALSE; 5864 } else { /* we can safely reuse already computed coarse matrix */ 5865 coarse_reuse = PETSC_TRUE; 5866 } 5867 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 5868 coarse_reuse = PETSC_FALSE; 5869 } 5870 /* reset any subassembling information */ 5871 if (!coarse_reuse || pcbddc->recompute_topography) { 5872 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5873 } 5874 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 5875 coarse_reuse = PETSC_TRUE; 5876 } 5877 /* assemble coarse matrix */ 5878 if (coarse_reuse && pcbddc->coarse_ksp) { 5879 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5880 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 5881 coarse_mat_reuse = MAT_REUSE_MATRIX; 5882 } else { 5883 coarse_mat = NULL; 5884 coarse_mat_reuse = MAT_INITIAL_MATRIX; 5885 } 5886 5887 /* creates temporary l2gmap and IS for coarse indexes */ 5888 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 5889 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 5890 5891 /* creates temporary MATIS object for coarse matrix */ 5892 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 5893 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5894 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 5895 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5896 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 5897 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 5898 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5899 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5900 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 5901 5902 /* count "active" (i.e. with positive local size) and "void" processes */ 5903 im_active = !!(pcis->n); 5904 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5905 5906 /* determine number of process partecipating to coarse solver and compute subassembling pattern */ 5907 /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */ 5908 /* full_restr : just use the receivers from the subassembling pattern */ 5909 coarse_mat_is = NULL; 5910 multilevel_allowed = PETSC_FALSE; 5911 multilevel_requested = PETSC_FALSE; 5912 full_restr = PETSC_TRUE; 5913 pcbddc->coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc); 5914 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 5915 if (multilevel_requested) { 5916 ncoarse = active_procs/pcbddc->coarsening_ratio; 5917 restr = PETSC_FALSE; 5918 full_restr = PETSC_FALSE; 5919 } else { 5920 ncoarse = pcbddc->coarse_size/pcbddc->coarse_eqs_per_proc; 5921 restr = PETSC_TRUE; 5922 full_restr = PETSC_TRUE; 5923 } 5924 if (!pcbddc->coarse_size) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 5925 ncoarse = PetscMax(1,ncoarse); 5926 if (!pcbddc->coarse_subassembling) { 5927 if (pcbddc->coarsening_ratio > 1) { 5928 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 5929 } else { 5930 PetscMPIInt size,rank; 5931 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 5932 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 5933 have_void = (active_procs == (PetscInt)size) ? PETSC_FALSE : PETSC_TRUE; 5934 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5935 } 5936 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 5937 PetscInt psum; 5938 PetscMPIInt size; 5939 if (pcbddc->coarse_ksp) psum = 1; 5940 else psum = 0; 5941 ierr = MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5942 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 5943 if (ncoarse < size) have_void = PETSC_TRUE; 5944 } 5945 /* determine if we can go multilevel */ 5946 if (multilevel_requested) { 5947 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 5948 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 5949 } 5950 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 5951 5952 /* dump subassembling pattern */ 5953 if (pcbddc->dbg_flag && multilevel_allowed) { 5954 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 5955 } 5956 5957 /* compute dofs splitting and neumann boundaries for coarse dofs */ 5958 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal)) { /* protects from unneded computations */ 5959 PetscInt *tidxs,*tidxs2,nout,tsize,i; 5960 const PetscInt *idxs; 5961 ISLocalToGlobalMapping tmap; 5962 5963 /* create map between primal indices (in local representative ordering) and local primal numbering */ 5964 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 5965 /* allocate space for temporary storage */ 5966 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 5967 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 5968 /* allocate for IS array */ 5969 nisdofs = pcbddc->n_ISForDofsLocal; 5970 nisneu = !!pcbddc->NeumannBoundariesLocal; 5971 nisvert = 0; /* nisvert is not used */ 5972 nis = nisdofs + nisneu + nisvert; 5973 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 5974 /* dofs splitting */ 5975 for (i=0;i<nisdofs;i++) { 5976 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 5977 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 5978 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5979 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5980 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5981 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5982 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5983 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 5984 } 5985 /* neumann boundaries */ 5986 if (pcbddc->NeumannBoundariesLocal) { 5987 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 5988 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 5989 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5990 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5991 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5992 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5993 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 5994 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 5995 } 5996 /* free memory */ 5997 ierr = PetscFree(tidxs);CHKERRQ(ierr); 5998 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 5999 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 6000 } else { 6001 nis = 0; 6002 nisdofs = 0; 6003 nisneu = 0; 6004 nisvert = 0; 6005 isarray = NULL; 6006 } 6007 /* destroy no longer needed map */ 6008 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 6009 6010 /* subassemble */ 6011 if (multilevel_allowed) { 6012 Vec vp[1]; 6013 PetscInt nvecs = 0; 6014 PetscBool reuse,reuser; 6015 6016 if (coarse_mat) reuse = PETSC_TRUE; 6017 else reuse = PETSC_FALSE; 6018 ierr = MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6019 vp[0] = NULL; 6020 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 6021 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);CHKERRQ(ierr); 6022 ierr = VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);CHKERRQ(ierr); 6023 ierr = VecSetType(vp[0],VECSTANDARD);CHKERRQ(ierr); 6024 nvecs = 1; 6025 6026 if (pcbddc->divudotp) { 6027 Mat B,loc_divudotp; 6028 Vec v,p; 6029 IS dummy; 6030 PetscInt np; 6031 6032 ierr = MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);CHKERRQ(ierr); 6033 ierr = MatGetSize(loc_divudotp,&np,NULL);CHKERRQ(ierr); 6034 ierr = ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);CHKERRQ(ierr); 6035 ierr = MatGetSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 6036 ierr = MatCreateVecs(B,&v,&p);CHKERRQ(ierr); 6037 ierr = VecSet(p,1.);CHKERRQ(ierr); 6038 ierr = MatMultTranspose(B,p,v);CHKERRQ(ierr); 6039 ierr = VecDestroy(&p);CHKERRQ(ierr); 6040 ierr = MatDestroy(&B);CHKERRQ(ierr); 6041 ierr = VecGetArray(vp[0],&array);CHKERRQ(ierr); 6042 ierr = VecPlaceArray(pcbddc->vec1_P,array);CHKERRQ(ierr); 6043 ierr = VecRestoreArray(vp[0],&array);CHKERRQ(ierr); 6044 ierr = MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);CHKERRQ(ierr); 6045 ierr = VecResetArray(pcbddc->vec1_P);CHKERRQ(ierr); 6046 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 6047 ierr = VecDestroy(&v);CHKERRQ(ierr); 6048 } 6049 } 6050 if (reuser) { 6051 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);CHKERRQ(ierr); 6052 } else { 6053 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);CHKERRQ(ierr); 6054 } 6055 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 6056 PetscScalar *arraym,*arrayv; 6057 PetscInt nl; 6058 ierr = VecGetLocalSize(vp[0],&nl);CHKERRQ(ierr); 6059 ierr = MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);CHKERRQ(ierr); 6060 ierr = MatDenseGetArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 6061 ierr = VecGetArray(vp[0],&arrayv);CHKERRQ(ierr); 6062 ierr = PetscMemcpy(arraym,arrayv,nl*sizeof(PetscScalar));CHKERRQ(ierr); 6063 ierr = VecRestoreArray(vp[0],&arrayv);CHKERRQ(ierr); 6064 ierr = MatDenseRestoreArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 6065 ierr = VecDestroy(&vp[0]);CHKERRQ(ierr); 6066 } else { 6067 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);CHKERRQ(ierr); 6068 } 6069 } else { 6070 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,0,NULL);CHKERRQ(ierr); 6071 } 6072 if (coarse_mat_is || coarse_mat) { 6073 PetscMPIInt size; 6074 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)coarse_mat_is),&size); 6075 if (!multilevel_allowed) { 6076 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 6077 } else { 6078 Mat A; 6079 6080 /* if this matrix is present, it means we are not reusing the coarse matrix */ 6081 if (coarse_mat_is) { 6082 if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 6083 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 6084 coarse_mat = coarse_mat_is; 6085 } 6086 /* be sure we don't have MatSeqDENSE as local mat */ 6087 ierr = MatISGetLocalMat(coarse_mat,&A);CHKERRQ(ierr); 6088 ierr = MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 6089 } 6090 } 6091 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 6092 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 6093 6094 /* create local to global scatters for coarse problem */ 6095 if (compute_vecs) { 6096 PetscInt lrows; 6097 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 6098 if (coarse_mat) { 6099 ierr = MatGetLocalSize(coarse_mat,&lrows,NULL);CHKERRQ(ierr); 6100 } else { 6101 lrows = 0; 6102 } 6103 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 6104 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 6105 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 6106 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 6107 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 6108 } 6109 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 6110 6111 /* set defaults for coarse KSP and PC */ 6112 if (multilevel_allowed) { 6113 coarse_ksp_type = KSPRICHARDSON; 6114 coarse_pc_type = PCBDDC; 6115 } else { 6116 coarse_ksp_type = KSPPREONLY; 6117 coarse_pc_type = PCREDUNDANT; 6118 } 6119 6120 /* print some info if requested */ 6121 if (pcbddc->dbg_flag) { 6122 if (!multilevel_allowed) { 6123 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 6124 if (multilevel_requested) { 6125 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 6126 } else if (pcbddc->max_levels) { 6127 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 6128 } 6129 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6130 } 6131 } 6132 6133 /* create the coarse KSP object only once with defaults */ 6134 if (coarse_mat) { 6135 PetscViewer dbg_viewer = NULL; 6136 if (pcbddc->dbg_flag) { 6137 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 6138 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 6139 } 6140 if (!pcbddc->coarse_ksp) { 6141 char prefix[256],str_level[16]; 6142 size_t len; 6143 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 6144 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 6145 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 6146 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 6147 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 6148 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 6149 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 6150 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 6151 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 6152 /* prefix */ 6153 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 6154 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 6155 if (!pcbddc->current_level) { 6156 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 6157 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 6158 } else { 6159 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 6160 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 6161 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 6162 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 6163 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 6164 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 6165 } 6166 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 6167 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 6168 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 6169 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 6170 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 6171 /* allow user customization */ 6172 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 6173 } 6174 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 6175 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 6176 if (nisdofs) { 6177 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 6178 for (i=0;i<nisdofs;i++) { 6179 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 6180 } 6181 } 6182 if (nisneu) { 6183 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 6184 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 6185 } 6186 if (nisvert) { 6187 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 6188 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 6189 } 6190 6191 /* get some info after set from options */ 6192 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 6193 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 6194 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 6195 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 6196 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 6197 isbddc = PETSC_FALSE; 6198 } 6199 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 6200 if (isredundant) { 6201 KSP inner_ksp; 6202 PC inner_pc; 6203 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 6204 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 6205 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 6206 } 6207 6208 /* parameters which miss an API */ 6209 if (isbddc) { 6210 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 6211 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 6212 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 6213 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 6214 if (pcbddc_coarse->benign_saddle_point) { 6215 Mat coarsedivudotp_is; 6216 ISLocalToGlobalMapping l2gmap,rl2g,cl2g; 6217 IS row,col; 6218 const PetscInt *gidxs; 6219 PetscInt n,st,M,N; 6220 6221 ierr = MatGetSize(coarsedivudotp,&n,NULL);CHKERRQ(ierr); 6222 ierr = MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));CHKERRQ(ierr); 6223 st = st-n; 6224 ierr = ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);CHKERRQ(ierr); 6225 ierr = MatGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);CHKERRQ(ierr); 6226 ierr = ISLocalToGlobalMappingGetSize(l2gmap,&n);CHKERRQ(ierr); 6227 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 6228 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 6229 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 6230 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 6231 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 6232 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 6233 ierr = MatGetSize(coarse_mat,&N,NULL);CHKERRQ(ierr); 6234 ierr = ISDestroy(&row);CHKERRQ(ierr); 6235 ierr = ISDestroy(&col);CHKERRQ(ierr); 6236 ierr = MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);CHKERRQ(ierr); 6237 ierr = MatSetType(coarsedivudotp_is,MATIS);CHKERRQ(ierr); 6238 ierr = MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 6239 ierr = MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);CHKERRQ(ierr); 6240 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 6241 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 6242 ierr = MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);CHKERRQ(ierr); 6243 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 6244 ierr = PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,NULL);CHKERRQ(ierr); 6245 ierr = MatDestroy(&coarsedivudotp_is);CHKERRQ(ierr); 6246 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 6247 if (pcbddc->adaptive_threshold < 1.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 6248 } 6249 } 6250 6251 /* propagate symmetry info of coarse matrix */ 6252 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 6253 if (pc->pmat->symmetric_set) { 6254 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 6255 } 6256 if (pc->pmat->hermitian_set) { 6257 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 6258 } 6259 if (pc->pmat->spd_set) { 6260 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 6261 } 6262 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 6263 ierr = MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 6264 } 6265 /* set operators */ 6266 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 6267 if (pcbddc->dbg_flag) { 6268 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 6269 } 6270 } 6271 ierr = PetscFree(isarray);CHKERRQ(ierr); 6272 #if 0 6273 { 6274 PetscViewer viewer; 6275 char filename[256]; 6276 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 6277 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 6278 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 6279 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 6280 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 6281 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 6282 } 6283 #endif 6284 6285 if (pcbddc->coarse_ksp) { 6286 Vec crhs,csol; 6287 6288 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 6289 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 6290 if (!csol) { 6291 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 6292 } 6293 if (!crhs) { 6294 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 6295 } 6296 } 6297 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 6298 6299 /* compute null space for coarse solver if the benign trick has been requested */ 6300 if (pcbddc->benign_null) { 6301 6302 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 6303 for (i=0;i<pcbddc->benign_n;i++) { 6304 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 6305 } 6306 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 6307 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 6308 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6309 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6310 if (coarse_mat) { 6311 Vec nullv; 6312 PetscScalar *array,*array2; 6313 PetscInt nl; 6314 6315 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 6316 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 6317 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 6318 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 6319 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 6320 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 6321 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 6322 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 6323 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 6324 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 6325 } 6326 } 6327 6328 if (pcbddc->coarse_ksp) { 6329 PetscBool ispreonly; 6330 6331 if (CoarseNullSpace) { 6332 PetscBool isnull; 6333 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 6334 if (isnull) { 6335 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 6336 } 6337 /* TODO: add local nullspaces (if any) */ 6338 } 6339 /* setup coarse ksp */ 6340 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 6341 /* Check coarse problem if in debug mode or if solving with an iterative method */ 6342 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 6343 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 6344 KSP check_ksp; 6345 KSPType check_ksp_type; 6346 PC check_pc; 6347 Vec check_vec,coarse_vec; 6348 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 6349 PetscInt its; 6350 PetscBool compute_eigs; 6351 PetscReal *eigs_r,*eigs_c; 6352 PetscInt neigs; 6353 const char *prefix; 6354 6355 /* Create ksp object suitable for estimation of extreme eigenvalues */ 6356 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 6357 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 6358 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 6359 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 6360 /* prevent from setup unneeded object */ 6361 ierr = KSPGetPC(check_ksp,&check_pc);CHKERRQ(ierr); 6362 ierr = PCSetType(check_pc,PCNONE);CHKERRQ(ierr); 6363 if (ispreonly) { 6364 check_ksp_type = KSPPREONLY; 6365 compute_eigs = PETSC_FALSE; 6366 } else { 6367 check_ksp_type = KSPGMRES; 6368 compute_eigs = PETSC_TRUE; 6369 } 6370 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 6371 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 6372 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 6373 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 6374 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 6375 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 6376 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 6377 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 6378 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 6379 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 6380 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 6381 /* create random vec */ 6382 ierr = MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);CHKERRQ(ierr); 6383 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 6384 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 6385 /* solve coarse problem */ 6386 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 6387 /* set eigenvalue estimation if preonly has not been requested */ 6388 if (compute_eigs) { 6389 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 6390 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 6391 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 6392 if (neigs) { 6393 lambda_max = eigs_r[neigs-1]; 6394 lambda_min = eigs_r[0]; 6395 if (pcbddc->use_coarse_estimates) { 6396 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 6397 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);CHKERRQ(ierr); 6398 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 6399 } 6400 } 6401 } 6402 } 6403 6404 /* check coarse problem residual error */ 6405 if (pcbddc->dbg_flag) { 6406 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 6407 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 6408 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 6409 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 6410 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 6411 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 6412 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 6413 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 6414 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 6415 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 6416 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 6417 if (CoarseNullSpace) { 6418 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 6419 } 6420 if (compute_eigs) { 6421 PetscReal lambda_max_s,lambda_min_s; 6422 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 6423 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 6424 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 6425 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 6426 for (i=0;i<neigs;i++) { 6427 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 6428 } 6429 } 6430 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 6431 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 6432 } 6433 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 6434 ierr = VecDestroy(&coarse_vec);CHKERRQ(ierr); 6435 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 6436 if (compute_eigs) { 6437 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 6438 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 6439 } 6440 } 6441 } 6442 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 6443 /* print additional info */ 6444 if (pcbddc->dbg_flag) { 6445 /* waits until all processes reaches this point */ 6446 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 6447 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 6448 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6449 } 6450 6451 /* free memory */ 6452 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 6453 PetscFunctionReturn(0); 6454 } 6455 6456 #undef __FUNCT__ 6457 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 6458 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 6459 { 6460 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 6461 PC_IS* pcis = (PC_IS*)pc->data; 6462 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6463 IS subset,subset_mult,subset_n; 6464 PetscInt local_size,coarse_size=0; 6465 PetscInt *local_primal_indices=NULL; 6466 const PetscInt *t_local_primal_indices; 6467 PetscErrorCode ierr; 6468 6469 PetscFunctionBegin; 6470 /* Compute global number of coarse dofs */ 6471 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 6472 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 6473 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 6474 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 6475 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 6476 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 6477 ierr = ISDestroy(&subset);CHKERRQ(ierr); 6478 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 6479 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 6480 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 6481 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 6482 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 6483 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 6484 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 6485 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 6486 6487 /* check numbering */ 6488 if (pcbddc->dbg_flag) { 6489 PetscScalar coarsesum,*array,*array2; 6490 PetscInt i; 6491 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 6492 6493 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6494 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 6495 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 6496 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6497 /* counter */ 6498 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6499 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6500 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6501 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6502 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6503 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6504 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 6505 for (i=0;i<pcbddc->local_primal_size;i++) { 6506 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 6507 } 6508 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6509 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6510 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6511 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6512 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6513 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6514 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6515 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6516 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 6517 for (i=0;i<pcis->n;i++) { 6518 if (array[i] != 0.0 && array[i] != array2[i]) { 6519 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 6520 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 6521 set_error = PETSC_TRUE; 6522 ierr = ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);CHKERRQ(ierr); 6523 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d (gid %d) owned by %d processes instead of %d!\n",PetscGlobalRank,i,gi,owned,neigh);CHKERRQ(ierr); 6524 } 6525 } 6526 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 6527 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6528 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6529 for (i=0;i<pcis->n;i++) { 6530 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 6531 } 6532 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6533 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6534 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6535 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6536 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 6537 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 6538 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 6539 PetscInt *gidxs; 6540 6541 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 6542 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 6543 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 6544 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6545 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 6546 for (i=0;i<pcbddc->local_primal_size;i++) { 6547 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 6548 } 6549 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6550 ierr = PetscFree(gidxs);CHKERRQ(ierr); 6551 } 6552 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6553 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6554 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 6555 } 6556 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 6557 /* get back data */ 6558 *coarse_size_n = coarse_size; 6559 *local_primal_indices_n = local_primal_indices; 6560 PetscFunctionReturn(0); 6561 } 6562 6563 #undef __FUNCT__ 6564 #define __FUNCT__ "PCBDDCGlobalToLocal" 6565 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 6566 { 6567 IS localis_t; 6568 PetscInt i,lsize,*idxs,n; 6569 PetscScalar *vals; 6570 PetscErrorCode ierr; 6571 6572 PetscFunctionBegin; 6573 /* get indices in local ordering exploiting local to global map */ 6574 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 6575 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 6576 for (i=0;i<lsize;i++) vals[i] = 1.0; 6577 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 6578 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 6579 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 6580 if (idxs) { /* multilevel guard */ 6581 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 6582 } 6583 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 6584 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 6585 ierr = PetscFree(vals);CHKERRQ(ierr); 6586 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 6587 /* now compute set in local ordering */ 6588 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6589 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6590 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6591 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 6592 for (i=0,lsize=0;i<n;i++) { 6593 if (PetscRealPart(vals[i]) > 0.5) { 6594 lsize++; 6595 } 6596 } 6597 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 6598 for (i=0,lsize=0;i<n;i++) { 6599 if (PetscRealPart(vals[i]) > 0.5) { 6600 idxs[lsize++] = i; 6601 } 6602 } 6603 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6604 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 6605 *localis = localis_t; 6606 PetscFunctionReturn(0); 6607 } 6608 6609 #undef __FUNCT__ 6610 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 6611 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 6612 { 6613 PC_IS *pcis=(PC_IS*)pc->data; 6614 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6615 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6616 Mat S_j; 6617 PetscInt *used_xadj,*used_adjncy; 6618 PetscBool free_used_adj; 6619 PetscErrorCode ierr; 6620 6621 PetscFunctionBegin; 6622 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 6623 free_used_adj = PETSC_FALSE; 6624 if (pcbddc->sub_schurs_layers == -1) { 6625 used_xadj = NULL; 6626 used_adjncy = NULL; 6627 } else { 6628 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 6629 used_xadj = pcbddc->mat_graph->xadj; 6630 used_adjncy = pcbddc->mat_graph->adjncy; 6631 } else if (pcbddc->computed_rowadj) { 6632 used_xadj = pcbddc->mat_graph->xadj; 6633 used_adjncy = pcbddc->mat_graph->adjncy; 6634 } else { 6635 PetscBool flg_row=PETSC_FALSE; 6636 const PetscInt *xadj,*adjncy; 6637 PetscInt nvtxs; 6638 6639 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6640 if (flg_row) { 6641 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 6642 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 6643 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 6644 free_used_adj = PETSC_TRUE; 6645 } else { 6646 pcbddc->sub_schurs_layers = -1; 6647 used_xadj = NULL; 6648 used_adjncy = NULL; 6649 } 6650 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6651 } 6652 } 6653 6654 /* setup sub_schurs data */ 6655 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6656 if (!sub_schurs->schur_explicit) { 6657 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 6658 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6659 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 6660 } else { 6661 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 6662 PetscBool isseqaij,need_change = PETSC_FALSE;; 6663 PetscInt benign_n; 6664 Mat change = NULL; 6665 Vec scaling = NULL; 6666 IS change_primal = NULL; 6667 6668 if (!pcbddc->use_vertices && reuse_solvers) { 6669 PetscInt n_vertices; 6670 6671 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 6672 reuse_solvers = (PetscBool)!n_vertices; 6673 } 6674 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 6675 if (!isseqaij) { 6676 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6677 if (matis->A == pcbddc->local_mat) { 6678 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 6679 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6680 } else { 6681 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6682 } 6683 } 6684 if (!pcbddc->benign_change_explicit) { 6685 benign_n = pcbddc->benign_n; 6686 } else { 6687 benign_n = 0; 6688 } 6689 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 6690 We need a global reduction to avoid possible deadlocks. 6691 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 6692 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 6693 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 6694 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6695 need_change = (PetscBool)(!need_change); 6696 } 6697 /* If the user defines additional constraints, we import them here. 6698 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 6699 if (need_change) { 6700 PC_IS *pcisf; 6701 PC_BDDC *pcbddcf; 6702 PC pcf; 6703 6704 if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 6705 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 6706 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 6707 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 6708 /* hacks */ 6709 pcisf = (PC_IS*)pcf->data; 6710 pcisf->is_B_local = pcis->is_B_local; 6711 pcisf->vec1_N = pcis->vec1_N; 6712 pcisf->BtoNmap = pcis->BtoNmap; 6713 pcisf->n = pcis->n; 6714 pcisf->n_B = pcis->n_B; 6715 pcbddcf = (PC_BDDC*)pcf->data; 6716 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 6717 pcbddcf->mat_graph = pcbddc->mat_graph; 6718 pcbddcf->use_faces = PETSC_TRUE; 6719 pcbddcf->use_change_of_basis = PETSC_TRUE; 6720 pcbddcf->use_change_on_faces = PETSC_TRUE; 6721 pcbddcf->use_qr_single = PETSC_TRUE; 6722 pcbddcf->fake_change = PETSC_TRUE; 6723 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 6724 /* store information on primal vertices and change of basis (in local numbering) */ 6725 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 6726 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 6727 change = pcbddcf->ConstraintMatrix; 6728 pcbddcf->ConstraintMatrix = NULL; 6729 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 6730 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 6731 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 6732 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 6733 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 6734 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 6735 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 6736 pcf->ops->destroy = NULL; 6737 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 6738 } 6739 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 6740 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 6741 ierr = MatDestroy(&change);CHKERRQ(ierr); 6742 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 6743 } 6744 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6745 6746 /* free adjacency */ 6747 if (free_used_adj) { 6748 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 6749 } 6750 PetscFunctionReturn(0); 6751 } 6752 6753 #undef __FUNCT__ 6754 #define __FUNCT__ "PCBDDCInitSubSchurs" 6755 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 6756 { 6757 PC_IS *pcis=(PC_IS*)pc->data; 6758 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6759 PCBDDCGraph graph; 6760 PetscErrorCode ierr; 6761 6762 PetscFunctionBegin; 6763 /* attach interface graph for determining subsets */ 6764 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 6765 IS verticesIS,verticescomm; 6766 PetscInt vsize,*idxs; 6767 6768 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 6769 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 6770 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6771 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 6772 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6773 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 6774 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 6775 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 6776 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 6777 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 6778 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 6779 } else { 6780 graph = pcbddc->mat_graph; 6781 } 6782 /* print some info */ 6783 if (pcbddc->dbg_flag) { 6784 IS vertices; 6785 PetscInt nv,nedges,nfaces; 6786 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 6787 ierr = PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 6788 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 6789 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 6790 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6791 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 6792 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 6793 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 6794 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 6795 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6796 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6797 } 6798 6799 /* sub_schurs init */ 6800 if (!pcbddc->sub_schurs) { 6801 ierr = PCBDDCSubSchursCreate(&pcbddc->sub_schurs);CHKERRQ(ierr); 6802 } 6803 ierr = PCBDDCSubSchursInit(pcbddc->sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 6804 6805 /* free graph struct */ 6806 if (pcbddc->sub_schurs_rebuild) { 6807 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 6808 } 6809 PetscFunctionReturn(0); 6810 } 6811 6812 #undef __FUNCT__ 6813 #define __FUNCT__ "PCBDDCCheckOperator" 6814 PetscErrorCode PCBDDCCheckOperator(PC pc) 6815 { 6816 PC_IS *pcis=(PC_IS*)pc->data; 6817 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6818 PetscErrorCode ierr; 6819 6820 PetscFunctionBegin; 6821 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 6822 IS zerodiag = NULL; 6823 Mat S_j,B0_B=NULL; 6824 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 6825 PetscScalar *p0_check,*array,*array2; 6826 PetscReal norm; 6827 PetscInt i; 6828 6829 /* B0 and B0_B */ 6830 if (zerodiag) { 6831 IS dummy; 6832 6833 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 6834 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 6835 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 6836 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 6837 } 6838 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 6839 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 6840 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 6841 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6842 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6843 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6844 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6845 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 6846 /* S_j */ 6847 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6848 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6849 6850 /* mimic vector in \widetilde{W}_\Gamma */ 6851 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 6852 /* continuous in primal space */ 6853 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 6854 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6855 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6856 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6857 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 6858 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 6859 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6860 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6861 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6862 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6863 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6864 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6865 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 6866 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 6867 6868 /* assemble rhs for coarse problem */ 6869 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 6870 /* local with Schur */ 6871 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 6872 if (zerodiag) { 6873 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6874 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 6875 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6876 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 6877 } 6878 /* sum on primal nodes the local contributions */ 6879 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6880 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6881 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6882 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6883 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 6884 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6885 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6886 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 6887 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6888 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6889 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6890 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6891 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6892 /* scale primal nodes (BDDC sums contibutions) */ 6893 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 6894 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6895 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6896 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6897 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6898 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6899 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6900 /* global: \widetilde{B0}_B w_\Gamma */ 6901 if (zerodiag) { 6902 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 6903 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6904 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 6905 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6906 } 6907 /* BDDC */ 6908 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 6909 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 6910 6911 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 6912 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 6913 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 6914 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 6915 for (i=0;i<pcbddc->benign_n;i++) { 6916 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 6917 } 6918 ierr = PetscFree(p0_check);CHKERRQ(ierr); 6919 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 6920 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 6921 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 6922 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6923 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 6924 } 6925 PetscFunctionReturn(0); 6926 } 6927