1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 #include <petsc/private/sfimpl.h> 6 7 /* the near-null space of BDDC carries information on quadrature weights, 8 and these can be collinear -> so cheat with MatNullSpaceCreate 9 and create a suitable set of basis vectors first */ 10 #undef __FUNCT__ 11 #define __FUNCT__ "PCBDDCNullSpaceCreate" 12 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp) 13 { 14 PetscErrorCode ierr; 15 PetscInt i; 16 17 PetscFunctionBegin; 18 for (i=0;i<nvecs;i++) { 19 PetscInt first,last; 20 21 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 22 if (last-first < 2*nvecs && has_const) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented"); 23 if (i>=first && i < last) { 24 PetscScalar *data; 25 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 26 if (!has_const) { 27 data[i-first] = 1.; 28 } else { 29 data[2*i-first] = 1./PetscSqrtReal(2.); 30 data[2*i-first+1] = -1./PetscSqrtReal(2.); 31 } 32 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 33 } 34 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 35 } 36 ierr = MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp);CHKERRQ(ierr); 37 for (i=0;i<nvecs;i++) { /* reset vectors */ 38 PetscInt first,last; 39 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 40 if (i>=first && i < last) { 41 PetscScalar *data; 42 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 43 if (!has_const) { 44 data[i-first] = 0.; 45 } else { 46 data[2*i-first] = 0.; 47 data[2*i-first+1] = 0.; 48 } 49 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 50 } 51 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 52 } 53 PetscFunctionReturn(0); 54 } 55 56 #undef __FUNCT__ 57 #define __FUNCT__ "PCBDDCComputeNoNetFlux" 58 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 59 { 60 Mat loc_divudotp; 61 Vec p,v,vins,quad_vec,*quad_vecs; 62 ISLocalToGlobalMapping map; 63 IS *faces,*edges; 64 PetscScalar *vals; 65 const PetscScalar *array; 66 PetscInt i,maxneighs,lmaxneighs,maxsize,nf,ne; 67 PetscMPIInt rank; 68 PetscErrorCode ierr; 69 70 PetscFunctionBegin; 71 ierr = PCBDDCGraphGetCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 72 if (graph->twodim) { 73 lmaxneighs = 2; 74 } else { 75 lmaxneighs = 1; 76 for (i=0;i<ne;i++) { 77 const PetscInt *idxs; 78 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 79 lmaxneighs = PetscMax(lmaxneighs,graph->count[idxs[0]]); 80 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 81 } 82 lmaxneighs++; /* graph count does not include self */ 83 } 84 ierr = MPIU_Allreduce(&lmaxneighs,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 85 maxsize = 0; 86 for (i=0;i<ne;i++) { 87 PetscInt nn; 88 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 89 maxsize = PetscMax(maxsize,nn); 90 } 91 for (i=0;i<nf;i++) { 92 PetscInt nn; 93 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 94 maxsize = PetscMax(maxsize,nn); 95 } 96 ierr = PetscMalloc1(maxsize,&vals);CHKERRQ(ierr); 97 /* create vectors to hold quadrature weights */ 98 ierr = MatCreateVecs(A,&quad_vec,NULL);CHKERRQ(ierr); 99 if (!transpose) { 100 ierr = MatGetLocalToGlobalMapping(A,&map,NULL);CHKERRQ(ierr); 101 } else { 102 ierr = MatGetLocalToGlobalMapping(A,NULL,&map);CHKERRQ(ierr); 103 } 104 ierr = VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);CHKERRQ(ierr); 105 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 106 ierr = PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);CHKERRQ(ierr); 107 for (i=0;i<maxneighs;i++) { 108 ierr = VecSetLocalToGlobalMapping(quad_vecs[i],map);CHKERRQ(ierr); 109 } 110 111 /* compute local quad vec */ 112 ierr = MatISGetLocalMat(divudotp,&loc_divudotp);CHKERRQ(ierr); 113 if (!transpose) { 114 ierr = MatCreateVecs(loc_divudotp,&v,&p);CHKERRQ(ierr); 115 } else { 116 ierr = MatCreateVecs(loc_divudotp,&p,&v);CHKERRQ(ierr); 117 } 118 ierr = VecSet(p,1.);CHKERRQ(ierr); 119 if (!transpose) { 120 ierr = MatMultTranspose(loc_divudotp,p,v);CHKERRQ(ierr); 121 } else { 122 ierr = MatMult(loc_divudotp,p,v);CHKERRQ(ierr); 123 } 124 if (vl2l) { 125 ierr = VecGetSubVector(v,vl2l,&vins);CHKERRQ(ierr); 126 } else { 127 vins = v; 128 } 129 ierr = VecGetArrayRead(vins,&array);CHKERRQ(ierr); 130 ierr = VecDestroy(&p);CHKERRQ(ierr); 131 132 /* insert in global quadrature vecs */ 133 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 134 for (i=0;i<nf;i++) { 135 const PetscInt *idxs; 136 PetscInt idx,nn,j; 137 138 ierr = ISGetIndices(faces[i],&idxs);CHKERRQ(ierr); 139 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 140 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 141 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 142 idx = -(idx+1); 143 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 144 ierr = ISRestoreIndices(faces[i],&idxs);CHKERRQ(ierr); 145 } 146 for (i=0;i<ne;i++) { 147 const PetscInt *idxs; 148 PetscInt idx,nn,j; 149 150 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 151 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 152 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 153 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 154 idx = -(idx+1); 155 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 156 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 157 } 158 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 159 ierr = VecRestoreArrayRead(vins,&array);CHKERRQ(ierr); 160 if (vl2l) { 161 ierr = VecRestoreSubVector(v,vl2l,&vins);CHKERRQ(ierr); 162 } 163 ierr = VecDestroy(&v);CHKERRQ(ierr); 164 ierr = PetscFree(vals);CHKERRQ(ierr); 165 166 /* assemble near null space */ 167 for (i=0;i<maxneighs;i++) { 168 ierr = VecAssemblyBegin(quad_vecs[i]);CHKERRQ(ierr); 169 } 170 for (i=0;i<maxneighs;i++) { 171 ierr = VecAssemblyEnd(quad_vecs[i]);CHKERRQ(ierr); 172 } 173 ierr = VecDestroyVecs(maxneighs,&quad_vecs);CHKERRQ(ierr); 174 PetscFunctionReturn(0); 175 } 176 177 178 #undef __FUNCT__ 179 #define __FUNCT__ "PCBDDCComputeLocalTopologyInfo" 180 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 181 { 182 PetscErrorCode ierr; 183 Vec local,global; 184 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 185 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 186 187 PetscFunctionBegin; 188 ierr = MatCreateVecs(pc->pmat,&global,NULL);CHKERRQ(ierr); 189 /* need to convert from global to local topology information and remove references to information in global ordering */ 190 ierr = MatCreateVecs(matis->A,&local,NULL);CHKERRQ(ierr); 191 if (pcbddc->user_provided_isfordofs) { 192 if (pcbddc->n_ISForDofs) { 193 PetscInt i; 194 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 195 for (i=0;i<pcbddc->n_ISForDofs;i++) { 196 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 197 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 198 } 199 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 200 pcbddc->n_ISForDofs = 0; 201 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 202 } 203 } else { 204 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering if bs > 1 */ 205 PetscInt i, n = matis->A->rmap->n; 206 ierr = MatGetBlockSize(pc->pmat,&i);CHKERRQ(ierr); 207 if (i > 1) { 208 pcbddc->n_ISForDofsLocal = i; 209 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 210 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 211 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 212 } 213 } 214 } 215 } 216 217 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 218 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 219 } 220 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 221 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 222 } 223 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 224 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 225 } 226 ierr = VecDestroy(&global);CHKERRQ(ierr); 227 ierr = VecDestroy(&local);CHKERRQ(ierr); 228 PetscFunctionReturn(0); 229 } 230 231 #undef __FUNCT__ 232 #define __FUNCT__ "PCBDDCBenignRemoveInterior" 233 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z) 234 { 235 PC_IS *pcis = (PC_IS*)(pc->data); 236 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 237 PetscErrorCode ierr; 238 239 PetscFunctionBegin; 240 if (!pcbddc->benign_have_null) { 241 PetscFunctionReturn(0); 242 } 243 if (pcbddc->ChangeOfBasisMatrix) { 244 Vec swap; 245 246 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);CHKERRQ(ierr); 247 swap = pcbddc->work_change; 248 pcbddc->work_change = r; 249 r = swap; 250 } 251 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 252 ierr = VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 253 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 254 ierr = VecSet(z,0.);CHKERRQ(ierr); 255 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 256 ierr = VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 257 if (pcbddc->ChangeOfBasisMatrix) { 258 Vec swap; 259 260 swap = r; 261 r = pcbddc->work_change; 262 pcbddc->work_change = swap; 263 ierr = VecCopy(z,pcbddc->work_change);CHKERRQ(ierr); 264 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);CHKERRQ(ierr); 265 } 266 PetscFunctionReturn(0); 267 } 268 269 #undef __FUNCT__ 270 #define __FUNCT__ "PCBDDCBenignMatMult_Private_Private" 271 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 272 { 273 PCBDDCBenignMatMult_ctx ctx; 274 PetscErrorCode ierr; 275 PetscBool apply_right,apply_left,reset_x; 276 277 PetscFunctionBegin; 278 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 279 if (transpose) { 280 apply_right = ctx->apply_left; 281 apply_left = ctx->apply_right; 282 } else { 283 apply_right = ctx->apply_right; 284 apply_left = ctx->apply_left; 285 } 286 reset_x = PETSC_FALSE; 287 if (apply_right) { 288 const PetscScalar *ax; 289 PetscInt nl,i; 290 291 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 292 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 293 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 294 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 295 for (i=0;i<ctx->benign_n;i++) { 296 PetscScalar sum,val; 297 const PetscInt *idxs; 298 PetscInt nz,j; 299 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 300 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 301 sum = 0.; 302 if (ctx->apply_p0) { 303 val = ctx->work[idxs[nz-1]]; 304 for (j=0;j<nz-1;j++) { 305 sum += ctx->work[idxs[j]]; 306 ctx->work[idxs[j]] += val; 307 } 308 } else { 309 for (j=0;j<nz-1;j++) { 310 sum += ctx->work[idxs[j]]; 311 } 312 } 313 ctx->work[idxs[nz-1]] -= sum; 314 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 315 } 316 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 317 reset_x = PETSC_TRUE; 318 } 319 if (transpose) { 320 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 321 } else { 322 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 323 } 324 if (reset_x) { 325 ierr = VecResetArray(x);CHKERRQ(ierr); 326 } 327 if (apply_left) { 328 PetscScalar *ay; 329 PetscInt i; 330 331 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 332 for (i=0;i<ctx->benign_n;i++) { 333 PetscScalar sum,val; 334 const PetscInt *idxs; 335 PetscInt nz,j; 336 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 337 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 338 val = -ay[idxs[nz-1]]; 339 if (ctx->apply_p0) { 340 sum = 0.; 341 for (j=0;j<nz-1;j++) { 342 sum += ay[idxs[j]]; 343 ay[idxs[j]] += val; 344 } 345 ay[idxs[nz-1]] += sum; 346 } else { 347 for (j=0;j<nz-1;j++) { 348 ay[idxs[j]] += val; 349 } 350 ay[idxs[nz-1]] = 0.; 351 } 352 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 353 } 354 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 355 } 356 PetscFunctionReturn(0); 357 } 358 359 #undef __FUNCT__ 360 #define __FUNCT__ "PCBDDCBenignMatMultTranspose_Private" 361 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 362 { 363 PetscErrorCode ierr; 364 365 PetscFunctionBegin; 366 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 367 PetscFunctionReturn(0); 368 } 369 370 #undef __FUNCT__ 371 #define __FUNCT__ "PCBDDCBenignMatMult_Private" 372 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 373 { 374 PetscErrorCode ierr; 375 376 PetscFunctionBegin; 377 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 378 PetscFunctionReturn(0); 379 } 380 381 #undef __FUNCT__ 382 #define __FUNCT__ "PCBDDCBenignShellMat" 383 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 384 { 385 PC_IS *pcis = (PC_IS*)pc->data; 386 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 387 PCBDDCBenignMatMult_ctx ctx; 388 PetscErrorCode ierr; 389 390 PetscFunctionBegin; 391 if (!restore) { 392 Mat A_IB,A_BI; 393 PetscScalar *work; 394 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 395 396 if (pcbddc->benign_original_mat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 397 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(0); 398 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 399 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 400 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 401 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 402 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 403 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 404 ierr = PetscNew(&ctx);CHKERRQ(ierr); 405 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 406 ctx->apply_left = PETSC_TRUE; 407 ctx->apply_right = PETSC_FALSE; 408 ctx->apply_p0 = PETSC_FALSE; 409 ctx->benign_n = pcbddc->benign_n; 410 if (reuse) { 411 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 412 ctx->free = PETSC_FALSE; 413 } else { /* TODO: could be optimized for successive solves */ 414 ISLocalToGlobalMapping N_to_D; 415 PetscInt i; 416 417 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 418 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 419 for (i=0;i<pcbddc->benign_n;i++) { 420 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 421 } 422 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 423 ctx->free = PETSC_TRUE; 424 } 425 ctx->A = pcis->A_IB; 426 ctx->work = work; 427 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 428 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 429 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 430 pcis->A_IB = A_IB; 431 432 /* A_BI as A_IB^T */ 433 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 434 pcbddc->benign_original_mat = pcis->A_BI; 435 pcis->A_BI = A_BI; 436 } else { 437 if (!pcbddc->benign_original_mat) { 438 PetscFunctionReturn(0); 439 } 440 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 441 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 442 pcis->A_IB = ctx->A; 443 ctx->A = NULL; 444 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 445 pcis->A_BI = pcbddc->benign_original_mat; 446 pcbddc->benign_original_mat = NULL; 447 if (ctx->free) { 448 PetscInt i; 449 for (i=0;i<ctx->benign_n;i++) { 450 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 451 } 452 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 453 } 454 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 455 ierr = PetscFree(ctx);CHKERRQ(ierr); 456 } 457 PetscFunctionReturn(0); 458 } 459 460 /* used just in bddc debug mode */ 461 #undef __FUNCT__ 462 #define __FUNCT__ "PCBDDCBenignProject" 463 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 464 { 465 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 466 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 467 Mat An; 468 PetscErrorCode ierr; 469 470 PetscFunctionBegin; 471 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 472 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 473 if (is1) { 474 ierr = MatGetSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 475 ierr = MatDestroy(&An);CHKERRQ(ierr); 476 } else { 477 *B = An; 478 } 479 PetscFunctionReturn(0); 480 } 481 482 /* TODO: add reuse flag */ 483 #undef __FUNCT__ 484 #define __FUNCT__ "MatSeqAIJCompress" 485 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 486 { 487 Mat Bt; 488 PetscScalar *a,*bdata; 489 const PetscInt *ii,*ij; 490 PetscInt m,n,i,nnz,*bii,*bij; 491 PetscBool flg_row; 492 PetscErrorCode ierr; 493 494 PetscFunctionBegin; 495 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 496 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 497 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 498 nnz = n; 499 for (i=0;i<ii[n];i++) { 500 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 501 } 502 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 503 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 504 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 505 nnz = 0; 506 bii[0] = 0; 507 for (i=0;i<n;i++) { 508 PetscInt j; 509 for (j=ii[i];j<ii[i+1];j++) { 510 PetscScalar entry = a[j]; 511 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 512 bij[nnz] = ij[j]; 513 bdata[nnz] = entry; 514 nnz++; 515 } 516 } 517 bii[i+1] = nnz; 518 } 519 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 520 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 521 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 522 { 523 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 524 b->free_a = PETSC_TRUE; 525 b->free_ij = PETSC_TRUE; 526 } 527 *B = Bt; 528 PetscFunctionReturn(0); 529 } 530 531 #undef __FUNCT__ 532 #define __FUNCT__ "MatDetectDisconnectedComponents" 533 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 534 { 535 Mat B; 536 IS is_dummy,*cc_n; 537 ISLocalToGlobalMapping l2gmap_dummy; 538 PCBDDCGraph graph; 539 PetscInt i,n; 540 PetscInt *xadj,*adjncy; 541 PetscInt *xadj_filtered,*adjncy_filtered; 542 PetscBool flg_row,isseqaij; 543 PetscErrorCode ierr; 544 545 PetscFunctionBegin; 546 if (!A->rmap->N || !A->cmap->N) { 547 *ncc = 0; 548 *cc = NULL; 549 PetscFunctionReturn(0); 550 } 551 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 552 if (!isseqaij && filter) { 553 PetscBool isseqdense; 554 555 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 556 if (!isseqdense) { 557 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 558 } else { /* TODO: rectangular case and LDA */ 559 PetscScalar *array; 560 PetscReal chop=1.e-6; 561 562 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 563 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 564 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 565 for (i=0;i<n;i++) { 566 PetscInt j; 567 for (j=i+1;j<n;j++) { 568 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 569 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 570 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 571 } 572 } 573 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 574 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 575 } 576 } else { 577 B = A; 578 } 579 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 580 581 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 582 if (filter) { 583 PetscScalar *data; 584 PetscInt j,cum; 585 586 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 587 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 588 cum = 0; 589 for (i=0;i<n;i++) { 590 PetscInt t; 591 592 for (j=xadj[i];j<xadj[i+1];j++) { 593 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 594 continue; 595 } 596 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 597 } 598 t = xadj_filtered[i]; 599 xadj_filtered[i] = cum; 600 cum += t; 601 } 602 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 603 } else { 604 xadj_filtered = NULL; 605 adjncy_filtered = NULL; 606 } 607 608 /* compute local connected components using PCBDDCGraph */ 609 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 610 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 611 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 612 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 613 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n);CHKERRQ(ierr); 614 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 615 if (xadj_filtered) { 616 graph->xadj = xadj_filtered; 617 graph->adjncy = adjncy_filtered; 618 } else { 619 graph->xadj = xadj; 620 graph->adjncy = adjncy; 621 } 622 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 623 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 624 /* partial clean up */ 625 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 626 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 627 if (A != B) { 628 ierr = MatDestroy(&B);CHKERRQ(ierr); 629 } 630 631 /* get back data */ 632 if (ncc) *ncc = graph->ncc; 633 if (cc) { 634 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 635 for (i=0;i<graph->ncc;i++) { 636 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 637 } 638 *cc = cc_n; 639 } 640 /* clean up graph */ 641 graph->xadj = 0; 642 graph->adjncy = 0; 643 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 644 PetscFunctionReturn(0); 645 } 646 647 #undef __FUNCT__ 648 #define __FUNCT__ "PCBDDCBenignCheck" 649 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 650 { 651 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 652 PC_IS* pcis = (PC_IS*)(pc->data); 653 IS dirIS = NULL; 654 PetscInt i; 655 PetscErrorCode ierr; 656 657 PetscFunctionBegin; 658 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 659 if (zerodiag) { 660 Mat A; 661 Vec vec3_N; 662 PetscScalar *vals; 663 const PetscInt *idxs; 664 PetscInt nz,*count; 665 666 /* p0 */ 667 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 668 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 669 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 670 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 671 for (i=0;i<nz;i++) vals[i] = 1.; 672 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 673 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 674 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 675 /* v_I */ 676 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 677 for (i=0;i<nz;i++) vals[i] = 0.; 678 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 679 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 680 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 681 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 682 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 683 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 684 if (dirIS) { 685 PetscInt n; 686 687 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 688 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 689 for (i=0;i<n;i++) vals[i] = 0.; 690 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 691 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 692 } 693 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 694 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 695 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 696 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 697 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 698 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 699 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 700 if (PetscAbsScalar(vals[0]) > 1.e-1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 701 ierr = PetscFree(vals);CHKERRQ(ierr); 702 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 703 704 /* there should not be any pressure dofs lying on the interface */ 705 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 706 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 707 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 708 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 709 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 710 for (i=0;i<nz;i++) if (count[idxs[i]]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 711 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 712 ierr = PetscFree(count);CHKERRQ(ierr); 713 } 714 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 715 716 /* check PCBDDCBenignGetOrSetP0 */ 717 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 718 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 719 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 720 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 721 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 722 for (i=0;i<pcbddc->benign_n;i++) { 723 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]); 724 if (val != -PetscGlobalRank-i) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %d instead of %g\n",PetscRealPart(pcbddc->benign_p0[i]),i,-PetscGlobalRank-i);CHKERRQ(ierr); 725 } 726 PetscFunctionReturn(0); 727 } 728 729 #undef __FUNCT__ 730 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 731 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 732 { 733 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 734 IS pressures,zerodiag,*zerodiag_subs; 735 PetscInt nz,n; 736 PetscInt *interior_dofs,n_interior_dofs; 737 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag; 738 PetscErrorCode ierr; 739 740 PetscFunctionBegin; 741 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 742 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 743 for (n=0;n<pcbddc->benign_n;n++) { 744 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 745 } 746 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 747 pcbddc->benign_n = 0; 748 /* if a local info on dofs is present, assumes that the last field represents "pressures" 749 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 750 Checks if all the pressure dofs in each subdomain have a zero diagonal 751 If not, a change of basis on pressures is not needed 752 since the local Schur complements are already SPD 753 */ 754 has_null_pressures = PETSC_TRUE; 755 have_null = PETSC_TRUE; 756 if (pcbddc->n_ISForDofsLocal) { 757 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 758 759 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 760 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 761 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 762 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 763 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 764 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 765 if (!sorted) { 766 ierr = ISSort(pressures);CHKERRQ(ierr); 767 } 768 } else { 769 pressures = NULL; 770 } 771 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 772 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 773 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 774 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 775 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 776 if (!sorted) { 777 ierr = ISSort(zerodiag);CHKERRQ(ierr); 778 } 779 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 780 if (!nz) { 781 if (n) have_null = PETSC_FALSE; 782 has_null_pressures = PETSC_FALSE; 783 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 784 } 785 recompute_zerodiag = PETSC_FALSE; 786 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 787 zerodiag_subs = NULL; 788 pcbddc->benign_n = 0; 789 n_interior_dofs = 0; 790 interior_dofs = NULL; 791 if (pcbddc->current_level) { /* need to compute interior nodes */ 792 PetscInt n,i,j; 793 PetscInt n_neigh,*neigh,*n_shared,**shared; 794 PetscInt *iwork; 795 796 ierr = ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);CHKERRQ(ierr); 797 ierr = ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 798 ierr = PetscCalloc1(n,&iwork);CHKERRQ(ierr); 799 ierr = PetscMalloc1(n,&interior_dofs);CHKERRQ(ierr); 800 for (i=1;i<n_neigh;i++) 801 for (j=0;j<n_shared[i];j++) 802 iwork[shared[i][j]] += 1; 803 for (i=0;i<n;i++) 804 if (!iwork[i]) 805 interior_dofs[n_interior_dofs++] = i; 806 ierr = PetscFree(iwork);CHKERRQ(ierr); 807 ierr = ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 808 } 809 if (has_null_pressures) { 810 IS *subs; 811 PetscInt nsubs,i,j,nl; 812 const PetscInt *idxs; 813 PetscScalar *array; 814 Vec *work; 815 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 816 817 subs = pcbddc->local_subs; 818 nsubs = pcbddc->n_local_subs; 819 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 820 if (pcbddc->current_level) { 821 ierr = VecDuplicateVecs(matis->y,2,&work);CHKERRQ(ierr); 822 ierr = ISGetLocalSize(zerodiag,&nl);CHKERRQ(ierr); 823 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 824 /* work[0] = 1_p */ 825 ierr = VecSet(work[0],0.);CHKERRQ(ierr); 826 ierr = VecGetArray(work[0],&array);CHKERRQ(ierr); 827 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 828 ierr = VecRestoreArray(work[0],&array);CHKERRQ(ierr); 829 /* work[0] = 1_v */ 830 ierr = VecSet(work[1],1.);CHKERRQ(ierr); 831 ierr = VecGetArray(work[1],&array);CHKERRQ(ierr); 832 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 833 ierr = VecRestoreArray(work[1],&array);CHKERRQ(ierr); 834 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 835 } 836 if (nsubs > 1) { 837 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 838 for (i=0;i<nsubs;i++) { 839 ISLocalToGlobalMapping l2g; 840 IS t_zerodiag_subs; 841 PetscInt nl; 842 843 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 844 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 845 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 846 if (nl) { 847 PetscBool valid = PETSC_TRUE; 848 849 if (pcbddc->current_level) { 850 ierr = VecSet(matis->x,0);CHKERRQ(ierr); 851 ierr = ISGetLocalSize(subs[i],&nl);CHKERRQ(ierr); 852 ierr = ISGetIndices(subs[i],&idxs);CHKERRQ(ierr); 853 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 854 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 855 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 856 ierr = ISRestoreIndices(subs[i],&idxs);CHKERRQ(ierr); 857 ierr = VecPointwiseMult(matis->x,work[0],matis->x);CHKERRQ(ierr); 858 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 859 ierr = VecPointwiseMult(matis->y,work[1],matis->y);CHKERRQ(ierr); 860 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 861 for (j=0;j<n_interior_dofs;j++) { 862 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 863 valid = PETSC_FALSE; 864 break; 865 } 866 } 867 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 868 } 869 if (valid && pcbddc->NeumannBoundariesLocal) { 870 IS t_bc; 871 PetscInt nzb; 872 873 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pcbddc->NeumannBoundariesLocal,&t_bc);CHKERRQ(ierr); 874 ierr = ISGetLocalSize(t_bc,&nzb);CHKERRQ(ierr); 875 ierr = ISDestroy(&t_bc);CHKERRQ(ierr); 876 if (nzb) valid = PETSC_FALSE; 877 } 878 if (valid && pressures) { 879 IS t_pressure_subs; 880 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 881 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 882 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 883 } 884 if (valid) { 885 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 886 pcbddc->benign_n++; 887 } else { 888 recompute_zerodiag = PETSC_TRUE; 889 } 890 } 891 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 892 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 893 } 894 } else { /* there's just one subdomain (or zero if they have not been detected */ 895 PetscBool valid = PETSC_TRUE; 896 897 if (pcbddc->NeumannBoundariesLocal) { 898 PetscInt nzb; 899 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nzb);CHKERRQ(ierr); 900 if (nzb) valid = PETSC_FALSE; 901 } 902 if (valid && pressures) { 903 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 904 } 905 if (valid && pcbddc->current_level) { 906 ierr = MatMult(matis->A,work[0],matis->x);CHKERRQ(ierr); 907 ierr = VecPointwiseMult(matis->x,work[1],matis->x);CHKERRQ(ierr); 908 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 909 for (j=0;j<n_interior_dofs;j++) { 910 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 911 valid = PETSC_FALSE; 912 break; 913 } 914 } 915 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 916 } 917 if (valid) { 918 pcbddc->benign_n = 1; 919 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 920 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 921 zerodiag_subs[0] = zerodiag; 922 } 923 } 924 if (pcbddc->current_level) { 925 ierr = VecDestroyVecs(2,&work);CHKERRQ(ierr); 926 } 927 } 928 ierr = PetscFree(interior_dofs);CHKERRQ(ierr); 929 930 if (!pcbddc->benign_n) { 931 PetscInt n; 932 933 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 934 recompute_zerodiag = PETSC_FALSE; 935 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 936 if (n) { 937 has_null_pressures = PETSC_FALSE; 938 have_null = PETSC_FALSE; 939 } 940 } 941 942 /* final check for null pressures */ 943 if (zerodiag && pressures) { 944 PetscInt nz,np; 945 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 946 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 947 if (nz != np) have_null = PETSC_FALSE; 948 } 949 950 if (recompute_zerodiag) { 951 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 952 if (pcbddc->benign_n == 1) { 953 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 954 zerodiag = zerodiag_subs[0]; 955 } else { 956 PetscInt i,nzn,*new_idxs; 957 958 nzn = 0; 959 for (i=0;i<pcbddc->benign_n;i++) { 960 PetscInt ns; 961 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 962 nzn += ns; 963 } 964 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 965 nzn = 0; 966 for (i=0;i<pcbddc->benign_n;i++) { 967 PetscInt ns,*idxs; 968 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 969 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 970 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 971 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 972 nzn += ns; 973 } 974 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 975 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 976 } 977 have_null = PETSC_FALSE; 978 } 979 980 /* Prepare matrix to compute no-net-flux */ 981 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 982 Mat A,loc_divudotp; 983 ISLocalToGlobalMapping rl2g,cl2g,l2gmap; 984 IS row,col,isused = NULL; 985 PetscInt M,N,n,st,n_isused; 986 987 if (pressures) { 988 isused = pressures; 989 } else { 990 isused = zerodiag; 991 } 992 ierr = MatGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);CHKERRQ(ierr); 993 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 994 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 995 if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field"); 996 n_isused = 0; 997 if (isused) { 998 ierr = ISGetLocalSize(isused,&n_isused);CHKERRQ(ierr); 999 } 1000 ierr = MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1001 st = st-n_isused; 1002 if (n) { 1003 const PetscInt *gidxs; 1004 1005 ierr = MatGetSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);CHKERRQ(ierr); 1006 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 1007 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 1008 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 1009 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 1010 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 1011 } else { 1012 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);CHKERRQ(ierr); 1013 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 1014 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 1015 } 1016 ierr = MatGetSize(pc->pmat,NULL,&N);CHKERRQ(ierr); 1017 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 1018 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 1019 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 1020 ierr = ISDestroy(&row);CHKERRQ(ierr); 1021 ierr = ISDestroy(&col);CHKERRQ(ierr); 1022 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);CHKERRQ(ierr); 1023 ierr = MatSetType(pcbddc->divudotp,MATIS);CHKERRQ(ierr); 1024 ierr = MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1025 ierr = MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);CHKERRQ(ierr); 1026 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 1027 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 1028 ierr = MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);CHKERRQ(ierr); 1029 ierr = MatDestroy(&loc_divudotp);CHKERRQ(ierr); 1030 ierr = MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1031 ierr = MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1032 } 1033 1034 /* change of basis and p0 dofs */ 1035 if (has_null_pressures) { 1036 IS zerodiagc; 1037 const PetscInt *idxs,*idxsc; 1038 PetscInt i,s,*nnz; 1039 1040 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 1041 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 1042 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 1043 /* local change of basis for pressures */ 1044 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 1045 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 1046 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 1047 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1048 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 1049 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 1050 for (i=0;i<pcbddc->benign_n;i++) { 1051 PetscInt nzs,j; 1052 1053 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 1054 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1055 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 1056 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 1057 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1058 } 1059 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 1060 ierr = PetscFree(nnz);CHKERRQ(ierr); 1061 /* set identity on velocities */ 1062 for (i=0;i<n-nz;i++) { 1063 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 1064 } 1065 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 1066 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 1067 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 1068 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 1069 /* set change on pressures */ 1070 for (s=0;s<pcbddc->benign_n;s++) { 1071 PetscScalar *array; 1072 PetscInt nzs; 1073 1074 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 1075 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 1076 for (i=0;i<nzs-1;i++) { 1077 PetscScalar vals[2]; 1078 PetscInt cols[2]; 1079 1080 cols[0] = idxs[i]; 1081 cols[1] = idxs[nzs-1]; 1082 vals[0] = 1.; 1083 vals[1] = 1.; 1084 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 1085 } 1086 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 1087 for (i=0;i<nzs-1;i++) array[i] = -1.; 1088 array[nzs-1] = 1.; 1089 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 1090 /* store local idxs for p0 */ 1091 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 1092 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 1093 ierr = PetscFree(array);CHKERRQ(ierr); 1094 } 1095 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1096 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1097 /* project if needed */ 1098 if (pcbddc->benign_change_explicit) { 1099 Mat M; 1100 1101 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 1102 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1103 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 1104 ierr = MatDestroy(&M);CHKERRQ(ierr); 1105 } 1106 /* store global idxs for p0 */ 1107 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 1108 } 1109 pcbddc->benign_zerodiag_subs = zerodiag_subs; 1110 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 1111 1112 /* determines if the coarse solver will be singular or not */ 1113 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1114 /* determines if the problem has subdomains with 0 pressure block */ 1115 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1116 *zerodiaglocal = zerodiag; 1117 PetscFunctionReturn(0); 1118 } 1119 1120 #undef __FUNCT__ 1121 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 1122 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 1123 { 1124 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1125 PetscScalar *array; 1126 PetscErrorCode ierr; 1127 1128 PetscFunctionBegin; 1129 if (!pcbddc->benign_sf) { 1130 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 1131 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 1132 } 1133 if (get) { 1134 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 1135 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 1136 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 1137 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 1138 } else { 1139 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 1140 ierr = PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 1141 ierr = PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 1142 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 1143 } 1144 PetscFunctionReturn(0); 1145 } 1146 1147 #undef __FUNCT__ 1148 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 1149 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 1150 { 1151 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1152 PetscErrorCode ierr; 1153 1154 PetscFunctionBegin; 1155 /* TODO: add error checking 1156 - avoid nested pop (or push) calls. 1157 - cannot push before pop. 1158 - cannot call this if pcbddc->local_mat is NULL 1159 */ 1160 if (!pcbddc->benign_n) { 1161 PetscFunctionReturn(0); 1162 } 1163 if (pop) { 1164 if (pcbddc->benign_change_explicit) { 1165 IS is_p0; 1166 MatReuse reuse; 1167 1168 /* extract B_0 */ 1169 reuse = MAT_INITIAL_MATRIX; 1170 if (pcbddc->benign_B0) { 1171 reuse = MAT_REUSE_MATRIX; 1172 } 1173 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 1174 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 1175 /* remove rows and cols from local problem */ 1176 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 1177 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1178 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 1179 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 1180 } else { 1181 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1182 PetscScalar *vals; 1183 PetscInt i,n,*idxs_ins; 1184 1185 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 1186 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 1187 if (!pcbddc->benign_B0) { 1188 PetscInt *nnz; 1189 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 1190 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 1191 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1192 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 1193 for (i=0;i<pcbddc->benign_n;i++) { 1194 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 1195 nnz[i] = n - nnz[i]; 1196 } 1197 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 1198 ierr = PetscFree(nnz);CHKERRQ(ierr); 1199 } 1200 1201 for (i=0;i<pcbddc->benign_n;i++) { 1202 PetscScalar *array; 1203 PetscInt *idxs,j,nz,cum; 1204 1205 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 1206 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1207 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 1208 for (j=0;j<nz;j++) vals[j] = 1.; 1209 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1210 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 1211 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 1212 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 1213 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 1214 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 1215 cum = 0; 1216 for (j=0;j<n;j++) { 1217 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 1218 vals[cum] = array[j]; 1219 idxs_ins[cum] = j; 1220 cum++; 1221 } 1222 } 1223 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 1224 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 1225 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 1226 } 1227 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1228 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1229 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 1230 } 1231 } else { /* push */ 1232 if (pcbddc->benign_change_explicit) { 1233 PetscInt i; 1234 1235 for (i=0;i<pcbddc->benign_n;i++) { 1236 PetscScalar *B0_vals; 1237 PetscInt *B0_cols,B0_ncol; 1238 1239 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 1240 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 1241 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 1242 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 1243 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 1244 } 1245 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1246 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1247 } else { 1248 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 1249 } 1250 } 1251 PetscFunctionReturn(0); 1252 } 1253 1254 #undef __FUNCT__ 1255 #define __FUNCT__ "PCBDDCAdaptiveSelection" 1256 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 1257 { 1258 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1259 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1260 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 1261 PetscBLASInt *B_iwork,*B_ifail; 1262 PetscScalar *work,lwork; 1263 PetscScalar *St,*S,*eigv; 1264 PetscScalar *Sarray,*Starray; 1265 PetscReal *eigs,thresh; 1266 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 1267 PetscBool allocated_S_St; 1268 #if defined(PETSC_USE_COMPLEX) 1269 PetscReal *rwork; 1270 #endif 1271 PetscErrorCode ierr; 1272 1273 PetscFunctionBegin; 1274 if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data"); 1275 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 1276 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 1277 1278 if (pcbddc->dbg_flag) { 1279 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1280 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1281 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 1282 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 1283 } 1284 1285 if (pcbddc->dbg_flag) { 1286 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 1287 } 1288 1289 /* max size of subsets */ 1290 mss = 0; 1291 for (i=0;i<sub_schurs->n_subs;i++) { 1292 PetscInt subset_size; 1293 1294 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1295 mss = PetscMax(mss,subset_size); 1296 } 1297 1298 /* min/max and threshold */ 1299 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 1300 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 1301 nmax = PetscMax(nmin,nmax); 1302 allocated_S_St = PETSC_FALSE; 1303 if (nmin) { 1304 allocated_S_St = PETSC_TRUE; 1305 } 1306 1307 /* allocate lapack workspace */ 1308 cum = cum2 = 0; 1309 maxneigs = 0; 1310 for (i=0;i<sub_schurs->n_subs;i++) { 1311 PetscInt n,subset_size; 1312 1313 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1314 n = PetscMin(subset_size,nmax); 1315 cum += subset_size; 1316 cum2 += subset_size*n; 1317 maxneigs = PetscMax(maxneigs,n); 1318 } 1319 if (mss) { 1320 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 1321 PetscBLASInt B_itype = 1; 1322 PetscBLASInt B_N = mss; 1323 PetscReal zero = 0.0; 1324 PetscReal eps = 0.0; /* dlamch? */ 1325 1326 B_lwork = -1; 1327 S = NULL; 1328 St = NULL; 1329 eigs = NULL; 1330 eigv = NULL; 1331 B_iwork = NULL; 1332 B_ifail = NULL; 1333 #if defined(PETSC_USE_COMPLEX) 1334 rwork = NULL; 1335 #endif 1336 thresh = 1.0; 1337 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1338 #if defined(PETSC_USE_COMPLEX) 1339 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1340 #else 1341 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1342 #endif 1343 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 1344 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1345 } else { 1346 /* TODO */ 1347 } 1348 } else { 1349 lwork = 0; 1350 } 1351 1352 nv = 0; 1353 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 1354 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 1355 } 1356 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 1357 if (allocated_S_St) { 1358 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 1359 } 1360 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 1361 #if defined(PETSC_USE_COMPLEX) 1362 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 1363 #endif 1364 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 1365 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 1366 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 1367 nv+cum,&pcbddc->adaptive_constraints_idxs, 1368 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 1369 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 1370 1371 maxneigs = 0; 1372 cum = cumarray = 0; 1373 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 1374 pcbddc->adaptive_constraints_data_ptr[0] = 0; 1375 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 1376 const PetscInt *idxs; 1377 1378 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 1379 for (cum=0;cum<nv;cum++) { 1380 pcbddc->adaptive_constraints_n[cum] = 1; 1381 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 1382 pcbddc->adaptive_constraints_data[cum] = 1.0; 1383 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 1384 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 1385 } 1386 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 1387 } 1388 1389 if (mss) { /* multilevel */ 1390 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1391 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1392 } 1393 1394 thresh = pcbddc->adaptive_threshold; 1395 for (i=0;i<sub_schurs->n_subs;i++) { 1396 const PetscInt *idxs; 1397 PetscReal upper,lower; 1398 PetscInt j,subset_size,eigs_start = 0; 1399 PetscBLASInt B_N; 1400 PetscBool same_data = PETSC_FALSE; 1401 1402 if (pcbddc->use_deluxe_scaling) { 1403 upper = PETSC_MAX_REAL; 1404 lower = thresh; 1405 } else { 1406 upper = 1./thresh; 1407 lower = 0.; 1408 } 1409 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1410 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1411 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 1412 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 1413 if (sub_schurs->is_hermitian) { 1414 PetscInt j,k; 1415 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 1416 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1417 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1418 } 1419 for (j=0;j<subset_size;j++) { 1420 for (k=j;k<subset_size;k++) { 1421 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 1422 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 1423 } 1424 } 1425 } else { 1426 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1427 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1428 } 1429 } else { 1430 S = Sarray + cumarray; 1431 St = Starray + cumarray; 1432 } 1433 /* see if we can save some work */ 1434 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 1435 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 1436 } 1437 1438 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 1439 B_neigs = 0; 1440 } else { 1441 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 1442 PetscBLASInt B_itype = 1; 1443 PetscBLASInt B_IL, B_IU; 1444 PetscReal eps = -1.0; /* dlamch? */ 1445 PetscInt nmin_s; 1446 PetscBool compute_range = PETSC_FALSE; 1447 1448 if (pcbddc->dbg_flag) { 1449 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 1450 } 1451 1452 compute_range = PETSC_FALSE; 1453 if (thresh > 1.+PETSC_SMALL && !same_data) { 1454 compute_range = PETSC_TRUE; 1455 } 1456 1457 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1458 if (compute_range) { 1459 1460 /* ask for eigenvalues larger than thresh */ 1461 #if defined(PETSC_USE_COMPLEX) 1462 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1463 #else 1464 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1465 #endif 1466 } else if (!same_data) { 1467 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 1468 B_IL = 1; 1469 #if defined(PETSC_USE_COMPLEX) 1470 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1471 #else 1472 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1473 #endif 1474 } else { /* same_data is true, so get the adaptive function requested by the user */ 1475 PetscInt k; 1476 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 1477 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 1478 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 1479 nmin = nmax; 1480 ierr = PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));CHKERRQ(ierr); 1481 for (k=0;k<nmax;k++) { 1482 eigs[k] = 1./PETSC_SMALL; 1483 eigv[k*(subset_size+1)] = 1.0; 1484 } 1485 } 1486 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1487 if (B_ierr) { 1488 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1489 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1490 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1491 } 1492 1493 if (B_neigs > nmax) { 1494 if (pcbddc->dbg_flag) { 1495 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 1496 } 1497 if (pcbddc->use_deluxe_scaling) eigs_start = B_neigs -nmax; 1498 B_neigs = nmax; 1499 } 1500 1501 nmin_s = PetscMin(nmin,B_N); 1502 if (B_neigs < nmin_s) { 1503 PetscBLASInt B_neigs2; 1504 1505 if (pcbddc->use_deluxe_scaling) { 1506 B_IL = B_N - nmin_s + 1; 1507 B_IU = B_N - B_neigs; 1508 } else { 1509 B_IL = B_neigs + 1; 1510 B_IU = nmin_s; 1511 } 1512 if (pcbddc->dbg_flag) { 1513 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 1514 } 1515 if (sub_schurs->is_hermitian) { 1516 PetscInt j,k; 1517 for (j=0;j<subset_size;j++) { 1518 for (k=j;k<subset_size;k++) { 1519 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 1520 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 1521 } 1522 } 1523 } else { 1524 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1525 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1526 } 1527 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1528 #if defined(PETSC_USE_COMPLEX) 1529 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1530 #else 1531 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1532 #endif 1533 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1534 B_neigs += B_neigs2; 1535 } 1536 if (B_ierr) { 1537 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1538 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1539 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1540 } 1541 if (pcbddc->dbg_flag) { 1542 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 1543 for (j=0;j<B_neigs;j++) { 1544 if (eigs[j] == 0.0) { 1545 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 1546 } else { 1547 if (pcbddc->use_deluxe_scaling) { 1548 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 1549 } else { 1550 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 1551 } 1552 } 1553 } 1554 } 1555 } else { 1556 /* TODO */ 1557 } 1558 } 1559 /* change the basis back to the original one */ 1560 if (sub_schurs->change) { 1561 Mat change,phi,phit; 1562 1563 if (pcbddc->dbg_flag > 1) { 1564 PetscInt ii; 1565 for (ii=0;ii<B_neigs;ii++) { 1566 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1567 for (j=0;j<B_N;j++) { 1568 #if defined(PETSC_USE_COMPLEX) 1569 PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]); 1570 PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]); 1571 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 1572 #else 1573 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 1574 #endif 1575 } 1576 } 1577 } 1578 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 1579 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 1580 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 1581 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1582 ierr = MatDestroy(&phit);CHKERRQ(ierr); 1583 ierr = MatDestroy(&phi);CHKERRQ(ierr); 1584 } 1585 maxneigs = PetscMax(B_neigs,maxneigs); 1586 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 1587 if (B_neigs) { 1588 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1589 1590 if (pcbddc->dbg_flag > 1) { 1591 PetscInt ii; 1592 for (ii=0;ii<B_neigs;ii++) { 1593 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1594 for (j=0;j<B_N;j++) { 1595 #if defined(PETSC_USE_COMPLEX) 1596 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1597 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1598 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 1599 #else 1600 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 1601 #endif 1602 } 1603 } 1604 } 1605 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 1606 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 1607 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 1608 cum++; 1609 } 1610 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1611 /* shift for next computation */ 1612 cumarray += subset_size*subset_size; 1613 } 1614 if (pcbddc->dbg_flag) { 1615 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1616 } 1617 1618 if (mss) { 1619 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1620 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1621 /* destroy matrices (junk) */ 1622 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 1623 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 1624 } 1625 if (allocated_S_St) { 1626 ierr = PetscFree2(S,St);CHKERRQ(ierr); 1627 } 1628 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 1629 #if defined(PETSC_USE_COMPLEX) 1630 ierr = PetscFree(rwork);CHKERRQ(ierr); 1631 #endif 1632 if (pcbddc->dbg_flag) { 1633 PetscInt maxneigs_r; 1634 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1635 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 1636 } 1637 PetscFunctionReturn(0); 1638 } 1639 1640 #undef __FUNCT__ 1641 #define __FUNCT__ "PCBDDCSetUpSolvers" 1642 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 1643 { 1644 PetscScalar *coarse_submat_vals; 1645 PetscErrorCode ierr; 1646 1647 PetscFunctionBegin; 1648 /* Setup local scatters R_to_B and (optionally) R_to_D */ 1649 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 1650 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 1651 1652 /* Setup local neumann solver ksp_R */ 1653 /* PCBDDCSetUpLocalScatters should be called first! */ 1654 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 1655 1656 /* 1657 Setup local correction and local part of coarse basis. 1658 Gives back the dense local part of the coarse matrix in column major ordering 1659 */ 1660 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 1661 1662 /* Compute total number of coarse nodes and setup coarse solver */ 1663 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 1664 1665 /* free */ 1666 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 1667 PetscFunctionReturn(0); 1668 } 1669 1670 #undef __FUNCT__ 1671 #define __FUNCT__ "PCBDDCResetCustomization" 1672 PetscErrorCode PCBDDCResetCustomization(PC pc) 1673 { 1674 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1675 PetscErrorCode ierr; 1676 1677 PetscFunctionBegin; 1678 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 1679 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1680 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 1681 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1682 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 1683 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1684 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1685 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1686 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 1687 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 1688 PetscFunctionReturn(0); 1689 } 1690 1691 #undef __FUNCT__ 1692 #define __FUNCT__ "PCBDDCResetTopography" 1693 PetscErrorCode PCBDDCResetTopography(PC pc) 1694 { 1695 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1696 PetscInt i; 1697 PetscErrorCode ierr; 1698 1699 PetscFunctionBegin; 1700 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 1701 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1702 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 1703 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 1704 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1705 ierr = MatDestroy(&pcbddc->divudotp);CHKERRQ(ierr); 1706 ierr = ISDestroy(&pcbddc->divudotp_vl2l);CHKERRQ(ierr); 1707 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 1708 pcbddc->graphanalyzed = PETSC_FALSE; 1709 for (i=0;i<pcbddc->n_local_subs;i++) { 1710 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 1711 } 1712 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 1713 if (pcbddc->sub_schurs) { 1714 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 1715 } 1716 PetscFunctionReturn(0); 1717 } 1718 1719 #undef __FUNCT__ 1720 #define __FUNCT__ "PCBDDCResetSolvers" 1721 PetscErrorCode PCBDDCResetSolvers(PC pc) 1722 { 1723 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1724 PetscErrorCode ierr; 1725 1726 PetscFunctionBegin; 1727 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 1728 if (pcbddc->coarse_phi_B) { 1729 PetscScalar *array; 1730 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 1731 ierr = PetscFree(array);CHKERRQ(ierr); 1732 } 1733 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1734 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1735 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1736 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1737 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1738 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1739 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1740 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1741 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1742 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1743 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1744 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1745 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1746 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 1747 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 1748 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 1749 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 1750 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1751 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1752 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1753 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 1754 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 1755 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 1756 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 1757 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 1758 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 1759 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 1760 if (pcbddc->benign_zerodiag_subs) { 1761 PetscInt i; 1762 for (i=0;i<pcbddc->benign_n;i++) { 1763 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1764 } 1765 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 1766 } 1767 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 1768 PetscFunctionReturn(0); 1769 } 1770 1771 #undef __FUNCT__ 1772 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 1773 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 1774 { 1775 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1776 PC_IS *pcis = (PC_IS*)pc->data; 1777 VecType impVecType; 1778 PetscInt n_constraints,n_R,old_size; 1779 PetscErrorCode ierr; 1780 1781 PetscFunctionBegin; 1782 if (!pcbddc->ConstraintMatrix) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 1783 /* get sizes */ 1784 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 1785 n_R = pcis->n - pcbddc->n_vertices; 1786 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 1787 /* local work vectors (try to avoid unneeded work)*/ 1788 /* R nodes */ 1789 old_size = -1; 1790 if (pcbddc->vec1_R) { 1791 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 1792 } 1793 if (n_R != old_size) { 1794 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1795 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1796 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 1797 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 1798 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 1799 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 1800 } 1801 /* local primal dofs */ 1802 old_size = -1; 1803 if (pcbddc->vec1_P) { 1804 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 1805 } 1806 if (pcbddc->local_primal_size != old_size) { 1807 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1808 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 1809 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 1810 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 1811 } 1812 /* local explicit constraints */ 1813 old_size = -1; 1814 if (pcbddc->vec1_C) { 1815 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 1816 } 1817 if (n_constraints && n_constraints != old_size) { 1818 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1819 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 1820 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 1821 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 1822 } 1823 PetscFunctionReturn(0); 1824 } 1825 1826 #undef __FUNCT__ 1827 #define __FUNCT__ "PCBDDCSetUpCorrection" 1828 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 1829 { 1830 PetscErrorCode ierr; 1831 /* pointers to pcis and pcbddc */ 1832 PC_IS* pcis = (PC_IS*)pc->data; 1833 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1834 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1835 /* submatrices of local problem */ 1836 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 1837 /* submatrices of local coarse problem */ 1838 Mat S_VV,S_CV,S_VC,S_CC; 1839 /* working matrices */ 1840 Mat C_CR; 1841 /* additional working stuff */ 1842 PC pc_R; 1843 Mat F; 1844 Vec dummy_vec; 1845 PetscBool isLU,isCHOL,isILU,need_benign_correction; 1846 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 1847 PetscScalar *work; 1848 PetscInt *idx_V_B; 1849 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 1850 PetscInt i,n_R,n_D,n_B; 1851 1852 /* some shortcuts to scalars */ 1853 PetscScalar one=1.0,m_one=-1.0; 1854 1855 PetscFunctionBegin; 1856 if (!pcbddc->symmetric_primal && pcbddc->benign_n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 1857 1858 /* Set Non-overlapping dimensions */ 1859 n_vertices = pcbddc->n_vertices; 1860 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 1861 n_B = pcis->n_B; 1862 n_D = pcis->n - n_B; 1863 n_R = pcis->n - n_vertices; 1864 1865 /* vertices in boundary numbering */ 1866 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 1867 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 1868 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i); 1869 1870 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 1871 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 1872 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 1873 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 1874 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 1875 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 1876 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 1877 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 1878 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 1879 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 1880 1881 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 1882 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 1883 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 1884 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 1885 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 1886 lda_rhs = n_R; 1887 need_benign_correction = PETSC_FALSE; 1888 if (isLU || isILU || isCHOL) { 1889 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 1890 } else if (sub_schurs && sub_schurs->reuse_solver) { 1891 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1892 MatFactorType type; 1893 1894 F = reuse_solver->F; 1895 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 1896 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 1897 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 1898 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 1899 } else { 1900 F = NULL; 1901 } 1902 1903 /* allocate workspace */ 1904 n = 0; 1905 if (n_constraints) { 1906 n += lda_rhs*n_constraints; 1907 } 1908 if (n_vertices) { 1909 n = PetscMax(2*lda_rhs*n_vertices,n); 1910 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 1911 } 1912 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1913 1914 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 1915 dummy_vec = NULL; 1916 if (need_benign_correction && lda_rhs != n_R && F) { 1917 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);CHKERRQ(ierr); 1918 } 1919 1920 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 1921 if (n_constraints) { 1922 Mat M1,M2,M3,C_B; 1923 IS is_aux; 1924 PetscScalar *array,*array2; 1925 1926 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1927 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1928 1929 /* Extract constraints on R nodes: C_{CR} */ 1930 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 1931 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 1932 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 1933 1934 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 1935 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 1936 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1937 for (i=0;i<n_constraints;i++) { 1938 const PetscScalar *row_cmat_values; 1939 const PetscInt *row_cmat_indices; 1940 PetscInt size_of_constraint,j; 1941 1942 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1943 for (j=0;j<size_of_constraint;j++) { 1944 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 1945 } 1946 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1947 } 1948 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 1949 if (F) { 1950 Mat B; 1951 1952 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 1953 if (need_benign_correction) { 1954 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1955 1956 /* rhs is already zero on interior dofs, no need to change the rhs */ 1957 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 1958 } 1959 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 1960 if (need_benign_correction) { 1961 PetscScalar *marr; 1962 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1963 1964 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1965 if (lda_rhs != n_R) { 1966 for (i=0;i<n_constraints;i++) { 1967 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 1968 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1969 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 1970 } 1971 } else { 1972 for (i=0;i<n_constraints;i++) { 1973 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1974 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1975 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1976 } 1977 } 1978 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1979 } 1980 ierr = MatDestroy(&B);CHKERRQ(ierr); 1981 } else { 1982 PetscScalar *marr; 1983 1984 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1985 for (i=0;i<n_constraints;i++) { 1986 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1987 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 1988 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1989 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1990 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1991 } 1992 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1993 } 1994 if (!pcbddc->switch_static) { 1995 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1996 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1997 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1998 for (i=0;i<n_constraints;i++) { 1999 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 2000 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 2001 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2002 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2003 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2004 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2005 } 2006 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 2007 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 2008 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 2009 } else { 2010 if (lda_rhs != n_R) { 2011 IS dummy; 2012 2013 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 2014 ierr = MatGetSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 2015 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 2016 } else { 2017 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 2018 pcbddc->local_auxmat2 = local_auxmat2_R; 2019 } 2020 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 2021 } 2022 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 2023 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 2024 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 2025 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 2026 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 2027 if (isCHOL) { 2028 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 2029 } else { 2030 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 2031 } 2032 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 2033 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 2034 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 2035 ierr = MatDestroy(&M2);CHKERRQ(ierr); 2036 ierr = MatDestroy(&M3);CHKERRQ(ierr); 2037 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 2038 ierr = MatMatMult(M1,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 2039 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 2040 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 2041 ierr = MatDestroy(&M1);CHKERRQ(ierr); 2042 } 2043 2044 /* Get submatrices from subdomain matrix */ 2045 if (n_vertices) { 2046 IS is_aux; 2047 2048 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 2049 IS tis; 2050 2051 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 2052 ierr = ISSort(tis);CHKERRQ(ierr); 2053 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 2054 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2055 } else { 2056 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 2057 } 2058 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 2059 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 2060 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 2061 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 2062 } 2063 2064 /* Matrix of coarse basis functions (local) */ 2065 if (pcbddc->coarse_phi_B) { 2066 PetscInt on_B,on_primal,on_D=n_D; 2067 if (pcbddc->coarse_phi_D) { 2068 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 2069 } 2070 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 2071 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 2072 PetscScalar *marray; 2073 2074 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 2075 ierr = PetscFree(marray);CHKERRQ(ierr); 2076 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 2077 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 2078 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 2079 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 2080 } 2081 } 2082 2083 if (!pcbddc->coarse_phi_B) { 2084 PetscScalar *marray; 2085 2086 n = n_B*pcbddc->local_primal_size; 2087 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2088 n += n_D*pcbddc->local_primal_size; 2089 } 2090 if (!pcbddc->symmetric_primal) { 2091 n *= 2; 2092 } 2093 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 2094 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 2095 n = n_B*pcbddc->local_primal_size; 2096 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2097 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 2098 n += n_D*pcbddc->local_primal_size; 2099 } 2100 if (!pcbddc->symmetric_primal) { 2101 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 2102 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2103 n = n_B*pcbddc->local_primal_size; 2104 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 2105 } 2106 } else { 2107 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 2108 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 2109 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2110 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 2111 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 2112 } 2113 } 2114 } 2115 2116 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 2117 p0_lidx_I = NULL; 2118 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 2119 const PetscInt *idxs; 2120 2121 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 2122 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 2123 for (i=0;i<pcbddc->benign_n;i++) { 2124 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 2125 } 2126 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 2127 } 2128 2129 /* vertices */ 2130 if (n_vertices) { 2131 2132 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 2133 2134 if (n_R) { 2135 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 2136 PetscBLASInt B_N,B_one = 1; 2137 PetscScalar *x,*y; 2138 PetscBool isseqaij; 2139 2140 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 2141 if (need_benign_correction) { 2142 ISLocalToGlobalMapping RtoN; 2143 IS is_p0; 2144 PetscInt *idxs_p0,n; 2145 2146 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 2147 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 2148 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 2149 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 2150 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 2151 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 2152 ierr = MatGetSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 2153 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 2154 } 2155 2156 if (lda_rhs == n_R) { 2157 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 2158 } else { 2159 PetscScalar *av,*array; 2160 const PetscInt *xadj,*adjncy; 2161 PetscInt n; 2162 PetscBool flg_row; 2163 2164 array = work+lda_rhs*n_vertices; 2165 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 2166 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 2167 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2168 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 2169 for (i=0;i<n;i++) { 2170 PetscInt j; 2171 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 2172 } 2173 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2174 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 2175 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 2176 } 2177 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 2178 if (need_benign_correction) { 2179 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2180 PetscScalar *marr; 2181 2182 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 2183 /* need \Phi^T A_RV = (I+L)A_RV, L given by 2184 2185 | 0 0 0 | (V) 2186 L = | 0 0 -1 | (P-p0) 2187 | 0 0 -1 | (p0) 2188 2189 */ 2190 for (i=0;i<reuse_solver->benign_n;i++) { 2191 const PetscScalar *vals; 2192 const PetscInt *idxs,*idxs_zero; 2193 PetscInt n,j,nz; 2194 2195 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2196 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 2197 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 2198 for (j=0;j<n;j++) { 2199 PetscScalar val = vals[j]; 2200 PetscInt k,col = idxs[j]; 2201 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 2202 } 2203 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 2204 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 2205 } 2206 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 2207 } 2208 if (F) { 2209 /* need to correct the rhs */ 2210 if (need_benign_correction) { 2211 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2212 PetscScalar *marr; 2213 2214 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 2215 if (lda_rhs != n_R) { 2216 for (i=0;i<n_vertices;i++) { 2217 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 2218 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 2219 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 2220 } 2221 } else { 2222 for (i=0;i<n_vertices;i++) { 2223 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 2224 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 2225 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2226 } 2227 } 2228 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 2229 } 2230 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 2231 /* need to correct the solution */ 2232 if (need_benign_correction) { 2233 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2234 PetscScalar *marr; 2235 2236 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 2237 if (lda_rhs != n_R) { 2238 for (i=0;i<n_vertices;i++) { 2239 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 2240 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 2241 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 2242 } 2243 } else { 2244 for (i=0;i<n_vertices;i++) { 2245 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 2246 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 2247 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2248 } 2249 } 2250 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 2251 } 2252 } else { 2253 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 2254 for (i=0;i<n_vertices;i++) { 2255 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 2256 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 2257 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2258 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2259 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2260 } 2261 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 2262 } 2263 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 2264 /* S_VV and S_CV */ 2265 if (n_constraints) { 2266 Mat B; 2267 2268 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 2269 for (i=0;i<n_vertices;i++) { 2270 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 2271 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 2272 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2273 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2274 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2275 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2276 } 2277 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 2278 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 2279 ierr = MatDestroy(&B);CHKERRQ(ierr); 2280 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 2281 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 2282 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 2283 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 2284 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 2285 ierr = MatDestroy(&B);CHKERRQ(ierr); 2286 } 2287 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2288 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 2289 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 2290 } 2291 if (lda_rhs != n_R) { 2292 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 2293 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 2294 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 2295 } 2296 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 2297 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 2298 if (need_benign_correction) { 2299 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2300 PetscScalar *marr,*sums; 2301 2302 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 2303 ierr = MatDenseGetArray(S_VVt,&marr); 2304 for (i=0;i<reuse_solver->benign_n;i++) { 2305 const PetscScalar *vals; 2306 const PetscInt *idxs,*idxs_zero; 2307 PetscInt n,j,nz; 2308 2309 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2310 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 2311 for (j=0;j<n_vertices;j++) { 2312 PetscInt k; 2313 sums[j] = 0.; 2314 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 2315 } 2316 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 2317 for (j=0;j<n;j++) { 2318 PetscScalar val = vals[j]; 2319 PetscInt k; 2320 for (k=0;k<n_vertices;k++) { 2321 marr[idxs[j]+k*n_vertices] += val*sums[k]; 2322 } 2323 } 2324 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 2325 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 2326 } 2327 ierr = PetscFree(sums);CHKERRQ(ierr); 2328 ierr = MatDenseRestoreArray(S_VVt,&marr); 2329 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 2330 } 2331 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 2332 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 2333 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 2334 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 2335 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 2336 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 2337 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 2338 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2339 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 2340 } else { 2341 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2342 } 2343 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 2344 2345 /* coarse basis functions */ 2346 for (i=0;i<n_vertices;i++) { 2347 PetscScalar *y; 2348 2349 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 2350 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2351 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 2352 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2353 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2354 y[n_B*i+idx_V_B[i]] = 1.0; 2355 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2356 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2357 2358 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2359 PetscInt j; 2360 2361 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2362 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2363 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2364 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2365 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2366 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 2367 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2368 } 2369 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2370 } 2371 /* if n_R == 0 the object is not destroyed */ 2372 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 2373 } 2374 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 2375 2376 if (n_constraints) { 2377 Mat B; 2378 2379 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 2380 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 2381 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 2382 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 2383 if (n_vertices) { 2384 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 2385 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 2386 } else { 2387 Mat S_VCt; 2388 2389 if (lda_rhs != n_R) { 2390 ierr = MatDestroy(&B);CHKERRQ(ierr); 2391 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 2392 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 2393 } 2394 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 2395 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2396 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 2397 } 2398 } 2399 ierr = MatDestroy(&B);CHKERRQ(ierr); 2400 /* coarse basis functions */ 2401 for (i=0;i<n_constraints;i++) { 2402 PetscScalar *y; 2403 2404 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 2405 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2406 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 2407 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2408 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2409 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2410 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2411 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2412 PetscInt j; 2413 2414 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2415 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 2416 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2417 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2418 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2419 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 2420 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2421 } 2422 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2423 } 2424 } 2425 if (n_constraints) { 2426 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 2427 } 2428 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 2429 2430 /* coarse matrix entries relative to B_0 */ 2431 if (pcbddc->benign_n) { 2432 Mat B0_B,B0_BPHI; 2433 IS is_dummy; 2434 PetscScalar *data; 2435 PetscInt j; 2436 2437 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2438 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 2439 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2440 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2441 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2442 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 2443 for (j=0;j<pcbddc->benign_n;j++) { 2444 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2445 for (i=0;i<pcbddc->local_primal_size;i++) { 2446 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 2447 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 2448 } 2449 } 2450 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 2451 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2452 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2453 } 2454 2455 /* compute other basis functions for non-symmetric problems */ 2456 if (!pcbddc->symmetric_primal) { 2457 Mat B_V=NULL,B_C=NULL; 2458 PetscScalar *marray; 2459 2460 if (n_constraints) { 2461 Mat S_CCT,C_CRT; 2462 2463 ierr = MatTranspose(C_CR,MAT_INPLACE_MATRIX,&C_CRT);CHKERRQ(ierr); 2464 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 2465 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 2466 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 2467 if (n_vertices) { 2468 Mat S_VCT; 2469 2470 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 2471 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 2472 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 2473 } 2474 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 2475 } else { 2476 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);CHKERRQ(ierr); 2477 } 2478 if (n_vertices && n_R) { 2479 PetscScalar *av,*marray; 2480 const PetscInt *xadj,*adjncy; 2481 PetscInt n; 2482 PetscBool flg_row; 2483 2484 /* B_V = B_V - A_VR^T */ 2485 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 2486 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2487 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 2488 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2489 for (i=0;i<n;i++) { 2490 PetscInt j; 2491 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 2492 } 2493 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2494 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2495 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2496 } 2497 2498 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 2499 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2500 for (i=0;i<n_vertices;i++) { 2501 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 2502 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2503 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2504 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2505 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2506 } 2507 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2508 if (B_C) { 2509 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 2510 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 2511 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 2512 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2513 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2514 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2515 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2516 } 2517 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 2518 } 2519 /* coarse basis functions */ 2520 for (i=0;i<pcbddc->local_primal_size;i++) { 2521 PetscScalar *y; 2522 2523 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 2524 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2525 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 2526 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2527 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2528 if (i<n_vertices) { 2529 y[n_B*i+idx_V_B[i]] = 1.0; 2530 } 2531 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2532 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2533 2534 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2535 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2536 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2537 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2538 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2539 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2540 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2541 } 2542 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2543 } 2544 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 2545 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 2546 } 2547 /* free memory */ 2548 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 2549 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 2550 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 2551 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 2552 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 2553 ierr = PetscFree(work);CHKERRQ(ierr); 2554 if (n_vertices) { 2555 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2556 } 2557 if (n_constraints) { 2558 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 2559 } 2560 /* Checking coarse_sub_mat and coarse basis functios */ 2561 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2562 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2563 if (pcbddc->dbg_flag) { 2564 Mat coarse_sub_mat; 2565 Mat AUXMAT,TM1,TM2,TM3,TM4; 2566 Mat coarse_phi_D,coarse_phi_B; 2567 Mat coarse_psi_D,coarse_psi_B; 2568 Mat A_II,A_BB,A_IB,A_BI; 2569 Mat C_B,CPHI; 2570 IS is_dummy; 2571 Vec mones; 2572 MatType checkmattype=MATSEQAIJ; 2573 PetscReal real_value; 2574 2575 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 2576 Mat A; 2577 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 2578 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2579 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2580 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2581 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2582 ierr = MatDestroy(&A);CHKERRQ(ierr); 2583 } else { 2584 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2585 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2586 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2587 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2588 } 2589 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 2590 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 2591 if (!pcbddc->symmetric_primal) { 2592 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 2593 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 2594 } 2595 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 2596 2597 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2598 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 2599 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2600 if (!pcbddc->symmetric_primal) { 2601 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2602 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2603 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2604 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2605 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2606 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2607 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2608 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2609 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2610 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2611 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2612 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2613 } else { 2614 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2615 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2616 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2617 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2618 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2619 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2620 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2621 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2622 } 2623 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2624 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2625 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2626 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 2627 if (pcbddc->benign_n) { 2628 Mat B0_B,B0_BPHI; 2629 PetscScalar *data,*data2; 2630 PetscInt j; 2631 2632 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2633 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 2634 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2635 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2636 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 2637 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 2638 for (j=0;j<pcbddc->benign_n;j++) { 2639 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2640 for (i=0;i<pcbddc->local_primal_size;i++) { 2641 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 2642 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 2643 } 2644 } 2645 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 2646 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 2647 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2648 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2649 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2650 } 2651 #if 0 2652 { 2653 PetscViewer viewer; 2654 char filename[256]; 2655 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 2656 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 2657 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2658 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 2659 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 2660 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 2661 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 2662 if (save_change) { 2663 Mat phi_B; 2664 ierr = MatMatMult(save_change,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&phi_B);CHKERRQ(ierr); 2665 ierr = PetscObjectSetName((PetscObject)phi_B,"phi_B");CHKERRQ(ierr); 2666 ierr = MatView(phi_B,viewer);CHKERRQ(ierr); 2667 ierr = MatDestroy(&phi_B);CHKERRQ(ierr); 2668 } else { 2669 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 2670 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 2671 } 2672 if (pcbddc->coarse_phi_D) { 2673 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 2674 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 2675 } 2676 if (pcbddc->coarse_psi_B) { 2677 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 2678 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 2679 } 2680 if (pcbddc->coarse_psi_D) { 2681 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 2682 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 2683 } 2684 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 2685 } 2686 #endif 2687 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2688 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2689 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2690 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2691 2692 /* check constraints */ 2693 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2694 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 2695 if (!pcbddc->benign_n) { /* TODO: add benign case */ 2696 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2697 } else { 2698 PetscScalar *data; 2699 Mat tmat; 2700 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 2701 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 2702 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 2703 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2704 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2705 } 2706 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 2707 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2708 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2709 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2710 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2711 if (!pcbddc->symmetric_primal) { 2712 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2713 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2714 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2715 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2716 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2717 } 2718 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 2719 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 2720 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2721 ierr = VecDestroy(&mones);CHKERRQ(ierr); 2722 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2723 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 2724 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 2725 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 2726 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 2727 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 2728 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 2729 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 2730 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 2731 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 2732 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 2733 if (!pcbddc->symmetric_primal) { 2734 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 2735 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 2736 } 2737 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 2738 } 2739 /* get back data */ 2740 *coarse_submat_vals_n = coarse_submat_vals; 2741 PetscFunctionReturn(0); 2742 } 2743 2744 #undef __FUNCT__ 2745 #define __FUNCT__ "MatGetSubMatrixUnsorted" 2746 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 2747 { 2748 Mat *work_mat; 2749 IS isrow_s,iscol_s; 2750 PetscBool rsorted,csorted; 2751 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 2752 PetscErrorCode ierr; 2753 2754 PetscFunctionBegin; 2755 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 2756 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 2757 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 2758 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2759 2760 if (!rsorted) { 2761 const PetscInt *idxs; 2762 PetscInt *idxs_sorted,i; 2763 2764 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 2765 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 2766 for (i=0;i<rsize;i++) { 2767 idxs_perm_r[i] = i; 2768 } 2769 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 2770 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 2771 for (i=0;i<rsize;i++) { 2772 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 2773 } 2774 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 2775 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 2776 } else { 2777 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 2778 isrow_s = isrow; 2779 } 2780 2781 if (!csorted) { 2782 if (isrow == iscol) { 2783 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 2784 iscol_s = isrow_s; 2785 } else { 2786 const PetscInt *idxs; 2787 PetscInt *idxs_sorted,i; 2788 2789 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 2790 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 2791 for (i=0;i<csize;i++) { 2792 idxs_perm_c[i] = i; 2793 } 2794 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 2795 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 2796 for (i=0;i<csize;i++) { 2797 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 2798 } 2799 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 2800 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 2801 } 2802 } else { 2803 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 2804 iscol_s = iscol; 2805 } 2806 2807 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2808 2809 if (!rsorted || !csorted) { 2810 Mat new_mat; 2811 IS is_perm_r,is_perm_c; 2812 2813 if (!rsorted) { 2814 PetscInt *idxs_r,i; 2815 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 2816 for (i=0;i<rsize;i++) { 2817 idxs_r[idxs_perm_r[i]] = i; 2818 } 2819 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 2820 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 2821 } else { 2822 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 2823 } 2824 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 2825 2826 if (!csorted) { 2827 if (isrow_s == iscol_s) { 2828 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 2829 is_perm_c = is_perm_r; 2830 } else { 2831 PetscInt *idxs_c,i; 2832 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 2833 for (i=0;i<csize;i++) { 2834 idxs_c[idxs_perm_c[i]] = i; 2835 } 2836 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 2837 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 2838 } 2839 } else { 2840 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 2841 } 2842 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 2843 2844 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 2845 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 2846 work_mat[0] = new_mat; 2847 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 2848 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 2849 } 2850 2851 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 2852 *B = work_mat[0]; 2853 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 2854 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 2855 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 2856 PetscFunctionReturn(0); 2857 } 2858 2859 #undef __FUNCT__ 2860 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 2861 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 2862 { 2863 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2864 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2865 Mat new_mat; 2866 IS is_local,is_global; 2867 PetscInt local_size; 2868 PetscBool isseqaij; 2869 PetscErrorCode ierr; 2870 2871 PetscFunctionBegin; 2872 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2873 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 2874 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 2875 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 2876 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 2877 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 2878 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 2879 2880 /* check */ 2881 if (pcbddc->dbg_flag) { 2882 Vec x,x_change; 2883 PetscReal error; 2884 2885 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 2886 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2887 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 2888 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2889 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2890 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 2891 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2892 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2893 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2894 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2895 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2896 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 2897 ierr = VecDestroy(&x);CHKERRQ(ierr); 2898 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2899 } 2900 2901 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 2902 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2903 if (isseqaij) { 2904 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2905 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 2906 } else { 2907 Mat work_mat; 2908 2909 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2910 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2911 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 2912 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 2913 } 2914 if (matis->A->symmetric_set) { 2915 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 2916 #if !defined(PETSC_USE_COMPLEX) 2917 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 2918 #endif 2919 } 2920 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 2921 PetscFunctionReturn(0); 2922 } 2923 2924 #undef __FUNCT__ 2925 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 2926 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 2927 { 2928 PC_IS* pcis = (PC_IS*)(pc->data); 2929 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2930 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2931 PetscInt *idx_R_local=NULL; 2932 PetscInt n_vertices,i,j,n_R,n_D,n_B; 2933 PetscInt vbs,bs; 2934 PetscBT bitmask=NULL; 2935 PetscErrorCode ierr; 2936 2937 PetscFunctionBegin; 2938 /* 2939 No need to setup local scatters if 2940 - primal space is unchanged 2941 AND 2942 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 2943 AND 2944 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 2945 */ 2946 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 2947 PetscFunctionReturn(0); 2948 } 2949 /* destroy old objects */ 2950 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 2951 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 2952 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 2953 /* Set Non-overlapping dimensions */ 2954 n_B = pcis->n_B; 2955 n_D = pcis->n - n_B; 2956 n_vertices = pcbddc->n_vertices; 2957 2958 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 2959 2960 /* create auxiliary bitmask and allocate workspace */ 2961 if (!sub_schurs || !sub_schurs->reuse_solver) { 2962 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 2963 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 2964 for (i=0;i<n_vertices;i++) { 2965 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 2966 } 2967 2968 for (i=0, n_R=0; i<pcis->n; i++) { 2969 if (!PetscBTLookup(bitmask,i)) { 2970 idx_R_local[n_R++] = i; 2971 } 2972 } 2973 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 2974 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2975 2976 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2977 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 2978 } 2979 2980 /* Block code */ 2981 vbs = 1; 2982 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 2983 if (bs>1 && !(n_vertices%bs)) { 2984 PetscBool is_blocked = PETSC_TRUE; 2985 PetscInt *vary; 2986 if (!sub_schurs || !sub_schurs->reuse_solver) { 2987 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 2988 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 2989 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 2990 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 2991 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 2992 for (i=0; i<pcis->n/bs; i++) { 2993 if (vary[i]!=0 && vary[i]!=bs) { 2994 is_blocked = PETSC_FALSE; 2995 break; 2996 } 2997 } 2998 ierr = PetscFree(vary);CHKERRQ(ierr); 2999 } else { 3000 /* Verify directly the R set */ 3001 for (i=0; i<n_R/bs; i++) { 3002 PetscInt j,node=idx_R_local[bs*i]; 3003 for (j=1; j<bs; j++) { 3004 if (node != idx_R_local[bs*i+j]-j) { 3005 is_blocked = PETSC_FALSE; 3006 break; 3007 } 3008 } 3009 } 3010 } 3011 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 3012 vbs = bs; 3013 for (i=0;i<n_R/vbs;i++) { 3014 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 3015 } 3016 } 3017 } 3018 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 3019 if (sub_schurs && sub_schurs->reuse_solver) { 3020 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3021 3022 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 3023 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 3024 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 3025 reuse_solver->is_R = pcbddc->is_R_local; 3026 } else { 3027 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 3028 } 3029 3030 /* print some info if requested */ 3031 if (pcbddc->dbg_flag) { 3032 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3033 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3034 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3035 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 3036 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 3037 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 3038 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3039 } 3040 3041 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 3042 if (!sub_schurs || !sub_schurs->reuse_solver) { 3043 IS is_aux1,is_aux2; 3044 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 3045 3046 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 3047 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 3048 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 3049 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3050 for (i=0; i<n_D; i++) { 3051 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 3052 } 3053 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3054 for (i=0, j=0; i<n_R; i++) { 3055 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 3056 aux_array1[j++] = i; 3057 } 3058 } 3059 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 3060 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3061 for (i=0, j=0; i<n_B; i++) { 3062 if (!PetscBTLookup(bitmask,is_indices[i])) { 3063 aux_array2[j++] = i; 3064 } 3065 } 3066 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3067 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 3068 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 3069 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 3070 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 3071 3072 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3073 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 3074 for (i=0, j=0; i<n_R; i++) { 3075 if (PetscBTLookup(bitmask,idx_R_local[i])) { 3076 aux_array1[j++] = i; 3077 } 3078 } 3079 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 3080 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 3081 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 3082 } 3083 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 3084 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 3085 } else { 3086 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3087 IS tis; 3088 PetscInt schur_size; 3089 3090 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 3091 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 3092 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 3093 ierr = ISDestroy(&tis);CHKERRQ(ierr); 3094 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3095 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 3096 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 3097 ierr = ISDestroy(&tis);CHKERRQ(ierr); 3098 } 3099 } 3100 PetscFunctionReturn(0); 3101 } 3102 3103 3104 #undef __FUNCT__ 3105 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 3106 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 3107 { 3108 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3109 PC_IS *pcis = (PC_IS*)pc->data; 3110 PC pc_temp; 3111 Mat A_RR; 3112 MatReuse reuse; 3113 PetscScalar m_one = -1.0; 3114 PetscReal value; 3115 PetscInt n_D,n_R; 3116 PetscBool check_corr[2],issbaij; 3117 PetscErrorCode ierr; 3118 /* prefixes stuff */ 3119 char dir_prefix[256],neu_prefix[256],str_level[16]; 3120 size_t len; 3121 3122 PetscFunctionBegin; 3123 3124 /* compute prefixes */ 3125 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 3126 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 3127 if (!pcbddc->current_level) { 3128 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 3129 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 3130 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 3131 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 3132 } else { 3133 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 3134 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 3135 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 3136 len -= 15; /* remove "pc_bddc_coarse_" */ 3137 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 3138 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 3139 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 3140 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 3141 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 3142 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 3143 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 3144 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 3145 } 3146 3147 /* DIRICHLET PROBLEM */ 3148 if (dirichlet) { 3149 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3150 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 3151 if (!sub_schurs || !sub_schurs->reuse_solver) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 3152 if (pcbddc->dbg_flag) { 3153 Mat A_IIn; 3154 3155 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 3156 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 3157 pcis->A_II = A_IIn; 3158 } 3159 } 3160 if (pcbddc->local_mat->symmetric_set) { 3161 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 3162 } 3163 /* Matrix for Dirichlet problem is pcis->A_II */ 3164 n_D = pcis->n - pcis->n_B; 3165 if (!pcbddc->ksp_D) { /* create object if not yet build */ 3166 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 3167 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 3168 /* default */ 3169 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 3170 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 3171 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 3172 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 3173 if (issbaij) { 3174 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 3175 } else { 3176 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 3177 } 3178 /* Allow user's customization */ 3179 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 3180 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 3181 } 3182 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 3183 if (sub_schurs && sub_schurs->reuse_solver) { 3184 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3185 3186 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 3187 } 3188 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 3189 if (!n_D) { 3190 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 3191 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 3192 } 3193 /* Set Up KSP for Dirichlet problem of BDDC */ 3194 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 3195 /* set ksp_D into pcis data */ 3196 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 3197 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 3198 pcis->ksp_D = pcbddc->ksp_D; 3199 } 3200 3201 /* NEUMANN PROBLEM */ 3202 A_RR = 0; 3203 if (neumann) { 3204 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3205 PetscInt ibs,mbs; 3206 PetscBool issbaij; 3207 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3208 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 3209 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 3210 if (pcbddc->ksp_R) { /* already created ksp */ 3211 PetscInt nn_R; 3212 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 3213 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 3214 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 3215 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 3216 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 3217 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3218 reuse = MAT_INITIAL_MATRIX; 3219 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 3220 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 3221 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3222 reuse = MAT_INITIAL_MATRIX; 3223 } else { /* safe to reuse the matrix */ 3224 reuse = MAT_REUSE_MATRIX; 3225 } 3226 } 3227 /* last check */ 3228 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 3229 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3230 reuse = MAT_INITIAL_MATRIX; 3231 } 3232 } else { /* first time, so we need to create the matrix */ 3233 reuse = MAT_INITIAL_MATRIX; 3234 } 3235 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */ 3236 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 3237 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 3238 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 3239 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 3240 if (matis->A == pcbddc->local_mat) { 3241 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3242 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 3243 } else { 3244 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 3245 } 3246 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 3247 if (matis->A == pcbddc->local_mat) { 3248 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3249 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 3250 } else { 3251 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 3252 } 3253 } 3254 /* extract A_RR */ 3255 if (sub_schurs && sub_schurs->reuse_solver) { 3256 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3257 3258 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 3259 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3260 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 3261 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 3262 } else { 3263 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 3264 } 3265 } else { 3266 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3267 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 3268 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 3269 } 3270 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 3271 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 3272 } 3273 if (pcbddc->local_mat->symmetric_set) { 3274 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 3275 } 3276 if (!pcbddc->ksp_R) { /* create object if not present */ 3277 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 3278 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 3279 /* default */ 3280 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 3281 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 3282 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 3283 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 3284 if (issbaij) { 3285 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 3286 } else { 3287 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 3288 } 3289 /* Allow user's customization */ 3290 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 3291 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 3292 } 3293 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 3294 if (!n_R) { 3295 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 3296 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 3297 } 3298 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 3299 /* Reuse solver if it is present */ 3300 if (sub_schurs && sub_schurs->reuse_solver) { 3301 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3302 3303 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 3304 } 3305 /* Set Up KSP for Neumann problem of BDDC */ 3306 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 3307 } 3308 3309 if (pcbddc->dbg_flag) { 3310 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3311 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3312 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3313 } 3314 3315 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 3316 check_corr[0] = check_corr[1] = PETSC_FALSE; 3317 if (pcbddc->NullSpace_corr[0]) { 3318 ierr = PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);CHKERRQ(ierr); 3319 } 3320 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 3321 check_corr[0] = PETSC_TRUE; 3322 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);CHKERRQ(ierr); 3323 } 3324 if (neumann && pcbddc->NullSpace_corr[2]) { 3325 check_corr[1] = PETSC_TRUE; 3326 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);CHKERRQ(ierr); 3327 } 3328 3329 /* check Dirichlet and Neumann solvers */ 3330 if (pcbddc->dbg_flag) { 3331 if (dirichlet) { /* Dirichlet */ 3332 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 3333 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 3334 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 3335 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 3336 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 3337 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 3338 if (check_corr[0]) { 3339 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_TRUE);CHKERRQ(ierr); 3340 } 3341 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3342 } 3343 if (neumann) { /* Neumann */ 3344 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 3345 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3346 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 3347 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 3348 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 3349 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 3350 if (check_corr[1]) { 3351 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_FALSE);CHKERRQ(ierr); 3352 } 3353 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3354 } 3355 } 3356 /* free Neumann problem's matrix */ 3357 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3358 PetscFunctionReturn(0); 3359 } 3360 3361 #undef __FUNCT__ 3362 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 3363 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 3364 { 3365 PetscErrorCode ierr; 3366 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3367 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3368 PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE ) : PETSC_FALSE; 3369 3370 PetscFunctionBegin; 3371 if (!reuse_solver) { 3372 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 3373 } 3374 if (!pcbddc->switch_static) { 3375 if (applytranspose && pcbddc->local_auxmat1) { 3376 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3377 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3378 } 3379 if (!reuse_solver) { 3380 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3381 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3382 } else { 3383 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3384 3385 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3386 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3387 } 3388 } else { 3389 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3390 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3391 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3392 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3393 if (applytranspose && pcbddc->local_auxmat1) { 3394 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 3395 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3396 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3397 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3398 } 3399 } 3400 if (!reuse_solver || pcbddc->switch_static) { 3401 if (applytranspose) { 3402 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3403 } else { 3404 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3405 } 3406 } else { 3407 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3408 3409 if (applytranspose) { 3410 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 3411 } else { 3412 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 3413 } 3414 } 3415 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 3416 if (!pcbddc->switch_static) { 3417 if (!reuse_solver) { 3418 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3419 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3420 } else { 3421 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3422 3423 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3424 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3425 } 3426 if (!applytranspose && pcbddc->local_auxmat1) { 3427 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3428 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3429 } 3430 } else { 3431 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3432 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3433 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3434 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3435 if (!applytranspose && pcbddc->local_auxmat1) { 3436 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3437 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3438 } 3439 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3440 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3441 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3442 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3443 } 3444 PetscFunctionReturn(0); 3445 } 3446 3447 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 3448 #undef __FUNCT__ 3449 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 3450 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 3451 { 3452 PetscErrorCode ierr; 3453 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3454 PC_IS* pcis = (PC_IS*) (pc->data); 3455 const PetscScalar zero = 0.0; 3456 3457 PetscFunctionBegin; 3458 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 3459 if (!pcbddc->benign_apply_coarse_only) { 3460 if (applytranspose) { 3461 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3462 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3463 } else { 3464 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3465 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3466 } 3467 } else { 3468 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 3469 } 3470 3471 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 3472 if (pcbddc->benign_n) { 3473 PetscScalar *array; 3474 PetscInt j; 3475 3476 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3477 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 3478 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3479 } 3480 3481 /* start communications from local primal nodes to rhs of coarse solver */ 3482 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 3483 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3484 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3485 3486 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 3487 if (pcbddc->coarse_ksp) { 3488 Mat coarse_mat; 3489 Vec rhs,sol; 3490 MatNullSpace nullsp; 3491 PetscBool isbddc = PETSC_FALSE; 3492 3493 if (pcbddc->benign_have_null) { 3494 PC coarse_pc; 3495 3496 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 3497 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 3498 /* we need to propagate to coarser levels the need for a possible benign correction */ 3499 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 3500 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 3501 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 3502 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 3503 } 3504 } 3505 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 3506 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 3507 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 3508 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 3509 if (nullsp) { 3510 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 3511 } 3512 if (applytranspose) { 3513 if (pcbddc->benign_apply_coarse_only) SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 3514 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3515 } else { 3516 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 3517 PC coarse_pc; 3518 3519 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 3520 ierr = PCPreSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 3521 ierr = PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);CHKERRQ(ierr); 3522 ierr = PCPostSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 3523 } else { 3524 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3525 } 3526 } 3527 /* we don't need the benign correction at coarser levels anymore */ 3528 if (pcbddc->benign_have_null && isbddc) { 3529 PC coarse_pc; 3530 PC_BDDC* coarsepcbddc; 3531 3532 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 3533 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 3534 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 3535 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 3536 } 3537 if (nullsp) { 3538 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 3539 } 3540 } 3541 3542 /* Local solution on R nodes */ 3543 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 3544 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 3545 } 3546 /* communications from coarse sol to local primal nodes */ 3547 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3548 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3549 3550 /* Sum contributions from the two levels */ 3551 if (!pcbddc->benign_apply_coarse_only) { 3552 if (applytranspose) { 3553 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3554 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3555 } else { 3556 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3557 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3558 } 3559 /* store p0 */ 3560 if (pcbddc->benign_n) { 3561 PetscScalar *array; 3562 PetscInt j; 3563 3564 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3565 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 3566 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3567 } 3568 } else { /* expand the coarse solution */ 3569 if (applytranspose) { 3570 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3571 } else { 3572 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3573 } 3574 } 3575 PetscFunctionReturn(0); 3576 } 3577 3578 #undef __FUNCT__ 3579 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 3580 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 3581 { 3582 PetscErrorCode ierr; 3583 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3584 PetscScalar *array; 3585 Vec from,to; 3586 3587 PetscFunctionBegin; 3588 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3589 from = pcbddc->coarse_vec; 3590 to = pcbddc->vec1_P; 3591 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3592 Vec tvec; 3593 3594 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3595 ierr = VecResetArray(tvec);CHKERRQ(ierr); 3596 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3597 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 3598 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 3599 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 3600 } 3601 } else { /* from local to global -> put data in coarse right hand side */ 3602 from = pcbddc->vec1_P; 3603 to = pcbddc->coarse_vec; 3604 } 3605 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3606 PetscFunctionReturn(0); 3607 } 3608 3609 #undef __FUNCT__ 3610 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 3611 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 3612 { 3613 PetscErrorCode ierr; 3614 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3615 PetscScalar *array; 3616 Vec from,to; 3617 3618 PetscFunctionBegin; 3619 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3620 from = pcbddc->coarse_vec; 3621 to = pcbddc->vec1_P; 3622 } else { /* from local to global -> put data in coarse right hand side */ 3623 from = pcbddc->vec1_P; 3624 to = pcbddc->coarse_vec; 3625 } 3626 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3627 if (smode == SCATTER_FORWARD) { 3628 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3629 Vec tvec; 3630 3631 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3632 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 3633 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 3634 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 3635 } 3636 } else { 3637 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 3638 ierr = VecResetArray(from);CHKERRQ(ierr); 3639 } 3640 } 3641 PetscFunctionReturn(0); 3642 } 3643 3644 /* uncomment for testing purposes */ 3645 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 3646 #undef __FUNCT__ 3647 #define __FUNCT__ "PCBDDCConstraintsSetUp" 3648 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 3649 { 3650 PetscErrorCode ierr; 3651 PC_IS* pcis = (PC_IS*)(pc->data); 3652 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3653 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3654 /* one and zero */ 3655 PetscScalar one=1.0,zero=0.0; 3656 /* space to store constraints and their local indices */ 3657 PetscScalar *constraints_data; 3658 PetscInt *constraints_idxs,*constraints_idxs_B; 3659 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 3660 PetscInt *constraints_n; 3661 /* iterators */ 3662 PetscInt i,j,k,total_counts,total_counts_cc,cum; 3663 /* BLAS integers */ 3664 PetscBLASInt lwork,lierr; 3665 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 3666 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 3667 /* reuse */ 3668 PetscInt olocal_primal_size,olocal_primal_size_cc; 3669 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 3670 /* change of basis */ 3671 PetscBool qr_needed; 3672 PetscBT change_basis,qr_needed_idx; 3673 /* auxiliary stuff */ 3674 PetscInt *nnz,*is_indices; 3675 PetscInt ncc; 3676 /* some quantities */ 3677 PetscInt n_vertices,total_primal_vertices,valid_constraints; 3678 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 3679 3680 PetscFunctionBegin; 3681 /* Destroy Mat objects computed previously */ 3682 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3683 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3684 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 3685 /* save info on constraints from previous setup (if any) */ 3686 olocal_primal_size = pcbddc->local_primal_size; 3687 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 3688 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 3689 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3690 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3691 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3692 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3693 3694 if (!pcbddc->adaptive_selection) { 3695 IS ISForVertices,*ISForFaces,*ISForEdges; 3696 MatNullSpace nearnullsp; 3697 const Vec *nearnullvecs; 3698 Vec *localnearnullsp; 3699 PetscScalar *array; 3700 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 3701 PetscBool nnsp_has_cnst; 3702 /* LAPACK working arrays for SVD or POD */ 3703 PetscBool skip_lapack,boolforchange; 3704 PetscScalar *work; 3705 PetscReal *singular_vals; 3706 #if defined(PETSC_USE_COMPLEX) 3707 PetscReal *rwork; 3708 #endif 3709 #if defined(PETSC_MISSING_LAPACK_GESVD) 3710 PetscScalar *temp_basis,*correlation_mat; 3711 #else 3712 PetscBLASInt dummy_int=1; 3713 PetscScalar dummy_scalar=1.; 3714 #endif 3715 3716 /* Get index sets for faces, edges and vertices from graph */ 3717 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 3718 /* print some info */ 3719 if (pcbddc->dbg_flag && !pcbddc->sub_schurs) { 3720 PetscInt nv; 3721 3722 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 3723 ierr = ISGetSize(ISForVertices,&nv);CHKERRQ(ierr); 3724 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3725 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3726 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 3727 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);CHKERRQ(ierr); 3728 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);CHKERRQ(ierr); 3729 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3730 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3731 } 3732 3733 /* free unneeded index sets */ 3734 if (!pcbddc->use_vertices) { 3735 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3736 } 3737 if (!pcbddc->use_edges) { 3738 for (i=0;i<n_ISForEdges;i++) { 3739 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3740 } 3741 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3742 n_ISForEdges = 0; 3743 } 3744 if (!pcbddc->use_faces) { 3745 for (i=0;i<n_ISForFaces;i++) { 3746 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3747 } 3748 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3749 n_ISForFaces = 0; 3750 } 3751 3752 /* check if near null space is attached to global mat */ 3753 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 3754 if (nearnullsp) { 3755 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 3756 /* remove any stored info */ 3757 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3758 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3759 /* store information for BDDC solver reuse */ 3760 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 3761 pcbddc->onearnullspace = nearnullsp; 3762 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3763 for (i=0;i<nnsp_size;i++) { 3764 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 3765 } 3766 } else { /* if near null space is not provided BDDC uses constants by default */ 3767 nnsp_size = 0; 3768 nnsp_has_cnst = PETSC_TRUE; 3769 } 3770 /* get max number of constraints on a single cc */ 3771 max_constraints = nnsp_size; 3772 if (nnsp_has_cnst) max_constraints++; 3773 3774 /* 3775 Evaluate maximum storage size needed by the procedure 3776 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 3777 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 3778 There can be multiple constraints per connected component 3779 */ 3780 n_vertices = 0; 3781 if (ISForVertices) { 3782 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 3783 } 3784 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 3785 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 3786 3787 total_counts = n_ISForFaces+n_ISForEdges; 3788 total_counts *= max_constraints; 3789 total_counts += n_vertices; 3790 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 3791 3792 total_counts = 0; 3793 max_size_of_constraint = 0; 3794 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 3795 IS used_is; 3796 if (i<n_ISForEdges) { 3797 used_is = ISForEdges[i]; 3798 } else { 3799 used_is = ISForFaces[i-n_ISForEdges]; 3800 } 3801 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 3802 total_counts += j; 3803 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 3804 } 3805 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 3806 3807 /* get local part of global near null space vectors */ 3808 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 3809 for (k=0;k<nnsp_size;k++) { 3810 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 3811 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3812 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3813 } 3814 3815 /* whether or not to skip lapack calls */ 3816 skip_lapack = PETSC_TRUE; 3817 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 3818 3819 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 3820 if (!skip_lapack) { 3821 PetscScalar temp_work; 3822 3823 #if defined(PETSC_MISSING_LAPACK_GESVD) 3824 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 3825 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 3826 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 3827 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 3828 #if defined(PETSC_USE_COMPLEX) 3829 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 3830 #endif 3831 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3832 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3833 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 3834 lwork = -1; 3835 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3836 #if !defined(PETSC_USE_COMPLEX) 3837 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 3838 #else 3839 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 3840 #endif 3841 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3842 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 3843 #else /* on missing GESVD */ 3844 /* SVD */ 3845 PetscInt max_n,min_n; 3846 max_n = max_size_of_constraint; 3847 min_n = max_constraints; 3848 if (max_size_of_constraint < max_constraints) { 3849 min_n = max_size_of_constraint; 3850 max_n = max_constraints; 3851 } 3852 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 3853 #if defined(PETSC_USE_COMPLEX) 3854 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 3855 #endif 3856 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3857 lwork = -1; 3858 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 3859 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 3860 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 3861 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3862 #if !defined(PETSC_USE_COMPLEX) 3863 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 3864 #else 3865 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 3866 #endif 3867 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3868 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 3869 #endif /* on missing GESVD */ 3870 /* Allocate optimal workspace */ 3871 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 3872 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 3873 } 3874 /* Now we can loop on constraining sets */ 3875 total_counts = 0; 3876 constraints_idxs_ptr[0] = 0; 3877 constraints_data_ptr[0] = 0; 3878 /* vertices */ 3879 if (n_vertices) { 3880 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3881 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3882 for (i=0;i<n_vertices;i++) { 3883 constraints_n[total_counts] = 1; 3884 constraints_data[total_counts] = 1.0; 3885 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 3886 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 3887 total_counts++; 3888 } 3889 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3890 n_vertices = total_counts; 3891 } 3892 3893 /* edges and faces */ 3894 total_counts_cc = total_counts; 3895 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 3896 IS used_is; 3897 PetscBool idxs_copied = PETSC_FALSE; 3898 3899 if (ncc<n_ISForEdges) { 3900 used_is = ISForEdges[ncc]; 3901 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 3902 } else { 3903 used_is = ISForFaces[ncc-n_ISForEdges]; 3904 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 3905 } 3906 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 3907 3908 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 3909 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3910 /* change of basis should not be performed on local periodic nodes */ 3911 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 3912 if (nnsp_has_cnst) { 3913 PetscScalar quad_value; 3914 3915 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3916 idxs_copied = PETSC_TRUE; 3917 3918 if (!pcbddc->use_nnsp_true) { 3919 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 3920 } else { 3921 quad_value = 1.0; 3922 } 3923 for (j=0;j<size_of_constraint;j++) { 3924 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 3925 } 3926 temp_constraints++; 3927 total_counts++; 3928 } 3929 for (k=0;k<nnsp_size;k++) { 3930 PetscReal real_value; 3931 PetscScalar *ptr_to_data; 3932 3933 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3934 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 3935 for (j=0;j<size_of_constraint;j++) { 3936 ptr_to_data[j] = array[is_indices[j]]; 3937 } 3938 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3939 /* check if array is null on the connected component */ 3940 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3941 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 3942 if (real_value > 0.0) { /* keep indices and values */ 3943 temp_constraints++; 3944 total_counts++; 3945 if (!idxs_copied) { 3946 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3947 idxs_copied = PETSC_TRUE; 3948 } 3949 } 3950 } 3951 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3952 valid_constraints = temp_constraints; 3953 if (!pcbddc->use_nnsp_true && temp_constraints) { 3954 if (temp_constraints == 1) { /* just normalize the constraint */ 3955 PetscScalar norm,*ptr_to_data; 3956 3957 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3958 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3959 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 3960 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 3961 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 3962 } else { /* perform SVD */ 3963 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 3964 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3965 3966 #if defined(PETSC_MISSING_LAPACK_GESVD) 3967 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 3968 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 3969 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 3970 the constraints basis will differ (by a complex factor with absolute value equal to 1) 3971 from that computed using LAPACKgesvd 3972 -> This is due to a different computation of eigenvectors in LAPACKheev 3973 -> The quality of the POD-computed basis will be the same */ 3974 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3975 /* Store upper triangular part of correlation matrix */ 3976 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3977 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3978 for (j=0;j<temp_constraints;j++) { 3979 for (k=0;k<j+1;k++) { 3980 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 3981 } 3982 } 3983 /* compute eigenvalues and eigenvectors of correlation matrix */ 3984 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3985 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 3986 #if !defined(PETSC_USE_COMPLEX) 3987 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 3988 #else 3989 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 3990 #endif 3991 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3992 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 3993 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 3994 j = 0; 3995 while (j < temp_constraints && singular_vals[j] < tol) j++; 3996 total_counts = total_counts-j; 3997 valid_constraints = temp_constraints-j; 3998 /* scale and copy POD basis into used quadrature memory */ 3999 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4000 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 4001 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 4002 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4003 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 4004 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 4005 if (j<temp_constraints) { 4006 PetscInt ii; 4007 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 4008 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4009 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 4010 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4011 for (k=0;k<temp_constraints-j;k++) { 4012 for (ii=0;ii<size_of_constraint;ii++) { 4013 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 4014 } 4015 } 4016 } 4017 #else /* on missing GESVD */ 4018 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4019 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 4020 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4021 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4022 #if !defined(PETSC_USE_COMPLEX) 4023 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 4024 #else 4025 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 4026 #endif 4027 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 4028 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4029 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 4030 k = temp_constraints; 4031 if (k > size_of_constraint) k = size_of_constraint; 4032 j = 0; 4033 while (j < k && singular_vals[k-j-1] < tol) j++; 4034 valid_constraints = k-j; 4035 total_counts = total_counts-temp_constraints+valid_constraints; 4036 #endif /* on missing GESVD */ 4037 } 4038 } 4039 /* update pointers information */ 4040 if (valid_constraints) { 4041 constraints_n[total_counts_cc] = valid_constraints; 4042 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 4043 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 4044 /* set change_of_basis flag */ 4045 if (boolforchange) { 4046 PetscBTSet(change_basis,total_counts_cc); 4047 } 4048 total_counts_cc++; 4049 } 4050 } 4051 /* free workspace */ 4052 if (!skip_lapack) { 4053 ierr = PetscFree(work);CHKERRQ(ierr); 4054 #if defined(PETSC_USE_COMPLEX) 4055 ierr = PetscFree(rwork);CHKERRQ(ierr); 4056 #endif 4057 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 4058 #if defined(PETSC_MISSING_LAPACK_GESVD) 4059 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 4060 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 4061 #endif 4062 } 4063 for (k=0;k<nnsp_size;k++) { 4064 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 4065 } 4066 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 4067 /* free index sets of faces, edges and vertices */ 4068 for (i=0;i<n_ISForFaces;i++) { 4069 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 4070 } 4071 if (n_ISForFaces) { 4072 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 4073 } 4074 for (i=0;i<n_ISForEdges;i++) { 4075 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 4076 } 4077 if (n_ISForEdges) { 4078 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 4079 } 4080 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 4081 } else { 4082 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4083 4084 total_counts = 0; 4085 n_vertices = 0; 4086 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 4087 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 4088 } 4089 max_constraints = 0; 4090 total_counts_cc = 0; 4091 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 4092 total_counts += pcbddc->adaptive_constraints_n[i]; 4093 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 4094 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 4095 } 4096 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 4097 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 4098 constraints_idxs = pcbddc->adaptive_constraints_idxs; 4099 constraints_data = pcbddc->adaptive_constraints_data; 4100 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 4101 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 4102 total_counts_cc = 0; 4103 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 4104 if (pcbddc->adaptive_constraints_n[i]) { 4105 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 4106 } 4107 } 4108 #if 0 4109 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 4110 for (i=0;i<total_counts_cc;i++) { 4111 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 4112 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 4113 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 4114 printf(" %d",constraints_idxs[j]); 4115 } 4116 printf("\n"); 4117 printf("number of cc: %d\n",constraints_n[i]); 4118 } 4119 for (i=0;i<n_vertices;i++) { 4120 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 4121 } 4122 for (i=0;i<sub_schurs->n_subs;i++) { 4123 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 4124 } 4125 #endif 4126 4127 max_size_of_constraint = 0; 4128 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 4129 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 4130 /* Change of basis */ 4131 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 4132 if (pcbddc->use_change_of_basis) { 4133 for (i=0;i<sub_schurs->n_subs;i++) { 4134 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 4135 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 4136 } 4137 } 4138 } 4139 } 4140 pcbddc->local_primal_size = total_counts; 4141 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 4142 4143 /* map constraints_idxs in boundary numbering */ 4144 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 4145 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i); 4146 4147 /* Create constraint matrix */ 4148 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 4149 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 4150 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 4151 4152 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 4153 /* determine if a QR strategy is needed for change of basis */ 4154 qr_needed = PETSC_FALSE; 4155 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 4156 total_primal_vertices=0; 4157 pcbddc->local_primal_size_cc = 0; 4158 for (i=0;i<total_counts_cc;i++) { 4159 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 4160 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 4161 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 4162 pcbddc->local_primal_size_cc += 1; 4163 } else if (PetscBTLookup(change_basis,i)) { 4164 for (k=0;k<constraints_n[i];k++) { 4165 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 4166 } 4167 pcbddc->local_primal_size_cc += constraints_n[i]; 4168 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 4169 PetscBTSet(qr_needed_idx,i); 4170 qr_needed = PETSC_TRUE; 4171 } 4172 } else { 4173 pcbddc->local_primal_size_cc += 1; 4174 } 4175 } 4176 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 4177 pcbddc->n_vertices = total_primal_vertices; 4178 /* permute indices in order to have a sorted set of vertices */ 4179 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 4180 4181 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 4182 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 4183 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 4184 4185 /* nonzero structure of constraint matrix */ 4186 /* and get reference dof for local constraints */ 4187 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 4188 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 4189 4190 j = total_primal_vertices; 4191 total_counts = total_primal_vertices; 4192 cum = total_primal_vertices; 4193 for (i=n_vertices;i<total_counts_cc;i++) { 4194 if (!PetscBTLookup(change_basis,i)) { 4195 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 4196 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 4197 cum++; 4198 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 4199 for (k=0;k<constraints_n[i];k++) { 4200 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 4201 nnz[j+k] = size_of_constraint; 4202 } 4203 j += constraints_n[i]; 4204 } 4205 } 4206 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 4207 ierr = PetscFree(nnz);CHKERRQ(ierr); 4208 4209 /* set values in constraint matrix */ 4210 for (i=0;i<total_primal_vertices;i++) { 4211 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4212 } 4213 total_counts = total_primal_vertices; 4214 for (i=n_vertices;i<total_counts_cc;i++) { 4215 if (!PetscBTLookup(change_basis,i)) { 4216 PetscInt *cols; 4217 4218 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 4219 cols = constraints_idxs+constraints_idxs_ptr[i]; 4220 for (k=0;k<constraints_n[i];k++) { 4221 PetscInt row = total_counts+k; 4222 PetscScalar *vals; 4223 4224 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 4225 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 4226 } 4227 total_counts += constraints_n[i]; 4228 } 4229 } 4230 /* assembling */ 4231 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4232 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4233 4234 /* 4235 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4236 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 4237 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 4238 */ 4239 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 4240 if (pcbddc->use_change_of_basis) { 4241 /* dual and primal dofs on a single cc */ 4242 PetscInt dual_dofs,primal_dofs; 4243 /* working stuff for GEQRF */ 4244 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 4245 PetscBLASInt lqr_work; 4246 /* working stuff for UNGQR */ 4247 PetscScalar *gqr_work,lgqr_work_t; 4248 PetscBLASInt lgqr_work; 4249 /* working stuff for TRTRS */ 4250 PetscScalar *trs_rhs; 4251 PetscBLASInt Blas_NRHS; 4252 /* pointers for values insertion into change of basis matrix */ 4253 PetscInt *start_rows,*start_cols; 4254 PetscScalar *start_vals; 4255 /* working stuff for values insertion */ 4256 PetscBT is_primal; 4257 PetscInt *aux_primal_numbering_B; 4258 /* matrix sizes */ 4259 PetscInt global_size,local_size; 4260 /* temporary change of basis */ 4261 Mat localChangeOfBasisMatrix; 4262 /* extra space for debugging */ 4263 PetscScalar *dbg_work; 4264 4265 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 4266 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 4267 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 4268 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 4269 /* nonzeros for local mat */ 4270 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 4271 if (!pcbddc->benign_change || pcbddc->fake_change) { 4272 for (i=0;i<pcis->n;i++) nnz[i]=1; 4273 } else { 4274 const PetscInt *ii; 4275 PetscInt n; 4276 PetscBool flg_row; 4277 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 4278 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 4279 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 4280 } 4281 for (i=n_vertices;i<total_counts_cc;i++) { 4282 if (PetscBTLookup(change_basis,i)) { 4283 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 4284 if (PetscBTLookup(qr_needed_idx,i)) { 4285 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 4286 } else { 4287 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 4288 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 4289 } 4290 } 4291 } 4292 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 4293 ierr = PetscFree(nnz);CHKERRQ(ierr); 4294 /* Set interior change in the matrix */ 4295 if (!pcbddc->benign_change || pcbddc->fake_change) { 4296 for (i=0;i<pcis->n;i++) { 4297 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 4298 } 4299 } else { 4300 const PetscInt *ii,*jj; 4301 PetscScalar *aa; 4302 PetscInt n; 4303 PetscBool flg_row; 4304 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 4305 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 4306 for (i=0;i<n;i++) { 4307 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 4308 } 4309 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 4310 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 4311 } 4312 4313 if (pcbddc->dbg_flag) { 4314 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 4315 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4316 } 4317 4318 4319 /* Now we loop on the constraints which need a change of basis */ 4320 /* 4321 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 4322 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 4323 4324 Basic blocks of change of basis matrix T computed by 4325 4326 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 4327 4328 | 1 0 ... 0 s_1/S | 4329 | 0 1 ... 0 s_2/S | 4330 | ... | 4331 | 0 ... 1 s_{n-1}/S | 4332 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 4333 4334 with S = \sum_{i=1}^n s_i^2 4335 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 4336 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 4337 4338 - QR decomposition of constraints otherwise 4339 */ 4340 if (qr_needed) { 4341 /* space to store Q */ 4342 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 4343 /* first we issue queries for optimal work */ 4344 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 4345 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 4346 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4347 lqr_work = -1; 4348 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 4349 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 4350 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 4351 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 4352 lgqr_work = -1; 4353 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 4354 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 4355 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 4356 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4357 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 4358 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 4359 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 4360 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 4361 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 4362 /* array to store scaling factors for reflectors */ 4363 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 4364 /* array to store rhs and solution of triangular solver */ 4365 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 4366 /* allocating workspace for check */ 4367 if (pcbddc->dbg_flag) { 4368 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 4369 } 4370 } 4371 /* array to store whether a node is primal or not */ 4372 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 4373 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 4374 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 4375 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i); 4376 for (i=0;i<total_primal_vertices;i++) { 4377 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 4378 } 4379 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 4380 4381 /* loop on constraints and see whether or not they need a change of basis and compute it */ 4382 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 4383 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 4384 if (PetscBTLookup(change_basis,total_counts)) { 4385 /* get constraint info */ 4386 primal_dofs = constraints_n[total_counts]; 4387 dual_dofs = size_of_constraint-primal_dofs; 4388 4389 if (pcbddc->dbg_flag) { 4390 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 4391 } 4392 4393 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 4394 4395 /* copy quadrature constraints for change of basis check */ 4396 if (pcbddc->dbg_flag) { 4397 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4398 } 4399 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 4400 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4401 4402 /* compute QR decomposition of constraints */ 4403 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4404 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4405 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4406 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4407 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 4408 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 4409 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4410 4411 /* explictly compute R^-T */ 4412 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 4413 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 4414 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4415 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 4416 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4417 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 4418 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4419 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 4420 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 4421 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4422 4423 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 4424 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4425 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4426 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 4427 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4428 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4429 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 4430 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 4431 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4432 4433 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 4434 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 4435 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 4436 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4437 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4438 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 4439 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4440 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 4441 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 4442 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4443 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 4444 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4445 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4446 4447 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 4448 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 4449 /* insert cols for primal dofs */ 4450 for (j=0;j<primal_dofs;j++) { 4451 start_vals = &qr_basis[j*size_of_constraint]; 4452 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 4453 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 4454 } 4455 /* insert cols for dual dofs */ 4456 for (j=0,k=0;j<dual_dofs;k++) { 4457 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 4458 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 4459 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 4460 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 4461 j++; 4462 } 4463 } 4464 4465 /* check change of basis */ 4466 if (pcbddc->dbg_flag) { 4467 PetscInt ii,jj; 4468 PetscBool valid_qr=PETSC_TRUE; 4469 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 4470 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4471 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 4472 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4473 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 4474 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 4475 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4476 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 4477 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4478 for (jj=0;jj<size_of_constraint;jj++) { 4479 for (ii=0;ii<primal_dofs;ii++) { 4480 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 4481 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 4482 } 4483 } 4484 if (!valid_qr) { 4485 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 4486 for (jj=0;jj<size_of_constraint;jj++) { 4487 for (ii=0;ii<primal_dofs;ii++) { 4488 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 4489 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4490 } 4491 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 4492 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4493 } 4494 } 4495 } 4496 } else { 4497 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 4498 } 4499 } 4500 } else { /* simple transformation block */ 4501 PetscInt row,col; 4502 PetscScalar val,norm; 4503 4504 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4505 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 4506 for (j=0;j<size_of_constraint;j++) { 4507 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 4508 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 4509 if (!PetscBTLookup(is_primal,row_B)) { 4510 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 4511 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 4512 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 4513 } else { 4514 for (k=0;k<size_of_constraint;k++) { 4515 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 4516 if (row != col) { 4517 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 4518 } else { 4519 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 4520 } 4521 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 4522 } 4523 } 4524 } 4525 if (pcbddc->dbg_flag) { 4526 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 4527 } 4528 } 4529 } else { 4530 if (pcbddc->dbg_flag) { 4531 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 4532 } 4533 } 4534 } 4535 4536 /* free workspace */ 4537 if (qr_needed) { 4538 if (pcbddc->dbg_flag) { 4539 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 4540 } 4541 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 4542 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 4543 ierr = PetscFree(qr_work);CHKERRQ(ierr); 4544 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 4545 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 4546 } 4547 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 4548 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4549 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4550 4551 /* assembling of global change of variable */ 4552 if (!pcbddc->fake_change) { 4553 Mat tmat; 4554 PetscInt bs; 4555 4556 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 4557 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 4558 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 4559 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 4560 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4561 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 4562 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 4563 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 4564 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 4565 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 4566 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4567 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4568 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4569 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4570 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4571 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4572 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 4573 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 4574 4575 /* check */ 4576 if (pcbddc->dbg_flag) { 4577 PetscReal error; 4578 Vec x,x_change; 4579 4580 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 4581 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 4582 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4583 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 4584 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4585 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4586 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 4587 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4588 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4589 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 4590 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4591 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4592 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4593 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 4594 ierr = VecDestroy(&x);CHKERRQ(ierr); 4595 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4596 } 4597 /* adapt sub_schurs computed (if any) */ 4598 if (pcbddc->use_deluxe_scaling) { 4599 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4600 4601 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");CHKERRQ(ierr); 4602 if (sub_schurs && sub_schurs->S_Ej_all) { 4603 Mat S_new,tmat; 4604 IS is_all_N,is_V_Sall = NULL; 4605 4606 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 4607 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 4608 if (pcbddc->deluxe_zerorows) { 4609 ISLocalToGlobalMapping NtoSall; 4610 IS is_V; 4611 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 4612 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 4613 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 4614 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 4615 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 4616 } 4617 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 4618 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4619 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 4620 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4621 if (pcbddc->deluxe_zerorows) { 4622 const PetscScalar *array; 4623 const PetscInt *idxs_V,*idxs_all; 4624 PetscInt i,n_V; 4625 4626 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4627 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 4628 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4629 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4630 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 4631 for (i=0;i<n_V;i++) { 4632 PetscScalar val; 4633 PetscInt idx; 4634 4635 idx = idxs_V[i]; 4636 val = array[idxs_all[idxs_V[i]]]; 4637 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 4638 } 4639 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4640 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4641 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 4642 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4643 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4644 } 4645 sub_schurs->S_Ej_all = S_new; 4646 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4647 if (sub_schurs->sum_S_Ej_all) { 4648 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4649 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 4650 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4651 if (pcbddc->deluxe_zerorows) { 4652 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4653 } 4654 sub_schurs->sum_S_Ej_all = S_new; 4655 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4656 } 4657 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 4658 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4659 } 4660 /* destroy any change of basis context in sub_schurs */ 4661 if (sub_schurs && sub_schurs->change) { 4662 PetscInt i; 4663 4664 for (i=0;i<sub_schurs->n_subs;i++) { 4665 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 4666 } 4667 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 4668 } 4669 } 4670 if (pcbddc->switch_static) { /* need to save the local change */ 4671 pcbddc->switch_static_change = localChangeOfBasisMatrix; 4672 } else { 4673 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 4674 } 4675 /* determine if any process has changed the pressures locally */ 4676 pcbddc->change_interior = pcbddc->benign_have_null; 4677 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 4678 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 4679 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 4680 pcbddc->use_qr_single = qr_needed; 4681 } 4682 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 4683 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 4684 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 4685 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 4686 } else { 4687 Mat benign_global = NULL; 4688 if (pcbddc->benign_have_null) { 4689 Mat tmat; 4690 4691 pcbddc->change_interior = PETSC_TRUE; 4692 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4693 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4694 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4695 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4696 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 4697 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4698 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4699 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 4700 if (pcbddc->benign_change) { 4701 Mat M; 4702 4703 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 4704 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 4705 ierr = MatISSetLocalMat(tmat,M);CHKERRQ(ierr); 4706 ierr = MatDestroy(&M);CHKERRQ(ierr); 4707 } else { 4708 Mat eye; 4709 PetscScalar *array; 4710 4711 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4712 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&eye);CHKERRQ(ierr); 4713 for (i=0;i<pcis->n;i++) { 4714 ierr = MatSetValue(eye,i,i,array[i],INSERT_VALUES);CHKERRQ(ierr); 4715 } 4716 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4717 ierr = MatAssemblyBegin(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4718 ierr = MatAssemblyEnd(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4719 ierr = MatISSetLocalMat(tmat,eye);CHKERRQ(ierr); 4720 ierr = MatDestroy(&eye);CHKERRQ(ierr); 4721 } 4722 ierr = MatISGetMPIXAIJ(tmat,MAT_INITIAL_MATRIX,&benign_global);CHKERRQ(ierr); 4723 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4724 } 4725 if (pcbddc->user_ChangeOfBasisMatrix) { 4726 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4727 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 4728 } else if (pcbddc->benign_have_null) { 4729 pcbddc->ChangeOfBasisMatrix = benign_global; 4730 } 4731 } 4732 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 4733 IS is_global; 4734 const PetscInt *gidxs; 4735 4736 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 4737 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 4738 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 4739 ierr = MatGetSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 4740 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 4741 } 4742 } 4743 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 4744 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 4745 } 4746 4747 if (!pcbddc->fake_change) { 4748 /* add pressure dofs to set of primal nodes for numbering purposes */ 4749 for (i=0;i<pcbddc->benign_n;i++) { 4750 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 4751 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 4752 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 4753 pcbddc->local_primal_size_cc++; 4754 pcbddc->local_primal_size++; 4755 } 4756 4757 /* check if a new primal space has been introduced (also take into account benign trick) */ 4758 pcbddc->new_primal_space_local = PETSC_TRUE; 4759 if (olocal_primal_size == pcbddc->local_primal_size) { 4760 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4761 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4762 if (!pcbddc->new_primal_space_local) { 4763 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4764 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4765 } 4766 } 4767 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 4768 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4769 } 4770 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 4771 4772 /* flush dbg viewer */ 4773 if (pcbddc->dbg_flag) { 4774 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4775 } 4776 4777 /* free workspace */ 4778 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 4779 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 4780 if (!pcbddc->adaptive_selection) { 4781 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 4782 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 4783 } else { 4784 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 4785 pcbddc->adaptive_constraints_idxs_ptr, 4786 pcbddc->adaptive_constraints_data_ptr, 4787 pcbddc->adaptive_constraints_idxs, 4788 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 4789 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 4790 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 4791 } 4792 PetscFunctionReturn(0); 4793 } 4794 4795 #undef __FUNCT__ 4796 #define __FUNCT__ "PCBDDCAnalyzeInterface" 4797 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 4798 { 4799 ISLocalToGlobalMapping map; 4800 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4801 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 4802 PetscInt ierr,i,N; 4803 4804 PetscFunctionBegin; 4805 if (pcbddc->graphanalyzed && !pcbddc->recompute_topography) PetscFunctionReturn(0); 4806 /* Reset previously computed graph */ 4807 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 4808 /* Init local Graph struct */ 4809 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 4810 ierr = MatGetLocalToGlobalMapping(pc->pmat,&map,NULL);CHKERRQ(ierr); 4811 ierr = PCBDDCGraphInit(pcbddc->mat_graph,map,N);CHKERRQ(ierr); 4812 4813 /* Check validity of the csr graph passed in by the user */ 4814 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 4815 4816 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 4817 if ( (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) && pcbddc->use_local_adj) { 4818 PetscInt *xadj,*adjncy; 4819 PetscInt nvtxs; 4820 PetscBool flg_row=PETSC_FALSE; 4821 4822 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4823 if (flg_row) { 4824 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 4825 pcbddc->computed_rowadj = PETSC_TRUE; 4826 } 4827 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4828 } 4829 if (pcbddc->dbg_flag) { 4830 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4831 } 4832 4833 /* Setup of Graph */ 4834 pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */ 4835 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 4836 4837 /* attach info on disconnected subdomains if present */ 4838 if (pcbddc->n_local_subs) { 4839 PetscInt *local_subs; 4840 4841 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 4842 for (i=0;i<pcbddc->n_local_subs;i++) { 4843 const PetscInt *idxs; 4844 PetscInt nl,j; 4845 4846 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 4847 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4848 for (j=0;j<nl;j++) local_subs[idxs[j]] = i; 4849 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4850 } 4851 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 4852 pcbddc->mat_graph->local_subs = local_subs; 4853 } 4854 4855 /* Graph's connected components analysis */ 4856 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 4857 4858 /* set flag indicating analysis has been done */ 4859 pcbddc->graphanalyzed = PETSC_TRUE; 4860 PetscFunctionReturn(0); 4861 } 4862 4863 #undef __FUNCT__ 4864 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 4865 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 4866 { 4867 PetscInt i,j; 4868 PetscScalar *alphas; 4869 PetscErrorCode ierr; 4870 4871 PetscFunctionBegin; 4872 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 4873 for (i=0;i<n;i++) { 4874 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 4875 ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],alphas);CHKERRQ(ierr); 4876 for (j=0;j<n-i-1;j++) alphas[j] = PetscConj(-alphas[j]); 4877 ierr = VecMAXPY(vecs[j],n-i-1,alphas,vecs+i);CHKERRQ(ierr); 4878 } 4879 ierr = PetscFree(alphas);CHKERRQ(ierr); 4880 PetscFunctionReturn(0); 4881 } 4882 4883 #undef __FUNCT__ 4884 #define __FUNCT__ "MatISGetSubassemblingPattern" 4885 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 4886 { 4887 Mat A; 4888 PetscInt n_neighs,*neighs,*n_shared,**shared; 4889 PetscMPIInt size,rank,color; 4890 PetscInt *xadj,*adjncy; 4891 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 4892 PetscInt im_active,active_procs,n,i,j,local_size,threshold = 2; 4893 PetscInt void_procs,*procs_candidates = NULL; 4894 PetscInt xadj_count, *count; 4895 PetscBool ismatis,use_vwgt=PETSC_FALSE; 4896 PetscSubcomm psubcomm; 4897 MPI_Comm subcomm; 4898 PetscErrorCode ierr; 4899 4900 PetscFunctionBegin; 4901 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 4902 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 4903 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 4904 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 4905 PetscValidLogicalCollectiveInt(mat,redprocs,3); 4906 if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %d\n",*n_subdomains); 4907 4908 if (have_void) *have_void = PETSC_FALSE; 4909 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 4910 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 4911 ierr = MatISGetLocalMat(mat,&A);CHKERRQ(ierr); 4912 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 4913 im_active = !!(n); 4914 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 4915 void_procs = size - active_procs; 4916 /* get ranks of of non-active processes in mat communicator */ 4917 if (void_procs) { 4918 PetscInt ncand; 4919 4920 if (have_void) *have_void = PETSC_TRUE; 4921 ierr = PetscMalloc1(size,&procs_candidates);CHKERRQ(ierr); 4922 ierr = MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 4923 for (i=0,ncand=0;i<size;i++) { 4924 if (!procs_candidates[i]) { 4925 procs_candidates[ncand++] = i; 4926 } 4927 } 4928 /* force n_subdomains to be not greater that the number of non-active processes */ 4929 *n_subdomains = PetscMin(void_procs,*n_subdomains); 4930 } 4931 4932 /* number of subdomains requested greater than active processes -> just shift the matrix 4933 number of subdomains requested 1 -> send to master or first candidate in voids */ 4934 if (active_procs < *n_subdomains || *n_subdomains == 1) { 4935 PetscInt issize,isidx,dest; 4936 if (*n_subdomains == 1) dest = 0; 4937 else dest = rank; 4938 if (im_active) { 4939 issize = 1; 4940 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 4941 isidx = procs_candidates[dest]; 4942 } else { 4943 isidx = dest; 4944 } 4945 } else { 4946 issize = 0; 4947 isidx = -1; 4948 } 4949 *n_subdomains = active_procs; 4950 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);CHKERRQ(ierr); 4951 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 4952 PetscFunctionReturn(0); 4953 } 4954 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 4955 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 4956 threshold = PetscMax(threshold,2); 4957 4958 /* Get info on mapping */ 4959 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 4960 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4961 4962 /* build local CSR graph of subdomains' connectivity */ 4963 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 4964 xadj[0] = 0; 4965 xadj[1] = PetscMax(n_neighs-1,0); 4966 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 4967 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 4968 ierr = PetscCalloc1(local_size,&count);CHKERRQ(ierr); 4969 for (i=1;i<n_neighs;i++) 4970 for (j=0;j<n_shared[i];j++) 4971 count[shared[i][j]] += 1; 4972 4973 xadj_count = 0; 4974 for (i=1;i<n_neighs;i++) { 4975 for (j=0;j<n_shared[i];j++) { 4976 if (count[shared[i][j]] < threshold) { 4977 adjncy[xadj_count] = neighs[i]; 4978 adjncy_wgt[xadj_count] = n_shared[i]; 4979 xadj_count++; 4980 break; 4981 } 4982 } 4983 } 4984 xadj[1] = xadj_count; 4985 ierr = PetscFree(count);CHKERRQ(ierr); 4986 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4987 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4988 4989 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 4990 4991 /* Restrict work on active processes only */ 4992 ierr = PetscMPIIntCast(im_active,&color);CHKERRQ(ierr); 4993 if (void_procs) { 4994 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);CHKERRQ(ierr); 4995 ierr = PetscSubcommSetNumber(psubcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 4996 ierr = PetscSubcommSetTypeGeneral(psubcomm,color,rank);CHKERRQ(ierr); 4997 subcomm = PetscSubcommChild(psubcomm); 4998 } else { 4999 psubcomm = NULL; 5000 subcomm = PetscObjectComm((PetscObject)mat); 5001 } 5002 5003 v_wgt = NULL; 5004 if (!color) { 5005 ierr = PetscFree(xadj);CHKERRQ(ierr); 5006 ierr = PetscFree(adjncy);CHKERRQ(ierr); 5007 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 5008 } else { 5009 Mat subdomain_adj; 5010 IS new_ranks,new_ranks_contig; 5011 MatPartitioning partitioner; 5012 PetscInt rstart=0,rend=0; 5013 PetscInt *is_indices,*oldranks; 5014 PetscMPIInt size; 5015 PetscBool aggregate; 5016 5017 ierr = MPI_Comm_size(subcomm,&size);CHKERRQ(ierr); 5018 if (void_procs) { 5019 PetscInt prank = rank; 5020 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 5021 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);CHKERRQ(ierr); 5022 for (i=0;i<xadj[1];i++) { 5023 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 5024 } 5025 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 5026 } else { 5027 oldranks = NULL; 5028 } 5029 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 5030 if (aggregate) { /* TODO: all this part could be made more efficient */ 5031 PetscInt lrows,row,ncols,*cols; 5032 PetscMPIInt nrank; 5033 PetscScalar *vals; 5034 5035 ierr = MPI_Comm_rank(subcomm,&nrank);CHKERRQ(ierr); 5036 lrows = 0; 5037 if (nrank<redprocs) { 5038 lrows = size/redprocs; 5039 if (nrank<size%redprocs) lrows++; 5040 } 5041 ierr = MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 5042 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 5043 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 5044 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 5045 row = nrank; 5046 ncols = xadj[1]-xadj[0]; 5047 cols = adjncy; 5048 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 5049 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 5050 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 5051 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5052 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5053 ierr = PetscFree(xadj);CHKERRQ(ierr); 5054 ierr = PetscFree(adjncy);CHKERRQ(ierr); 5055 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 5056 ierr = PetscFree(vals);CHKERRQ(ierr); 5057 if (use_vwgt) { 5058 Vec v; 5059 const PetscScalar *array; 5060 PetscInt nl; 5061 5062 ierr = MatCreateVecs(subdomain_adj,&v,NULL);CHKERRQ(ierr); 5063 ierr = VecSetValue(v,row,(PetscScalar)local_size,INSERT_VALUES);CHKERRQ(ierr); 5064 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 5065 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 5066 ierr = VecGetLocalSize(v,&nl);CHKERRQ(ierr); 5067 ierr = VecGetArrayRead(v,&array);CHKERRQ(ierr); 5068 ierr = PetscMalloc1(nl,&v_wgt);CHKERRQ(ierr); 5069 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 5070 ierr = VecRestoreArrayRead(v,&array);CHKERRQ(ierr); 5071 ierr = VecDestroy(&v);CHKERRQ(ierr); 5072 } 5073 } else { 5074 ierr = MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 5075 if (use_vwgt) { 5076 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 5077 v_wgt[0] = local_size; 5078 } 5079 } 5080 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 5081 5082 /* Partition */ 5083 ierr = MatPartitioningCreate(subcomm,&partitioner);CHKERRQ(ierr); 5084 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 5085 if (v_wgt) { 5086 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 5087 } 5088 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 5089 ierr = MatPartitioningSetNParts(partitioner,*n_subdomains);CHKERRQ(ierr); 5090 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 5091 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 5092 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 5093 5094 /* renumber new_ranks to avoid "holes" in new set of processors */ 5095 ierr = ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 5096 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 5097 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5098 if (!aggregate) { 5099 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 5100 #if defined(PETSC_USE_DEBUG) 5101 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 5102 #endif 5103 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 5104 } else if (oldranks) { 5105 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 5106 } else { 5107 ranks_send_to_idx[0] = is_indices[0]; 5108 } 5109 } else { 5110 PetscInt idxs[1]; 5111 PetscMPIInt tag; 5112 MPI_Request *reqs; 5113 5114 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 5115 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 5116 for (i=rstart;i<rend;i++) { 5117 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);CHKERRQ(ierr); 5118 } 5119 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 5120 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5121 ierr = PetscFree(reqs);CHKERRQ(ierr); 5122 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 5123 #if defined(PETSC_USE_DEBUG) 5124 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 5125 #endif 5126 ranks_send_to_idx[0] = procs_candidates[oldranks[idxs[0]]]; 5127 } else if (oldranks) { 5128 ranks_send_to_idx[0] = oldranks[idxs[0]]; 5129 } else { 5130 ranks_send_to_idx[0] = idxs[0]; 5131 } 5132 } 5133 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5134 /* clean up */ 5135 ierr = PetscFree(oldranks);CHKERRQ(ierr); 5136 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 5137 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 5138 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 5139 } 5140 ierr = PetscSubcommDestroy(&psubcomm);CHKERRQ(ierr); 5141 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 5142 5143 /* assemble parallel IS for sends */ 5144 i = 1; 5145 if (!color) i=0; 5146 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);CHKERRQ(ierr); 5147 PetscFunctionReturn(0); 5148 } 5149 5150 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 5151 5152 #undef __FUNCT__ 5153 #define __FUNCT__ "MatISSubassemble" 5154 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 5155 { 5156 Mat local_mat; 5157 IS is_sends_internal; 5158 PetscInt rows,cols,new_local_rows; 5159 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs; 5160 PetscBool ismatis,isdense,newisdense,destroy_mat; 5161 ISLocalToGlobalMapping l2gmap; 5162 PetscInt* l2gmap_indices; 5163 const PetscInt* is_indices; 5164 MatType new_local_type; 5165 /* buffers */ 5166 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 5167 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 5168 PetscInt *recv_buffer_idxs_local; 5169 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 5170 PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs; 5171 /* MPI */ 5172 MPI_Comm comm,comm_n; 5173 PetscSubcomm subcomm; 5174 PetscMPIInt n_sends,n_recvs,commsize; 5175 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 5176 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 5177 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest; 5178 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs; 5179 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs; 5180 PetscErrorCode ierr; 5181 5182 PetscFunctionBegin; 5183 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 5184 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 5185 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 5186 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 5187 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 5188 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 5189 PetscValidLogicalCollectiveBool(mat,reuse,6); 5190 PetscValidLogicalCollectiveInt(mat,nis,8); 5191 PetscValidLogicalCollectiveInt(mat,nvecs,10); 5192 if (nvecs) { 5193 if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported"); 5194 PetscValidHeaderSpecific(nnsp_vec[0],VEC_CLASSID,11); 5195 } 5196 /* further checks */ 5197 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 5198 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 5199 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 5200 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 5201 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 5202 if (reuse && *mat_n) { 5203 PetscInt mrows,mcols,mnrows,mncols; 5204 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 5205 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 5206 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 5207 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 5208 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 5209 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 5210 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 5211 } 5212 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 5213 PetscValidLogicalCollectiveInt(mat,bs,0); 5214 5215 /* prepare IS for sending if not provided */ 5216 if (!is_sends) { 5217 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 5218 ierr = MatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);CHKERRQ(ierr); 5219 } else { 5220 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 5221 is_sends_internal = is_sends; 5222 } 5223 5224 /* get comm */ 5225 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 5226 5227 /* compute number of sends */ 5228 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 5229 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 5230 5231 /* compute number of receives */ 5232 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 5233 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 5234 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 5235 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 5236 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 5237 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 5238 ierr = PetscFree(iflags);CHKERRQ(ierr); 5239 5240 /* restrict comm if requested */ 5241 subcomm = 0; 5242 destroy_mat = PETSC_FALSE; 5243 if (restrict_comm) { 5244 PetscMPIInt color,subcommsize; 5245 5246 color = 0; 5247 if (restrict_full) { 5248 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 5249 } else { 5250 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 5251 } 5252 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 5253 subcommsize = commsize - subcommsize; 5254 /* check if reuse has been requested */ 5255 if (reuse) { 5256 if (*mat_n) { 5257 PetscMPIInt subcommsize2; 5258 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 5259 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 5260 comm_n = PetscObjectComm((PetscObject)*mat_n); 5261 } else { 5262 comm_n = PETSC_COMM_SELF; 5263 } 5264 } else { /* MAT_INITIAL_MATRIX */ 5265 PetscMPIInt rank; 5266 5267 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5268 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 5269 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 5270 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 5271 comm_n = PetscSubcommChild(subcomm); 5272 } 5273 /* flag to destroy *mat_n if not significative */ 5274 if (color) destroy_mat = PETSC_TRUE; 5275 } else { 5276 comm_n = comm; 5277 } 5278 5279 /* prepare send/receive buffers */ 5280 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 5281 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 5282 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 5283 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 5284 if (nis) { 5285 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 5286 } 5287 5288 /* Get data from local matrices */ 5289 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 5290 /* TODO: See below some guidelines on how to prepare the local buffers */ 5291 /* 5292 send_buffer_vals should contain the raw values of the local matrix 5293 send_buffer_idxs should contain: 5294 - MatType_PRIVATE type 5295 - PetscInt size_of_l2gmap 5296 - PetscInt global_row_indices[size_of_l2gmap] 5297 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 5298 */ 5299 else { 5300 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 5301 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 5302 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 5303 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 5304 send_buffer_idxs[1] = i; 5305 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 5306 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 5307 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 5308 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 5309 for (i=0;i<n_sends;i++) { 5310 ilengths_vals[is_indices[i]] = len*len; 5311 ilengths_idxs[is_indices[i]] = len+2; 5312 } 5313 } 5314 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 5315 /* additional is (if any) */ 5316 if (nis) { 5317 PetscMPIInt psum; 5318 PetscInt j; 5319 for (j=0,psum=0;j<nis;j++) { 5320 PetscInt plen; 5321 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 5322 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 5323 psum += len+1; /* indices + lenght */ 5324 } 5325 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 5326 for (j=0,psum=0;j<nis;j++) { 5327 PetscInt plen; 5328 const PetscInt *is_array_idxs; 5329 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 5330 send_buffer_idxs_is[psum] = plen; 5331 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 5332 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 5333 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 5334 psum += plen+1; /* indices + lenght */ 5335 } 5336 for (i=0;i<n_sends;i++) { 5337 ilengths_idxs_is[is_indices[i]] = psum; 5338 } 5339 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 5340 } 5341 5342 buf_size_idxs = 0; 5343 buf_size_vals = 0; 5344 buf_size_idxs_is = 0; 5345 buf_size_vecs = 0; 5346 for (i=0;i<n_recvs;i++) { 5347 buf_size_idxs += (PetscInt)olengths_idxs[i]; 5348 buf_size_vals += (PetscInt)olengths_vals[i]; 5349 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 5350 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 5351 } 5352 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 5353 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 5354 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 5355 ierr = PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);CHKERRQ(ierr); 5356 5357 /* get new tags for clean communications */ 5358 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 5359 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 5360 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 5361 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);CHKERRQ(ierr); 5362 5363 /* allocate for requests */ 5364 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 5365 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 5366 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 5367 ierr = PetscMalloc1(n_sends,&send_req_vecs);CHKERRQ(ierr); 5368 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 5369 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 5370 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 5371 ierr = PetscMalloc1(n_recvs,&recv_req_vecs);CHKERRQ(ierr); 5372 5373 /* communications */ 5374 ptr_idxs = recv_buffer_idxs; 5375 ptr_vals = recv_buffer_vals; 5376 ptr_idxs_is = recv_buffer_idxs_is; 5377 ptr_vecs = recv_buffer_vecs; 5378 for (i=0;i<n_recvs;i++) { 5379 source_dest = onodes[i]; 5380 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 5381 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 5382 ptr_idxs += olengths_idxs[i]; 5383 ptr_vals += olengths_vals[i]; 5384 if (nis) { 5385 source_dest = onodes_is[i]; 5386 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 5387 ptr_idxs_is += olengths_idxs_is[i]; 5388 } 5389 if (nvecs) { 5390 source_dest = onodes[i]; 5391 ierr = MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);CHKERRQ(ierr); 5392 ptr_vecs += olengths_idxs[i]-2; 5393 } 5394 } 5395 for (i=0;i<n_sends;i++) { 5396 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 5397 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 5398 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 5399 if (nis) { 5400 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 5401 } 5402 if (nvecs) { 5403 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 5404 ierr = MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);CHKERRQ(ierr); 5405 } 5406 } 5407 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 5408 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 5409 5410 /* assemble new l2g map */ 5411 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5412 ptr_idxs = recv_buffer_idxs; 5413 new_local_rows = 0; 5414 for (i=0;i<n_recvs;i++) { 5415 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 5416 ptr_idxs += olengths_idxs[i]; 5417 } 5418 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 5419 ptr_idxs = recv_buffer_idxs; 5420 new_local_rows = 0; 5421 for (i=0;i<n_recvs;i++) { 5422 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 5423 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 5424 ptr_idxs += olengths_idxs[i]; 5425 } 5426 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 5427 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 5428 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 5429 5430 /* infer new local matrix type from received local matrices type */ 5431 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 5432 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 5433 if (n_recvs) { 5434 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 5435 ptr_idxs = recv_buffer_idxs; 5436 for (i=0;i<n_recvs;i++) { 5437 if ((PetscInt)new_local_type_private != *ptr_idxs) { 5438 new_local_type_private = MATAIJ_PRIVATE; 5439 break; 5440 } 5441 ptr_idxs += olengths_idxs[i]; 5442 } 5443 switch (new_local_type_private) { 5444 case MATDENSE_PRIVATE: 5445 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 5446 new_local_type = MATSEQAIJ; 5447 bs = 1; 5448 } else { /* if I receive only 1 dense matrix */ 5449 new_local_type = MATSEQDENSE; 5450 bs = 1; 5451 } 5452 break; 5453 case MATAIJ_PRIVATE: 5454 new_local_type = MATSEQAIJ; 5455 bs = 1; 5456 break; 5457 case MATBAIJ_PRIVATE: 5458 new_local_type = MATSEQBAIJ; 5459 break; 5460 case MATSBAIJ_PRIVATE: 5461 new_local_type = MATSEQSBAIJ; 5462 break; 5463 default: 5464 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 5465 break; 5466 } 5467 } else { /* by default, new_local_type is seqdense */ 5468 new_local_type = MATSEQDENSE; 5469 bs = 1; 5470 } 5471 5472 /* create MATIS object if needed */ 5473 if (!reuse) { 5474 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 5475 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 5476 } else { 5477 /* it also destroys the local matrices */ 5478 if (*mat_n) { 5479 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 5480 } else { /* this is a fake object */ 5481 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 5482 } 5483 } 5484 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 5485 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 5486 5487 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5488 5489 /* Global to local map of received indices */ 5490 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 5491 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 5492 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 5493 5494 /* restore attributes -> type of incoming data and its size */ 5495 buf_size_idxs = 0; 5496 for (i=0;i<n_recvs;i++) { 5497 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 5498 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 5499 buf_size_idxs += (PetscInt)olengths_idxs[i]; 5500 } 5501 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 5502 5503 /* set preallocation */ 5504 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 5505 if (!newisdense) { 5506 PetscInt *new_local_nnz=0; 5507 5508 ptr_idxs = recv_buffer_idxs_local; 5509 if (n_recvs) { 5510 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 5511 } 5512 for (i=0;i<n_recvs;i++) { 5513 PetscInt j; 5514 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 5515 for (j=0;j<*(ptr_idxs+1);j++) { 5516 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 5517 } 5518 } else { 5519 /* TODO */ 5520 } 5521 ptr_idxs += olengths_idxs[i]; 5522 } 5523 if (new_local_nnz) { 5524 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 5525 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 5526 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 5527 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5528 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 5529 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5530 } else { 5531 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5532 } 5533 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 5534 } else { 5535 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5536 } 5537 5538 /* set values */ 5539 ptr_vals = recv_buffer_vals; 5540 ptr_idxs = recv_buffer_idxs_local; 5541 for (i=0;i<n_recvs;i++) { 5542 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 5543 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 5544 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 5545 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5546 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5547 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 5548 } else { 5549 /* TODO */ 5550 } 5551 ptr_idxs += olengths_idxs[i]; 5552 ptr_vals += olengths_vals[i]; 5553 } 5554 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5555 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5556 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5557 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5558 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 5559 5560 #if 0 5561 if (!restrict_comm) { /* check */ 5562 Vec lvec,rvec; 5563 PetscReal infty_error; 5564 5565 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 5566 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 5567 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 5568 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 5569 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 5570 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5571 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 5572 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 5573 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 5574 } 5575 #endif 5576 5577 /* assemble new additional is (if any) */ 5578 if (nis) { 5579 PetscInt **temp_idxs,*count_is,j,psum; 5580 5581 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5582 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 5583 ptr_idxs = recv_buffer_idxs_is; 5584 psum = 0; 5585 for (i=0;i<n_recvs;i++) { 5586 for (j=0;j<nis;j++) { 5587 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5588 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 5589 psum += plen; 5590 ptr_idxs += plen+1; /* shift pointer to received data */ 5591 } 5592 } 5593 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 5594 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 5595 for (i=1;i<nis;i++) { 5596 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 5597 } 5598 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 5599 ptr_idxs = recv_buffer_idxs_is; 5600 for (i=0;i<n_recvs;i++) { 5601 for (j=0;j<nis;j++) { 5602 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5603 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 5604 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 5605 ptr_idxs += plen+1; /* shift pointer to received data */ 5606 } 5607 } 5608 for (i=0;i<nis;i++) { 5609 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5610 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 5611 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5612 } 5613 ierr = PetscFree(count_is);CHKERRQ(ierr); 5614 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 5615 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 5616 } 5617 /* free workspace */ 5618 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 5619 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5620 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 5621 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5622 if (isdense) { 5623 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 5624 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 5625 } else { 5626 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 5627 } 5628 if (nis) { 5629 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5630 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 5631 } 5632 5633 if (nvecs) { 5634 ierr = MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5635 ierr = MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5636 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 5637 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 5638 ierr = VecCreate(comm_n,&nnsp_vec[0]);CHKERRQ(ierr); 5639 ierr = VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);CHKERRQ(ierr); 5640 ierr = VecSetType(nnsp_vec[0],VECSTANDARD);CHKERRQ(ierr); 5641 /* set values */ 5642 ptr_vals = recv_buffer_vecs; 5643 ptr_idxs = recv_buffer_idxs_local; 5644 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 5645 for (i=0;i<n_recvs;i++) { 5646 PetscInt j; 5647 for (j=0;j<*(ptr_idxs+1);j++) { 5648 send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j); 5649 } 5650 ptr_idxs += olengths_idxs[i]; 5651 ptr_vals += olengths_idxs[i]-2; 5652 } 5653 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 5654 ierr = VecAssemblyBegin(nnsp_vec[0]);CHKERRQ(ierr); 5655 ierr = VecAssemblyEnd(nnsp_vec[0]);CHKERRQ(ierr); 5656 } 5657 5658 ierr = PetscFree(recv_buffer_vecs);CHKERRQ(ierr); 5659 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 5660 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 5661 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 5662 ierr = PetscFree(recv_req_vecs);CHKERRQ(ierr); 5663 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 5664 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 5665 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 5666 ierr = PetscFree(send_req_vecs);CHKERRQ(ierr); 5667 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 5668 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 5669 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 5670 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 5671 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 5672 ierr = PetscFree(onodes);CHKERRQ(ierr); 5673 if (nis) { 5674 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 5675 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 5676 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 5677 } 5678 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 5679 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 5680 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 5681 for (i=0;i<nis;i++) { 5682 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5683 } 5684 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 5685 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 5686 } 5687 *mat_n = NULL; 5688 } 5689 PetscFunctionReturn(0); 5690 } 5691 5692 /* temporary hack into ksp private data structure */ 5693 #include <petsc/private/kspimpl.h> 5694 5695 #undef __FUNCT__ 5696 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 5697 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 5698 { 5699 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5700 PC_IS *pcis = (PC_IS*)pc->data; 5701 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 5702 Mat coarsedivudotp = NULL; 5703 MatNullSpace CoarseNullSpace = NULL; 5704 ISLocalToGlobalMapping coarse_islg; 5705 IS coarse_is,*isarray; 5706 PetscInt i,im_active=-1,active_procs=-1; 5707 PetscInt nis,nisdofs,nisneu,nisvert; 5708 PC pc_temp; 5709 PCType coarse_pc_type; 5710 KSPType coarse_ksp_type; 5711 PetscBool multilevel_requested,multilevel_allowed; 5712 PetscBool isredundant,isbddc,isnn,coarse_reuse; 5713 Mat t_coarse_mat_is; 5714 PetscInt ncoarse; 5715 PetscBool compute_vecs = PETSC_FALSE; 5716 PetscScalar *array; 5717 MatReuse coarse_mat_reuse; 5718 PetscBool restr, full_restr, have_void; 5719 PetscErrorCode ierr; 5720 5721 PetscFunctionBegin; 5722 /* Assign global numbering to coarse dofs */ 5723 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 5724 PetscInt ocoarse_size; 5725 compute_vecs = PETSC_TRUE; 5726 ocoarse_size = pcbddc->coarse_size; 5727 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 5728 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 5729 /* see if we can avoid some work */ 5730 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 5731 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 5732 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 5733 PC pc; 5734 PetscBool isbddc; 5735 5736 /* temporary workaround since PCBDDC does not have a reset method so far */ 5737 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 5738 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5739 if (isbddc) { 5740 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 5741 } else { 5742 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 5743 } 5744 coarse_reuse = PETSC_FALSE; 5745 } else { /* we can safely reuse already computed coarse matrix */ 5746 coarse_reuse = PETSC_TRUE; 5747 } 5748 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 5749 coarse_reuse = PETSC_FALSE; 5750 } 5751 /* reset any subassembling information */ 5752 if (!coarse_reuse || pcbddc->recompute_topography) { 5753 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5754 } 5755 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 5756 coarse_reuse = PETSC_TRUE; 5757 } 5758 /* assemble coarse matrix */ 5759 if (coarse_reuse && pcbddc->coarse_ksp) { 5760 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5761 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 5762 coarse_mat_reuse = MAT_REUSE_MATRIX; 5763 } else { 5764 coarse_mat = NULL; 5765 coarse_mat_reuse = MAT_INITIAL_MATRIX; 5766 } 5767 5768 /* creates temporary l2gmap and IS for coarse indexes */ 5769 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 5770 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 5771 5772 /* creates temporary MATIS object for coarse matrix */ 5773 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 5774 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5775 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 5776 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5777 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 5778 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 5779 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5780 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5781 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 5782 5783 /* count "active" (i.e. with positive local size) and "void" processes */ 5784 im_active = !!(pcis->n); 5785 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5786 5787 /* determine number of processes partecipating to coarse solver and compute subassembling pattern */ 5788 /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */ 5789 /* full_restr : just use the receivers from the subassembling pattern */ 5790 coarse_mat_is = NULL; 5791 multilevel_allowed = PETSC_FALSE; 5792 multilevel_requested = PETSC_FALSE; 5793 full_restr = PETSC_TRUE; 5794 pcbddc->coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc); 5795 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 5796 if (multilevel_requested) { 5797 ncoarse = active_procs/pcbddc->coarsening_ratio; 5798 restr = PETSC_FALSE; 5799 full_restr = PETSC_FALSE; 5800 } else { 5801 ncoarse = pcbddc->coarse_size/pcbddc->coarse_eqs_per_proc; 5802 restr = PETSC_TRUE; 5803 full_restr = PETSC_TRUE; 5804 } 5805 if (!pcbddc->coarse_size) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 5806 ncoarse = PetscMax(1,ncoarse); 5807 if (!pcbddc->coarse_subassembling) { 5808 if (pcbddc->coarsening_ratio > 1) { 5809 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 5810 } else { 5811 PetscMPIInt size,rank; 5812 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 5813 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 5814 have_void = (active_procs == (PetscInt)size) ? PETSC_FALSE : PETSC_TRUE; 5815 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5816 } 5817 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 5818 PetscInt psum; 5819 PetscMPIInt size; 5820 if (pcbddc->coarse_ksp) psum = 1; 5821 else psum = 0; 5822 ierr = MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5823 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 5824 if (ncoarse < size) have_void = PETSC_TRUE; 5825 } 5826 /* determine if we can go multilevel */ 5827 if (multilevel_requested) { 5828 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 5829 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 5830 } 5831 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 5832 5833 /* dump subassembling pattern */ 5834 if (pcbddc->dbg_flag && multilevel_allowed) { 5835 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 5836 } 5837 5838 /* compute dofs splitting and neumann boundaries for coarse dofs */ 5839 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal)) { /* protects from unneded computations */ 5840 PetscInt *tidxs,*tidxs2,nout,tsize,i; 5841 const PetscInt *idxs; 5842 ISLocalToGlobalMapping tmap; 5843 5844 /* create map between primal indices (in local representative ordering) and local primal numbering */ 5845 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 5846 /* allocate space for temporary storage */ 5847 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 5848 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 5849 /* allocate for IS array */ 5850 nisdofs = pcbddc->n_ISForDofsLocal; 5851 nisneu = !!pcbddc->NeumannBoundariesLocal; 5852 nisvert = 0; /* nisvert is not used */ 5853 nis = nisdofs + nisneu + nisvert; 5854 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 5855 /* dofs splitting */ 5856 for (i=0;i<nisdofs;i++) { 5857 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 5858 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 5859 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5860 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5861 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5862 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5863 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5864 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 5865 } 5866 /* neumann boundaries */ 5867 if (pcbddc->NeumannBoundariesLocal) { 5868 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 5869 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 5870 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5871 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5872 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5873 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5874 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 5875 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 5876 } 5877 /* free memory */ 5878 ierr = PetscFree(tidxs);CHKERRQ(ierr); 5879 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 5880 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 5881 } else { 5882 nis = 0; 5883 nisdofs = 0; 5884 nisneu = 0; 5885 nisvert = 0; 5886 isarray = NULL; 5887 } 5888 /* destroy no longer needed map */ 5889 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 5890 5891 /* subassemble */ 5892 if (multilevel_allowed) { 5893 Vec vp[1]; 5894 PetscInt nvecs = 0; 5895 PetscBool reuse,reuser; 5896 5897 if (coarse_mat) reuse = PETSC_TRUE; 5898 else reuse = PETSC_FALSE; 5899 ierr = MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5900 vp[0] = NULL; 5901 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 5902 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);CHKERRQ(ierr); 5903 ierr = VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);CHKERRQ(ierr); 5904 ierr = VecSetType(vp[0],VECSTANDARD);CHKERRQ(ierr); 5905 nvecs = 1; 5906 5907 if (pcbddc->divudotp) { 5908 Mat B,loc_divudotp; 5909 Vec v,p; 5910 IS dummy; 5911 PetscInt np; 5912 5913 ierr = MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);CHKERRQ(ierr); 5914 ierr = MatGetSize(loc_divudotp,&np,NULL);CHKERRQ(ierr); 5915 ierr = ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);CHKERRQ(ierr); 5916 ierr = MatGetSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 5917 ierr = MatCreateVecs(B,&v,&p);CHKERRQ(ierr); 5918 ierr = VecSet(p,1.);CHKERRQ(ierr); 5919 ierr = MatMultTranspose(B,p,v);CHKERRQ(ierr); 5920 ierr = VecDestroy(&p);CHKERRQ(ierr); 5921 ierr = MatDestroy(&B);CHKERRQ(ierr); 5922 ierr = VecGetArray(vp[0],&array);CHKERRQ(ierr); 5923 ierr = VecPlaceArray(pcbddc->vec1_P,array);CHKERRQ(ierr); 5924 ierr = VecRestoreArray(vp[0],&array);CHKERRQ(ierr); 5925 ierr = MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);CHKERRQ(ierr); 5926 ierr = VecResetArray(pcbddc->vec1_P);CHKERRQ(ierr); 5927 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 5928 ierr = VecDestroy(&v);CHKERRQ(ierr); 5929 } 5930 } 5931 if (reuser) { 5932 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);CHKERRQ(ierr); 5933 } else { 5934 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);CHKERRQ(ierr); 5935 } 5936 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 5937 PetscScalar *arraym,*arrayv; 5938 PetscInt nl; 5939 ierr = VecGetLocalSize(vp[0],&nl);CHKERRQ(ierr); 5940 ierr = MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);CHKERRQ(ierr); 5941 ierr = MatDenseGetArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 5942 ierr = VecGetArray(vp[0],&arrayv);CHKERRQ(ierr); 5943 ierr = PetscMemcpy(arraym,arrayv,nl*sizeof(PetscScalar));CHKERRQ(ierr); 5944 ierr = VecRestoreArray(vp[0],&arrayv);CHKERRQ(ierr); 5945 ierr = MatDenseRestoreArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 5946 ierr = VecDestroy(&vp[0]);CHKERRQ(ierr); 5947 } else { 5948 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);CHKERRQ(ierr); 5949 } 5950 } else { 5951 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,0,NULL);CHKERRQ(ierr); 5952 } 5953 if (coarse_mat_is || coarse_mat) { 5954 PetscMPIInt size; 5955 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)coarse_mat_is),&size); 5956 if (!multilevel_allowed) { 5957 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 5958 } else { 5959 Mat A; 5960 5961 /* if this matrix is present, it means we are not reusing the coarse matrix */ 5962 if (coarse_mat_is) { 5963 if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 5964 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 5965 coarse_mat = coarse_mat_is; 5966 } 5967 /* be sure we don't have MatSeqDENSE as local mat */ 5968 ierr = MatISGetLocalMat(coarse_mat,&A);CHKERRQ(ierr); 5969 ierr = MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 5970 } 5971 } 5972 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 5973 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 5974 5975 /* create local to global scatters for coarse problem */ 5976 if (compute_vecs) { 5977 PetscInt lrows; 5978 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 5979 if (coarse_mat) { 5980 ierr = MatGetLocalSize(coarse_mat,&lrows,NULL);CHKERRQ(ierr); 5981 } else { 5982 lrows = 0; 5983 } 5984 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 5985 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 5986 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 5987 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5988 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5989 } 5990 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 5991 5992 /* set defaults for coarse KSP and PC */ 5993 if (multilevel_allowed) { 5994 coarse_ksp_type = KSPRICHARDSON; 5995 coarse_pc_type = PCBDDC; 5996 } else { 5997 coarse_ksp_type = KSPPREONLY; 5998 coarse_pc_type = PCREDUNDANT; 5999 } 6000 6001 /* print some info if requested */ 6002 if (pcbddc->dbg_flag) { 6003 if (!multilevel_allowed) { 6004 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 6005 if (multilevel_requested) { 6006 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 6007 } else if (pcbddc->max_levels) { 6008 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 6009 } 6010 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6011 } 6012 } 6013 6014 /* create the coarse KSP object only once with defaults */ 6015 if (coarse_mat) { 6016 PetscViewer dbg_viewer = NULL; 6017 if (pcbddc->dbg_flag) { 6018 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 6019 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 6020 } 6021 if (!pcbddc->coarse_ksp) { 6022 char prefix[256],str_level[16]; 6023 size_t len; 6024 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 6025 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 6026 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 6027 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 6028 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 6029 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 6030 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 6031 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 6032 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 6033 /* prefix */ 6034 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 6035 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 6036 if (!pcbddc->current_level) { 6037 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 6038 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 6039 } else { 6040 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 6041 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 6042 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 6043 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 6044 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 6045 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 6046 } 6047 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 6048 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 6049 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 6050 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 6051 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 6052 /* allow user customization */ 6053 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 6054 } 6055 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 6056 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 6057 if (nisdofs) { 6058 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 6059 for (i=0;i<nisdofs;i++) { 6060 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 6061 } 6062 } 6063 if (nisneu) { 6064 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 6065 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 6066 } 6067 if (nisvert) { 6068 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 6069 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 6070 } 6071 6072 /* get some info after set from options */ 6073 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 6074 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 6075 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 6076 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 6077 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 6078 isbddc = PETSC_FALSE; 6079 } 6080 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 6081 if (isredundant) { 6082 KSP inner_ksp; 6083 PC inner_pc; 6084 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 6085 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 6086 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 6087 } 6088 6089 /* parameters which miss an API */ 6090 if (isbddc) { 6091 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 6092 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 6093 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 6094 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 6095 if (pcbddc_coarse->benign_saddle_point) { 6096 Mat coarsedivudotp_is; 6097 ISLocalToGlobalMapping l2gmap,rl2g,cl2g; 6098 IS row,col; 6099 const PetscInt *gidxs; 6100 PetscInt n,st,M,N; 6101 6102 ierr = MatGetSize(coarsedivudotp,&n,NULL);CHKERRQ(ierr); 6103 ierr = MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));CHKERRQ(ierr); 6104 st = st-n; 6105 ierr = ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);CHKERRQ(ierr); 6106 ierr = MatGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);CHKERRQ(ierr); 6107 ierr = ISLocalToGlobalMappingGetSize(l2gmap,&n);CHKERRQ(ierr); 6108 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 6109 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 6110 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 6111 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 6112 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 6113 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 6114 ierr = MatGetSize(coarse_mat,&N,NULL);CHKERRQ(ierr); 6115 ierr = ISDestroy(&row);CHKERRQ(ierr); 6116 ierr = ISDestroy(&col);CHKERRQ(ierr); 6117 ierr = MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);CHKERRQ(ierr); 6118 ierr = MatSetType(coarsedivudotp_is,MATIS);CHKERRQ(ierr); 6119 ierr = MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 6120 ierr = MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);CHKERRQ(ierr); 6121 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 6122 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 6123 ierr = MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);CHKERRQ(ierr); 6124 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 6125 ierr = PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL);CHKERRQ(ierr); 6126 ierr = MatDestroy(&coarsedivudotp_is);CHKERRQ(ierr); 6127 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 6128 if (pcbddc->adaptive_threshold < 1.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 6129 } 6130 } 6131 6132 /* propagate symmetry info of coarse matrix */ 6133 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 6134 if (pc->pmat->symmetric_set) { 6135 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 6136 } 6137 if (pc->pmat->hermitian_set) { 6138 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 6139 } 6140 if (pc->pmat->spd_set) { 6141 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 6142 } 6143 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 6144 ierr = MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 6145 } 6146 /* set operators */ 6147 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 6148 if (pcbddc->dbg_flag) { 6149 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 6150 } 6151 } 6152 ierr = PetscFree(isarray);CHKERRQ(ierr); 6153 #if 0 6154 { 6155 PetscViewer viewer; 6156 char filename[256]; 6157 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 6158 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 6159 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 6160 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 6161 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 6162 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 6163 } 6164 #endif 6165 6166 if (pcbddc->coarse_ksp) { 6167 Vec crhs,csol; 6168 6169 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 6170 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 6171 if (!csol) { 6172 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 6173 } 6174 if (!crhs) { 6175 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 6176 } 6177 } 6178 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 6179 6180 /* compute null space for coarse solver if the benign trick has been requested */ 6181 if (pcbddc->benign_null) { 6182 6183 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 6184 for (i=0;i<pcbddc->benign_n;i++) { 6185 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 6186 } 6187 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 6188 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 6189 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6190 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6191 if (coarse_mat) { 6192 Vec nullv; 6193 PetscScalar *array,*array2; 6194 PetscInt nl; 6195 6196 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 6197 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 6198 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 6199 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 6200 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 6201 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 6202 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 6203 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 6204 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 6205 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 6206 } 6207 } 6208 6209 if (pcbddc->coarse_ksp) { 6210 PetscBool ispreonly; 6211 6212 if (CoarseNullSpace) { 6213 PetscBool isnull; 6214 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 6215 if (isnull) { 6216 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 6217 } 6218 /* TODO: add local nullspaces (if any) */ 6219 } 6220 /* setup coarse ksp */ 6221 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 6222 /* Check coarse problem if in debug mode or if solving with an iterative method */ 6223 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 6224 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 6225 KSP check_ksp; 6226 KSPType check_ksp_type; 6227 PC check_pc; 6228 Vec check_vec,coarse_vec; 6229 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 6230 PetscInt its; 6231 PetscBool compute_eigs; 6232 PetscReal *eigs_r,*eigs_c; 6233 PetscInt neigs; 6234 const char *prefix; 6235 6236 /* Create ksp object suitable for estimation of extreme eigenvalues */ 6237 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 6238 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 6239 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 6240 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 6241 /* prevent from setup unneeded object */ 6242 ierr = KSPGetPC(check_ksp,&check_pc);CHKERRQ(ierr); 6243 ierr = PCSetType(check_pc,PCNONE);CHKERRQ(ierr); 6244 if (ispreonly) { 6245 check_ksp_type = KSPPREONLY; 6246 compute_eigs = PETSC_FALSE; 6247 } else { 6248 check_ksp_type = KSPGMRES; 6249 compute_eigs = PETSC_TRUE; 6250 } 6251 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 6252 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 6253 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 6254 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 6255 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 6256 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 6257 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 6258 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 6259 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 6260 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 6261 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 6262 /* create random vec */ 6263 ierr = MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);CHKERRQ(ierr); 6264 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 6265 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 6266 /* solve coarse problem */ 6267 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 6268 /* set eigenvalue estimation if preonly has not been requested */ 6269 if (compute_eigs) { 6270 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 6271 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 6272 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 6273 if (neigs) { 6274 lambda_max = eigs_r[neigs-1]; 6275 lambda_min = eigs_r[0]; 6276 if (pcbddc->use_coarse_estimates) { 6277 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 6278 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);CHKERRQ(ierr); 6279 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 6280 } 6281 } 6282 } 6283 } 6284 6285 /* check coarse problem residual error */ 6286 if (pcbddc->dbg_flag) { 6287 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 6288 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 6289 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 6290 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 6291 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 6292 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 6293 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 6294 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 6295 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 6296 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 6297 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 6298 if (CoarseNullSpace) { 6299 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 6300 } 6301 if (compute_eigs) { 6302 PetscReal lambda_max_s,lambda_min_s; 6303 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 6304 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 6305 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 6306 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 6307 for (i=0;i<neigs;i++) { 6308 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 6309 } 6310 } 6311 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 6312 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 6313 } 6314 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 6315 ierr = VecDestroy(&coarse_vec);CHKERRQ(ierr); 6316 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 6317 if (compute_eigs) { 6318 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 6319 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 6320 } 6321 } 6322 } 6323 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 6324 /* print additional info */ 6325 if (pcbddc->dbg_flag) { 6326 /* waits until all processes reaches this point */ 6327 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 6328 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 6329 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6330 } 6331 6332 /* free memory */ 6333 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 6334 PetscFunctionReturn(0); 6335 } 6336 6337 #undef __FUNCT__ 6338 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 6339 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 6340 { 6341 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 6342 PC_IS* pcis = (PC_IS*)pc->data; 6343 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6344 IS subset,subset_mult,subset_n; 6345 PetscInt local_size,coarse_size=0; 6346 PetscInt *local_primal_indices=NULL; 6347 const PetscInt *t_local_primal_indices; 6348 PetscErrorCode ierr; 6349 6350 PetscFunctionBegin; 6351 /* Compute global number of coarse dofs */ 6352 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 6353 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 6354 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 6355 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 6356 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 6357 ierr = ISRenumber(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 6358 ierr = ISDestroy(&subset);CHKERRQ(ierr); 6359 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 6360 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 6361 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 6362 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 6363 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 6364 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 6365 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 6366 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 6367 6368 /* check numbering */ 6369 if (pcbddc->dbg_flag) { 6370 PetscScalar coarsesum,*array,*array2; 6371 PetscInt i; 6372 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 6373 6374 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6375 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 6376 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 6377 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6378 /* counter */ 6379 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6380 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6381 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6382 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6383 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6384 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6385 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 6386 for (i=0;i<pcbddc->local_primal_size;i++) { 6387 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 6388 } 6389 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6390 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6391 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6392 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6393 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6394 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6395 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6396 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6397 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 6398 for (i=0;i<pcis->n;i++) { 6399 if (array[i] != 0.0 && array[i] != array2[i]) { 6400 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 6401 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 6402 set_error = PETSC_TRUE; 6403 ierr = ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);CHKERRQ(ierr); 6404 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d (gid %d) owned by %d processes instead of %d!\n",PetscGlobalRank,i,gi,owned,neigh);CHKERRQ(ierr); 6405 } 6406 } 6407 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 6408 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6409 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6410 for (i=0;i<pcis->n;i++) { 6411 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 6412 } 6413 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6414 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6415 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6416 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6417 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 6418 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 6419 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 6420 PetscInt *gidxs; 6421 6422 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 6423 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 6424 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 6425 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6426 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 6427 for (i=0;i<pcbddc->local_primal_size;i++) { 6428 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 6429 } 6430 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6431 ierr = PetscFree(gidxs);CHKERRQ(ierr); 6432 } 6433 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6434 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6435 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 6436 } 6437 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 6438 /* get back data */ 6439 *coarse_size_n = coarse_size; 6440 *local_primal_indices_n = local_primal_indices; 6441 PetscFunctionReturn(0); 6442 } 6443 6444 #undef __FUNCT__ 6445 #define __FUNCT__ "PCBDDCGlobalToLocal" 6446 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 6447 { 6448 IS localis_t; 6449 PetscInt i,lsize,*idxs,n; 6450 PetscScalar *vals; 6451 PetscErrorCode ierr; 6452 6453 PetscFunctionBegin; 6454 /* get indices in local ordering exploiting local to global map */ 6455 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 6456 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 6457 for (i=0;i<lsize;i++) vals[i] = 1.0; 6458 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 6459 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 6460 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 6461 if (idxs) { /* multilevel guard */ 6462 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 6463 } 6464 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 6465 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 6466 ierr = PetscFree(vals);CHKERRQ(ierr); 6467 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 6468 /* now compute set in local ordering */ 6469 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6470 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6471 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6472 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 6473 for (i=0,lsize=0;i<n;i++) { 6474 if (PetscRealPart(vals[i]) > 0.5) { 6475 lsize++; 6476 } 6477 } 6478 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 6479 for (i=0,lsize=0;i<n;i++) { 6480 if (PetscRealPart(vals[i]) > 0.5) { 6481 idxs[lsize++] = i; 6482 } 6483 } 6484 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6485 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 6486 *localis = localis_t; 6487 PetscFunctionReturn(0); 6488 } 6489 6490 #undef __FUNCT__ 6491 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 6492 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 6493 { 6494 PC_IS *pcis=(PC_IS*)pc->data; 6495 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6496 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6497 Mat S_j; 6498 PetscInt *used_xadj,*used_adjncy; 6499 PetscBool free_used_adj; 6500 PetscErrorCode ierr; 6501 6502 PetscFunctionBegin; 6503 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 6504 free_used_adj = PETSC_FALSE; 6505 if (pcbddc->sub_schurs_layers == -1) { 6506 used_xadj = NULL; 6507 used_adjncy = NULL; 6508 } else { 6509 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 6510 used_xadj = pcbddc->mat_graph->xadj; 6511 used_adjncy = pcbddc->mat_graph->adjncy; 6512 } else if (pcbddc->computed_rowadj) { 6513 used_xadj = pcbddc->mat_graph->xadj; 6514 used_adjncy = pcbddc->mat_graph->adjncy; 6515 } else { 6516 PetscBool flg_row=PETSC_FALSE; 6517 const PetscInt *xadj,*adjncy; 6518 PetscInt nvtxs; 6519 6520 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6521 if (flg_row) { 6522 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 6523 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 6524 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 6525 free_used_adj = PETSC_TRUE; 6526 } else { 6527 pcbddc->sub_schurs_layers = -1; 6528 used_xadj = NULL; 6529 used_adjncy = NULL; 6530 } 6531 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6532 } 6533 } 6534 6535 /* setup sub_schurs data */ 6536 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6537 if (!sub_schurs->schur_explicit) { 6538 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 6539 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6540 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 6541 } else { 6542 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 6543 PetscBool isseqaij,need_change = PETSC_FALSE; 6544 PetscInt benign_n; 6545 Mat change = NULL; 6546 Vec scaling = NULL; 6547 IS change_primal = NULL; 6548 6549 if (!pcbddc->use_vertices && reuse_solvers) { 6550 PetscInt n_vertices; 6551 6552 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 6553 reuse_solvers = (PetscBool)!n_vertices; 6554 } 6555 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 6556 if (!isseqaij) { 6557 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6558 if (matis->A == pcbddc->local_mat) { 6559 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 6560 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6561 } else { 6562 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6563 } 6564 } 6565 if (!pcbddc->benign_change_explicit) { 6566 benign_n = pcbddc->benign_n; 6567 } else { 6568 benign_n = 0; 6569 } 6570 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 6571 We need a global reduction to avoid possible deadlocks. 6572 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 6573 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 6574 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 6575 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6576 need_change = (PetscBool)(!need_change); 6577 } 6578 /* If the user defines additional constraints, we import them here. 6579 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 6580 if (need_change) { 6581 PC_IS *pcisf; 6582 PC_BDDC *pcbddcf; 6583 PC pcf; 6584 6585 if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 6586 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 6587 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 6588 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 6589 /* hacks */ 6590 pcisf = (PC_IS*)pcf->data; 6591 pcisf->is_B_local = pcis->is_B_local; 6592 pcisf->vec1_N = pcis->vec1_N; 6593 pcisf->BtoNmap = pcis->BtoNmap; 6594 pcisf->n = pcis->n; 6595 pcisf->n_B = pcis->n_B; 6596 pcbddcf = (PC_BDDC*)pcf->data; 6597 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 6598 pcbddcf->mat_graph = pcbddc->mat_graph; 6599 pcbddcf->use_faces = PETSC_TRUE; 6600 pcbddcf->use_change_of_basis = PETSC_TRUE; 6601 pcbddcf->use_change_on_faces = PETSC_TRUE; 6602 pcbddcf->use_qr_single = PETSC_TRUE; 6603 pcbddcf->fake_change = PETSC_TRUE; 6604 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 6605 /* store information on primal vertices and change of basis (in local numbering) */ 6606 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 6607 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 6608 change = pcbddcf->ConstraintMatrix; 6609 pcbddcf->ConstraintMatrix = NULL; 6610 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 6611 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 6612 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 6613 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 6614 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 6615 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 6616 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 6617 pcf->ops->destroy = NULL; 6618 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 6619 } 6620 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 6621 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 6622 ierr = MatDestroy(&change);CHKERRQ(ierr); 6623 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 6624 } 6625 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6626 6627 /* free adjacency */ 6628 if (free_used_adj) { 6629 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 6630 } 6631 PetscFunctionReturn(0); 6632 } 6633 6634 #undef __FUNCT__ 6635 #define __FUNCT__ "PCBDDCInitSubSchurs" 6636 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 6637 { 6638 PC_IS *pcis=(PC_IS*)pc->data; 6639 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6640 PCBDDCGraph graph; 6641 PetscErrorCode ierr; 6642 6643 PetscFunctionBegin; 6644 /* attach interface graph for determining subsets */ 6645 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 6646 IS verticesIS,verticescomm; 6647 PetscInt vsize,*idxs; 6648 6649 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 6650 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 6651 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6652 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 6653 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6654 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 6655 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 6656 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 6657 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 6658 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 6659 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 6660 } else { 6661 graph = pcbddc->mat_graph; 6662 } 6663 /* print some info */ 6664 if (pcbddc->dbg_flag) { 6665 IS vertices; 6666 PetscInt nv,nedges,nfaces; 6667 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 6668 ierr = PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 6669 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 6670 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6671 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 6672 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 6673 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 6674 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 6675 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6676 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6677 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 6678 } 6679 6680 /* sub_schurs init */ 6681 if (!pcbddc->sub_schurs) { 6682 ierr = PCBDDCSubSchursCreate(&pcbddc->sub_schurs);CHKERRQ(ierr); 6683 } 6684 ierr = PCBDDCSubSchursInit(pcbddc->sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 6685 6686 /* free graph struct */ 6687 if (pcbddc->sub_schurs_rebuild) { 6688 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 6689 } 6690 PetscFunctionReturn(0); 6691 } 6692 6693 #undef __FUNCT__ 6694 #define __FUNCT__ "PCBDDCCheckOperator" 6695 PetscErrorCode PCBDDCCheckOperator(PC pc) 6696 { 6697 PC_IS *pcis=(PC_IS*)pc->data; 6698 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6699 PetscErrorCode ierr; 6700 6701 PetscFunctionBegin; 6702 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 6703 IS zerodiag = NULL; 6704 Mat S_j,B0_B=NULL; 6705 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 6706 PetscScalar *p0_check,*array,*array2; 6707 PetscReal norm; 6708 PetscInt i; 6709 6710 /* B0 and B0_B */ 6711 if (zerodiag) { 6712 IS dummy; 6713 6714 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 6715 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 6716 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 6717 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 6718 } 6719 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 6720 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 6721 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 6722 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6723 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6724 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6725 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6726 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 6727 /* S_j */ 6728 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6729 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6730 6731 /* mimic vector in \widetilde{W}_\Gamma */ 6732 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 6733 /* continuous in primal space */ 6734 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 6735 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6736 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6737 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6738 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 6739 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 6740 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6741 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6742 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6743 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6744 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6745 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6746 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 6747 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 6748 6749 /* assemble rhs for coarse problem */ 6750 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 6751 /* local with Schur */ 6752 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 6753 if (zerodiag) { 6754 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6755 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 6756 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6757 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 6758 } 6759 /* sum on primal nodes the local contributions */ 6760 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6761 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6762 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6763 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6764 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 6765 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6766 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6767 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 6768 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6769 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6770 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6771 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6772 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6773 /* scale primal nodes (BDDC sums contibutions) */ 6774 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 6775 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6776 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6777 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6778 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6779 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6780 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6781 /* global: \widetilde{B0}_B w_\Gamma */ 6782 if (zerodiag) { 6783 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 6784 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6785 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 6786 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6787 } 6788 /* BDDC */ 6789 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 6790 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 6791 6792 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 6793 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 6794 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 6795 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 6796 for (i=0;i<pcbddc->benign_n;i++) { 6797 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 6798 } 6799 ierr = PetscFree(p0_check);CHKERRQ(ierr); 6800 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 6801 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 6802 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 6803 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6804 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 6805 } 6806 PetscFunctionReturn(0); 6807 } 6808