1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 #include <petsc/private/sfimpl.h> 6 7 /* the near-null space of BDDC carries information on quadrature weights, 8 and these can be collinear -> so cheat with MatNullSpaceCreate 9 and create a suitable set of basis vectors first */ 10 #undef __FUNCT__ 11 #define __FUNCT__ "PCBDDCNullSpaceCreate" 12 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp) 13 { 14 PetscErrorCode ierr; 15 PetscInt i; 16 17 PetscFunctionBegin; 18 for (i=0;i<nvecs;i++) { 19 PetscInt first,last; 20 21 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 22 if (last-first < 2*nvecs && has_const) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented"); 23 if (i>=first && i < last) { 24 PetscScalar *data; 25 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 26 if (!has_const) { 27 data[i-first] = 1.; 28 } else { 29 data[2*i-first] = 1./PetscSqrtReal(2.); 30 data[2*i-first+1] = -1./PetscSqrtReal(2.); 31 } 32 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 33 } 34 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 35 } 36 ierr = MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp);CHKERRQ(ierr); 37 for (i=0;i<nvecs;i++) { /* reset vectors */ 38 PetscInt first,last; 39 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 40 if (i>=first && i < last) { 41 PetscScalar *data; 42 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 43 if (!has_const) { 44 data[i-first] = 0.; 45 } else { 46 data[2*i-first] = 0.; 47 data[2*i-first+1] = 0.; 48 } 49 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 50 } 51 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 52 } 53 PetscFunctionReturn(0); 54 } 55 56 #undef __FUNCT__ 57 #define __FUNCT__ "PCBDDCComputeNoNetFlux" 58 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 59 { 60 Mat loc_divudotp; 61 Vec p,v,vins,quad_vec,*quad_vecs; 62 ISLocalToGlobalMapping map; 63 IS *faces,*edges; 64 PetscScalar *vals; 65 const PetscScalar *array; 66 PetscInt i,maxneighs,lmaxneighs,maxsize,nf,ne; 67 PetscMPIInt rank; 68 PetscErrorCode ierr; 69 70 PetscFunctionBegin; 71 ierr = PCBDDCGraphGetCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 72 if (graph->twodim) { 73 lmaxneighs = 2; 74 } else { 75 lmaxneighs = 1; 76 for (i=0;i<ne;i++) { 77 const PetscInt *idxs; 78 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 79 lmaxneighs = PetscMax(lmaxneighs,graph->count[idxs[0]]); 80 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 81 } 82 lmaxneighs++; /* graph count does not include self */ 83 } 84 ierr = MPIU_Allreduce(&lmaxneighs,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 85 maxsize = 0; 86 for (i=0;i<ne;i++) { 87 PetscInt nn; 88 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 89 maxsize = PetscMax(maxsize,nn); 90 } 91 for (i=0;i<nf;i++) { 92 PetscInt nn; 93 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 94 maxsize = PetscMax(maxsize,nn); 95 } 96 ierr = PetscMalloc1(maxsize,&vals);CHKERRQ(ierr); 97 /* create vectors to hold quadrature weights */ 98 ierr = MatCreateVecs(A,&quad_vec,NULL);CHKERRQ(ierr); 99 if (!transpose) { 100 ierr = MatGetLocalToGlobalMapping(A,&map,NULL);CHKERRQ(ierr); 101 } else { 102 ierr = MatGetLocalToGlobalMapping(A,NULL,&map);CHKERRQ(ierr); 103 } 104 ierr = VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);CHKERRQ(ierr); 105 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 106 ierr = PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);CHKERRQ(ierr); 107 for (i=0;i<maxneighs;i++) { 108 ierr = VecSetLocalToGlobalMapping(quad_vecs[i],map);CHKERRQ(ierr); 109 } 110 111 /* compute local quad vec */ 112 ierr = MatISGetLocalMat(divudotp,&loc_divudotp);CHKERRQ(ierr); 113 if (!transpose) { 114 ierr = MatCreateVecs(loc_divudotp,&v,&p);CHKERRQ(ierr); 115 } else { 116 ierr = MatCreateVecs(loc_divudotp,&p,&v);CHKERRQ(ierr); 117 } 118 ierr = VecSet(p,1.);CHKERRQ(ierr); 119 if (!transpose) { 120 ierr = MatMultTranspose(loc_divudotp,p,v);CHKERRQ(ierr); 121 } else { 122 ierr = MatMult(loc_divudotp,p,v);CHKERRQ(ierr); 123 } 124 if (vl2l) { 125 ierr = VecGetSubVector(v,vl2l,&vins);CHKERRQ(ierr); 126 } else { 127 vins = v; 128 } 129 ierr = VecGetArrayRead(vins,&array);CHKERRQ(ierr); 130 ierr = VecDestroy(&p);CHKERRQ(ierr); 131 132 /* insert in global quadrature vecs */ 133 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 134 for (i=0;i<nf;i++) { 135 const PetscInt *idxs; 136 PetscInt idx,nn,j; 137 138 ierr = ISGetIndices(faces[i],&idxs);CHKERRQ(ierr); 139 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 140 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 141 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 142 idx = -(idx+1); 143 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 144 ierr = ISRestoreIndices(faces[i],&idxs);CHKERRQ(ierr); 145 } 146 for (i=0;i<ne;i++) { 147 const PetscInt *idxs; 148 PetscInt idx,nn,j; 149 150 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 151 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 152 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 153 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 154 idx = -(idx+1); 155 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 156 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 157 } 158 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 159 ierr = VecRestoreArrayRead(vins,&array);CHKERRQ(ierr); 160 if (vl2l) { 161 ierr = VecRestoreSubVector(v,vl2l,&vins);CHKERRQ(ierr); 162 } 163 ierr = VecDestroy(&v);CHKERRQ(ierr); 164 ierr = PetscFree(vals);CHKERRQ(ierr); 165 166 /* assemble near null space */ 167 for (i=0;i<maxneighs;i++) { 168 ierr = VecAssemblyBegin(quad_vecs[i]);CHKERRQ(ierr); 169 } 170 for (i=0;i<maxneighs;i++) { 171 ierr = VecAssemblyEnd(quad_vecs[i]);CHKERRQ(ierr); 172 } 173 ierr = VecDestroyVecs(maxneighs,&quad_vecs);CHKERRQ(ierr); 174 PetscFunctionReturn(0); 175 } 176 177 178 #undef __FUNCT__ 179 #define __FUNCT__ "PCBDDCComputeLocalTopologyInfo" 180 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 181 { 182 PetscErrorCode ierr; 183 Vec local,global; 184 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 185 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 186 187 PetscFunctionBegin; 188 ierr = MatCreateVecs(pc->pmat,&global,NULL);CHKERRQ(ierr); 189 /* need to convert from global to local topology information and remove references to information in global ordering */ 190 ierr = MatCreateVecs(matis->A,&local,NULL);CHKERRQ(ierr); 191 if (pcbddc->user_provided_isfordofs) { 192 if (pcbddc->n_ISForDofs) { 193 PetscInt i; 194 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 195 for (i=0;i<pcbddc->n_ISForDofs;i++) { 196 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 197 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 198 } 199 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 200 pcbddc->n_ISForDofs = 0; 201 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 202 } 203 } else { 204 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering if bs > 1 */ 205 PetscInt i, n = matis->A->rmap->n; 206 ierr = MatGetBlockSize(pc->pmat,&i);CHKERRQ(ierr); 207 if (i > 1) { 208 pcbddc->n_ISForDofsLocal = i; 209 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 210 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 211 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 212 } 213 } 214 } 215 } 216 217 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 218 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 219 } 220 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 221 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 222 } 223 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 224 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 225 } 226 ierr = VecDestroy(&global);CHKERRQ(ierr); 227 ierr = VecDestroy(&local);CHKERRQ(ierr); 228 PetscFunctionReturn(0); 229 } 230 231 #undef __FUNCT__ 232 #define __FUNCT__ "PCBDDCBenignRemoveInterior" 233 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z) 234 { 235 PC_IS *pcis = (PC_IS*)(pc->data); 236 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 237 PetscErrorCode ierr; 238 239 PetscFunctionBegin; 240 if (!pcbddc->benign_have_null) { 241 PetscFunctionReturn(0); 242 } 243 if (pcbddc->ChangeOfBasisMatrix) { 244 Vec swap; 245 246 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);CHKERRQ(ierr); 247 swap = pcbddc->work_change; 248 pcbddc->work_change = r; 249 r = swap; 250 } 251 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 252 ierr = VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 253 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 254 ierr = VecSet(z,0.);CHKERRQ(ierr); 255 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 256 ierr = VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 257 if (pcbddc->ChangeOfBasisMatrix) { 258 Vec swap; 259 260 swap = r; 261 r = pcbddc->work_change; 262 pcbddc->work_change = swap; 263 ierr = VecCopy(z,pcbddc->work_change);CHKERRQ(ierr); 264 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);CHKERRQ(ierr); 265 } 266 PetscFunctionReturn(0); 267 } 268 269 #undef __FUNCT__ 270 #define __FUNCT__ "PCBDDCBenignMatMult_Private_Private" 271 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 272 { 273 PCBDDCBenignMatMult_ctx ctx; 274 PetscErrorCode ierr; 275 PetscBool apply_right,apply_left,reset_x; 276 277 PetscFunctionBegin; 278 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 279 if (transpose) { 280 apply_right = ctx->apply_left; 281 apply_left = ctx->apply_right; 282 } else { 283 apply_right = ctx->apply_right; 284 apply_left = ctx->apply_left; 285 } 286 reset_x = PETSC_FALSE; 287 if (apply_right) { 288 const PetscScalar *ax; 289 PetscInt nl,i; 290 291 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 292 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 293 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 294 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 295 for (i=0;i<ctx->benign_n;i++) { 296 PetscScalar sum,val; 297 const PetscInt *idxs; 298 PetscInt nz,j; 299 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 300 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 301 sum = 0.; 302 if (ctx->apply_p0) { 303 val = ctx->work[idxs[nz-1]]; 304 for (j=0;j<nz-1;j++) { 305 sum += ctx->work[idxs[j]]; 306 ctx->work[idxs[j]] += val; 307 } 308 } else { 309 for (j=0;j<nz-1;j++) { 310 sum += ctx->work[idxs[j]]; 311 } 312 } 313 ctx->work[idxs[nz-1]] -= sum; 314 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 315 } 316 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 317 reset_x = PETSC_TRUE; 318 } 319 if (transpose) { 320 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 321 } else { 322 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 323 } 324 if (reset_x) { 325 ierr = VecResetArray(x);CHKERRQ(ierr); 326 } 327 if (apply_left) { 328 PetscScalar *ay; 329 PetscInt i; 330 331 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 332 for (i=0;i<ctx->benign_n;i++) { 333 PetscScalar sum,val; 334 const PetscInt *idxs; 335 PetscInt nz,j; 336 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 337 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 338 val = -ay[idxs[nz-1]]; 339 if (ctx->apply_p0) { 340 sum = 0.; 341 for (j=0;j<nz-1;j++) { 342 sum += ay[idxs[j]]; 343 ay[idxs[j]] += val; 344 } 345 ay[idxs[nz-1]] += sum; 346 } else { 347 for (j=0;j<nz-1;j++) { 348 ay[idxs[j]] += val; 349 } 350 ay[idxs[nz-1]] = 0.; 351 } 352 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 353 } 354 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 355 } 356 PetscFunctionReturn(0); 357 } 358 359 #undef __FUNCT__ 360 #define __FUNCT__ "PCBDDCBenignMatMultTranspose_Private" 361 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 362 { 363 PetscErrorCode ierr; 364 365 PetscFunctionBegin; 366 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 367 PetscFunctionReturn(0); 368 } 369 370 #undef __FUNCT__ 371 #define __FUNCT__ "PCBDDCBenignMatMult_Private" 372 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 373 { 374 PetscErrorCode ierr; 375 376 PetscFunctionBegin; 377 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 378 PetscFunctionReturn(0); 379 } 380 381 #undef __FUNCT__ 382 #define __FUNCT__ "PCBDDCBenignShellMat" 383 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 384 { 385 PC_IS *pcis = (PC_IS*)pc->data; 386 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 387 PCBDDCBenignMatMult_ctx ctx; 388 PetscErrorCode ierr; 389 390 PetscFunctionBegin; 391 if (!restore) { 392 Mat A_IB,A_BI; 393 PetscScalar *work; 394 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 395 396 if (pcbddc->benign_original_mat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 397 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(0); 398 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 399 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 400 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 401 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 402 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 403 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 404 ierr = PetscNew(&ctx);CHKERRQ(ierr); 405 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 406 ctx->apply_left = PETSC_TRUE; 407 ctx->apply_right = PETSC_FALSE; 408 ctx->apply_p0 = PETSC_FALSE; 409 ctx->benign_n = pcbddc->benign_n; 410 if (reuse) { 411 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 412 ctx->free = PETSC_FALSE; 413 } else { /* TODO: could be optimized for successive solves */ 414 ISLocalToGlobalMapping N_to_D; 415 PetscInt i; 416 417 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 418 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 419 for (i=0;i<pcbddc->benign_n;i++) { 420 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 421 } 422 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 423 ctx->free = PETSC_TRUE; 424 } 425 ctx->A = pcis->A_IB; 426 ctx->work = work; 427 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 428 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 429 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 430 pcis->A_IB = A_IB; 431 432 /* A_BI as A_IB^T */ 433 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 434 pcbddc->benign_original_mat = pcis->A_BI; 435 pcis->A_BI = A_BI; 436 } else { 437 if (!pcbddc->benign_original_mat) { 438 PetscFunctionReturn(0); 439 } 440 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 441 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 442 pcis->A_IB = ctx->A; 443 ctx->A = NULL; 444 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 445 pcis->A_BI = pcbddc->benign_original_mat; 446 pcbddc->benign_original_mat = NULL; 447 if (ctx->free) { 448 PetscInt i; 449 for (i=0;i<ctx->benign_n;i++) { 450 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 451 } 452 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 453 } 454 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 455 ierr = PetscFree(ctx);CHKERRQ(ierr); 456 } 457 PetscFunctionReturn(0); 458 } 459 460 /* used just in bddc debug mode */ 461 #undef __FUNCT__ 462 #define __FUNCT__ "PCBDDCBenignProject" 463 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 464 { 465 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 466 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 467 Mat An; 468 PetscErrorCode ierr; 469 470 PetscFunctionBegin; 471 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 472 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 473 if (is1) { 474 ierr = MatGetSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 475 ierr = MatDestroy(&An);CHKERRQ(ierr); 476 } else { 477 *B = An; 478 } 479 PetscFunctionReturn(0); 480 } 481 482 /* TODO: add reuse flag */ 483 #undef __FUNCT__ 484 #define __FUNCT__ "MatSeqAIJCompress" 485 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 486 { 487 Mat Bt; 488 PetscScalar *a,*bdata; 489 const PetscInt *ii,*ij; 490 PetscInt m,n,i,nnz,*bii,*bij; 491 PetscBool flg_row; 492 PetscErrorCode ierr; 493 494 PetscFunctionBegin; 495 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 496 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 497 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 498 nnz = n; 499 for (i=0;i<ii[n];i++) { 500 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 501 } 502 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 503 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 504 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 505 nnz = 0; 506 bii[0] = 0; 507 for (i=0;i<n;i++) { 508 PetscInt j; 509 for (j=ii[i];j<ii[i+1];j++) { 510 PetscScalar entry = a[j]; 511 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 512 bij[nnz] = ij[j]; 513 bdata[nnz] = entry; 514 nnz++; 515 } 516 } 517 bii[i+1] = nnz; 518 } 519 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 520 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 521 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 522 { 523 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 524 b->free_a = PETSC_TRUE; 525 b->free_ij = PETSC_TRUE; 526 } 527 *B = Bt; 528 PetscFunctionReturn(0); 529 } 530 531 #undef __FUNCT__ 532 #define __FUNCT__ "MatDetectDisconnectedComponents" 533 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 534 { 535 Mat B; 536 IS is_dummy,*cc_n; 537 ISLocalToGlobalMapping l2gmap_dummy; 538 PCBDDCGraph graph; 539 PetscInt i,n; 540 PetscInt *xadj,*adjncy; 541 PetscInt *xadj_filtered,*adjncy_filtered; 542 PetscBool flg_row,isseqaij; 543 PetscErrorCode ierr; 544 545 PetscFunctionBegin; 546 if (!A->rmap->N || !A->cmap->N) { 547 *ncc = 0; 548 *cc = NULL; 549 PetscFunctionReturn(0); 550 } 551 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 552 if (!isseqaij && filter) { 553 PetscBool isseqdense; 554 555 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 556 if (!isseqdense) { 557 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 558 } else { /* TODO: rectangular case and LDA */ 559 PetscScalar *array; 560 PetscReal chop=1.e-6; 561 562 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 563 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 564 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 565 for (i=0;i<n;i++) { 566 PetscInt j; 567 for (j=i+1;j<n;j++) { 568 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 569 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 570 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 571 } 572 } 573 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 574 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 575 } 576 } else { 577 B = A; 578 } 579 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 580 581 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 582 if (filter) { 583 PetscScalar *data; 584 PetscInt j,cum; 585 586 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 587 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 588 cum = 0; 589 for (i=0;i<n;i++) { 590 PetscInt t; 591 592 for (j=xadj[i];j<xadj[i+1];j++) { 593 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 594 continue; 595 } 596 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 597 } 598 t = xadj_filtered[i]; 599 xadj_filtered[i] = cum; 600 cum += t; 601 } 602 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 603 } else { 604 xadj_filtered = NULL; 605 adjncy_filtered = NULL; 606 } 607 608 /* compute local connected components using PCBDDCGraph */ 609 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 610 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 611 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 612 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 613 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n);CHKERRQ(ierr); 614 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 615 if (xadj_filtered) { 616 graph->xadj = xadj_filtered; 617 graph->adjncy = adjncy_filtered; 618 } else { 619 graph->xadj = xadj; 620 graph->adjncy = adjncy; 621 } 622 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 623 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 624 /* partial clean up */ 625 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 626 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 627 if (A != B) { 628 ierr = MatDestroy(&B);CHKERRQ(ierr); 629 } 630 631 /* get back data */ 632 if (ncc) *ncc = graph->ncc; 633 if (cc) { 634 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 635 for (i=0;i<graph->ncc;i++) { 636 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 637 } 638 *cc = cc_n; 639 } 640 /* clean up graph */ 641 graph->xadj = 0; 642 graph->adjncy = 0; 643 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 644 PetscFunctionReturn(0); 645 } 646 647 #undef __FUNCT__ 648 #define __FUNCT__ "PCBDDCBenignCheck" 649 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 650 { 651 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 652 PC_IS* pcis = (PC_IS*)(pc->data); 653 IS dirIS = NULL; 654 PetscInt i; 655 PetscErrorCode ierr; 656 657 PetscFunctionBegin; 658 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 659 if (zerodiag) { 660 Mat A; 661 Vec vec3_N; 662 PetscScalar *vals; 663 const PetscInt *idxs; 664 PetscInt nz,*count; 665 666 /* p0 */ 667 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 668 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 669 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 670 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 671 for (i=0;i<nz;i++) vals[i] = 1.; 672 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 673 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 674 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 675 /* v_I */ 676 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 677 for (i=0;i<nz;i++) vals[i] = 0.; 678 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 679 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 680 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 681 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 682 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 683 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 684 if (dirIS) { 685 PetscInt n; 686 687 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 688 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 689 for (i=0;i<n;i++) vals[i] = 0.; 690 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 691 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 692 } 693 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 694 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 695 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 696 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 697 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 698 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 699 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 700 if (PetscAbsScalar(vals[0]) > 1.e-1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 701 ierr = PetscFree(vals);CHKERRQ(ierr); 702 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 703 704 /* there should not be any pressure dofs lying on the interface */ 705 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 706 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 707 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 708 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 709 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 710 for (i=0;i<nz;i++) if (count[idxs[i]]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 711 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 712 ierr = PetscFree(count);CHKERRQ(ierr); 713 } 714 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 715 716 /* check PCBDDCBenignGetOrSetP0 */ 717 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 718 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 719 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 720 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 721 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 722 for (i=0;i<pcbddc->benign_n;i++) { 723 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]); 724 if (val != -PetscGlobalRank-i) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %d instead of %g\n",PetscRealPart(pcbddc->benign_p0[i]),i,-PetscGlobalRank-i);CHKERRQ(ierr); 725 } 726 PetscFunctionReturn(0); 727 } 728 729 #undef __FUNCT__ 730 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 731 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 732 { 733 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 734 IS pressures,zerodiag,*zerodiag_subs; 735 PetscInt nz,n; 736 PetscInt *interior_dofs,n_interior_dofs; 737 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag; 738 PetscErrorCode ierr; 739 740 PetscFunctionBegin; 741 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 742 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 743 for (n=0;n<pcbddc->benign_n;n++) { 744 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 745 } 746 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 747 pcbddc->benign_n = 0; 748 /* if a local info on dofs is present, assumes that the last field represents "pressures" 749 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 750 Checks if all the pressure dofs in each subdomain have a zero diagonal 751 If not, a change of basis on pressures is not needed 752 since the local Schur complements are already SPD 753 */ 754 has_null_pressures = PETSC_TRUE; 755 have_null = PETSC_TRUE; 756 if (pcbddc->n_ISForDofsLocal) { 757 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 758 759 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 760 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 761 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 762 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 763 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 764 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 765 if (!sorted) { 766 ierr = ISSort(pressures);CHKERRQ(ierr); 767 } 768 } else { 769 pressures = NULL; 770 } 771 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 772 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 773 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 774 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 775 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 776 if (!sorted) { 777 ierr = ISSort(zerodiag);CHKERRQ(ierr); 778 } 779 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 780 if (!nz) { 781 if (n) have_null = PETSC_FALSE; 782 has_null_pressures = PETSC_FALSE; 783 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 784 } 785 recompute_zerodiag = PETSC_FALSE; 786 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 787 zerodiag_subs = NULL; 788 pcbddc->benign_n = 0; 789 n_interior_dofs = 0; 790 interior_dofs = NULL; 791 if (pcbddc->current_level) { /* need to compute interior nodes */ 792 PetscInt n,i,j; 793 PetscInt n_neigh,*neigh,*n_shared,**shared; 794 PetscInt *iwork; 795 796 ierr = ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);CHKERRQ(ierr); 797 ierr = ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 798 ierr = PetscCalloc1(n,&iwork);CHKERRQ(ierr); 799 ierr = PetscMalloc1(n,&interior_dofs);CHKERRQ(ierr); 800 for (i=1;i<n_neigh;i++) 801 for (j=0;j<n_shared[i];j++) 802 iwork[shared[i][j]] += 1; 803 for (i=0;i<n;i++) 804 if (!iwork[i]) 805 interior_dofs[n_interior_dofs++] = i; 806 ierr = PetscFree(iwork);CHKERRQ(ierr); 807 ierr = ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 808 } 809 if (has_null_pressures) { 810 IS *subs; 811 PetscInt nsubs,i,j,nl; 812 const PetscInt *idxs; 813 PetscScalar *array; 814 Vec *work; 815 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 816 817 subs = pcbddc->local_subs; 818 nsubs = pcbddc->n_local_subs; 819 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 820 if (pcbddc->current_level) { 821 ierr = VecDuplicateVecs(matis->y,2,&work);CHKERRQ(ierr); 822 ierr = ISGetLocalSize(zerodiag,&nl);CHKERRQ(ierr); 823 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 824 /* work[0] = 1_p */ 825 ierr = VecSet(work[0],0.);CHKERRQ(ierr); 826 ierr = VecGetArray(work[0],&array);CHKERRQ(ierr); 827 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 828 ierr = VecRestoreArray(work[0],&array);CHKERRQ(ierr); 829 /* work[0] = 1_v */ 830 ierr = VecSet(work[1],1.);CHKERRQ(ierr); 831 ierr = VecGetArray(work[1],&array);CHKERRQ(ierr); 832 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 833 ierr = VecRestoreArray(work[1],&array);CHKERRQ(ierr); 834 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 835 } 836 if (nsubs > 1) { 837 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 838 for (i=0;i<nsubs;i++) { 839 ISLocalToGlobalMapping l2g; 840 IS t_zerodiag_subs; 841 PetscInt nl; 842 843 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 844 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 845 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 846 if (nl) { 847 PetscBool valid = PETSC_TRUE; 848 849 if (pcbddc->current_level) { 850 ierr = VecSet(matis->x,0);CHKERRQ(ierr); 851 ierr = ISGetLocalSize(subs[i],&nl);CHKERRQ(ierr); 852 ierr = ISGetIndices(subs[i],&idxs);CHKERRQ(ierr); 853 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 854 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 855 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 856 ierr = ISRestoreIndices(subs[i],&idxs);CHKERRQ(ierr); 857 ierr = VecPointwiseMult(matis->x,work[0],matis->x);CHKERRQ(ierr); 858 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 859 ierr = VecPointwiseMult(matis->y,work[1],matis->y);CHKERRQ(ierr); 860 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 861 for (j=0;j<n_interior_dofs;j++) { 862 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 863 valid = PETSC_FALSE; 864 break; 865 } 866 } 867 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 868 } 869 if (valid && pcbddc->NeumannBoundariesLocal) { 870 IS t_bc; 871 PetscInt nzb; 872 873 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pcbddc->NeumannBoundariesLocal,&t_bc);CHKERRQ(ierr); 874 ierr = ISGetLocalSize(t_bc,&nzb);CHKERRQ(ierr); 875 ierr = ISDestroy(&t_bc);CHKERRQ(ierr); 876 if (nzb) valid = PETSC_FALSE; 877 } 878 if (valid && pressures) { 879 IS t_pressure_subs; 880 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 881 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 882 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 883 } 884 if (valid) { 885 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 886 pcbddc->benign_n++; 887 } else { 888 recompute_zerodiag = PETSC_TRUE; 889 } 890 } 891 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 892 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 893 } 894 } else { /* there's just one subdomain (or zero if they have not been detected */ 895 PetscBool valid = PETSC_TRUE; 896 897 if (pcbddc->NeumannBoundariesLocal) { 898 PetscInt nzb; 899 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nzb);CHKERRQ(ierr); 900 if (nzb) valid = PETSC_FALSE; 901 } 902 if (valid && pressures) { 903 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 904 } 905 if (valid && pcbddc->current_level) { 906 ierr = MatMult(matis->A,work[0],matis->x);CHKERRQ(ierr); 907 ierr = VecPointwiseMult(matis->x,work[1],matis->x);CHKERRQ(ierr); 908 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 909 for (j=0;j<n_interior_dofs;j++) { 910 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 911 valid = PETSC_FALSE; 912 break; 913 } 914 } 915 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 916 } 917 if (valid) { 918 pcbddc->benign_n = 1; 919 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 920 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 921 zerodiag_subs[0] = zerodiag; 922 } 923 } 924 if (pcbddc->current_level) { 925 ierr = VecDestroyVecs(2,&work);CHKERRQ(ierr); 926 } 927 } 928 ierr = PetscFree(interior_dofs);CHKERRQ(ierr); 929 930 if (!pcbddc->benign_n) { 931 PetscInt n; 932 933 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 934 recompute_zerodiag = PETSC_FALSE; 935 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 936 if (n) { 937 has_null_pressures = PETSC_FALSE; 938 have_null = PETSC_FALSE; 939 } 940 } 941 942 /* final check for null pressures */ 943 if (zerodiag && pressures) { 944 PetscInt nz,np; 945 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 946 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 947 if (nz != np) have_null = PETSC_FALSE; 948 } 949 950 if (recompute_zerodiag) { 951 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 952 if (pcbddc->benign_n == 1) { 953 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 954 zerodiag = zerodiag_subs[0]; 955 } else { 956 PetscInt i,nzn,*new_idxs; 957 958 nzn = 0; 959 for (i=0;i<pcbddc->benign_n;i++) { 960 PetscInt ns; 961 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 962 nzn += ns; 963 } 964 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 965 nzn = 0; 966 for (i=0;i<pcbddc->benign_n;i++) { 967 PetscInt ns,*idxs; 968 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 969 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 970 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 971 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 972 nzn += ns; 973 } 974 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 975 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 976 } 977 have_null = PETSC_FALSE; 978 } 979 980 /* Prepare matrix to compute no-net-flux */ 981 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 982 Mat A,loc_divudotp; 983 ISLocalToGlobalMapping rl2g,cl2g,l2gmap; 984 IS row,col,isused = NULL; 985 PetscInt M,N,n,st,n_isused; 986 987 if (pressures) { 988 isused = pressures; 989 } else { 990 isused = zerodiag; 991 } 992 ierr = MatGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);CHKERRQ(ierr); 993 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 994 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 995 if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field"); 996 n_isused = 0; 997 if (isused) { 998 ierr = ISGetLocalSize(isused,&n_isused);CHKERRQ(ierr); 999 } 1000 ierr = MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1001 st = st-n_isused; 1002 if (n) { 1003 const PetscInt *gidxs; 1004 1005 ierr = MatGetSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);CHKERRQ(ierr); 1006 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 1007 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 1008 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 1009 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 1010 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 1011 } else { 1012 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);CHKERRQ(ierr); 1013 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 1014 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 1015 } 1016 ierr = MatGetSize(pc->pmat,NULL,&N);CHKERRQ(ierr); 1017 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 1018 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 1019 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 1020 ierr = ISDestroy(&row);CHKERRQ(ierr); 1021 ierr = ISDestroy(&col);CHKERRQ(ierr); 1022 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);CHKERRQ(ierr); 1023 ierr = MatSetType(pcbddc->divudotp,MATIS);CHKERRQ(ierr); 1024 ierr = MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1025 ierr = MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);CHKERRQ(ierr); 1026 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 1027 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 1028 ierr = MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);CHKERRQ(ierr); 1029 ierr = MatDestroy(&loc_divudotp);CHKERRQ(ierr); 1030 ierr = MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1031 ierr = MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1032 } 1033 1034 /* change of basis and p0 dofs */ 1035 if (has_null_pressures) { 1036 IS zerodiagc; 1037 const PetscInt *idxs,*idxsc; 1038 PetscInt i,s,*nnz; 1039 1040 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 1041 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 1042 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 1043 /* local change of basis for pressures */ 1044 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 1045 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 1046 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 1047 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1048 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 1049 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 1050 for (i=0;i<pcbddc->benign_n;i++) { 1051 PetscInt nzs,j; 1052 1053 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 1054 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1055 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 1056 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 1057 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1058 } 1059 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 1060 ierr = PetscFree(nnz);CHKERRQ(ierr); 1061 /* set identity on velocities */ 1062 for (i=0;i<n-nz;i++) { 1063 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 1064 } 1065 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 1066 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 1067 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 1068 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 1069 /* set change on pressures */ 1070 for (s=0;s<pcbddc->benign_n;s++) { 1071 PetscScalar *array; 1072 PetscInt nzs; 1073 1074 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 1075 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 1076 for (i=0;i<nzs-1;i++) { 1077 PetscScalar vals[2]; 1078 PetscInt cols[2]; 1079 1080 cols[0] = idxs[i]; 1081 cols[1] = idxs[nzs-1]; 1082 vals[0] = 1.; 1083 vals[1] = 1.; 1084 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 1085 } 1086 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 1087 for (i=0;i<nzs-1;i++) array[i] = -1.; 1088 array[nzs-1] = 1.; 1089 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 1090 /* store local idxs for p0 */ 1091 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 1092 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 1093 ierr = PetscFree(array);CHKERRQ(ierr); 1094 } 1095 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1096 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1097 /* project if needed */ 1098 if (pcbddc->benign_change_explicit) { 1099 Mat M; 1100 1101 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 1102 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1103 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 1104 ierr = MatDestroy(&M);CHKERRQ(ierr); 1105 } 1106 /* store global idxs for p0 */ 1107 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 1108 } 1109 pcbddc->benign_zerodiag_subs = zerodiag_subs; 1110 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 1111 1112 /* determines if the coarse solver will be singular or not */ 1113 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1114 /* determines if the problem has subdomains with 0 pressure block */ 1115 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1116 *zerodiaglocal = zerodiag; 1117 PetscFunctionReturn(0); 1118 } 1119 1120 #undef __FUNCT__ 1121 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 1122 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 1123 { 1124 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1125 PetscScalar *array; 1126 PetscErrorCode ierr; 1127 1128 PetscFunctionBegin; 1129 if (!pcbddc->benign_sf) { 1130 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 1131 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 1132 } 1133 if (get) { 1134 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 1135 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 1136 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 1137 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 1138 } else { 1139 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 1140 ierr = PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 1141 ierr = PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 1142 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 1143 } 1144 PetscFunctionReturn(0); 1145 } 1146 1147 #undef __FUNCT__ 1148 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 1149 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 1150 { 1151 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1152 PetscErrorCode ierr; 1153 1154 PetscFunctionBegin; 1155 /* TODO: add error checking 1156 - avoid nested pop (or push) calls. 1157 - cannot push before pop. 1158 - cannot call this if pcbddc->local_mat is NULL 1159 */ 1160 if (!pcbddc->benign_n) { 1161 PetscFunctionReturn(0); 1162 } 1163 if (pop) { 1164 if (pcbddc->benign_change_explicit) { 1165 IS is_p0; 1166 MatReuse reuse; 1167 1168 /* extract B_0 */ 1169 reuse = MAT_INITIAL_MATRIX; 1170 if (pcbddc->benign_B0) { 1171 reuse = MAT_REUSE_MATRIX; 1172 } 1173 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 1174 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 1175 /* remove rows and cols from local problem */ 1176 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 1177 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1178 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 1179 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 1180 } else { 1181 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1182 PetscScalar *vals; 1183 PetscInt i,n,*idxs_ins; 1184 1185 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 1186 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 1187 if (!pcbddc->benign_B0) { 1188 PetscInt *nnz; 1189 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 1190 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 1191 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1192 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 1193 for (i=0;i<pcbddc->benign_n;i++) { 1194 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 1195 nnz[i] = n - nnz[i]; 1196 } 1197 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 1198 ierr = PetscFree(nnz);CHKERRQ(ierr); 1199 } 1200 1201 for (i=0;i<pcbddc->benign_n;i++) { 1202 PetscScalar *array; 1203 PetscInt *idxs,j,nz,cum; 1204 1205 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 1206 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1207 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 1208 for (j=0;j<nz;j++) vals[j] = 1.; 1209 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1210 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 1211 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 1212 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 1213 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 1214 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 1215 cum = 0; 1216 for (j=0;j<n;j++) { 1217 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 1218 vals[cum] = array[j]; 1219 idxs_ins[cum] = j; 1220 cum++; 1221 } 1222 } 1223 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 1224 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 1225 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 1226 } 1227 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1228 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1229 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 1230 } 1231 } else { /* push */ 1232 if (pcbddc->benign_change_explicit) { 1233 PetscInt i; 1234 1235 for (i=0;i<pcbddc->benign_n;i++) { 1236 PetscScalar *B0_vals; 1237 PetscInt *B0_cols,B0_ncol; 1238 1239 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 1240 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 1241 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 1242 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 1243 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 1244 } 1245 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1246 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1247 } else { 1248 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 1249 } 1250 } 1251 PetscFunctionReturn(0); 1252 } 1253 1254 #undef __FUNCT__ 1255 #define __FUNCT__ "PCBDDCAdaptiveSelection" 1256 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 1257 { 1258 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1259 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1260 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 1261 PetscBLASInt *B_iwork,*B_ifail; 1262 PetscScalar *work,lwork; 1263 PetscScalar *St,*S,*eigv; 1264 PetscScalar *Sarray,*Starray; 1265 PetscReal *eigs,thresh; 1266 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 1267 PetscBool allocated_S_St; 1268 #if defined(PETSC_USE_COMPLEX) 1269 PetscReal *rwork; 1270 #endif 1271 PetscErrorCode ierr; 1272 1273 PetscFunctionBegin; 1274 if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data"); 1275 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 1276 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 1277 1278 if (pcbddc->dbg_flag) { 1279 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1280 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1281 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 1282 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 1283 } 1284 1285 if (pcbddc->dbg_flag) { 1286 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 1287 } 1288 1289 /* max size of subsets */ 1290 mss = 0; 1291 for (i=0;i<sub_schurs->n_subs;i++) { 1292 PetscInt subset_size; 1293 1294 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1295 mss = PetscMax(mss,subset_size); 1296 } 1297 1298 /* min/max and threshold */ 1299 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 1300 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 1301 nmax = PetscMax(nmin,nmax); 1302 allocated_S_St = PETSC_FALSE; 1303 if (nmin) { 1304 allocated_S_St = PETSC_TRUE; 1305 } 1306 1307 /* allocate lapack workspace */ 1308 cum = cum2 = 0; 1309 maxneigs = 0; 1310 for (i=0;i<sub_schurs->n_subs;i++) { 1311 PetscInt n,subset_size; 1312 1313 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1314 n = PetscMin(subset_size,nmax); 1315 cum += subset_size; 1316 cum2 += subset_size*n; 1317 maxneigs = PetscMax(maxneigs,n); 1318 } 1319 if (mss) { 1320 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 1321 PetscBLASInt B_itype = 1; 1322 PetscBLASInt B_N = mss; 1323 PetscReal zero = 0.0; 1324 PetscReal eps = 0.0; /* dlamch? */ 1325 1326 B_lwork = -1; 1327 S = NULL; 1328 St = NULL; 1329 eigs = NULL; 1330 eigv = NULL; 1331 B_iwork = NULL; 1332 B_ifail = NULL; 1333 #if defined(PETSC_USE_COMPLEX) 1334 rwork = NULL; 1335 #endif 1336 thresh = 1.0; 1337 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1338 #if defined(PETSC_USE_COMPLEX) 1339 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1340 #else 1341 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1342 #endif 1343 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 1344 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1345 } else { 1346 /* TODO */ 1347 } 1348 } else { 1349 lwork = 0; 1350 } 1351 1352 nv = 0; 1353 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 1354 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 1355 } 1356 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 1357 if (allocated_S_St) { 1358 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 1359 } 1360 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 1361 #if defined(PETSC_USE_COMPLEX) 1362 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 1363 #endif 1364 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 1365 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 1366 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 1367 nv+cum,&pcbddc->adaptive_constraints_idxs, 1368 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 1369 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 1370 1371 maxneigs = 0; 1372 cum = cumarray = 0; 1373 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 1374 pcbddc->adaptive_constraints_data_ptr[0] = 0; 1375 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 1376 const PetscInt *idxs; 1377 1378 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 1379 for (cum=0;cum<nv;cum++) { 1380 pcbddc->adaptive_constraints_n[cum] = 1; 1381 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 1382 pcbddc->adaptive_constraints_data[cum] = 1.0; 1383 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 1384 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 1385 } 1386 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 1387 } 1388 1389 if (mss) { /* multilevel */ 1390 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1391 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1392 } 1393 1394 thresh = pcbddc->adaptive_threshold; 1395 for (i=0;i<sub_schurs->n_subs;i++) { 1396 const PetscInt *idxs; 1397 PetscReal upper,lower; 1398 PetscInt j,subset_size,eigs_start = 0; 1399 PetscBLASInt B_N; 1400 PetscBool same_data = PETSC_FALSE; 1401 1402 if (pcbddc->use_deluxe_scaling) { 1403 upper = PETSC_MAX_REAL; 1404 lower = thresh; 1405 } else { 1406 upper = 1./thresh; 1407 lower = 0.; 1408 } 1409 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1410 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1411 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 1412 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 1413 if (sub_schurs->is_hermitian) { 1414 PetscInt j,k; 1415 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 1416 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1417 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1418 } 1419 for (j=0;j<subset_size;j++) { 1420 for (k=j;k<subset_size;k++) { 1421 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 1422 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 1423 } 1424 } 1425 } else { 1426 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1427 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1428 } 1429 } else { 1430 S = Sarray + cumarray; 1431 St = Starray + cumarray; 1432 } 1433 /* see if we can save some work */ 1434 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 1435 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 1436 } 1437 1438 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 1439 B_neigs = 0; 1440 } else { 1441 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 1442 PetscBLASInt B_itype = 1; 1443 PetscBLASInt B_IL, B_IU; 1444 PetscReal eps = -1.0; /* dlamch? */ 1445 PetscInt nmin_s; 1446 PetscBool compute_range = PETSC_FALSE; 1447 1448 if (pcbddc->dbg_flag) { 1449 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 1450 } 1451 1452 compute_range = PETSC_FALSE; 1453 if (thresh > 1.+PETSC_SMALL && !same_data) { 1454 compute_range = PETSC_TRUE; 1455 } 1456 1457 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1458 if (compute_range) { 1459 1460 /* ask for eigenvalues larger than thresh */ 1461 #if defined(PETSC_USE_COMPLEX) 1462 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1463 #else 1464 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1465 #endif 1466 } else if (!same_data) { 1467 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 1468 B_IL = 1; 1469 #if defined(PETSC_USE_COMPLEX) 1470 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1471 #else 1472 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1473 #endif 1474 } else { /* same_data is true, so get the adaptive function requested by the user */ 1475 PetscInt k; 1476 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 1477 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 1478 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 1479 nmin = nmax; 1480 ierr = PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));CHKERRQ(ierr); 1481 for (k=0;k<nmax;k++) { 1482 eigs[k] = 1./PETSC_SMALL; 1483 eigv[k*(subset_size+1)] = 1.0; 1484 } 1485 } 1486 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1487 if (B_ierr) { 1488 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1489 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1490 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1491 } 1492 1493 if (B_neigs > nmax) { 1494 if (pcbddc->dbg_flag) { 1495 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 1496 } 1497 if (pcbddc->use_deluxe_scaling) eigs_start = B_neigs -nmax; 1498 B_neigs = nmax; 1499 } 1500 1501 nmin_s = PetscMin(nmin,B_N); 1502 if (B_neigs < nmin_s) { 1503 PetscBLASInt B_neigs2; 1504 1505 if (pcbddc->use_deluxe_scaling) { 1506 B_IL = B_N - nmin_s + 1; 1507 B_IU = B_N - B_neigs; 1508 } else { 1509 B_IL = B_neigs + 1; 1510 B_IU = nmin_s; 1511 } 1512 if (pcbddc->dbg_flag) { 1513 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 1514 } 1515 if (sub_schurs->is_hermitian) { 1516 PetscInt j,k; 1517 for (j=0;j<subset_size;j++) { 1518 for (k=j;k<subset_size;k++) { 1519 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 1520 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 1521 } 1522 } 1523 } else { 1524 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1525 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1526 } 1527 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1528 #if defined(PETSC_USE_COMPLEX) 1529 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1530 #else 1531 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1532 #endif 1533 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1534 B_neigs += B_neigs2; 1535 } 1536 if (B_ierr) { 1537 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1538 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1539 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1540 } 1541 if (pcbddc->dbg_flag) { 1542 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 1543 for (j=0;j<B_neigs;j++) { 1544 if (eigs[j] == 0.0) { 1545 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 1546 } else { 1547 if (pcbddc->use_deluxe_scaling) { 1548 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 1549 } else { 1550 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 1551 } 1552 } 1553 } 1554 } 1555 } else { 1556 /* TODO */ 1557 } 1558 } 1559 /* change the basis back to the original one */ 1560 if (sub_schurs->change) { 1561 Mat change,phi,phit; 1562 1563 if (pcbddc->dbg_flag > 1) { 1564 PetscInt ii; 1565 for (ii=0;ii<B_neigs;ii++) { 1566 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1567 for (j=0;j<B_N;j++) { 1568 #if defined(PETSC_USE_COMPLEX) 1569 PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]); 1570 PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]); 1571 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 1572 #else 1573 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 1574 #endif 1575 } 1576 } 1577 } 1578 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 1579 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 1580 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 1581 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1582 ierr = MatDestroy(&phit);CHKERRQ(ierr); 1583 ierr = MatDestroy(&phi);CHKERRQ(ierr); 1584 } 1585 maxneigs = PetscMax(B_neigs,maxneigs); 1586 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 1587 if (B_neigs) { 1588 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1589 1590 if (pcbddc->dbg_flag > 1) { 1591 PetscInt ii; 1592 for (ii=0;ii<B_neigs;ii++) { 1593 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1594 for (j=0;j<B_N;j++) { 1595 #if defined(PETSC_USE_COMPLEX) 1596 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1597 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1598 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 1599 #else 1600 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 1601 #endif 1602 } 1603 } 1604 } 1605 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 1606 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 1607 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 1608 cum++; 1609 } 1610 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1611 /* shift for next computation */ 1612 cumarray += subset_size*subset_size; 1613 } 1614 if (pcbddc->dbg_flag) { 1615 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1616 } 1617 1618 if (mss) { 1619 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1620 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1621 /* destroy matrices (junk) */ 1622 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 1623 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 1624 } 1625 if (allocated_S_St) { 1626 ierr = PetscFree2(S,St);CHKERRQ(ierr); 1627 } 1628 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 1629 #if defined(PETSC_USE_COMPLEX) 1630 ierr = PetscFree(rwork);CHKERRQ(ierr); 1631 #endif 1632 if (pcbddc->dbg_flag) { 1633 PetscInt maxneigs_r; 1634 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1635 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 1636 } 1637 PetscFunctionReturn(0); 1638 } 1639 1640 #undef __FUNCT__ 1641 #define __FUNCT__ "PCBDDCSetUpSolvers" 1642 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 1643 { 1644 PetscScalar *coarse_submat_vals; 1645 PetscErrorCode ierr; 1646 1647 PetscFunctionBegin; 1648 /* Setup local scatters R_to_B and (optionally) R_to_D */ 1649 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 1650 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 1651 1652 /* Setup local neumann solver ksp_R */ 1653 /* PCBDDCSetUpLocalScatters should be called first! */ 1654 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 1655 1656 /* 1657 Setup local correction and local part of coarse basis. 1658 Gives back the dense local part of the coarse matrix in column major ordering 1659 */ 1660 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 1661 1662 /* Compute total number of coarse nodes and setup coarse solver */ 1663 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 1664 1665 /* free */ 1666 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 1667 PetscFunctionReturn(0); 1668 } 1669 1670 #undef __FUNCT__ 1671 #define __FUNCT__ "PCBDDCResetCustomization" 1672 PetscErrorCode PCBDDCResetCustomization(PC pc) 1673 { 1674 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1675 PetscErrorCode ierr; 1676 1677 PetscFunctionBegin; 1678 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1679 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 1680 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1681 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 1682 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1683 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 1684 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1685 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1686 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1687 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 1688 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 1689 PetscFunctionReturn(0); 1690 } 1691 1692 #undef __FUNCT__ 1693 #define __FUNCT__ "PCBDDCResetTopography" 1694 PetscErrorCode PCBDDCResetTopography(PC pc) 1695 { 1696 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1697 PetscInt i; 1698 PetscErrorCode ierr; 1699 1700 PetscFunctionBegin; 1701 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 1702 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1703 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 1704 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 1705 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1706 ierr = MatDestroy(&pcbddc->divudotp);CHKERRQ(ierr); 1707 ierr = ISDestroy(&pcbddc->divudotp_vl2l);CHKERRQ(ierr); 1708 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 1709 pcbddc->graphanalyzed = PETSC_FALSE; 1710 for (i=0;i<pcbddc->n_local_subs;i++) { 1711 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 1712 } 1713 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 1714 if (pcbddc->sub_schurs) { 1715 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 1716 } 1717 PetscFunctionReturn(0); 1718 } 1719 1720 #undef __FUNCT__ 1721 #define __FUNCT__ "PCBDDCResetSolvers" 1722 PetscErrorCode PCBDDCResetSolvers(PC pc) 1723 { 1724 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1725 PetscErrorCode ierr; 1726 1727 PetscFunctionBegin; 1728 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 1729 if (pcbddc->coarse_phi_B) { 1730 PetscScalar *array; 1731 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 1732 ierr = PetscFree(array);CHKERRQ(ierr); 1733 } 1734 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1735 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1736 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1737 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1738 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1739 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1740 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1741 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1742 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1743 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1744 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1745 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1746 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1747 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 1748 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 1749 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 1750 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 1751 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1752 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1753 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1754 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 1755 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 1756 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 1757 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 1758 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 1759 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 1760 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 1761 if (pcbddc->benign_zerodiag_subs) { 1762 PetscInt i; 1763 for (i=0;i<pcbddc->benign_n;i++) { 1764 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1765 } 1766 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 1767 } 1768 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 1769 PetscFunctionReturn(0); 1770 } 1771 1772 #undef __FUNCT__ 1773 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 1774 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 1775 { 1776 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1777 PC_IS *pcis = (PC_IS*)pc->data; 1778 VecType impVecType; 1779 PetscInt n_constraints,n_R,old_size; 1780 PetscErrorCode ierr; 1781 1782 PetscFunctionBegin; 1783 if (!pcbddc->ConstraintMatrix) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 1784 /* get sizes */ 1785 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 1786 n_R = pcis->n - pcbddc->n_vertices; 1787 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 1788 /* local work vectors (try to avoid unneeded work)*/ 1789 /* R nodes */ 1790 old_size = -1; 1791 if (pcbddc->vec1_R) { 1792 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 1793 } 1794 if (n_R != old_size) { 1795 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1796 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1797 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 1798 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 1799 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 1800 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 1801 } 1802 /* local primal dofs */ 1803 old_size = -1; 1804 if (pcbddc->vec1_P) { 1805 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 1806 } 1807 if (pcbddc->local_primal_size != old_size) { 1808 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1809 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 1810 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 1811 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 1812 } 1813 /* local explicit constraints */ 1814 old_size = -1; 1815 if (pcbddc->vec1_C) { 1816 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 1817 } 1818 if (n_constraints && n_constraints != old_size) { 1819 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1820 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 1821 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 1822 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 1823 } 1824 PetscFunctionReturn(0); 1825 } 1826 1827 #undef __FUNCT__ 1828 #define __FUNCT__ "PCBDDCSetUpCorrection" 1829 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 1830 { 1831 PetscErrorCode ierr; 1832 /* pointers to pcis and pcbddc */ 1833 PC_IS* pcis = (PC_IS*)pc->data; 1834 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1835 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1836 /* submatrices of local problem */ 1837 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 1838 /* submatrices of local coarse problem */ 1839 Mat S_VV,S_CV,S_VC,S_CC; 1840 /* working matrices */ 1841 Mat C_CR; 1842 /* additional working stuff */ 1843 PC pc_R; 1844 Mat F; 1845 Vec dummy_vec; 1846 PetscBool isLU,isCHOL,isILU,need_benign_correction; 1847 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 1848 PetscScalar *work; 1849 PetscInt *idx_V_B; 1850 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 1851 PetscInt i,n_R,n_D,n_B; 1852 1853 /* some shortcuts to scalars */ 1854 PetscScalar one=1.0,m_one=-1.0; 1855 1856 PetscFunctionBegin; 1857 if (!pcbddc->symmetric_primal && pcbddc->benign_n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 1858 1859 /* Set Non-overlapping dimensions */ 1860 n_vertices = pcbddc->n_vertices; 1861 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 1862 n_B = pcis->n_B; 1863 n_D = pcis->n - n_B; 1864 n_R = pcis->n - n_vertices; 1865 1866 /* vertices in boundary numbering */ 1867 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 1868 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 1869 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i); 1870 1871 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 1872 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 1873 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 1874 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 1875 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 1876 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 1877 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 1878 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 1879 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 1880 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 1881 1882 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 1883 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 1884 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 1885 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 1886 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 1887 lda_rhs = n_R; 1888 need_benign_correction = PETSC_FALSE; 1889 if (isLU || isILU || isCHOL) { 1890 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 1891 } else if (sub_schurs && sub_schurs->reuse_solver) { 1892 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1893 MatFactorType type; 1894 1895 F = reuse_solver->F; 1896 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 1897 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 1898 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 1899 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 1900 } else { 1901 F = NULL; 1902 } 1903 1904 /* allocate workspace */ 1905 n = 0; 1906 if (n_constraints) { 1907 n += lda_rhs*n_constraints; 1908 } 1909 if (n_vertices) { 1910 n = PetscMax(2*lda_rhs*n_vertices,n); 1911 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 1912 } 1913 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1914 1915 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 1916 dummy_vec = NULL; 1917 if (need_benign_correction && lda_rhs != n_R && F) { 1918 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);CHKERRQ(ierr); 1919 } 1920 1921 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 1922 if (n_constraints) { 1923 Mat M1,M2,M3,C_B; 1924 IS is_aux; 1925 PetscScalar *array,*array2; 1926 1927 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1928 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1929 1930 /* Extract constraints on R nodes: C_{CR} */ 1931 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 1932 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 1933 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 1934 1935 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 1936 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 1937 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1938 for (i=0;i<n_constraints;i++) { 1939 const PetscScalar *row_cmat_values; 1940 const PetscInt *row_cmat_indices; 1941 PetscInt size_of_constraint,j; 1942 1943 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1944 for (j=0;j<size_of_constraint;j++) { 1945 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 1946 } 1947 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1948 } 1949 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 1950 if (F) { 1951 Mat B; 1952 1953 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 1954 if (need_benign_correction) { 1955 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1956 1957 /* rhs is already zero on interior dofs, no need to change the rhs */ 1958 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 1959 } 1960 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 1961 if (need_benign_correction) { 1962 PetscScalar *marr; 1963 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1964 1965 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1966 if (lda_rhs != n_R) { 1967 for (i=0;i<n_constraints;i++) { 1968 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 1969 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1970 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 1971 } 1972 } else { 1973 for (i=0;i<n_constraints;i++) { 1974 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1975 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1976 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1977 } 1978 } 1979 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1980 } 1981 ierr = MatDestroy(&B);CHKERRQ(ierr); 1982 } else { 1983 PetscScalar *marr; 1984 1985 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1986 for (i=0;i<n_constraints;i++) { 1987 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1988 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 1989 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1990 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1991 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1992 } 1993 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1994 } 1995 if (!pcbddc->switch_static) { 1996 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1997 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1998 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1999 for (i=0;i<n_constraints;i++) { 2000 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 2001 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 2002 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2003 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2004 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2005 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2006 } 2007 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 2008 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 2009 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 2010 } else { 2011 if (lda_rhs != n_R) { 2012 IS dummy; 2013 2014 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 2015 ierr = MatGetSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 2016 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 2017 } else { 2018 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 2019 pcbddc->local_auxmat2 = local_auxmat2_R; 2020 } 2021 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 2022 } 2023 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 2024 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 2025 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 2026 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 2027 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 2028 if (isCHOL) { 2029 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 2030 } else { 2031 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 2032 } 2033 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 2034 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 2035 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 2036 ierr = MatDestroy(&M2);CHKERRQ(ierr); 2037 ierr = MatDestroy(&M3);CHKERRQ(ierr); 2038 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 2039 ierr = MatMatMult(M1,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 2040 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 2041 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 2042 ierr = MatDestroy(&M1);CHKERRQ(ierr); 2043 } 2044 2045 /* Get submatrices from subdomain matrix */ 2046 if (n_vertices) { 2047 IS is_aux; 2048 2049 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 2050 IS tis; 2051 2052 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 2053 ierr = ISSort(tis);CHKERRQ(ierr); 2054 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 2055 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2056 } else { 2057 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 2058 } 2059 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 2060 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 2061 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 2062 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 2063 } 2064 2065 /* Matrix of coarse basis functions (local) */ 2066 if (pcbddc->coarse_phi_B) { 2067 PetscInt on_B,on_primal,on_D=n_D; 2068 if (pcbddc->coarse_phi_D) { 2069 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 2070 } 2071 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 2072 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 2073 PetscScalar *marray; 2074 2075 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 2076 ierr = PetscFree(marray);CHKERRQ(ierr); 2077 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 2078 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 2079 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 2080 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 2081 } 2082 } 2083 2084 if (!pcbddc->coarse_phi_B) { 2085 PetscScalar *marray; 2086 2087 n = n_B*pcbddc->local_primal_size; 2088 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2089 n += n_D*pcbddc->local_primal_size; 2090 } 2091 if (!pcbddc->symmetric_primal) { 2092 n *= 2; 2093 } 2094 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 2095 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 2096 n = n_B*pcbddc->local_primal_size; 2097 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2098 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 2099 n += n_D*pcbddc->local_primal_size; 2100 } 2101 if (!pcbddc->symmetric_primal) { 2102 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 2103 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2104 n = n_B*pcbddc->local_primal_size; 2105 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 2106 } 2107 } else { 2108 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 2109 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 2110 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2111 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 2112 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 2113 } 2114 } 2115 } 2116 2117 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 2118 p0_lidx_I = NULL; 2119 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 2120 const PetscInt *idxs; 2121 2122 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 2123 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 2124 for (i=0;i<pcbddc->benign_n;i++) { 2125 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 2126 } 2127 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 2128 } 2129 2130 /* vertices */ 2131 if (n_vertices) { 2132 2133 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 2134 2135 if (n_R) { 2136 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 2137 PetscBLASInt B_N,B_one = 1; 2138 PetscScalar *x,*y; 2139 PetscBool isseqaij; 2140 2141 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 2142 if (need_benign_correction) { 2143 ISLocalToGlobalMapping RtoN; 2144 IS is_p0; 2145 PetscInt *idxs_p0,n; 2146 2147 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 2148 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 2149 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 2150 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 2151 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 2152 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 2153 ierr = MatGetSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 2154 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 2155 } 2156 2157 if (lda_rhs == n_R) { 2158 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 2159 } else { 2160 PetscScalar *av,*array; 2161 const PetscInt *xadj,*adjncy; 2162 PetscInt n; 2163 PetscBool flg_row; 2164 2165 array = work+lda_rhs*n_vertices; 2166 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 2167 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 2168 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2169 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 2170 for (i=0;i<n;i++) { 2171 PetscInt j; 2172 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 2173 } 2174 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2175 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 2176 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 2177 } 2178 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 2179 if (need_benign_correction) { 2180 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2181 PetscScalar *marr; 2182 2183 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 2184 /* need \Phi^T A_RV = (I+L)A_RV, L given by 2185 2186 | 0 0 0 | (V) 2187 L = | 0 0 -1 | (P-p0) 2188 | 0 0 -1 | (p0) 2189 2190 */ 2191 for (i=0;i<reuse_solver->benign_n;i++) { 2192 const PetscScalar *vals; 2193 const PetscInt *idxs,*idxs_zero; 2194 PetscInt n,j,nz; 2195 2196 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2197 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 2198 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 2199 for (j=0;j<n;j++) { 2200 PetscScalar val = vals[j]; 2201 PetscInt k,col = idxs[j]; 2202 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 2203 } 2204 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 2205 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 2206 } 2207 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 2208 } 2209 if (F) { 2210 /* need to correct the rhs */ 2211 if (need_benign_correction) { 2212 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2213 PetscScalar *marr; 2214 2215 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 2216 if (lda_rhs != n_R) { 2217 for (i=0;i<n_vertices;i++) { 2218 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 2219 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 2220 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 2221 } 2222 } else { 2223 for (i=0;i<n_vertices;i++) { 2224 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 2225 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 2226 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2227 } 2228 } 2229 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 2230 } 2231 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 2232 /* need to correct the solution */ 2233 if (need_benign_correction) { 2234 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2235 PetscScalar *marr; 2236 2237 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 2238 if (lda_rhs != n_R) { 2239 for (i=0;i<n_vertices;i++) { 2240 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 2241 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 2242 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 2243 } 2244 } else { 2245 for (i=0;i<n_vertices;i++) { 2246 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 2247 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 2248 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2249 } 2250 } 2251 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 2252 } 2253 } else { 2254 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 2255 for (i=0;i<n_vertices;i++) { 2256 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 2257 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 2258 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2259 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2260 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2261 } 2262 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 2263 } 2264 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 2265 /* S_VV and S_CV */ 2266 if (n_constraints) { 2267 Mat B; 2268 2269 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 2270 for (i=0;i<n_vertices;i++) { 2271 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 2272 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 2273 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2274 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2275 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2276 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2277 } 2278 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 2279 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 2280 ierr = MatDestroy(&B);CHKERRQ(ierr); 2281 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 2282 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 2283 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 2284 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 2285 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 2286 ierr = MatDestroy(&B);CHKERRQ(ierr); 2287 } 2288 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2289 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 2290 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 2291 } 2292 if (lda_rhs != n_R) { 2293 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 2294 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 2295 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 2296 } 2297 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 2298 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 2299 if (need_benign_correction) { 2300 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2301 PetscScalar *marr,*sums; 2302 2303 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 2304 ierr = MatDenseGetArray(S_VVt,&marr); 2305 for (i=0;i<reuse_solver->benign_n;i++) { 2306 const PetscScalar *vals; 2307 const PetscInt *idxs,*idxs_zero; 2308 PetscInt n,j,nz; 2309 2310 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2311 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 2312 for (j=0;j<n_vertices;j++) { 2313 PetscInt k; 2314 sums[j] = 0.; 2315 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 2316 } 2317 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 2318 for (j=0;j<n;j++) { 2319 PetscScalar val = vals[j]; 2320 PetscInt k; 2321 for (k=0;k<n_vertices;k++) { 2322 marr[idxs[j]+k*n_vertices] += val*sums[k]; 2323 } 2324 } 2325 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 2326 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 2327 } 2328 ierr = PetscFree(sums);CHKERRQ(ierr); 2329 ierr = MatDenseRestoreArray(S_VVt,&marr); 2330 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 2331 } 2332 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 2333 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 2334 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 2335 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 2336 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 2337 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 2338 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 2339 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2340 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 2341 } else { 2342 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2343 } 2344 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 2345 2346 /* coarse basis functions */ 2347 for (i=0;i<n_vertices;i++) { 2348 PetscScalar *y; 2349 2350 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 2351 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2352 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 2353 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2354 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2355 y[n_B*i+idx_V_B[i]] = 1.0; 2356 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2357 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2358 2359 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2360 PetscInt j; 2361 2362 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2363 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2364 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2365 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2366 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2367 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 2368 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2369 } 2370 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2371 } 2372 /* if n_R == 0 the object is not destroyed */ 2373 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 2374 } 2375 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 2376 2377 if (n_constraints) { 2378 Mat B; 2379 2380 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 2381 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 2382 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 2383 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 2384 if (n_vertices) { 2385 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 2386 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 2387 } else { 2388 Mat S_VCt; 2389 2390 if (lda_rhs != n_R) { 2391 ierr = MatDestroy(&B);CHKERRQ(ierr); 2392 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 2393 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 2394 } 2395 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 2396 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2397 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 2398 } 2399 } 2400 ierr = MatDestroy(&B);CHKERRQ(ierr); 2401 /* coarse basis functions */ 2402 for (i=0;i<n_constraints;i++) { 2403 PetscScalar *y; 2404 2405 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 2406 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2407 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 2408 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2409 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2410 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2411 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2412 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2413 PetscInt j; 2414 2415 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2416 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 2417 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2418 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2419 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2420 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 2421 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2422 } 2423 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2424 } 2425 } 2426 if (n_constraints) { 2427 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 2428 } 2429 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 2430 2431 /* coarse matrix entries relative to B_0 */ 2432 if (pcbddc->benign_n) { 2433 Mat B0_B,B0_BPHI; 2434 IS is_dummy; 2435 PetscScalar *data; 2436 PetscInt j; 2437 2438 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2439 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 2440 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2441 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2442 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2443 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 2444 for (j=0;j<pcbddc->benign_n;j++) { 2445 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2446 for (i=0;i<pcbddc->local_primal_size;i++) { 2447 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 2448 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 2449 } 2450 } 2451 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 2452 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2453 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2454 } 2455 2456 /* compute other basis functions for non-symmetric problems */ 2457 if (!pcbddc->symmetric_primal) { 2458 Mat B_V=NULL,B_C=NULL; 2459 PetscScalar *marray; 2460 2461 if (n_constraints) { 2462 Mat S_CCT,C_CRT; 2463 2464 ierr = MatTranspose(C_CR,MAT_INPLACE_MATRIX,&C_CRT);CHKERRQ(ierr); 2465 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 2466 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 2467 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 2468 if (n_vertices) { 2469 Mat S_VCT; 2470 2471 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 2472 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 2473 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 2474 } 2475 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 2476 } else { 2477 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);CHKERRQ(ierr); 2478 } 2479 if (n_vertices && n_R) { 2480 PetscScalar *av,*marray; 2481 const PetscInt *xadj,*adjncy; 2482 PetscInt n; 2483 PetscBool flg_row; 2484 2485 /* B_V = B_V - A_VR^T */ 2486 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 2487 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2488 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 2489 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2490 for (i=0;i<n;i++) { 2491 PetscInt j; 2492 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 2493 } 2494 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2495 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2496 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2497 } 2498 2499 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 2500 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2501 for (i=0;i<n_vertices;i++) { 2502 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 2503 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2504 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2505 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2506 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2507 } 2508 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2509 if (B_C) { 2510 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 2511 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 2512 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 2513 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2514 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2515 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2516 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2517 } 2518 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 2519 } 2520 /* coarse basis functions */ 2521 for (i=0;i<pcbddc->local_primal_size;i++) { 2522 PetscScalar *y; 2523 2524 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 2525 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2526 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 2527 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2528 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2529 if (i<n_vertices) { 2530 y[n_B*i+idx_V_B[i]] = 1.0; 2531 } 2532 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2533 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2534 2535 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2536 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2537 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2538 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2539 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2540 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2541 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2542 } 2543 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2544 } 2545 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 2546 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 2547 } 2548 /* free memory */ 2549 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 2550 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 2551 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 2552 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 2553 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 2554 ierr = PetscFree(work);CHKERRQ(ierr); 2555 if (n_vertices) { 2556 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2557 } 2558 if (n_constraints) { 2559 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 2560 } 2561 /* Checking coarse_sub_mat and coarse basis functios */ 2562 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2563 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2564 if (pcbddc->dbg_flag) { 2565 Mat coarse_sub_mat; 2566 Mat AUXMAT,TM1,TM2,TM3,TM4; 2567 Mat coarse_phi_D,coarse_phi_B; 2568 Mat coarse_psi_D,coarse_psi_B; 2569 Mat A_II,A_BB,A_IB,A_BI; 2570 Mat C_B,CPHI; 2571 IS is_dummy; 2572 Vec mones; 2573 MatType checkmattype=MATSEQAIJ; 2574 PetscReal real_value; 2575 2576 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 2577 Mat A; 2578 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 2579 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2580 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2581 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2582 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2583 ierr = MatDestroy(&A);CHKERRQ(ierr); 2584 } else { 2585 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2586 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2587 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2588 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2589 } 2590 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 2591 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 2592 if (!pcbddc->symmetric_primal) { 2593 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 2594 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 2595 } 2596 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 2597 2598 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2599 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 2600 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2601 if (!pcbddc->symmetric_primal) { 2602 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2603 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2604 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2605 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2606 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2607 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2608 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2609 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2610 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2611 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2612 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2613 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2614 } else { 2615 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2616 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2617 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2618 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2619 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2620 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2621 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2622 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2623 } 2624 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2625 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2626 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2627 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 2628 if (pcbddc->benign_n) { 2629 Mat B0_B,B0_BPHI; 2630 PetscScalar *data,*data2; 2631 PetscInt j; 2632 2633 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2634 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 2635 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2636 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2637 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 2638 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 2639 for (j=0;j<pcbddc->benign_n;j++) { 2640 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2641 for (i=0;i<pcbddc->local_primal_size;i++) { 2642 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 2643 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 2644 } 2645 } 2646 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 2647 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 2648 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2649 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2650 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2651 } 2652 #if 0 2653 { 2654 PetscViewer viewer; 2655 char filename[256]; 2656 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 2657 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 2658 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2659 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 2660 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 2661 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 2662 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 2663 if (save_change) { 2664 Mat phi_B; 2665 ierr = MatMatMult(save_change,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&phi_B);CHKERRQ(ierr); 2666 ierr = PetscObjectSetName((PetscObject)phi_B,"phi_B");CHKERRQ(ierr); 2667 ierr = MatView(phi_B,viewer);CHKERRQ(ierr); 2668 ierr = MatDestroy(&phi_B);CHKERRQ(ierr); 2669 } else { 2670 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 2671 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 2672 } 2673 if (pcbddc->coarse_phi_D) { 2674 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 2675 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 2676 } 2677 if (pcbddc->coarse_psi_B) { 2678 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 2679 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 2680 } 2681 if (pcbddc->coarse_psi_D) { 2682 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 2683 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 2684 } 2685 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 2686 } 2687 #endif 2688 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2689 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2690 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2691 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2692 2693 /* check constraints */ 2694 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2695 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 2696 if (!pcbddc->benign_n) { /* TODO: add benign case */ 2697 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2698 } else { 2699 PetscScalar *data; 2700 Mat tmat; 2701 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 2702 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 2703 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 2704 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2705 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2706 } 2707 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 2708 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2709 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2710 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2711 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2712 if (!pcbddc->symmetric_primal) { 2713 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2714 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2715 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2716 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2717 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2718 } 2719 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 2720 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 2721 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2722 ierr = VecDestroy(&mones);CHKERRQ(ierr); 2723 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2724 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 2725 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 2726 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 2727 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 2728 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 2729 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 2730 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 2731 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 2732 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 2733 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 2734 if (!pcbddc->symmetric_primal) { 2735 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 2736 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 2737 } 2738 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 2739 } 2740 /* get back data */ 2741 *coarse_submat_vals_n = coarse_submat_vals; 2742 PetscFunctionReturn(0); 2743 } 2744 2745 #undef __FUNCT__ 2746 #define __FUNCT__ "MatGetSubMatrixUnsorted" 2747 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 2748 { 2749 Mat *work_mat; 2750 IS isrow_s,iscol_s; 2751 PetscBool rsorted,csorted; 2752 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 2753 PetscErrorCode ierr; 2754 2755 PetscFunctionBegin; 2756 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 2757 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 2758 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 2759 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2760 2761 if (!rsorted) { 2762 const PetscInt *idxs; 2763 PetscInt *idxs_sorted,i; 2764 2765 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 2766 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 2767 for (i=0;i<rsize;i++) { 2768 idxs_perm_r[i] = i; 2769 } 2770 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 2771 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 2772 for (i=0;i<rsize;i++) { 2773 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 2774 } 2775 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 2776 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 2777 } else { 2778 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 2779 isrow_s = isrow; 2780 } 2781 2782 if (!csorted) { 2783 if (isrow == iscol) { 2784 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 2785 iscol_s = isrow_s; 2786 } else { 2787 const PetscInt *idxs; 2788 PetscInt *idxs_sorted,i; 2789 2790 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 2791 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 2792 for (i=0;i<csize;i++) { 2793 idxs_perm_c[i] = i; 2794 } 2795 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 2796 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 2797 for (i=0;i<csize;i++) { 2798 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 2799 } 2800 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 2801 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 2802 } 2803 } else { 2804 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 2805 iscol_s = iscol; 2806 } 2807 2808 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2809 2810 if (!rsorted || !csorted) { 2811 Mat new_mat; 2812 IS is_perm_r,is_perm_c; 2813 2814 if (!rsorted) { 2815 PetscInt *idxs_r,i; 2816 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 2817 for (i=0;i<rsize;i++) { 2818 idxs_r[idxs_perm_r[i]] = i; 2819 } 2820 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 2821 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 2822 } else { 2823 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 2824 } 2825 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 2826 2827 if (!csorted) { 2828 if (isrow_s == iscol_s) { 2829 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 2830 is_perm_c = is_perm_r; 2831 } else { 2832 PetscInt *idxs_c,i; 2833 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 2834 for (i=0;i<csize;i++) { 2835 idxs_c[idxs_perm_c[i]] = i; 2836 } 2837 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 2838 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 2839 } 2840 } else { 2841 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 2842 } 2843 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 2844 2845 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 2846 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 2847 work_mat[0] = new_mat; 2848 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 2849 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 2850 } 2851 2852 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 2853 *B = work_mat[0]; 2854 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 2855 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 2856 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 2857 PetscFunctionReturn(0); 2858 } 2859 2860 #undef __FUNCT__ 2861 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 2862 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 2863 { 2864 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2865 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2866 Mat new_mat; 2867 IS is_local,is_global; 2868 PetscInt local_size; 2869 PetscBool isseqaij; 2870 PetscErrorCode ierr; 2871 2872 PetscFunctionBegin; 2873 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2874 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 2875 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 2876 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 2877 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 2878 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 2879 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 2880 2881 /* check */ 2882 if (pcbddc->dbg_flag) { 2883 Vec x,x_change; 2884 PetscReal error; 2885 2886 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 2887 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2888 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 2889 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2890 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2891 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 2892 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2893 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2894 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2895 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2896 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2897 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 2898 ierr = VecDestroy(&x);CHKERRQ(ierr); 2899 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2900 } 2901 2902 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 2903 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2904 if (isseqaij) { 2905 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2906 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 2907 } else { 2908 Mat work_mat; 2909 2910 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2911 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2912 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 2913 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 2914 } 2915 if (matis->A->symmetric_set) { 2916 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 2917 #if !defined(PETSC_USE_COMPLEX) 2918 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 2919 #endif 2920 } 2921 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 2922 PetscFunctionReturn(0); 2923 } 2924 2925 #undef __FUNCT__ 2926 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 2927 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 2928 { 2929 PC_IS* pcis = (PC_IS*)(pc->data); 2930 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2931 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2932 PetscInt *idx_R_local=NULL; 2933 PetscInt n_vertices,i,j,n_R,n_D,n_B; 2934 PetscInt vbs,bs; 2935 PetscBT bitmask=NULL; 2936 PetscErrorCode ierr; 2937 2938 PetscFunctionBegin; 2939 /* 2940 No need to setup local scatters if 2941 - primal space is unchanged 2942 AND 2943 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 2944 AND 2945 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 2946 */ 2947 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 2948 PetscFunctionReturn(0); 2949 } 2950 /* destroy old objects */ 2951 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 2952 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 2953 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 2954 /* Set Non-overlapping dimensions */ 2955 n_B = pcis->n_B; 2956 n_D = pcis->n - n_B; 2957 n_vertices = pcbddc->n_vertices; 2958 2959 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 2960 2961 /* create auxiliary bitmask and allocate workspace */ 2962 if (!sub_schurs || !sub_schurs->reuse_solver) { 2963 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 2964 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 2965 for (i=0;i<n_vertices;i++) { 2966 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 2967 } 2968 2969 for (i=0, n_R=0; i<pcis->n; i++) { 2970 if (!PetscBTLookup(bitmask,i)) { 2971 idx_R_local[n_R++] = i; 2972 } 2973 } 2974 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 2975 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2976 2977 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2978 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 2979 } 2980 2981 /* Block code */ 2982 vbs = 1; 2983 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 2984 if (bs>1 && !(n_vertices%bs)) { 2985 PetscBool is_blocked = PETSC_TRUE; 2986 PetscInt *vary; 2987 if (!sub_schurs || !sub_schurs->reuse_solver) { 2988 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 2989 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 2990 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 2991 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 2992 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 2993 for (i=0; i<pcis->n/bs; i++) { 2994 if (vary[i]!=0 && vary[i]!=bs) { 2995 is_blocked = PETSC_FALSE; 2996 break; 2997 } 2998 } 2999 ierr = PetscFree(vary);CHKERRQ(ierr); 3000 } else { 3001 /* Verify directly the R set */ 3002 for (i=0; i<n_R/bs; i++) { 3003 PetscInt j,node=idx_R_local[bs*i]; 3004 for (j=1; j<bs; j++) { 3005 if (node != idx_R_local[bs*i+j]-j) { 3006 is_blocked = PETSC_FALSE; 3007 break; 3008 } 3009 } 3010 } 3011 } 3012 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 3013 vbs = bs; 3014 for (i=0;i<n_R/vbs;i++) { 3015 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 3016 } 3017 } 3018 } 3019 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 3020 if (sub_schurs && sub_schurs->reuse_solver) { 3021 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3022 3023 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 3024 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 3025 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 3026 reuse_solver->is_R = pcbddc->is_R_local; 3027 } else { 3028 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 3029 } 3030 3031 /* print some info if requested */ 3032 if (pcbddc->dbg_flag) { 3033 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3034 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3035 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3036 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 3037 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 3038 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 3039 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3040 } 3041 3042 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 3043 if (!sub_schurs || !sub_schurs->reuse_solver) { 3044 IS is_aux1,is_aux2; 3045 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 3046 3047 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 3048 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 3049 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 3050 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3051 for (i=0; i<n_D; i++) { 3052 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 3053 } 3054 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3055 for (i=0, j=0; i<n_R; i++) { 3056 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 3057 aux_array1[j++] = i; 3058 } 3059 } 3060 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 3061 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3062 for (i=0, j=0; i<n_B; i++) { 3063 if (!PetscBTLookup(bitmask,is_indices[i])) { 3064 aux_array2[j++] = i; 3065 } 3066 } 3067 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3068 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 3069 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 3070 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 3071 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 3072 3073 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3074 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 3075 for (i=0, j=0; i<n_R; i++) { 3076 if (PetscBTLookup(bitmask,idx_R_local[i])) { 3077 aux_array1[j++] = i; 3078 } 3079 } 3080 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 3081 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 3082 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 3083 } 3084 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 3085 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 3086 } else { 3087 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3088 IS tis; 3089 PetscInt schur_size; 3090 3091 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 3092 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 3093 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 3094 ierr = ISDestroy(&tis);CHKERRQ(ierr); 3095 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3096 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 3097 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 3098 ierr = ISDestroy(&tis);CHKERRQ(ierr); 3099 } 3100 } 3101 PetscFunctionReturn(0); 3102 } 3103 3104 3105 #undef __FUNCT__ 3106 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 3107 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 3108 { 3109 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3110 PC_IS *pcis = (PC_IS*)pc->data; 3111 PC pc_temp; 3112 Mat A_RR; 3113 MatReuse reuse; 3114 PetscScalar m_one = -1.0; 3115 PetscReal value; 3116 PetscInt n_D,n_R; 3117 PetscBool check_corr[2],issbaij; 3118 PetscErrorCode ierr; 3119 /* prefixes stuff */ 3120 char dir_prefix[256],neu_prefix[256],str_level[16]; 3121 size_t len; 3122 3123 PetscFunctionBegin; 3124 3125 /* compute prefixes */ 3126 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 3127 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 3128 if (!pcbddc->current_level) { 3129 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 3130 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 3131 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 3132 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 3133 } else { 3134 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 3135 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 3136 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 3137 len -= 15; /* remove "pc_bddc_coarse_" */ 3138 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 3139 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 3140 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 3141 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 3142 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 3143 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 3144 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 3145 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 3146 } 3147 3148 /* DIRICHLET PROBLEM */ 3149 if (dirichlet) { 3150 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3151 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 3152 if (!sub_schurs || !sub_schurs->reuse_solver) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 3153 if (pcbddc->dbg_flag) { 3154 Mat A_IIn; 3155 3156 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 3157 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 3158 pcis->A_II = A_IIn; 3159 } 3160 } 3161 if (pcbddc->local_mat->symmetric_set) { 3162 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 3163 } 3164 /* Matrix for Dirichlet problem is pcis->A_II */ 3165 n_D = pcis->n - pcis->n_B; 3166 if (!pcbddc->ksp_D) { /* create object if not yet build */ 3167 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 3168 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 3169 /* default */ 3170 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 3171 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 3172 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 3173 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 3174 if (issbaij) { 3175 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 3176 } else { 3177 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 3178 } 3179 /* Allow user's customization */ 3180 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 3181 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 3182 } 3183 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 3184 if (sub_schurs && sub_schurs->reuse_solver) { 3185 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3186 3187 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 3188 } 3189 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 3190 if (!n_D) { 3191 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 3192 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 3193 } 3194 /* Set Up KSP for Dirichlet problem of BDDC */ 3195 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 3196 /* set ksp_D into pcis data */ 3197 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 3198 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 3199 pcis->ksp_D = pcbddc->ksp_D; 3200 } 3201 3202 /* NEUMANN PROBLEM */ 3203 A_RR = 0; 3204 if (neumann) { 3205 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3206 PetscInt ibs,mbs; 3207 PetscBool issbaij; 3208 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3209 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 3210 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 3211 if (pcbddc->ksp_R) { /* already created ksp */ 3212 PetscInt nn_R; 3213 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 3214 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 3215 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 3216 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 3217 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 3218 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3219 reuse = MAT_INITIAL_MATRIX; 3220 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 3221 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 3222 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3223 reuse = MAT_INITIAL_MATRIX; 3224 } else { /* safe to reuse the matrix */ 3225 reuse = MAT_REUSE_MATRIX; 3226 } 3227 } 3228 /* last check */ 3229 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 3230 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3231 reuse = MAT_INITIAL_MATRIX; 3232 } 3233 } else { /* first time, so we need to create the matrix */ 3234 reuse = MAT_INITIAL_MATRIX; 3235 } 3236 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */ 3237 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 3238 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 3239 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 3240 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 3241 if (matis->A == pcbddc->local_mat) { 3242 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3243 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 3244 } else { 3245 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 3246 } 3247 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 3248 if (matis->A == pcbddc->local_mat) { 3249 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3250 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 3251 } else { 3252 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 3253 } 3254 } 3255 /* extract A_RR */ 3256 if (sub_schurs && sub_schurs->reuse_solver) { 3257 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3258 3259 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 3260 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3261 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 3262 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 3263 } else { 3264 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 3265 } 3266 } else { 3267 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3268 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 3269 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 3270 } 3271 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 3272 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 3273 } 3274 if (pcbddc->local_mat->symmetric_set) { 3275 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 3276 } 3277 if (!pcbddc->ksp_R) { /* create object if not present */ 3278 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 3279 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 3280 /* default */ 3281 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 3282 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 3283 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 3284 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 3285 if (issbaij) { 3286 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 3287 } else { 3288 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 3289 } 3290 /* Allow user's customization */ 3291 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 3292 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 3293 } 3294 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 3295 if (!n_R) { 3296 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 3297 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 3298 } 3299 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 3300 /* Reuse solver if it is present */ 3301 if (sub_schurs && sub_schurs->reuse_solver) { 3302 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3303 3304 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 3305 } 3306 /* Set Up KSP for Neumann problem of BDDC */ 3307 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 3308 } 3309 3310 if (pcbddc->dbg_flag) { 3311 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3312 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3313 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3314 } 3315 3316 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 3317 check_corr[0] = check_corr[1] = PETSC_FALSE; 3318 if (pcbddc->NullSpace_corr[0]) { 3319 ierr = PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);CHKERRQ(ierr); 3320 } 3321 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 3322 check_corr[0] = PETSC_TRUE; 3323 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);CHKERRQ(ierr); 3324 } 3325 if (neumann && pcbddc->NullSpace_corr[2]) { 3326 check_corr[1] = PETSC_TRUE; 3327 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);CHKERRQ(ierr); 3328 } 3329 3330 /* check Dirichlet and Neumann solvers */ 3331 if (pcbddc->dbg_flag) { 3332 if (dirichlet) { /* Dirichlet */ 3333 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 3334 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 3335 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 3336 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 3337 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 3338 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 3339 if (check_corr[0]) { 3340 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_TRUE);CHKERRQ(ierr); 3341 } 3342 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3343 } 3344 if (neumann) { /* Neumann */ 3345 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 3346 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3347 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 3348 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 3349 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 3350 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 3351 if (check_corr[1]) { 3352 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_FALSE);CHKERRQ(ierr); 3353 } 3354 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3355 } 3356 } 3357 /* free Neumann problem's matrix */ 3358 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3359 PetscFunctionReturn(0); 3360 } 3361 3362 #undef __FUNCT__ 3363 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 3364 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 3365 { 3366 PetscErrorCode ierr; 3367 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3368 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3369 PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE ) : PETSC_FALSE; 3370 3371 PetscFunctionBegin; 3372 if (!reuse_solver) { 3373 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 3374 } 3375 if (!pcbddc->switch_static) { 3376 if (applytranspose && pcbddc->local_auxmat1) { 3377 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3378 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3379 } 3380 if (!reuse_solver) { 3381 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3382 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3383 } else { 3384 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3385 3386 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3387 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3388 } 3389 } else { 3390 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3391 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3392 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3393 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3394 if (applytranspose && pcbddc->local_auxmat1) { 3395 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 3396 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3397 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3398 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3399 } 3400 } 3401 if (!reuse_solver || pcbddc->switch_static) { 3402 if (applytranspose) { 3403 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3404 } else { 3405 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3406 } 3407 } else { 3408 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3409 3410 if (applytranspose) { 3411 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 3412 } else { 3413 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 3414 } 3415 } 3416 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 3417 if (!pcbddc->switch_static) { 3418 if (!reuse_solver) { 3419 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3420 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3421 } else { 3422 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3423 3424 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3425 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3426 } 3427 if (!applytranspose && pcbddc->local_auxmat1) { 3428 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3429 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3430 } 3431 } else { 3432 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3433 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3434 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3435 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3436 if (!applytranspose && pcbddc->local_auxmat1) { 3437 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3438 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3439 } 3440 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3441 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3442 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3443 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3444 } 3445 PetscFunctionReturn(0); 3446 } 3447 3448 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 3449 #undef __FUNCT__ 3450 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 3451 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 3452 { 3453 PetscErrorCode ierr; 3454 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3455 PC_IS* pcis = (PC_IS*) (pc->data); 3456 const PetscScalar zero = 0.0; 3457 3458 PetscFunctionBegin; 3459 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 3460 if (!pcbddc->benign_apply_coarse_only) { 3461 if (applytranspose) { 3462 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3463 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3464 } else { 3465 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3466 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3467 } 3468 } else { 3469 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 3470 } 3471 3472 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 3473 if (pcbddc->benign_n) { 3474 PetscScalar *array; 3475 PetscInt j; 3476 3477 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3478 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 3479 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3480 } 3481 3482 /* start communications from local primal nodes to rhs of coarse solver */ 3483 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 3484 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3485 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3486 3487 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 3488 if (pcbddc->coarse_ksp) { 3489 Mat coarse_mat; 3490 Vec rhs,sol; 3491 MatNullSpace nullsp; 3492 PetscBool isbddc = PETSC_FALSE; 3493 3494 if (pcbddc->benign_have_null) { 3495 PC coarse_pc; 3496 3497 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 3498 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 3499 /* we need to propagate to coarser levels the need for a possible benign correction */ 3500 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 3501 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 3502 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 3503 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 3504 } 3505 } 3506 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 3507 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 3508 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 3509 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 3510 if (nullsp) { 3511 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 3512 } 3513 if (applytranspose) { 3514 if (pcbddc->benign_apply_coarse_only) SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 3515 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3516 } else { 3517 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 3518 PC coarse_pc; 3519 3520 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 3521 ierr = PCPreSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 3522 ierr = PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);CHKERRQ(ierr); 3523 ierr = PCPostSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 3524 } else { 3525 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3526 } 3527 } 3528 /* we don't need the benign correction at coarser levels anymore */ 3529 if (pcbddc->benign_have_null && isbddc) { 3530 PC coarse_pc; 3531 PC_BDDC* coarsepcbddc; 3532 3533 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 3534 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 3535 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 3536 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 3537 } 3538 if (nullsp) { 3539 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 3540 } 3541 } 3542 3543 /* Local solution on R nodes */ 3544 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 3545 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 3546 } 3547 /* communications from coarse sol to local primal nodes */ 3548 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3549 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3550 3551 /* Sum contributions from the two levels */ 3552 if (!pcbddc->benign_apply_coarse_only) { 3553 if (applytranspose) { 3554 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3555 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3556 } else { 3557 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3558 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3559 } 3560 /* store p0 */ 3561 if (pcbddc->benign_n) { 3562 PetscScalar *array; 3563 PetscInt j; 3564 3565 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3566 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 3567 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3568 } 3569 } else { /* expand the coarse solution */ 3570 if (applytranspose) { 3571 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3572 } else { 3573 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3574 } 3575 } 3576 PetscFunctionReturn(0); 3577 } 3578 3579 #undef __FUNCT__ 3580 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 3581 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 3582 { 3583 PetscErrorCode ierr; 3584 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3585 PetscScalar *array; 3586 Vec from,to; 3587 3588 PetscFunctionBegin; 3589 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3590 from = pcbddc->coarse_vec; 3591 to = pcbddc->vec1_P; 3592 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3593 Vec tvec; 3594 3595 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3596 ierr = VecResetArray(tvec);CHKERRQ(ierr); 3597 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3598 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 3599 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 3600 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 3601 } 3602 } else { /* from local to global -> put data in coarse right hand side */ 3603 from = pcbddc->vec1_P; 3604 to = pcbddc->coarse_vec; 3605 } 3606 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3607 PetscFunctionReturn(0); 3608 } 3609 3610 #undef __FUNCT__ 3611 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 3612 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 3613 { 3614 PetscErrorCode ierr; 3615 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3616 PetscScalar *array; 3617 Vec from,to; 3618 3619 PetscFunctionBegin; 3620 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3621 from = pcbddc->coarse_vec; 3622 to = pcbddc->vec1_P; 3623 } else { /* from local to global -> put data in coarse right hand side */ 3624 from = pcbddc->vec1_P; 3625 to = pcbddc->coarse_vec; 3626 } 3627 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3628 if (smode == SCATTER_FORWARD) { 3629 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3630 Vec tvec; 3631 3632 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3633 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 3634 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 3635 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 3636 } 3637 } else { 3638 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 3639 ierr = VecResetArray(from);CHKERRQ(ierr); 3640 } 3641 } 3642 PetscFunctionReturn(0); 3643 } 3644 3645 /* uncomment for testing purposes */ 3646 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 3647 #undef __FUNCT__ 3648 #define __FUNCT__ "PCBDDCConstraintsSetUp" 3649 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 3650 { 3651 PetscErrorCode ierr; 3652 PC_IS* pcis = (PC_IS*)(pc->data); 3653 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3654 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3655 /* one and zero */ 3656 PetscScalar one=1.0,zero=0.0; 3657 /* space to store constraints and their local indices */ 3658 PetscScalar *constraints_data; 3659 PetscInt *constraints_idxs,*constraints_idxs_B; 3660 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 3661 PetscInt *constraints_n; 3662 /* iterators */ 3663 PetscInt i,j,k,total_counts,total_counts_cc,cum; 3664 /* BLAS integers */ 3665 PetscBLASInt lwork,lierr; 3666 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 3667 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 3668 /* reuse */ 3669 PetscInt olocal_primal_size,olocal_primal_size_cc; 3670 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 3671 /* change of basis */ 3672 PetscBool qr_needed; 3673 PetscBT change_basis,qr_needed_idx; 3674 /* auxiliary stuff */ 3675 PetscInt *nnz,*is_indices; 3676 PetscInt ncc; 3677 /* some quantities */ 3678 PetscInt n_vertices,total_primal_vertices,valid_constraints; 3679 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 3680 3681 PetscFunctionBegin; 3682 /* Destroy Mat objects computed previously */ 3683 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3684 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3685 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 3686 /* save info on constraints from previous setup (if any) */ 3687 olocal_primal_size = pcbddc->local_primal_size; 3688 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 3689 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 3690 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3691 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3692 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3693 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3694 3695 if (!pcbddc->adaptive_selection) { 3696 IS ISForVertices,*ISForFaces,*ISForEdges; 3697 MatNullSpace nearnullsp; 3698 const Vec *nearnullvecs; 3699 Vec *localnearnullsp; 3700 PetscScalar *array; 3701 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 3702 PetscBool nnsp_has_cnst; 3703 /* LAPACK working arrays for SVD or POD */ 3704 PetscBool skip_lapack,boolforchange; 3705 PetscScalar *work; 3706 PetscReal *singular_vals; 3707 #if defined(PETSC_USE_COMPLEX) 3708 PetscReal *rwork; 3709 #endif 3710 #if defined(PETSC_MISSING_LAPACK_GESVD) 3711 PetscScalar *temp_basis,*correlation_mat; 3712 #else 3713 PetscBLASInt dummy_int=1; 3714 PetscScalar dummy_scalar=1.; 3715 #endif 3716 3717 /* Get index sets for faces, edges and vertices from graph */ 3718 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 3719 /* print some info */ 3720 if (pcbddc->dbg_flag && !pcbddc->sub_schurs) { 3721 PetscInt nv; 3722 3723 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 3724 ierr = ISGetSize(ISForVertices,&nv);CHKERRQ(ierr); 3725 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3726 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3727 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 3728 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);CHKERRQ(ierr); 3729 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);CHKERRQ(ierr); 3730 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3731 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3732 } 3733 3734 /* free unneeded index sets */ 3735 if (!pcbddc->use_vertices) { 3736 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3737 } 3738 if (!pcbddc->use_edges) { 3739 for (i=0;i<n_ISForEdges;i++) { 3740 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3741 } 3742 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3743 n_ISForEdges = 0; 3744 } 3745 if (!pcbddc->use_faces) { 3746 for (i=0;i<n_ISForFaces;i++) { 3747 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3748 } 3749 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3750 n_ISForFaces = 0; 3751 } 3752 3753 /* check if near null space is attached to global mat */ 3754 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 3755 if (nearnullsp) { 3756 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 3757 /* remove any stored info */ 3758 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3759 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3760 /* store information for BDDC solver reuse */ 3761 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 3762 pcbddc->onearnullspace = nearnullsp; 3763 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3764 for (i=0;i<nnsp_size;i++) { 3765 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 3766 } 3767 } else { /* if near null space is not provided BDDC uses constants by default */ 3768 nnsp_size = 0; 3769 nnsp_has_cnst = PETSC_TRUE; 3770 } 3771 /* get max number of constraints on a single cc */ 3772 max_constraints = nnsp_size; 3773 if (nnsp_has_cnst) max_constraints++; 3774 3775 /* 3776 Evaluate maximum storage size needed by the procedure 3777 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 3778 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 3779 There can be multiple constraints per connected component 3780 */ 3781 n_vertices = 0; 3782 if (ISForVertices) { 3783 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 3784 } 3785 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 3786 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 3787 3788 total_counts = n_ISForFaces+n_ISForEdges; 3789 total_counts *= max_constraints; 3790 total_counts += n_vertices; 3791 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 3792 3793 total_counts = 0; 3794 max_size_of_constraint = 0; 3795 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 3796 IS used_is; 3797 if (i<n_ISForEdges) { 3798 used_is = ISForEdges[i]; 3799 } else { 3800 used_is = ISForFaces[i-n_ISForEdges]; 3801 } 3802 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 3803 total_counts += j; 3804 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 3805 } 3806 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 3807 3808 /* get local part of global near null space vectors */ 3809 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 3810 for (k=0;k<nnsp_size;k++) { 3811 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 3812 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3813 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3814 } 3815 3816 /* whether or not to skip lapack calls */ 3817 skip_lapack = PETSC_TRUE; 3818 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 3819 3820 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 3821 if (!skip_lapack) { 3822 PetscScalar temp_work; 3823 3824 #if defined(PETSC_MISSING_LAPACK_GESVD) 3825 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 3826 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 3827 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 3828 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 3829 #if defined(PETSC_USE_COMPLEX) 3830 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 3831 #endif 3832 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3833 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3834 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 3835 lwork = -1; 3836 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3837 #if !defined(PETSC_USE_COMPLEX) 3838 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 3839 #else 3840 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 3841 #endif 3842 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3843 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 3844 #else /* on missing GESVD */ 3845 /* SVD */ 3846 PetscInt max_n,min_n; 3847 max_n = max_size_of_constraint; 3848 min_n = max_constraints; 3849 if (max_size_of_constraint < max_constraints) { 3850 min_n = max_size_of_constraint; 3851 max_n = max_constraints; 3852 } 3853 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 3854 #if defined(PETSC_USE_COMPLEX) 3855 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 3856 #endif 3857 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3858 lwork = -1; 3859 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 3860 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 3861 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 3862 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3863 #if !defined(PETSC_USE_COMPLEX) 3864 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 3865 #else 3866 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 3867 #endif 3868 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3869 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 3870 #endif /* on missing GESVD */ 3871 /* Allocate optimal workspace */ 3872 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 3873 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 3874 } 3875 /* Now we can loop on constraining sets */ 3876 total_counts = 0; 3877 constraints_idxs_ptr[0] = 0; 3878 constraints_data_ptr[0] = 0; 3879 /* vertices */ 3880 if (n_vertices) { 3881 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3882 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3883 for (i=0;i<n_vertices;i++) { 3884 constraints_n[total_counts] = 1; 3885 constraints_data[total_counts] = 1.0; 3886 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 3887 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 3888 total_counts++; 3889 } 3890 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3891 n_vertices = total_counts; 3892 } 3893 3894 /* edges and faces */ 3895 total_counts_cc = total_counts; 3896 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 3897 IS used_is; 3898 PetscBool idxs_copied = PETSC_FALSE; 3899 3900 if (ncc<n_ISForEdges) { 3901 used_is = ISForEdges[ncc]; 3902 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 3903 } else { 3904 used_is = ISForFaces[ncc-n_ISForEdges]; 3905 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 3906 } 3907 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 3908 3909 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 3910 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3911 /* change of basis should not be performed on local periodic nodes */ 3912 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 3913 if (nnsp_has_cnst) { 3914 PetscScalar quad_value; 3915 3916 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3917 idxs_copied = PETSC_TRUE; 3918 3919 if (!pcbddc->use_nnsp_true) { 3920 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 3921 } else { 3922 quad_value = 1.0; 3923 } 3924 for (j=0;j<size_of_constraint;j++) { 3925 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 3926 } 3927 temp_constraints++; 3928 total_counts++; 3929 } 3930 for (k=0;k<nnsp_size;k++) { 3931 PetscReal real_value; 3932 PetscScalar *ptr_to_data; 3933 3934 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3935 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 3936 for (j=0;j<size_of_constraint;j++) { 3937 ptr_to_data[j] = array[is_indices[j]]; 3938 } 3939 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3940 /* check if array is null on the connected component */ 3941 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3942 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 3943 if (real_value > 0.0) { /* keep indices and values */ 3944 temp_constraints++; 3945 total_counts++; 3946 if (!idxs_copied) { 3947 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3948 idxs_copied = PETSC_TRUE; 3949 } 3950 } 3951 } 3952 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3953 valid_constraints = temp_constraints; 3954 if (!pcbddc->use_nnsp_true && temp_constraints) { 3955 if (temp_constraints == 1) { /* just normalize the constraint */ 3956 PetscScalar norm,*ptr_to_data; 3957 3958 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3959 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3960 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 3961 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 3962 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 3963 } else { /* perform SVD */ 3964 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 3965 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3966 3967 #if defined(PETSC_MISSING_LAPACK_GESVD) 3968 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 3969 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 3970 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 3971 the constraints basis will differ (by a complex factor with absolute value equal to 1) 3972 from that computed using LAPACKgesvd 3973 -> This is due to a different computation of eigenvectors in LAPACKheev 3974 -> The quality of the POD-computed basis will be the same */ 3975 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3976 /* Store upper triangular part of correlation matrix */ 3977 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3978 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3979 for (j=0;j<temp_constraints;j++) { 3980 for (k=0;k<j+1;k++) { 3981 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 3982 } 3983 } 3984 /* compute eigenvalues and eigenvectors of correlation matrix */ 3985 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3986 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 3987 #if !defined(PETSC_USE_COMPLEX) 3988 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 3989 #else 3990 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 3991 #endif 3992 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3993 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 3994 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 3995 j = 0; 3996 while (j < temp_constraints && singular_vals[j] < tol) j++; 3997 total_counts = total_counts-j; 3998 valid_constraints = temp_constraints-j; 3999 /* scale and copy POD basis into used quadrature memory */ 4000 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4001 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 4002 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 4003 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4004 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 4005 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 4006 if (j<temp_constraints) { 4007 PetscInt ii; 4008 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 4009 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4010 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 4011 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4012 for (k=0;k<temp_constraints-j;k++) { 4013 for (ii=0;ii<size_of_constraint;ii++) { 4014 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 4015 } 4016 } 4017 } 4018 #else /* on missing GESVD */ 4019 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4020 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 4021 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4022 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4023 #if !defined(PETSC_USE_COMPLEX) 4024 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 4025 #else 4026 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 4027 #endif 4028 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 4029 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4030 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 4031 k = temp_constraints; 4032 if (k > size_of_constraint) k = size_of_constraint; 4033 j = 0; 4034 while (j < k && singular_vals[k-j-1] < tol) j++; 4035 valid_constraints = k-j; 4036 total_counts = total_counts-temp_constraints+valid_constraints; 4037 #endif /* on missing GESVD */ 4038 } 4039 } 4040 /* update pointers information */ 4041 if (valid_constraints) { 4042 constraints_n[total_counts_cc] = valid_constraints; 4043 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 4044 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 4045 /* set change_of_basis flag */ 4046 if (boolforchange) { 4047 PetscBTSet(change_basis,total_counts_cc); 4048 } 4049 total_counts_cc++; 4050 } 4051 } 4052 /* free workspace */ 4053 if (!skip_lapack) { 4054 ierr = PetscFree(work);CHKERRQ(ierr); 4055 #if defined(PETSC_USE_COMPLEX) 4056 ierr = PetscFree(rwork);CHKERRQ(ierr); 4057 #endif 4058 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 4059 #if defined(PETSC_MISSING_LAPACK_GESVD) 4060 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 4061 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 4062 #endif 4063 } 4064 for (k=0;k<nnsp_size;k++) { 4065 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 4066 } 4067 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 4068 /* free index sets of faces, edges and vertices */ 4069 for (i=0;i<n_ISForFaces;i++) { 4070 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 4071 } 4072 if (n_ISForFaces) { 4073 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 4074 } 4075 for (i=0;i<n_ISForEdges;i++) { 4076 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 4077 } 4078 if (n_ISForEdges) { 4079 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 4080 } 4081 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 4082 } else { 4083 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4084 4085 total_counts = 0; 4086 n_vertices = 0; 4087 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 4088 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 4089 } 4090 max_constraints = 0; 4091 total_counts_cc = 0; 4092 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 4093 total_counts += pcbddc->adaptive_constraints_n[i]; 4094 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 4095 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 4096 } 4097 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 4098 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 4099 constraints_idxs = pcbddc->adaptive_constraints_idxs; 4100 constraints_data = pcbddc->adaptive_constraints_data; 4101 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 4102 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 4103 total_counts_cc = 0; 4104 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 4105 if (pcbddc->adaptive_constraints_n[i]) { 4106 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 4107 } 4108 } 4109 #if 0 4110 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 4111 for (i=0;i<total_counts_cc;i++) { 4112 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 4113 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 4114 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 4115 printf(" %d",constraints_idxs[j]); 4116 } 4117 printf("\n"); 4118 printf("number of cc: %d\n",constraints_n[i]); 4119 } 4120 for (i=0;i<n_vertices;i++) { 4121 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 4122 } 4123 for (i=0;i<sub_schurs->n_subs;i++) { 4124 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 4125 } 4126 #endif 4127 4128 max_size_of_constraint = 0; 4129 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 4130 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 4131 /* Change of basis */ 4132 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 4133 if (pcbddc->use_change_of_basis) { 4134 for (i=0;i<sub_schurs->n_subs;i++) { 4135 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 4136 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 4137 } 4138 } 4139 } 4140 } 4141 pcbddc->local_primal_size = total_counts; 4142 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 4143 4144 /* map constraints_idxs in boundary numbering */ 4145 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 4146 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i); 4147 4148 /* Create constraint matrix */ 4149 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 4150 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 4151 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 4152 4153 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 4154 /* determine if a QR strategy is needed for change of basis */ 4155 qr_needed = PETSC_FALSE; 4156 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 4157 total_primal_vertices=0; 4158 pcbddc->local_primal_size_cc = 0; 4159 for (i=0;i<total_counts_cc;i++) { 4160 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 4161 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 4162 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 4163 pcbddc->local_primal_size_cc += 1; 4164 } else if (PetscBTLookup(change_basis,i)) { 4165 for (k=0;k<constraints_n[i];k++) { 4166 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 4167 } 4168 pcbddc->local_primal_size_cc += constraints_n[i]; 4169 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 4170 PetscBTSet(qr_needed_idx,i); 4171 qr_needed = PETSC_TRUE; 4172 } 4173 } else { 4174 pcbddc->local_primal_size_cc += 1; 4175 } 4176 } 4177 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 4178 pcbddc->n_vertices = total_primal_vertices; 4179 /* permute indices in order to have a sorted set of vertices */ 4180 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 4181 4182 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 4183 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 4184 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 4185 4186 /* nonzero structure of constraint matrix */ 4187 /* and get reference dof for local constraints */ 4188 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 4189 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 4190 4191 j = total_primal_vertices; 4192 total_counts = total_primal_vertices; 4193 cum = total_primal_vertices; 4194 for (i=n_vertices;i<total_counts_cc;i++) { 4195 if (!PetscBTLookup(change_basis,i)) { 4196 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 4197 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 4198 cum++; 4199 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 4200 for (k=0;k<constraints_n[i];k++) { 4201 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 4202 nnz[j+k] = size_of_constraint; 4203 } 4204 j += constraints_n[i]; 4205 } 4206 } 4207 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 4208 ierr = PetscFree(nnz);CHKERRQ(ierr); 4209 4210 /* set values in constraint matrix */ 4211 for (i=0;i<total_primal_vertices;i++) { 4212 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4213 } 4214 total_counts = total_primal_vertices; 4215 for (i=n_vertices;i<total_counts_cc;i++) { 4216 if (!PetscBTLookup(change_basis,i)) { 4217 PetscInt *cols; 4218 4219 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 4220 cols = constraints_idxs+constraints_idxs_ptr[i]; 4221 for (k=0;k<constraints_n[i];k++) { 4222 PetscInt row = total_counts+k; 4223 PetscScalar *vals; 4224 4225 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 4226 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 4227 } 4228 total_counts += constraints_n[i]; 4229 } 4230 } 4231 /* assembling */ 4232 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4233 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4234 4235 /* 4236 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4237 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 4238 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 4239 */ 4240 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 4241 if (pcbddc->use_change_of_basis) { 4242 /* dual and primal dofs on a single cc */ 4243 PetscInt dual_dofs,primal_dofs; 4244 /* working stuff for GEQRF */ 4245 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 4246 PetscBLASInt lqr_work; 4247 /* working stuff for UNGQR */ 4248 PetscScalar *gqr_work,lgqr_work_t; 4249 PetscBLASInt lgqr_work; 4250 /* working stuff for TRTRS */ 4251 PetscScalar *trs_rhs; 4252 PetscBLASInt Blas_NRHS; 4253 /* pointers for values insertion into change of basis matrix */ 4254 PetscInt *start_rows,*start_cols; 4255 PetscScalar *start_vals; 4256 /* working stuff for values insertion */ 4257 PetscBT is_primal; 4258 PetscInt *aux_primal_numbering_B; 4259 /* matrix sizes */ 4260 PetscInt global_size,local_size; 4261 /* temporary change of basis */ 4262 Mat localChangeOfBasisMatrix; 4263 /* extra space for debugging */ 4264 PetscScalar *dbg_work; 4265 4266 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 4267 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 4268 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 4269 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 4270 /* nonzeros for local mat */ 4271 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 4272 if (!pcbddc->benign_change || pcbddc->fake_change) { 4273 for (i=0;i<pcis->n;i++) nnz[i]=1; 4274 } else { 4275 const PetscInt *ii; 4276 PetscInt n; 4277 PetscBool flg_row; 4278 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 4279 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 4280 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 4281 } 4282 for (i=n_vertices;i<total_counts_cc;i++) { 4283 if (PetscBTLookup(change_basis,i)) { 4284 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 4285 if (PetscBTLookup(qr_needed_idx,i)) { 4286 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 4287 } else { 4288 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 4289 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 4290 } 4291 } 4292 } 4293 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 4294 ierr = PetscFree(nnz);CHKERRQ(ierr); 4295 /* Set interior change in the matrix */ 4296 if (!pcbddc->benign_change || pcbddc->fake_change) { 4297 for (i=0;i<pcis->n;i++) { 4298 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 4299 } 4300 } else { 4301 const PetscInt *ii,*jj; 4302 PetscScalar *aa; 4303 PetscInt n; 4304 PetscBool flg_row; 4305 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 4306 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 4307 for (i=0;i<n;i++) { 4308 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 4309 } 4310 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 4311 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 4312 } 4313 4314 if (pcbddc->dbg_flag) { 4315 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 4316 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4317 } 4318 4319 4320 /* Now we loop on the constraints which need a change of basis */ 4321 /* 4322 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 4323 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 4324 4325 Basic blocks of change of basis matrix T computed by 4326 4327 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 4328 4329 | 1 0 ... 0 s_1/S | 4330 | 0 1 ... 0 s_2/S | 4331 | ... | 4332 | 0 ... 1 s_{n-1}/S | 4333 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 4334 4335 with S = \sum_{i=1}^n s_i^2 4336 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 4337 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 4338 4339 - QR decomposition of constraints otherwise 4340 */ 4341 if (qr_needed) { 4342 /* space to store Q */ 4343 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 4344 /* first we issue queries for optimal work */ 4345 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 4346 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 4347 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4348 lqr_work = -1; 4349 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 4350 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 4351 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 4352 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 4353 lgqr_work = -1; 4354 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 4355 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 4356 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 4357 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4358 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 4359 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 4360 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 4361 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 4362 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 4363 /* array to store scaling factors for reflectors */ 4364 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 4365 /* array to store rhs and solution of triangular solver */ 4366 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 4367 /* allocating workspace for check */ 4368 if (pcbddc->dbg_flag) { 4369 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 4370 } 4371 } 4372 /* array to store whether a node is primal or not */ 4373 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 4374 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 4375 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 4376 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i); 4377 for (i=0;i<total_primal_vertices;i++) { 4378 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 4379 } 4380 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 4381 4382 /* loop on constraints and see whether or not they need a change of basis and compute it */ 4383 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 4384 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 4385 if (PetscBTLookup(change_basis,total_counts)) { 4386 /* get constraint info */ 4387 primal_dofs = constraints_n[total_counts]; 4388 dual_dofs = size_of_constraint-primal_dofs; 4389 4390 if (pcbddc->dbg_flag) { 4391 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 4392 } 4393 4394 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 4395 4396 /* copy quadrature constraints for change of basis check */ 4397 if (pcbddc->dbg_flag) { 4398 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4399 } 4400 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 4401 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4402 4403 /* compute QR decomposition of constraints */ 4404 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4405 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4406 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4407 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4408 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 4409 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 4410 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4411 4412 /* explictly compute R^-T */ 4413 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 4414 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 4415 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4416 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 4417 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4418 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 4419 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4420 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 4421 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 4422 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4423 4424 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 4425 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4426 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4427 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 4428 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4429 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4430 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 4431 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 4432 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4433 4434 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 4435 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 4436 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 4437 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4438 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4439 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 4440 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4441 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 4442 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 4443 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4444 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 4445 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4446 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4447 4448 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 4449 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 4450 /* insert cols for primal dofs */ 4451 for (j=0;j<primal_dofs;j++) { 4452 start_vals = &qr_basis[j*size_of_constraint]; 4453 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 4454 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 4455 } 4456 /* insert cols for dual dofs */ 4457 for (j=0,k=0;j<dual_dofs;k++) { 4458 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 4459 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 4460 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 4461 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 4462 j++; 4463 } 4464 } 4465 4466 /* check change of basis */ 4467 if (pcbddc->dbg_flag) { 4468 PetscInt ii,jj; 4469 PetscBool valid_qr=PETSC_TRUE; 4470 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 4471 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4472 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 4473 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4474 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 4475 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 4476 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4477 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 4478 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4479 for (jj=0;jj<size_of_constraint;jj++) { 4480 for (ii=0;ii<primal_dofs;ii++) { 4481 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 4482 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 4483 } 4484 } 4485 if (!valid_qr) { 4486 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 4487 for (jj=0;jj<size_of_constraint;jj++) { 4488 for (ii=0;ii<primal_dofs;ii++) { 4489 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 4490 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4491 } 4492 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 4493 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4494 } 4495 } 4496 } 4497 } else { 4498 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 4499 } 4500 } 4501 } else { /* simple transformation block */ 4502 PetscInt row,col; 4503 PetscScalar val,norm; 4504 4505 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4506 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 4507 for (j=0;j<size_of_constraint;j++) { 4508 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 4509 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 4510 if (!PetscBTLookup(is_primal,row_B)) { 4511 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 4512 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 4513 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 4514 } else { 4515 for (k=0;k<size_of_constraint;k++) { 4516 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 4517 if (row != col) { 4518 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 4519 } else { 4520 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 4521 } 4522 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 4523 } 4524 } 4525 } 4526 if (pcbddc->dbg_flag) { 4527 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 4528 } 4529 } 4530 } else { 4531 if (pcbddc->dbg_flag) { 4532 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 4533 } 4534 } 4535 } 4536 4537 /* free workspace */ 4538 if (qr_needed) { 4539 if (pcbddc->dbg_flag) { 4540 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 4541 } 4542 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 4543 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 4544 ierr = PetscFree(qr_work);CHKERRQ(ierr); 4545 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 4546 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 4547 } 4548 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 4549 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4550 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4551 4552 /* assembling of global change of variable */ 4553 if (!pcbddc->fake_change) { 4554 Mat tmat; 4555 PetscInt bs; 4556 4557 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 4558 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 4559 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 4560 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 4561 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4562 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 4563 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 4564 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 4565 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 4566 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 4567 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4568 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4569 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4570 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4571 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4572 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4573 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 4574 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 4575 4576 /* check */ 4577 if (pcbddc->dbg_flag) { 4578 PetscReal error; 4579 Vec x,x_change; 4580 4581 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 4582 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 4583 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4584 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 4585 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4586 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4587 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 4588 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4589 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4590 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 4591 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4592 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4593 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4594 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 4595 ierr = VecDestroy(&x);CHKERRQ(ierr); 4596 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4597 } 4598 /* adapt sub_schurs computed (if any) */ 4599 if (pcbddc->use_deluxe_scaling) { 4600 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4601 4602 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");CHKERRQ(ierr); 4603 if (sub_schurs && sub_schurs->S_Ej_all) { 4604 Mat S_new,tmat; 4605 IS is_all_N,is_V_Sall = NULL; 4606 4607 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 4608 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 4609 if (pcbddc->deluxe_zerorows) { 4610 ISLocalToGlobalMapping NtoSall; 4611 IS is_V; 4612 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 4613 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 4614 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 4615 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 4616 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 4617 } 4618 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 4619 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4620 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 4621 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4622 if (pcbddc->deluxe_zerorows) { 4623 const PetscScalar *array; 4624 const PetscInt *idxs_V,*idxs_all; 4625 PetscInt i,n_V; 4626 4627 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4628 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 4629 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4630 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4631 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 4632 for (i=0;i<n_V;i++) { 4633 PetscScalar val; 4634 PetscInt idx; 4635 4636 idx = idxs_V[i]; 4637 val = array[idxs_all[idxs_V[i]]]; 4638 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 4639 } 4640 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4641 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4642 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 4643 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4644 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4645 } 4646 sub_schurs->S_Ej_all = S_new; 4647 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4648 if (sub_schurs->sum_S_Ej_all) { 4649 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4650 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 4651 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4652 if (pcbddc->deluxe_zerorows) { 4653 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4654 } 4655 sub_schurs->sum_S_Ej_all = S_new; 4656 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4657 } 4658 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 4659 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4660 } 4661 /* destroy any change of basis context in sub_schurs */ 4662 if (sub_schurs && sub_schurs->change) { 4663 PetscInt i; 4664 4665 for (i=0;i<sub_schurs->n_subs;i++) { 4666 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 4667 } 4668 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 4669 } 4670 } 4671 if (pcbddc->switch_static) { /* need to save the local change */ 4672 pcbddc->switch_static_change = localChangeOfBasisMatrix; 4673 } else { 4674 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 4675 } 4676 /* determine if any process has changed the pressures locally */ 4677 pcbddc->change_interior = pcbddc->benign_have_null; 4678 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 4679 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 4680 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 4681 pcbddc->use_qr_single = qr_needed; 4682 } 4683 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 4684 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 4685 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 4686 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 4687 } else { 4688 Mat benign_global = NULL; 4689 if (pcbddc->benign_have_null) { 4690 Mat tmat; 4691 4692 pcbddc->change_interior = PETSC_TRUE; 4693 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4694 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4695 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4696 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4697 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 4698 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4699 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4700 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 4701 if (pcbddc->benign_change) { 4702 Mat M; 4703 4704 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 4705 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 4706 ierr = MatISSetLocalMat(tmat,M);CHKERRQ(ierr); 4707 ierr = MatDestroy(&M);CHKERRQ(ierr); 4708 } else { 4709 Mat eye; 4710 PetscScalar *array; 4711 4712 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4713 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&eye);CHKERRQ(ierr); 4714 for (i=0;i<pcis->n;i++) { 4715 ierr = MatSetValue(eye,i,i,array[i],INSERT_VALUES);CHKERRQ(ierr); 4716 } 4717 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4718 ierr = MatAssemblyBegin(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4719 ierr = MatAssemblyEnd(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4720 ierr = MatISSetLocalMat(tmat,eye);CHKERRQ(ierr); 4721 ierr = MatDestroy(&eye);CHKERRQ(ierr); 4722 } 4723 ierr = MatISGetMPIXAIJ(tmat,MAT_INITIAL_MATRIX,&benign_global);CHKERRQ(ierr); 4724 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4725 } 4726 if (pcbddc->user_ChangeOfBasisMatrix) { 4727 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4728 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 4729 } else if (pcbddc->benign_have_null) { 4730 pcbddc->ChangeOfBasisMatrix = benign_global; 4731 } 4732 } 4733 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 4734 IS is_global; 4735 const PetscInt *gidxs; 4736 4737 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 4738 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 4739 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 4740 ierr = MatGetSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 4741 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 4742 } 4743 } 4744 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 4745 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 4746 } 4747 4748 if (!pcbddc->fake_change) { 4749 /* add pressure dofs to set of primal nodes for numbering purposes */ 4750 for (i=0;i<pcbddc->benign_n;i++) { 4751 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 4752 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 4753 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 4754 pcbddc->local_primal_size_cc++; 4755 pcbddc->local_primal_size++; 4756 } 4757 4758 /* check if a new primal space has been introduced (also take into account benign trick) */ 4759 pcbddc->new_primal_space_local = PETSC_TRUE; 4760 if (olocal_primal_size == pcbddc->local_primal_size) { 4761 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4762 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4763 if (!pcbddc->new_primal_space_local) { 4764 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4765 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4766 } 4767 } 4768 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 4769 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4770 } 4771 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 4772 4773 /* flush dbg viewer */ 4774 if (pcbddc->dbg_flag) { 4775 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4776 } 4777 4778 /* free workspace */ 4779 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 4780 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 4781 if (!pcbddc->adaptive_selection) { 4782 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 4783 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 4784 } else { 4785 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 4786 pcbddc->adaptive_constraints_idxs_ptr, 4787 pcbddc->adaptive_constraints_data_ptr, 4788 pcbddc->adaptive_constraints_idxs, 4789 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 4790 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 4791 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 4792 } 4793 PetscFunctionReturn(0); 4794 } 4795 4796 #undef __FUNCT__ 4797 #define __FUNCT__ "PCBDDCAnalyzeInterface" 4798 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 4799 { 4800 ISLocalToGlobalMapping map; 4801 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4802 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 4803 PetscInt ierr,i,N; 4804 4805 PetscFunctionBegin; 4806 if (pcbddc->graphanalyzed && !pcbddc->recompute_topography) PetscFunctionReturn(0); 4807 /* Reset previously computed graph */ 4808 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 4809 /* Init local Graph struct */ 4810 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 4811 ierr = MatGetLocalToGlobalMapping(pc->pmat,&map,NULL);CHKERRQ(ierr); 4812 ierr = PCBDDCGraphInit(pcbddc->mat_graph,map,N);CHKERRQ(ierr); 4813 4814 /* Check validity of the csr graph passed in by the user */ 4815 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 4816 4817 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 4818 if ( (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) && pcbddc->use_local_adj) { 4819 PetscInt *xadj,*adjncy; 4820 PetscInt nvtxs; 4821 PetscBool flg_row=PETSC_FALSE; 4822 4823 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4824 if (flg_row) { 4825 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 4826 pcbddc->computed_rowadj = PETSC_TRUE; 4827 } 4828 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4829 } 4830 if (pcbddc->dbg_flag) { 4831 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4832 } 4833 4834 /* Setup of Graph */ 4835 pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */ 4836 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 4837 4838 /* attach info on disconnected subdomains if present */ 4839 if (pcbddc->n_local_subs) { 4840 PetscInt *local_subs; 4841 4842 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 4843 for (i=0;i<pcbddc->n_local_subs;i++) { 4844 const PetscInt *idxs; 4845 PetscInt nl,j; 4846 4847 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 4848 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4849 for (j=0;j<nl;j++) local_subs[idxs[j]] = i; 4850 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4851 } 4852 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 4853 pcbddc->mat_graph->local_subs = local_subs; 4854 } 4855 4856 /* Graph's connected components analysis */ 4857 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 4858 4859 /* set flag indicating analysis has been done */ 4860 pcbddc->graphanalyzed = PETSC_TRUE; 4861 PetscFunctionReturn(0); 4862 } 4863 4864 #undef __FUNCT__ 4865 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 4866 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 4867 { 4868 PetscInt i,j; 4869 PetscScalar *alphas; 4870 PetscErrorCode ierr; 4871 4872 PetscFunctionBegin; 4873 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 4874 for (i=0;i<n;i++) { 4875 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 4876 ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],alphas);CHKERRQ(ierr); 4877 for (j=0;j<n-i-1;j++) alphas[j] = PetscConj(-alphas[j]); 4878 ierr = VecMAXPY(vecs[j],n-i-1,alphas,vecs+i);CHKERRQ(ierr); 4879 } 4880 ierr = PetscFree(alphas);CHKERRQ(ierr); 4881 PetscFunctionReturn(0); 4882 } 4883 4884 #undef __FUNCT__ 4885 #define __FUNCT__ "MatISGetSubassemblingPattern" 4886 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 4887 { 4888 Mat A; 4889 PetscInt n_neighs,*neighs,*n_shared,**shared; 4890 PetscMPIInt size,rank,color; 4891 PetscInt *xadj,*adjncy; 4892 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 4893 PetscInt im_active,active_procs,n,i,j,local_size,threshold = 2; 4894 PetscInt void_procs,*procs_candidates = NULL; 4895 PetscInt xadj_count, *count; 4896 PetscBool ismatis,use_vwgt=PETSC_FALSE; 4897 PetscSubcomm psubcomm; 4898 MPI_Comm subcomm; 4899 PetscErrorCode ierr; 4900 4901 PetscFunctionBegin; 4902 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 4903 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 4904 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 4905 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 4906 PetscValidLogicalCollectiveInt(mat,redprocs,3); 4907 if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %d\n",*n_subdomains); 4908 4909 if (have_void) *have_void = PETSC_FALSE; 4910 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 4911 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 4912 ierr = MatISGetLocalMat(mat,&A);CHKERRQ(ierr); 4913 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 4914 im_active = !!(n); 4915 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 4916 void_procs = size - active_procs; 4917 /* get ranks of of non-active processes in mat communicator */ 4918 if (void_procs) { 4919 PetscInt ncand; 4920 4921 if (have_void) *have_void = PETSC_TRUE; 4922 ierr = PetscMalloc1(size,&procs_candidates);CHKERRQ(ierr); 4923 ierr = MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 4924 for (i=0,ncand=0;i<size;i++) { 4925 if (!procs_candidates[i]) { 4926 procs_candidates[ncand++] = i; 4927 } 4928 } 4929 /* force n_subdomains to be not greater that the number of non-active processes */ 4930 *n_subdomains = PetscMin(void_procs,*n_subdomains); 4931 } 4932 4933 /* number of subdomains requested greater than active processes -> just shift the matrix 4934 number of subdomains requested 1 -> send to master or first candidate in voids */ 4935 if (active_procs < *n_subdomains || *n_subdomains == 1) { 4936 PetscInt issize,isidx,dest; 4937 if (*n_subdomains == 1) dest = 0; 4938 else dest = rank; 4939 if (im_active) { 4940 issize = 1; 4941 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 4942 isidx = procs_candidates[dest]; 4943 } else { 4944 isidx = dest; 4945 } 4946 } else { 4947 issize = 0; 4948 isidx = -1; 4949 } 4950 *n_subdomains = active_procs; 4951 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);CHKERRQ(ierr); 4952 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 4953 PetscFunctionReturn(0); 4954 } 4955 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 4956 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 4957 threshold = PetscMax(threshold,2); 4958 4959 /* Get info on mapping */ 4960 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 4961 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4962 4963 /* build local CSR graph of subdomains' connectivity */ 4964 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 4965 xadj[0] = 0; 4966 xadj[1] = PetscMax(n_neighs-1,0); 4967 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 4968 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 4969 ierr = PetscCalloc1(local_size,&count);CHKERRQ(ierr); 4970 for (i=1;i<n_neighs;i++) 4971 for (j=0;j<n_shared[i];j++) 4972 count[shared[i][j]] += 1; 4973 4974 xadj_count = 0; 4975 for (i=1;i<n_neighs;i++) { 4976 for (j=0;j<n_shared[i];j++) { 4977 if (count[shared[i][j]] < threshold) { 4978 adjncy[xadj_count] = neighs[i]; 4979 adjncy_wgt[xadj_count] = n_shared[i]; 4980 xadj_count++; 4981 break; 4982 } 4983 } 4984 } 4985 xadj[1] = xadj_count; 4986 ierr = PetscFree(count);CHKERRQ(ierr); 4987 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4988 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4989 4990 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 4991 4992 /* Restrict work on active processes only */ 4993 ierr = PetscMPIIntCast(im_active,&color);CHKERRQ(ierr); 4994 if (void_procs) { 4995 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);CHKERRQ(ierr); 4996 ierr = PetscSubcommSetNumber(psubcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 4997 ierr = PetscSubcommSetTypeGeneral(psubcomm,color,rank);CHKERRQ(ierr); 4998 subcomm = PetscSubcommChild(psubcomm); 4999 } else { 5000 psubcomm = NULL; 5001 subcomm = PetscObjectComm((PetscObject)mat); 5002 } 5003 5004 v_wgt = NULL; 5005 if (!color) { 5006 ierr = PetscFree(xadj);CHKERRQ(ierr); 5007 ierr = PetscFree(adjncy);CHKERRQ(ierr); 5008 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 5009 } else { 5010 Mat subdomain_adj; 5011 IS new_ranks,new_ranks_contig; 5012 MatPartitioning partitioner; 5013 PetscInt rstart=0,rend=0; 5014 PetscInt *is_indices,*oldranks; 5015 PetscMPIInt size; 5016 PetscBool aggregate; 5017 5018 ierr = MPI_Comm_size(subcomm,&size);CHKERRQ(ierr); 5019 if (void_procs) { 5020 PetscInt prank = rank; 5021 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 5022 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);CHKERRQ(ierr); 5023 for (i=0;i<xadj[1];i++) { 5024 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 5025 } 5026 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 5027 } else { 5028 oldranks = NULL; 5029 } 5030 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 5031 if (aggregate) { /* TODO: all this part could be made more efficient */ 5032 PetscInt lrows,row,ncols,*cols; 5033 PetscMPIInt nrank; 5034 PetscScalar *vals; 5035 5036 ierr = MPI_Comm_rank(subcomm,&nrank);CHKERRQ(ierr); 5037 lrows = 0; 5038 if (nrank<redprocs) { 5039 lrows = size/redprocs; 5040 if (nrank<size%redprocs) lrows++; 5041 } 5042 ierr = MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 5043 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 5044 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 5045 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 5046 row = nrank; 5047 ncols = xadj[1]-xadj[0]; 5048 cols = adjncy; 5049 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 5050 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 5051 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 5052 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5053 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5054 ierr = PetscFree(xadj);CHKERRQ(ierr); 5055 ierr = PetscFree(adjncy);CHKERRQ(ierr); 5056 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 5057 ierr = PetscFree(vals);CHKERRQ(ierr); 5058 if (use_vwgt) { 5059 Vec v; 5060 const PetscScalar *array; 5061 PetscInt nl; 5062 5063 ierr = MatCreateVecs(subdomain_adj,&v,NULL);CHKERRQ(ierr); 5064 ierr = VecSetValue(v,row,(PetscScalar)local_size,INSERT_VALUES);CHKERRQ(ierr); 5065 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 5066 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 5067 ierr = VecGetLocalSize(v,&nl);CHKERRQ(ierr); 5068 ierr = VecGetArrayRead(v,&array);CHKERRQ(ierr); 5069 ierr = PetscMalloc1(nl,&v_wgt);CHKERRQ(ierr); 5070 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 5071 ierr = VecRestoreArrayRead(v,&array);CHKERRQ(ierr); 5072 ierr = VecDestroy(&v);CHKERRQ(ierr); 5073 } 5074 } else { 5075 ierr = MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 5076 if (use_vwgt) { 5077 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 5078 v_wgt[0] = local_size; 5079 } 5080 } 5081 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 5082 5083 /* Partition */ 5084 ierr = MatPartitioningCreate(subcomm,&partitioner);CHKERRQ(ierr); 5085 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 5086 if (v_wgt) { 5087 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 5088 } 5089 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 5090 ierr = MatPartitioningSetNParts(partitioner,*n_subdomains);CHKERRQ(ierr); 5091 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 5092 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 5093 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 5094 5095 /* renumber new_ranks to avoid "holes" in new set of processors */ 5096 ierr = ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 5097 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 5098 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5099 if (!aggregate) { 5100 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 5101 #if defined(PETSC_USE_DEBUG) 5102 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 5103 #endif 5104 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 5105 } else if (oldranks) { 5106 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 5107 } else { 5108 ranks_send_to_idx[0] = is_indices[0]; 5109 } 5110 } else { 5111 PetscInt idxs[1]; 5112 PetscMPIInt tag; 5113 MPI_Request *reqs; 5114 5115 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 5116 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 5117 for (i=rstart;i<rend;i++) { 5118 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);CHKERRQ(ierr); 5119 } 5120 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 5121 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5122 ierr = PetscFree(reqs);CHKERRQ(ierr); 5123 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 5124 #if defined(PETSC_USE_DEBUG) 5125 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 5126 #endif 5127 ranks_send_to_idx[0] = procs_candidates[oldranks[idxs[0]]]; 5128 } else if (oldranks) { 5129 ranks_send_to_idx[0] = oldranks[idxs[0]]; 5130 } else { 5131 ranks_send_to_idx[0] = idxs[0]; 5132 } 5133 } 5134 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5135 /* clean up */ 5136 ierr = PetscFree(oldranks);CHKERRQ(ierr); 5137 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 5138 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 5139 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 5140 } 5141 ierr = PetscSubcommDestroy(&psubcomm);CHKERRQ(ierr); 5142 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 5143 5144 /* assemble parallel IS for sends */ 5145 i = 1; 5146 if (!color) i=0; 5147 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);CHKERRQ(ierr); 5148 PetscFunctionReturn(0); 5149 } 5150 5151 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 5152 5153 #undef __FUNCT__ 5154 #define __FUNCT__ "MatISSubassemble" 5155 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 5156 { 5157 Mat local_mat; 5158 IS is_sends_internal; 5159 PetscInt rows,cols,new_local_rows; 5160 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs; 5161 PetscBool ismatis,isdense,newisdense,destroy_mat; 5162 ISLocalToGlobalMapping l2gmap; 5163 PetscInt* l2gmap_indices; 5164 const PetscInt* is_indices; 5165 MatType new_local_type; 5166 /* buffers */ 5167 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 5168 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 5169 PetscInt *recv_buffer_idxs_local; 5170 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 5171 PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs; 5172 /* MPI */ 5173 MPI_Comm comm,comm_n; 5174 PetscSubcomm subcomm; 5175 PetscMPIInt n_sends,n_recvs,commsize; 5176 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 5177 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 5178 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest; 5179 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs; 5180 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs; 5181 PetscErrorCode ierr; 5182 5183 PetscFunctionBegin; 5184 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 5185 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 5186 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 5187 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 5188 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 5189 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 5190 PetscValidLogicalCollectiveBool(mat,reuse,6); 5191 PetscValidLogicalCollectiveInt(mat,nis,8); 5192 PetscValidLogicalCollectiveInt(mat,nvecs,10); 5193 if (nvecs) { 5194 if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported"); 5195 PetscValidHeaderSpecific(nnsp_vec[0],VEC_CLASSID,11); 5196 } 5197 /* further checks */ 5198 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 5199 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 5200 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 5201 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 5202 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 5203 if (reuse && *mat_n) { 5204 PetscInt mrows,mcols,mnrows,mncols; 5205 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 5206 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 5207 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 5208 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 5209 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 5210 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 5211 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 5212 } 5213 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 5214 PetscValidLogicalCollectiveInt(mat,bs,0); 5215 5216 /* prepare IS for sending if not provided */ 5217 if (!is_sends) { 5218 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 5219 ierr = MatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);CHKERRQ(ierr); 5220 } else { 5221 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 5222 is_sends_internal = is_sends; 5223 } 5224 5225 /* get comm */ 5226 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 5227 5228 /* compute number of sends */ 5229 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 5230 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 5231 5232 /* compute number of receives */ 5233 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 5234 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 5235 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 5236 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 5237 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 5238 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 5239 ierr = PetscFree(iflags);CHKERRQ(ierr); 5240 5241 /* restrict comm if requested */ 5242 subcomm = 0; 5243 destroy_mat = PETSC_FALSE; 5244 if (restrict_comm) { 5245 PetscMPIInt color,subcommsize; 5246 5247 color = 0; 5248 if (restrict_full) { 5249 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 5250 } else { 5251 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 5252 } 5253 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 5254 subcommsize = commsize - subcommsize; 5255 /* check if reuse has been requested */ 5256 if (reuse) { 5257 if (*mat_n) { 5258 PetscMPIInt subcommsize2; 5259 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 5260 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 5261 comm_n = PetscObjectComm((PetscObject)*mat_n); 5262 } else { 5263 comm_n = PETSC_COMM_SELF; 5264 } 5265 } else { /* MAT_INITIAL_MATRIX */ 5266 PetscMPIInt rank; 5267 5268 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5269 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 5270 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 5271 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 5272 comm_n = PetscSubcommChild(subcomm); 5273 } 5274 /* flag to destroy *mat_n if not significative */ 5275 if (color) destroy_mat = PETSC_TRUE; 5276 } else { 5277 comm_n = comm; 5278 } 5279 5280 /* prepare send/receive buffers */ 5281 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 5282 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 5283 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 5284 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 5285 if (nis) { 5286 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 5287 } 5288 5289 /* Get data from local matrices */ 5290 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 5291 /* TODO: See below some guidelines on how to prepare the local buffers */ 5292 /* 5293 send_buffer_vals should contain the raw values of the local matrix 5294 send_buffer_idxs should contain: 5295 - MatType_PRIVATE type 5296 - PetscInt size_of_l2gmap 5297 - PetscInt global_row_indices[size_of_l2gmap] 5298 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 5299 */ 5300 else { 5301 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 5302 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 5303 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 5304 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 5305 send_buffer_idxs[1] = i; 5306 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 5307 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 5308 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 5309 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 5310 for (i=0;i<n_sends;i++) { 5311 ilengths_vals[is_indices[i]] = len*len; 5312 ilengths_idxs[is_indices[i]] = len+2; 5313 } 5314 } 5315 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 5316 /* additional is (if any) */ 5317 if (nis) { 5318 PetscMPIInt psum; 5319 PetscInt j; 5320 for (j=0,psum=0;j<nis;j++) { 5321 PetscInt plen; 5322 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 5323 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 5324 psum += len+1; /* indices + lenght */ 5325 } 5326 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 5327 for (j=0,psum=0;j<nis;j++) { 5328 PetscInt plen; 5329 const PetscInt *is_array_idxs; 5330 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 5331 send_buffer_idxs_is[psum] = plen; 5332 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 5333 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 5334 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 5335 psum += plen+1; /* indices + lenght */ 5336 } 5337 for (i=0;i<n_sends;i++) { 5338 ilengths_idxs_is[is_indices[i]] = psum; 5339 } 5340 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 5341 } 5342 5343 buf_size_idxs = 0; 5344 buf_size_vals = 0; 5345 buf_size_idxs_is = 0; 5346 buf_size_vecs = 0; 5347 for (i=0;i<n_recvs;i++) { 5348 buf_size_idxs += (PetscInt)olengths_idxs[i]; 5349 buf_size_vals += (PetscInt)olengths_vals[i]; 5350 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 5351 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 5352 } 5353 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 5354 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 5355 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 5356 ierr = PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);CHKERRQ(ierr); 5357 5358 /* get new tags for clean communications */ 5359 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 5360 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 5361 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 5362 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);CHKERRQ(ierr); 5363 5364 /* allocate for requests */ 5365 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 5366 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 5367 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 5368 ierr = PetscMalloc1(n_sends,&send_req_vecs);CHKERRQ(ierr); 5369 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 5370 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 5371 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 5372 ierr = PetscMalloc1(n_recvs,&recv_req_vecs);CHKERRQ(ierr); 5373 5374 /* communications */ 5375 ptr_idxs = recv_buffer_idxs; 5376 ptr_vals = recv_buffer_vals; 5377 ptr_idxs_is = recv_buffer_idxs_is; 5378 ptr_vecs = recv_buffer_vecs; 5379 for (i=0;i<n_recvs;i++) { 5380 source_dest = onodes[i]; 5381 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 5382 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 5383 ptr_idxs += olengths_idxs[i]; 5384 ptr_vals += olengths_vals[i]; 5385 if (nis) { 5386 source_dest = onodes_is[i]; 5387 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 5388 ptr_idxs_is += olengths_idxs_is[i]; 5389 } 5390 if (nvecs) { 5391 source_dest = onodes[i]; 5392 ierr = MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);CHKERRQ(ierr); 5393 ptr_vecs += olengths_idxs[i]-2; 5394 } 5395 } 5396 for (i=0;i<n_sends;i++) { 5397 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 5398 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 5399 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 5400 if (nis) { 5401 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 5402 } 5403 if (nvecs) { 5404 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 5405 ierr = MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);CHKERRQ(ierr); 5406 } 5407 } 5408 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 5409 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 5410 5411 /* assemble new l2g map */ 5412 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5413 ptr_idxs = recv_buffer_idxs; 5414 new_local_rows = 0; 5415 for (i=0;i<n_recvs;i++) { 5416 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 5417 ptr_idxs += olengths_idxs[i]; 5418 } 5419 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 5420 ptr_idxs = recv_buffer_idxs; 5421 new_local_rows = 0; 5422 for (i=0;i<n_recvs;i++) { 5423 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 5424 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 5425 ptr_idxs += olengths_idxs[i]; 5426 } 5427 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 5428 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 5429 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 5430 5431 /* infer new local matrix type from received local matrices type */ 5432 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 5433 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 5434 if (n_recvs) { 5435 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 5436 ptr_idxs = recv_buffer_idxs; 5437 for (i=0;i<n_recvs;i++) { 5438 if ((PetscInt)new_local_type_private != *ptr_idxs) { 5439 new_local_type_private = MATAIJ_PRIVATE; 5440 break; 5441 } 5442 ptr_idxs += olengths_idxs[i]; 5443 } 5444 switch (new_local_type_private) { 5445 case MATDENSE_PRIVATE: 5446 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 5447 new_local_type = MATSEQAIJ; 5448 bs = 1; 5449 } else { /* if I receive only 1 dense matrix */ 5450 new_local_type = MATSEQDENSE; 5451 bs = 1; 5452 } 5453 break; 5454 case MATAIJ_PRIVATE: 5455 new_local_type = MATSEQAIJ; 5456 bs = 1; 5457 break; 5458 case MATBAIJ_PRIVATE: 5459 new_local_type = MATSEQBAIJ; 5460 break; 5461 case MATSBAIJ_PRIVATE: 5462 new_local_type = MATSEQSBAIJ; 5463 break; 5464 default: 5465 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 5466 break; 5467 } 5468 } else { /* by default, new_local_type is seqdense */ 5469 new_local_type = MATSEQDENSE; 5470 bs = 1; 5471 } 5472 5473 /* create MATIS object if needed */ 5474 if (!reuse) { 5475 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 5476 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 5477 } else { 5478 /* it also destroys the local matrices */ 5479 if (*mat_n) { 5480 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 5481 } else { /* this is a fake object */ 5482 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 5483 } 5484 } 5485 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 5486 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 5487 5488 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5489 5490 /* Global to local map of received indices */ 5491 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 5492 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 5493 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 5494 5495 /* restore attributes -> type of incoming data and its size */ 5496 buf_size_idxs = 0; 5497 for (i=0;i<n_recvs;i++) { 5498 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 5499 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 5500 buf_size_idxs += (PetscInt)olengths_idxs[i]; 5501 } 5502 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 5503 5504 /* set preallocation */ 5505 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 5506 if (!newisdense) { 5507 PetscInt *new_local_nnz=0; 5508 5509 ptr_idxs = recv_buffer_idxs_local; 5510 if (n_recvs) { 5511 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 5512 } 5513 for (i=0;i<n_recvs;i++) { 5514 PetscInt j; 5515 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 5516 for (j=0;j<*(ptr_idxs+1);j++) { 5517 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 5518 } 5519 } else { 5520 /* TODO */ 5521 } 5522 ptr_idxs += olengths_idxs[i]; 5523 } 5524 if (new_local_nnz) { 5525 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 5526 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 5527 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 5528 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5529 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 5530 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5531 } else { 5532 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5533 } 5534 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 5535 } else { 5536 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5537 } 5538 5539 /* set values */ 5540 ptr_vals = recv_buffer_vals; 5541 ptr_idxs = recv_buffer_idxs_local; 5542 for (i=0;i<n_recvs;i++) { 5543 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 5544 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 5545 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 5546 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5547 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5548 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 5549 } else { 5550 /* TODO */ 5551 } 5552 ptr_idxs += olengths_idxs[i]; 5553 ptr_vals += olengths_vals[i]; 5554 } 5555 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5556 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5557 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5558 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5559 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 5560 5561 #if 0 5562 if (!restrict_comm) { /* check */ 5563 Vec lvec,rvec; 5564 PetscReal infty_error; 5565 5566 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 5567 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 5568 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 5569 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 5570 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 5571 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5572 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 5573 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 5574 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 5575 } 5576 #endif 5577 5578 /* assemble new additional is (if any) */ 5579 if (nis) { 5580 PetscInt **temp_idxs,*count_is,j,psum; 5581 5582 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5583 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 5584 ptr_idxs = recv_buffer_idxs_is; 5585 psum = 0; 5586 for (i=0;i<n_recvs;i++) { 5587 for (j=0;j<nis;j++) { 5588 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5589 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 5590 psum += plen; 5591 ptr_idxs += plen+1; /* shift pointer to received data */ 5592 } 5593 } 5594 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 5595 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 5596 for (i=1;i<nis;i++) { 5597 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 5598 } 5599 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 5600 ptr_idxs = recv_buffer_idxs_is; 5601 for (i=0;i<n_recvs;i++) { 5602 for (j=0;j<nis;j++) { 5603 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5604 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 5605 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 5606 ptr_idxs += plen+1; /* shift pointer to received data */ 5607 } 5608 } 5609 for (i=0;i<nis;i++) { 5610 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5611 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 5612 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5613 } 5614 ierr = PetscFree(count_is);CHKERRQ(ierr); 5615 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 5616 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 5617 } 5618 /* free workspace */ 5619 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 5620 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5621 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 5622 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5623 if (isdense) { 5624 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 5625 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 5626 } else { 5627 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 5628 } 5629 if (nis) { 5630 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5631 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 5632 } 5633 5634 if (nvecs) { 5635 ierr = MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5636 ierr = MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5637 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 5638 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 5639 ierr = VecCreate(comm_n,&nnsp_vec[0]);CHKERRQ(ierr); 5640 ierr = VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);CHKERRQ(ierr); 5641 ierr = VecSetType(nnsp_vec[0],VECSTANDARD);CHKERRQ(ierr); 5642 /* set values */ 5643 ptr_vals = recv_buffer_vecs; 5644 ptr_idxs = recv_buffer_idxs_local; 5645 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 5646 for (i=0;i<n_recvs;i++) { 5647 PetscInt j; 5648 for (j=0;j<*(ptr_idxs+1);j++) { 5649 send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j); 5650 } 5651 ptr_idxs += olengths_idxs[i]; 5652 ptr_vals += olengths_idxs[i]-2; 5653 } 5654 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 5655 ierr = VecAssemblyBegin(nnsp_vec[0]);CHKERRQ(ierr); 5656 ierr = VecAssemblyEnd(nnsp_vec[0]);CHKERRQ(ierr); 5657 } 5658 5659 ierr = PetscFree(recv_buffer_vecs);CHKERRQ(ierr); 5660 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 5661 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 5662 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 5663 ierr = PetscFree(recv_req_vecs);CHKERRQ(ierr); 5664 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 5665 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 5666 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 5667 ierr = PetscFree(send_req_vecs);CHKERRQ(ierr); 5668 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 5669 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 5670 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 5671 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 5672 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 5673 ierr = PetscFree(onodes);CHKERRQ(ierr); 5674 if (nis) { 5675 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 5676 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 5677 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 5678 } 5679 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 5680 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 5681 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 5682 for (i=0;i<nis;i++) { 5683 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5684 } 5685 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 5686 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 5687 } 5688 *mat_n = NULL; 5689 } 5690 PetscFunctionReturn(0); 5691 } 5692 5693 /* temporary hack into ksp private data structure */ 5694 #include <petsc/private/kspimpl.h> 5695 5696 #undef __FUNCT__ 5697 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 5698 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 5699 { 5700 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5701 PC_IS *pcis = (PC_IS*)pc->data; 5702 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 5703 Mat coarsedivudotp = NULL; 5704 MatNullSpace CoarseNullSpace = NULL; 5705 ISLocalToGlobalMapping coarse_islg; 5706 IS coarse_is,*isarray; 5707 PetscInt i,im_active=-1,active_procs=-1; 5708 PetscInt nis,nisdofs,nisneu,nisvert; 5709 PC pc_temp; 5710 PCType coarse_pc_type; 5711 KSPType coarse_ksp_type; 5712 PetscBool multilevel_requested,multilevel_allowed; 5713 PetscBool isredundant,isbddc,isnn,coarse_reuse; 5714 Mat t_coarse_mat_is; 5715 PetscInt ncoarse; 5716 PetscBool compute_vecs = PETSC_FALSE; 5717 PetscScalar *array; 5718 MatReuse coarse_mat_reuse; 5719 PetscBool restr, full_restr, have_void; 5720 PetscErrorCode ierr; 5721 5722 PetscFunctionBegin; 5723 /* Assign global numbering to coarse dofs */ 5724 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 5725 PetscInt ocoarse_size; 5726 compute_vecs = PETSC_TRUE; 5727 ocoarse_size = pcbddc->coarse_size; 5728 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 5729 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 5730 /* see if we can avoid some work */ 5731 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 5732 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 5733 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 5734 PC pc; 5735 PetscBool isbddc; 5736 5737 /* temporary workaround since PCBDDC does not have a reset method so far */ 5738 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 5739 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5740 if (isbddc) { 5741 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 5742 } else { 5743 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 5744 } 5745 coarse_reuse = PETSC_FALSE; 5746 } else { /* we can safely reuse already computed coarse matrix */ 5747 coarse_reuse = PETSC_TRUE; 5748 } 5749 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 5750 coarse_reuse = PETSC_FALSE; 5751 } 5752 /* reset any subassembling information */ 5753 if (!coarse_reuse || pcbddc->recompute_topography) { 5754 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5755 } 5756 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 5757 coarse_reuse = PETSC_TRUE; 5758 } 5759 /* assemble coarse matrix */ 5760 if (coarse_reuse && pcbddc->coarse_ksp) { 5761 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5762 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 5763 coarse_mat_reuse = MAT_REUSE_MATRIX; 5764 } else { 5765 coarse_mat = NULL; 5766 coarse_mat_reuse = MAT_INITIAL_MATRIX; 5767 } 5768 5769 /* creates temporary l2gmap and IS for coarse indexes */ 5770 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 5771 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 5772 5773 /* creates temporary MATIS object for coarse matrix */ 5774 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 5775 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5776 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 5777 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5778 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 5779 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 5780 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5781 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5782 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 5783 5784 /* count "active" (i.e. with positive local size) and "void" processes */ 5785 im_active = !!(pcis->n); 5786 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5787 5788 /* determine number of processes partecipating to coarse solver and compute subassembling pattern */ 5789 /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */ 5790 /* full_restr : just use the receivers from the subassembling pattern */ 5791 coarse_mat_is = NULL; 5792 multilevel_allowed = PETSC_FALSE; 5793 multilevel_requested = PETSC_FALSE; 5794 full_restr = PETSC_TRUE; 5795 pcbddc->coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc); 5796 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 5797 if (multilevel_requested) { 5798 ncoarse = active_procs/pcbddc->coarsening_ratio; 5799 restr = PETSC_FALSE; 5800 full_restr = PETSC_FALSE; 5801 } else { 5802 ncoarse = pcbddc->coarse_size/pcbddc->coarse_eqs_per_proc; 5803 restr = PETSC_TRUE; 5804 full_restr = PETSC_TRUE; 5805 } 5806 if (!pcbddc->coarse_size) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 5807 ncoarse = PetscMax(1,ncoarse); 5808 if (!pcbddc->coarse_subassembling) { 5809 if (pcbddc->coarsening_ratio > 1) { 5810 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 5811 } else { 5812 PetscMPIInt size,rank; 5813 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 5814 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 5815 have_void = (active_procs == (PetscInt)size) ? PETSC_FALSE : PETSC_TRUE; 5816 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5817 } 5818 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 5819 PetscInt psum; 5820 PetscMPIInt size; 5821 if (pcbddc->coarse_ksp) psum = 1; 5822 else psum = 0; 5823 ierr = MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5824 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 5825 if (ncoarse < size) have_void = PETSC_TRUE; 5826 } 5827 /* determine if we can go multilevel */ 5828 if (multilevel_requested) { 5829 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 5830 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 5831 } 5832 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 5833 5834 /* dump subassembling pattern */ 5835 if (pcbddc->dbg_flag && multilevel_allowed) { 5836 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 5837 } 5838 5839 /* compute dofs splitting and neumann boundaries for coarse dofs */ 5840 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal)) { /* protects from unneded computations */ 5841 PetscInt *tidxs,*tidxs2,nout,tsize,i; 5842 const PetscInt *idxs; 5843 ISLocalToGlobalMapping tmap; 5844 5845 /* create map between primal indices (in local representative ordering) and local primal numbering */ 5846 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 5847 /* allocate space for temporary storage */ 5848 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 5849 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 5850 /* allocate for IS array */ 5851 nisdofs = pcbddc->n_ISForDofsLocal; 5852 nisneu = !!pcbddc->NeumannBoundariesLocal; 5853 nisvert = 0; /* nisvert is not used */ 5854 nis = nisdofs + nisneu + nisvert; 5855 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 5856 /* dofs splitting */ 5857 for (i=0;i<nisdofs;i++) { 5858 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 5859 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 5860 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5861 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5862 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5863 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5864 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5865 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 5866 } 5867 /* neumann boundaries */ 5868 if (pcbddc->NeumannBoundariesLocal) { 5869 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 5870 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 5871 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5872 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5873 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5874 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5875 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 5876 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 5877 } 5878 /* free memory */ 5879 ierr = PetscFree(tidxs);CHKERRQ(ierr); 5880 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 5881 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 5882 } else { 5883 nis = 0; 5884 nisdofs = 0; 5885 nisneu = 0; 5886 nisvert = 0; 5887 isarray = NULL; 5888 } 5889 /* destroy no longer needed map */ 5890 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 5891 5892 /* subassemble */ 5893 if (multilevel_allowed) { 5894 Vec vp[1]; 5895 PetscInt nvecs = 0; 5896 PetscBool reuse,reuser; 5897 5898 if (coarse_mat) reuse = PETSC_TRUE; 5899 else reuse = PETSC_FALSE; 5900 ierr = MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5901 vp[0] = NULL; 5902 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 5903 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);CHKERRQ(ierr); 5904 ierr = VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);CHKERRQ(ierr); 5905 ierr = VecSetType(vp[0],VECSTANDARD);CHKERRQ(ierr); 5906 nvecs = 1; 5907 5908 if (pcbddc->divudotp) { 5909 Mat B,loc_divudotp; 5910 Vec v,p; 5911 IS dummy; 5912 PetscInt np; 5913 5914 ierr = MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);CHKERRQ(ierr); 5915 ierr = MatGetSize(loc_divudotp,&np,NULL);CHKERRQ(ierr); 5916 ierr = ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);CHKERRQ(ierr); 5917 ierr = MatGetSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 5918 ierr = MatCreateVecs(B,&v,&p);CHKERRQ(ierr); 5919 ierr = VecSet(p,1.);CHKERRQ(ierr); 5920 ierr = MatMultTranspose(B,p,v);CHKERRQ(ierr); 5921 ierr = VecDestroy(&p);CHKERRQ(ierr); 5922 ierr = MatDestroy(&B);CHKERRQ(ierr); 5923 ierr = VecGetArray(vp[0],&array);CHKERRQ(ierr); 5924 ierr = VecPlaceArray(pcbddc->vec1_P,array);CHKERRQ(ierr); 5925 ierr = VecRestoreArray(vp[0],&array);CHKERRQ(ierr); 5926 ierr = MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);CHKERRQ(ierr); 5927 ierr = VecResetArray(pcbddc->vec1_P);CHKERRQ(ierr); 5928 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 5929 ierr = VecDestroy(&v);CHKERRQ(ierr); 5930 } 5931 } 5932 if (reuser) { 5933 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);CHKERRQ(ierr); 5934 } else { 5935 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);CHKERRQ(ierr); 5936 } 5937 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 5938 PetscScalar *arraym,*arrayv; 5939 PetscInt nl; 5940 ierr = VecGetLocalSize(vp[0],&nl);CHKERRQ(ierr); 5941 ierr = MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);CHKERRQ(ierr); 5942 ierr = MatDenseGetArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 5943 ierr = VecGetArray(vp[0],&arrayv);CHKERRQ(ierr); 5944 ierr = PetscMemcpy(arraym,arrayv,nl*sizeof(PetscScalar));CHKERRQ(ierr); 5945 ierr = VecRestoreArray(vp[0],&arrayv);CHKERRQ(ierr); 5946 ierr = MatDenseRestoreArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 5947 ierr = VecDestroy(&vp[0]);CHKERRQ(ierr); 5948 } else { 5949 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);CHKERRQ(ierr); 5950 } 5951 } else { 5952 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,0,NULL);CHKERRQ(ierr); 5953 } 5954 if (coarse_mat_is || coarse_mat) { 5955 PetscMPIInt size; 5956 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)coarse_mat_is),&size); 5957 if (!multilevel_allowed) { 5958 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 5959 } else { 5960 Mat A; 5961 5962 /* if this matrix is present, it means we are not reusing the coarse matrix */ 5963 if (coarse_mat_is) { 5964 if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 5965 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 5966 coarse_mat = coarse_mat_is; 5967 } 5968 /* be sure we don't have MatSeqDENSE as local mat */ 5969 ierr = MatISGetLocalMat(coarse_mat,&A);CHKERRQ(ierr); 5970 ierr = MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 5971 } 5972 } 5973 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 5974 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 5975 5976 /* create local to global scatters for coarse problem */ 5977 if (compute_vecs) { 5978 PetscInt lrows; 5979 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 5980 if (coarse_mat) { 5981 ierr = MatGetLocalSize(coarse_mat,&lrows,NULL);CHKERRQ(ierr); 5982 } else { 5983 lrows = 0; 5984 } 5985 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 5986 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 5987 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 5988 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5989 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5990 } 5991 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 5992 5993 /* set defaults for coarse KSP and PC */ 5994 if (multilevel_allowed) { 5995 coarse_ksp_type = KSPRICHARDSON; 5996 coarse_pc_type = PCBDDC; 5997 } else { 5998 coarse_ksp_type = KSPPREONLY; 5999 coarse_pc_type = PCREDUNDANT; 6000 } 6001 6002 /* print some info if requested */ 6003 if (pcbddc->dbg_flag) { 6004 if (!multilevel_allowed) { 6005 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 6006 if (multilevel_requested) { 6007 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 6008 } else if (pcbddc->max_levels) { 6009 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 6010 } 6011 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6012 } 6013 } 6014 6015 /* create the coarse KSP object only once with defaults */ 6016 if (coarse_mat) { 6017 PetscViewer dbg_viewer = NULL; 6018 if (pcbddc->dbg_flag) { 6019 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 6020 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 6021 } 6022 if (!pcbddc->coarse_ksp) { 6023 char prefix[256],str_level[16]; 6024 size_t len; 6025 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 6026 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 6027 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 6028 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 6029 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 6030 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 6031 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 6032 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 6033 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 6034 /* prefix */ 6035 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 6036 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 6037 if (!pcbddc->current_level) { 6038 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 6039 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 6040 } else { 6041 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 6042 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 6043 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 6044 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 6045 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 6046 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 6047 } 6048 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 6049 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 6050 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 6051 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 6052 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 6053 /* allow user customization */ 6054 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 6055 } 6056 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 6057 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 6058 if (nisdofs) { 6059 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 6060 for (i=0;i<nisdofs;i++) { 6061 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 6062 } 6063 } 6064 if (nisneu) { 6065 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 6066 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 6067 } 6068 if (nisvert) { 6069 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 6070 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 6071 } 6072 6073 /* get some info after set from options */ 6074 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 6075 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 6076 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 6077 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 6078 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 6079 isbddc = PETSC_FALSE; 6080 } 6081 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 6082 if (isredundant) { 6083 KSP inner_ksp; 6084 PC inner_pc; 6085 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 6086 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 6087 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 6088 } 6089 6090 /* parameters which miss an API */ 6091 if (isbddc) { 6092 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 6093 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 6094 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 6095 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 6096 if (pcbddc_coarse->benign_saddle_point) { 6097 Mat coarsedivudotp_is; 6098 ISLocalToGlobalMapping l2gmap,rl2g,cl2g; 6099 IS row,col; 6100 const PetscInt *gidxs; 6101 PetscInt n,st,M,N; 6102 6103 ierr = MatGetSize(coarsedivudotp,&n,NULL);CHKERRQ(ierr); 6104 ierr = MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));CHKERRQ(ierr); 6105 st = st-n; 6106 ierr = ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);CHKERRQ(ierr); 6107 ierr = MatGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);CHKERRQ(ierr); 6108 ierr = ISLocalToGlobalMappingGetSize(l2gmap,&n);CHKERRQ(ierr); 6109 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 6110 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 6111 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 6112 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 6113 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 6114 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 6115 ierr = MatGetSize(coarse_mat,&N,NULL);CHKERRQ(ierr); 6116 ierr = ISDestroy(&row);CHKERRQ(ierr); 6117 ierr = ISDestroy(&col);CHKERRQ(ierr); 6118 ierr = MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);CHKERRQ(ierr); 6119 ierr = MatSetType(coarsedivudotp_is,MATIS);CHKERRQ(ierr); 6120 ierr = MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 6121 ierr = MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);CHKERRQ(ierr); 6122 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 6123 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 6124 ierr = MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);CHKERRQ(ierr); 6125 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 6126 ierr = PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL);CHKERRQ(ierr); 6127 ierr = MatDestroy(&coarsedivudotp_is);CHKERRQ(ierr); 6128 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 6129 if (pcbddc->adaptive_threshold < 1.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 6130 } 6131 } 6132 6133 /* propagate symmetry info of coarse matrix */ 6134 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 6135 if (pc->pmat->symmetric_set) { 6136 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 6137 } 6138 if (pc->pmat->hermitian_set) { 6139 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 6140 } 6141 if (pc->pmat->spd_set) { 6142 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 6143 } 6144 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 6145 ierr = MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 6146 } 6147 /* set operators */ 6148 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 6149 if (pcbddc->dbg_flag) { 6150 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 6151 } 6152 } 6153 ierr = PetscFree(isarray);CHKERRQ(ierr); 6154 #if 0 6155 { 6156 PetscViewer viewer; 6157 char filename[256]; 6158 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 6159 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 6160 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 6161 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 6162 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 6163 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 6164 } 6165 #endif 6166 6167 if (pcbddc->coarse_ksp) { 6168 Vec crhs,csol; 6169 6170 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 6171 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 6172 if (!csol) { 6173 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 6174 } 6175 if (!crhs) { 6176 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 6177 } 6178 } 6179 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 6180 6181 /* compute null space for coarse solver if the benign trick has been requested */ 6182 if (pcbddc->benign_null) { 6183 6184 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 6185 for (i=0;i<pcbddc->benign_n;i++) { 6186 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 6187 } 6188 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 6189 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 6190 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6191 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6192 if (coarse_mat) { 6193 Vec nullv; 6194 PetscScalar *array,*array2; 6195 PetscInt nl; 6196 6197 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 6198 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 6199 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 6200 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 6201 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 6202 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 6203 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 6204 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 6205 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 6206 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 6207 } 6208 } 6209 6210 if (pcbddc->coarse_ksp) { 6211 PetscBool ispreonly; 6212 6213 if (CoarseNullSpace) { 6214 PetscBool isnull; 6215 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 6216 if (isnull) { 6217 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 6218 } 6219 /* TODO: add local nullspaces (if any) */ 6220 } 6221 /* setup coarse ksp */ 6222 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 6223 /* Check coarse problem if in debug mode or if solving with an iterative method */ 6224 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 6225 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 6226 KSP check_ksp; 6227 KSPType check_ksp_type; 6228 PC check_pc; 6229 Vec check_vec,coarse_vec; 6230 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 6231 PetscInt its; 6232 PetscBool compute_eigs; 6233 PetscReal *eigs_r,*eigs_c; 6234 PetscInt neigs; 6235 const char *prefix; 6236 6237 /* Create ksp object suitable for estimation of extreme eigenvalues */ 6238 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 6239 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 6240 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 6241 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 6242 /* prevent from setup unneeded object */ 6243 ierr = KSPGetPC(check_ksp,&check_pc);CHKERRQ(ierr); 6244 ierr = PCSetType(check_pc,PCNONE);CHKERRQ(ierr); 6245 if (ispreonly) { 6246 check_ksp_type = KSPPREONLY; 6247 compute_eigs = PETSC_FALSE; 6248 } else { 6249 check_ksp_type = KSPGMRES; 6250 compute_eigs = PETSC_TRUE; 6251 } 6252 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 6253 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 6254 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 6255 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 6256 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 6257 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 6258 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 6259 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 6260 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 6261 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 6262 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 6263 /* create random vec */ 6264 ierr = MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);CHKERRQ(ierr); 6265 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 6266 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 6267 /* solve coarse problem */ 6268 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 6269 /* set eigenvalue estimation if preonly has not been requested */ 6270 if (compute_eigs) { 6271 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 6272 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 6273 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 6274 if (neigs) { 6275 lambda_max = eigs_r[neigs-1]; 6276 lambda_min = eigs_r[0]; 6277 if (pcbddc->use_coarse_estimates) { 6278 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 6279 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);CHKERRQ(ierr); 6280 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 6281 } 6282 } 6283 } 6284 } 6285 6286 /* check coarse problem residual error */ 6287 if (pcbddc->dbg_flag) { 6288 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 6289 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 6290 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 6291 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 6292 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 6293 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 6294 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 6295 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 6296 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 6297 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 6298 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 6299 if (CoarseNullSpace) { 6300 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 6301 } 6302 if (compute_eigs) { 6303 PetscReal lambda_max_s,lambda_min_s; 6304 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 6305 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 6306 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 6307 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 6308 for (i=0;i<neigs;i++) { 6309 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 6310 } 6311 } 6312 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 6313 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 6314 } 6315 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 6316 ierr = VecDestroy(&coarse_vec);CHKERRQ(ierr); 6317 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 6318 if (compute_eigs) { 6319 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 6320 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 6321 } 6322 } 6323 } 6324 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 6325 /* print additional info */ 6326 if (pcbddc->dbg_flag) { 6327 /* waits until all processes reaches this point */ 6328 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 6329 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 6330 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6331 } 6332 6333 /* free memory */ 6334 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 6335 PetscFunctionReturn(0); 6336 } 6337 6338 #undef __FUNCT__ 6339 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 6340 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 6341 { 6342 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 6343 PC_IS* pcis = (PC_IS*)pc->data; 6344 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6345 IS subset,subset_mult,subset_n; 6346 PetscInt local_size,coarse_size=0; 6347 PetscInt *local_primal_indices=NULL; 6348 const PetscInt *t_local_primal_indices; 6349 PetscErrorCode ierr; 6350 6351 PetscFunctionBegin; 6352 /* Compute global number of coarse dofs */ 6353 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 6354 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 6355 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 6356 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 6357 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 6358 ierr = ISRenumber(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 6359 ierr = ISDestroy(&subset);CHKERRQ(ierr); 6360 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 6361 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 6362 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 6363 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 6364 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 6365 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 6366 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 6367 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 6368 6369 /* check numbering */ 6370 if (pcbddc->dbg_flag) { 6371 PetscScalar coarsesum,*array,*array2; 6372 PetscInt i; 6373 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 6374 6375 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6376 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 6377 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 6378 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6379 /* counter */ 6380 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6381 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6382 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6383 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6384 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6385 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6386 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 6387 for (i=0;i<pcbddc->local_primal_size;i++) { 6388 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 6389 } 6390 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6391 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6392 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6393 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6394 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6395 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6396 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6397 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6398 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 6399 for (i=0;i<pcis->n;i++) { 6400 if (array[i] != 0.0 && array[i] != array2[i]) { 6401 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 6402 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 6403 set_error = PETSC_TRUE; 6404 ierr = ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);CHKERRQ(ierr); 6405 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d (gid %d) owned by %d processes instead of %d!\n",PetscGlobalRank,i,gi,owned,neigh);CHKERRQ(ierr); 6406 } 6407 } 6408 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 6409 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6410 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6411 for (i=0;i<pcis->n;i++) { 6412 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 6413 } 6414 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6415 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6416 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6417 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6418 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 6419 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 6420 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 6421 PetscInt *gidxs; 6422 6423 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 6424 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 6425 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 6426 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6427 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 6428 for (i=0;i<pcbddc->local_primal_size;i++) { 6429 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 6430 } 6431 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6432 ierr = PetscFree(gidxs);CHKERRQ(ierr); 6433 } 6434 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6435 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6436 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 6437 } 6438 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 6439 /* get back data */ 6440 *coarse_size_n = coarse_size; 6441 *local_primal_indices_n = local_primal_indices; 6442 PetscFunctionReturn(0); 6443 } 6444 6445 #undef __FUNCT__ 6446 #define __FUNCT__ "PCBDDCGlobalToLocal" 6447 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 6448 { 6449 IS localis_t; 6450 PetscInt i,lsize,*idxs,n; 6451 PetscScalar *vals; 6452 PetscErrorCode ierr; 6453 6454 PetscFunctionBegin; 6455 /* get indices in local ordering exploiting local to global map */ 6456 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 6457 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 6458 for (i=0;i<lsize;i++) vals[i] = 1.0; 6459 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 6460 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 6461 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 6462 if (idxs) { /* multilevel guard */ 6463 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 6464 } 6465 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 6466 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 6467 ierr = PetscFree(vals);CHKERRQ(ierr); 6468 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 6469 /* now compute set in local ordering */ 6470 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6471 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6472 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6473 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 6474 for (i=0,lsize=0;i<n;i++) { 6475 if (PetscRealPart(vals[i]) > 0.5) { 6476 lsize++; 6477 } 6478 } 6479 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 6480 for (i=0,lsize=0;i<n;i++) { 6481 if (PetscRealPart(vals[i]) > 0.5) { 6482 idxs[lsize++] = i; 6483 } 6484 } 6485 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6486 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 6487 *localis = localis_t; 6488 PetscFunctionReturn(0); 6489 } 6490 6491 #undef __FUNCT__ 6492 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 6493 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 6494 { 6495 PC_IS *pcis=(PC_IS*)pc->data; 6496 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6497 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6498 Mat S_j; 6499 PetscInt *used_xadj,*used_adjncy; 6500 PetscBool free_used_adj; 6501 PetscErrorCode ierr; 6502 6503 PetscFunctionBegin; 6504 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 6505 free_used_adj = PETSC_FALSE; 6506 if (pcbddc->sub_schurs_layers == -1) { 6507 used_xadj = NULL; 6508 used_adjncy = NULL; 6509 } else { 6510 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 6511 used_xadj = pcbddc->mat_graph->xadj; 6512 used_adjncy = pcbddc->mat_graph->adjncy; 6513 } else if (pcbddc->computed_rowadj) { 6514 used_xadj = pcbddc->mat_graph->xadj; 6515 used_adjncy = pcbddc->mat_graph->adjncy; 6516 } else { 6517 PetscBool flg_row=PETSC_FALSE; 6518 const PetscInt *xadj,*adjncy; 6519 PetscInt nvtxs; 6520 6521 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6522 if (flg_row) { 6523 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 6524 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 6525 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 6526 free_used_adj = PETSC_TRUE; 6527 } else { 6528 pcbddc->sub_schurs_layers = -1; 6529 used_xadj = NULL; 6530 used_adjncy = NULL; 6531 } 6532 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6533 } 6534 } 6535 6536 /* setup sub_schurs data */ 6537 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6538 if (!sub_schurs->schur_explicit) { 6539 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 6540 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6541 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 6542 } else { 6543 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 6544 PetscBool isseqaij,need_change = PETSC_FALSE; 6545 PetscInt benign_n; 6546 Mat change = NULL; 6547 Vec scaling = NULL; 6548 IS change_primal = NULL; 6549 6550 if (!pcbddc->use_vertices && reuse_solvers) { 6551 PetscInt n_vertices; 6552 6553 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 6554 reuse_solvers = (PetscBool)!n_vertices; 6555 } 6556 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 6557 if (!isseqaij) { 6558 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6559 if (matis->A == pcbddc->local_mat) { 6560 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 6561 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6562 } else { 6563 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6564 } 6565 } 6566 if (!pcbddc->benign_change_explicit) { 6567 benign_n = pcbddc->benign_n; 6568 } else { 6569 benign_n = 0; 6570 } 6571 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 6572 We need a global reduction to avoid possible deadlocks. 6573 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 6574 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 6575 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 6576 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6577 need_change = (PetscBool)(!need_change); 6578 } 6579 /* If the user defines additional constraints, we import them here. 6580 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 6581 if (need_change) { 6582 PC_IS *pcisf; 6583 PC_BDDC *pcbddcf; 6584 PC pcf; 6585 6586 if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 6587 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 6588 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 6589 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 6590 /* hacks */ 6591 pcisf = (PC_IS*)pcf->data; 6592 pcisf->is_B_local = pcis->is_B_local; 6593 pcisf->vec1_N = pcis->vec1_N; 6594 pcisf->BtoNmap = pcis->BtoNmap; 6595 pcisf->n = pcis->n; 6596 pcisf->n_B = pcis->n_B; 6597 pcbddcf = (PC_BDDC*)pcf->data; 6598 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 6599 pcbddcf->mat_graph = pcbddc->mat_graph; 6600 pcbddcf->use_faces = PETSC_TRUE; 6601 pcbddcf->use_change_of_basis = PETSC_TRUE; 6602 pcbddcf->use_change_on_faces = PETSC_TRUE; 6603 pcbddcf->use_qr_single = PETSC_TRUE; 6604 pcbddcf->fake_change = PETSC_TRUE; 6605 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 6606 /* store information on primal vertices and change of basis (in local numbering) */ 6607 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 6608 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 6609 change = pcbddcf->ConstraintMatrix; 6610 pcbddcf->ConstraintMatrix = NULL; 6611 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 6612 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 6613 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 6614 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 6615 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 6616 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 6617 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 6618 pcf->ops->destroy = NULL; 6619 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 6620 } 6621 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 6622 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 6623 ierr = MatDestroy(&change);CHKERRQ(ierr); 6624 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 6625 } 6626 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6627 6628 /* free adjacency */ 6629 if (free_used_adj) { 6630 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 6631 } 6632 PetscFunctionReturn(0); 6633 } 6634 6635 #undef __FUNCT__ 6636 #define __FUNCT__ "PCBDDCInitSubSchurs" 6637 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 6638 { 6639 PC_IS *pcis=(PC_IS*)pc->data; 6640 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6641 PCBDDCGraph graph; 6642 PetscErrorCode ierr; 6643 6644 PetscFunctionBegin; 6645 /* attach interface graph for determining subsets */ 6646 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 6647 IS verticesIS,verticescomm; 6648 PetscInt vsize,*idxs; 6649 6650 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 6651 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 6652 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6653 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 6654 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6655 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 6656 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 6657 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 6658 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 6659 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 6660 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 6661 } else { 6662 graph = pcbddc->mat_graph; 6663 } 6664 /* print some info */ 6665 if (pcbddc->dbg_flag) { 6666 IS vertices; 6667 PetscInt nv,nedges,nfaces; 6668 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 6669 ierr = PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 6670 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 6671 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6672 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 6673 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 6674 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 6675 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 6676 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6677 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6678 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 6679 } 6680 6681 /* sub_schurs init */ 6682 if (!pcbddc->sub_schurs) { 6683 ierr = PCBDDCSubSchursCreate(&pcbddc->sub_schurs);CHKERRQ(ierr); 6684 } 6685 ierr = PCBDDCSubSchursInit(pcbddc->sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 6686 6687 /* free graph struct */ 6688 if (pcbddc->sub_schurs_rebuild) { 6689 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 6690 } 6691 PetscFunctionReturn(0); 6692 } 6693 6694 #undef __FUNCT__ 6695 #define __FUNCT__ "PCBDDCCheckOperator" 6696 PetscErrorCode PCBDDCCheckOperator(PC pc) 6697 { 6698 PC_IS *pcis=(PC_IS*)pc->data; 6699 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6700 PetscErrorCode ierr; 6701 6702 PetscFunctionBegin; 6703 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 6704 IS zerodiag = NULL; 6705 Mat S_j,B0_B=NULL; 6706 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 6707 PetscScalar *p0_check,*array,*array2; 6708 PetscReal norm; 6709 PetscInt i; 6710 6711 /* B0 and B0_B */ 6712 if (zerodiag) { 6713 IS dummy; 6714 6715 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 6716 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 6717 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 6718 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 6719 } 6720 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 6721 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 6722 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 6723 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6724 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6725 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6726 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6727 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 6728 /* S_j */ 6729 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6730 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6731 6732 /* mimic vector in \widetilde{W}_\Gamma */ 6733 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 6734 /* continuous in primal space */ 6735 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 6736 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6737 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6738 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6739 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 6740 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 6741 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6742 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6743 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6744 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6745 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6746 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6747 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 6748 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 6749 6750 /* assemble rhs for coarse problem */ 6751 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 6752 /* local with Schur */ 6753 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 6754 if (zerodiag) { 6755 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6756 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 6757 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6758 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 6759 } 6760 /* sum on primal nodes the local contributions */ 6761 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6762 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6763 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6764 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6765 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 6766 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6767 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6768 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 6769 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6770 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6771 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6772 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6773 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6774 /* scale primal nodes (BDDC sums contibutions) */ 6775 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 6776 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6777 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6778 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6779 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6780 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6781 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6782 /* global: \widetilde{B0}_B w_\Gamma */ 6783 if (zerodiag) { 6784 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 6785 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6786 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 6787 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6788 } 6789 /* BDDC */ 6790 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 6791 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 6792 6793 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 6794 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 6795 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 6796 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 6797 for (i=0;i<pcbddc->benign_n;i++) { 6798 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 6799 } 6800 ierr = PetscFree(p0_check);CHKERRQ(ierr); 6801 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 6802 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 6803 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 6804 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6805 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 6806 } 6807 PetscFunctionReturn(0); 6808 } 6809