1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 #include <petsc/private/sfimpl.h> 6 7 #undef __FUNCT__ 8 #define __FUNCT__ "PCBDDCComputeNoNetFlux" 9 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 10 { 11 Mat loc_divudotp; 12 Vec p,v,vins,quad_vec,*quad_vecs; 13 ISLocalToGlobalMapping map; 14 IS *faces,*edges; 15 PetscScalar *vals; 16 const PetscScalar *array; 17 PetscInt i,maxneighs,lmaxneighs,maxsize,nf,ne; 18 PetscMPIInt rank; 19 PetscErrorCode ierr; 20 21 PetscFunctionBegin; 22 ierr = PCBDDCGraphGetCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 23 if (graph->twodim) { 24 lmaxneighs = 2; 25 } else { 26 lmaxneighs = 1; 27 for (i=0;i<ne;i++) { 28 const PetscInt *idxs; 29 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 30 lmaxneighs = PetscMax(lmaxneighs,graph->count[idxs[0]]); 31 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 32 } 33 lmaxneighs++; /* graph count does not include self */ 34 } 35 ierr = MPIU_Allreduce(&lmaxneighs,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 36 maxsize = 0; 37 for (i=0;i<ne;i++) { 38 PetscInt nn; 39 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 40 maxsize = PetscMax(maxsize,nn); 41 } 42 for (i=0;i<nf;i++) { 43 PetscInt nn; 44 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 45 maxsize = PetscMax(maxsize,nn); 46 } 47 ierr = PetscMalloc1(maxsize,&vals);CHKERRQ(ierr); 48 /* create vectors to hold quadrature weights */ 49 ierr = MatCreateVecs(A,&quad_vec,NULL);CHKERRQ(ierr); 50 if (!transpose) { 51 ierr = MatGetLocalToGlobalMapping(A,&map,NULL);CHKERRQ(ierr); 52 } else { 53 ierr = MatGetLocalToGlobalMapping(A,NULL,&map);CHKERRQ(ierr); 54 } 55 ierr = VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);CHKERRQ(ierr); 56 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 57 for (i=0;i<maxneighs;i++) { 58 PetscInt first,last; 59 60 ierr = VecSetLocalToGlobalMapping(quad_vecs[i],map);CHKERRQ(ierr); 61 /* the near-null space fo BDDC carries information on quadrature weights, 62 and these can be collinear -> so cheat with MatNullSpaceCreate 63 and create a suitable set of basis vectors first */ 64 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 65 if (i>=first && i < last) { 66 PetscScalar *data; 67 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 68 data[i-first] = 1.; 69 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 70 } 71 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 72 } 73 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);CHKERRQ(ierr); 74 for (i=0;i<maxneighs;i++) { /* reset vectors */ 75 PetscInt first,last; 76 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 77 if (i>=first && i < last) { 78 PetscScalar *data; 79 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 80 data[i-first] = 0.; 81 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 82 } 83 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 84 } 85 /* compute local quad vec */ 86 ierr = MatISGetLocalMat(divudotp,&loc_divudotp);CHKERRQ(ierr); 87 if (!transpose) { 88 ierr = MatCreateVecs(loc_divudotp,&v,&p);CHKERRQ(ierr); 89 } else { 90 ierr = MatCreateVecs(loc_divudotp,&p,&v);CHKERRQ(ierr); 91 } 92 ierr = VecSet(p,1.);CHKERRQ(ierr); 93 if (!transpose) { 94 ierr = MatMultTranspose(loc_divudotp,p,v);CHKERRQ(ierr); 95 } else { 96 ierr = MatMult(loc_divudotp,p,v);CHKERRQ(ierr); 97 } 98 if (vl2l) { 99 ierr = VecGetSubVector(v,vl2l,&vins);CHKERRQ(ierr); 100 } else { 101 vins = v; 102 } 103 ierr = VecGetArrayRead(vins,&array);CHKERRQ(ierr); 104 ierr = VecDestroy(&p);CHKERRQ(ierr); 105 106 /* insert in global quadrature vecs */ 107 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 108 for (i=0;i<nf;i++) { 109 const PetscInt *idxs; 110 PetscInt idx,nn,j; 111 112 ierr = ISGetIndices(faces[i],&idxs);CHKERRQ(ierr); 113 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 114 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 115 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 116 idx = -(idx+1); 117 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 118 ierr = ISRestoreIndices(faces[i],&idxs);CHKERRQ(ierr); 119 } 120 for (i=0;i<ne;i++) { 121 const PetscInt *idxs; 122 PetscInt idx,nn,j; 123 124 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 125 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 126 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 127 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 128 idx = -(idx+1); 129 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 130 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 131 } 132 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 133 ierr = VecRestoreArrayRead(vins,&array);CHKERRQ(ierr); 134 if (vl2l) { 135 ierr = VecRestoreSubVector(v,vl2l,&vins);CHKERRQ(ierr); 136 } 137 ierr = VecDestroy(&v);CHKERRQ(ierr); 138 ierr = PetscFree(vals);CHKERRQ(ierr); 139 140 /* assemble near null space */ 141 for (i=0;i<maxneighs;i++) { 142 ierr = VecAssemblyBegin(quad_vecs[i]);CHKERRQ(ierr); 143 } 144 for (i=0;i<maxneighs;i++) { 145 ierr = VecAssemblyEnd(quad_vecs[i]);CHKERRQ(ierr); 146 } 147 ierr = VecDestroyVecs(maxneighs,&quad_vecs);CHKERRQ(ierr); 148 PetscFunctionReturn(0); 149 } 150 151 152 #undef __FUNCT__ 153 #define __FUNCT__ "PCBDDCComputeLocalTopologyInfo" 154 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 155 { 156 PetscErrorCode ierr; 157 Vec local,global; 158 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 159 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 160 161 PetscFunctionBegin; 162 ierr = MatCreateVecs(pc->pmat,&global,NULL);CHKERRQ(ierr); 163 /* need to convert from global to local topology information and remove references to information in global ordering */ 164 ierr = MatCreateVecs(matis->A,&local,NULL);CHKERRQ(ierr); 165 if (pcbddc->user_provided_isfordofs) { 166 if (pcbddc->n_ISForDofs) { 167 PetscInt i; 168 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 169 for (i=0;i<pcbddc->n_ISForDofs;i++) { 170 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 171 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 172 } 173 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 174 pcbddc->n_ISForDofs = 0; 175 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 176 } 177 } else { 178 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering if bs > 1 */ 179 PetscInt i, n = matis->A->rmap->n; 180 ierr = MatGetBlockSize(pc->pmat,&i);CHKERRQ(ierr); 181 if (i > 1) { 182 pcbddc->n_ISForDofsLocal = i; 183 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 184 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 185 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 186 } 187 } 188 } 189 } 190 191 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 192 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 193 } 194 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 195 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 196 } 197 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 198 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 199 } 200 ierr = VecDestroy(&global);CHKERRQ(ierr); 201 ierr = VecDestroy(&local);CHKERRQ(ierr); 202 PetscFunctionReturn(0); 203 } 204 205 #undef __FUNCT__ 206 #define __FUNCT__ "PCBDDCBenignRemoveInterior" 207 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z) 208 { 209 PC_IS *pcis = (PC_IS*)(pc->data); 210 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 211 PetscErrorCode ierr; 212 213 PetscFunctionBegin; 214 if (!pcbddc->benign_have_null) { 215 PetscFunctionReturn(0); 216 } 217 if (pcbddc->ChangeOfBasisMatrix) { 218 Vec swap; 219 220 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);CHKERRQ(ierr); 221 swap = pcbddc->work_change; 222 pcbddc->work_change = r; 223 r = swap; 224 } 225 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 226 ierr = VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 227 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 228 ierr = VecSet(z,0.);CHKERRQ(ierr); 229 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 230 ierr = VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 231 if (pcbddc->ChangeOfBasisMatrix) { 232 Vec swap; 233 234 swap = r; 235 r = pcbddc->work_change; 236 pcbddc->work_change = swap; 237 ierr = VecCopy(z,pcbddc->work_change);CHKERRQ(ierr); 238 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);CHKERRQ(ierr); 239 } 240 PetscFunctionReturn(0); 241 } 242 243 #undef __FUNCT__ 244 #define __FUNCT__ "PCBDDCBenignMatMult_Private_Private" 245 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 246 { 247 PCBDDCBenignMatMult_ctx ctx; 248 PetscErrorCode ierr; 249 PetscBool apply_right,apply_left,reset_x; 250 251 PetscFunctionBegin; 252 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 253 if (transpose) { 254 apply_right = ctx->apply_left; 255 apply_left = ctx->apply_right; 256 } else { 257 apply_right = ctx->apply_right; 258 apply_left = ctx->apply_left; 259 } 260 reset_x = PETSC_FALSE; 261 if (apply_right) { 262 const PetscScalar *ax; 263 PetscInt nl,i; 264 265 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 266 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 267 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 268 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 269 for (i=0;i<ctx->benign_n;i++) { 270 PetscScalar sum,val; 271 const PetscInt *idxs; 272 PetscInt nz,j; 273 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 274 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 275 sum = 0.; 276 if (ctx->apply_p0) { 277 val = ctx->work[idxs[nz-1]]; 278 for (j=0;j<nz-1;j++) { 279 sum += ctx->work[idxs[j]]; 280 ctx->work[idxs[j]] += val; 281 } 282 } else { 283 for (j=0;j<nz-1;j++) { 284 sum += ctx->work[idxs[j]]; 285 } 286 } 287 ctx->work[idxs[nz-1]] -= sum; 288 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 289 } 290 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 291 reset_x = PETSC_TRUE; 292 } 293 if (transpose) { 294 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 295 } else { 296 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 297 } 298 if (reset_x) { 299 ierr = VecResetArray(x);CHKERRQ(ierr); 300 } 301 if (apply_left) { 302 PetscScalar *ay; 303 PetscInt i; 304 305 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 306 for (i=0;i<ctx->benign_n;i++) { 307 PetscScalar sum,val; 308 const PetscInt *idxs; 309 PetscInt nz,j; 310 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 311 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 312 val = -ay[idxs[nz-1]]; 313 if (ctx->apply_p0) { 314 sum = 0.; 315 for (j=0;j<nz-1;j++) { 316 sum += ay[idxs[j]]; 317 ay[idxs[j]] += val; 318 } 319 ay[idxs[nz-1]] += sum; 320 } else { 321 for (j=0;j<nz-1;j++) { 322 ay[idxs[j]] += val; 323 } 324 ay[idxs[nz-1]] = 0.; 325 } 326 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 327 } 328 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 329 } 330 PetscFunctionReturn(0); 331 } 332 333 #undef __FUNCT__ 334 #define __FUNCT__ "PCBDDCBenignMatMultTranspose_Private" 335 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 336 { 337 PetscErrorCode ierr; 338 339 PetscFunctionBegin; 340 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 341 PetscFunctionReturn(0); 342 } 343 344 #undef __FUNCT__ 345 #define __FUNCT__ "PCBDDCBenignMatMult_Private" 346 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 347 { 348 PetscErrorCode ierr; 349 350 PetscFunctionBegin; 351 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 352 PetscFunctionReturn(0); 353 } 354 355 #undef __FUNCT__ 356 #define __FUNCT__ "PCBDDCBenignShellMat" 357 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 358 { 359 PC_IS *pcis = (PC_IS*)pc->data; 360 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 361 PCBDDCBenignMatMult_ctx ctx; 362 PetscErrorCode ierr; 363 364 PetscFunctionBegin; 365 if (!restore) { 366 Mat A_IB,A_BI; 367 PetscScalar *work; 368 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 369 370 if (pcbddc->benign_original_mat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 371 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(0); 372 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 373 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 374 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 375 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 376 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 377 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 378 ierr = PetscNew(&ctx);CHKERRQ(ierr); 379 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 380 ctx->apply_left = PETSC_TRUE; 381 ctx->apply_right = PETSC_FALSE; 382 ctx->apply_p0 = PETSC_FALSE; 383 ctx->benign_n = pcbddc->benign_n; 384 if (reuse) { 385 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 386 ctx->free = PETSC_FALSE; 387 } else { /* TODO: could be optimized for successive solves */ 388 ISLocalToGlobalMapping N_to_D; 389 PetscInt i; 390 391 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 392 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 393 for (i=0;i<pcbddc->benign_n;i++) { 394 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 395 } 396 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 397 ctx->free = PETSC_TRUE; 398 } 399 ctx->A = pcis->A_IB; 400 ctx->work = work; 401 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 402 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 403 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 404 pcis->A_IB = A_IB; 405 406 /* A_BI as A_IB^T */ 407 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 408 pcbddc->benign_original_mat = pcis->A_BI; 409 pcis->A_BI = A_BI; 410 } else { 411 if (!pcbddc->benign_original_mat) { 412 PetscFunctionReturn(0); 413 } 414 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 415 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 416 pcis->A_IB = ctx->A; 417 ctx->A = NULL; 418 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 419 pcis->A_BI = pcbddc->benign_original_mat; 420 pcbddc->benign_original_mat = NULL; 421 if (ctx->free) { 422 PetscInt i; 423 for (i=0;i<ctx->benign_n;i++) { 424 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 425 } 426 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 427 } 428 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 429 ierr = PetscFree(ctx);CHKERRQ(ierr); 430 } 431 PetscFunctionReturn(0); 432 } 433 434 /* used just in bddc debug mode */ 435 #undef __FUNCT__ 436 #define __FUNCT__ "PCBDDCBenignProject" 437 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 438 { 439 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 440 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 441 Mat An; 442 PetscErrorCode ierr; 443 444 PetscFunctionBegin; 445 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 446 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 447 if (is1) { 448 ierr = MatGetSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 449 ierr = MatDestroy(&An);CHKERRQ(ierr); 450 } else { 451 *B = An; 452 } 453 PetscFunctionReturn(0); 454 } 455 456 /* TODO: add reuse flag */ 457 #undef __FUNCT__ 458 #define __FUNCT__ "MatSeqAIJCompress" 459 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 460 { 461 Mat Bt; 462 PetscScalar *a,*bdata; 463 const PetscInt *ii,*ij; 464 PetscInt m,n,i,nnz,*bii,*bij; 465 PetscBool flg_row; 466 PetscErrorCode ierr; 467 468 PetscFunctionBegin; 469 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 470 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 471 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 472 nnz = n; 473 for (i=0;i<ii[n];i++) { 474 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 475 } 476 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 477 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 478 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 479 nnz = 0; 480 bii[0] = 0; 481 for (i=0;i<n;i++) { 482 PetscInt j; 483 for (j=ii[i];j<ii[i+1];j++) { 484 PetscScalar entry = a[j]; 485 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 486 bij[nnz] = ij[j]; 487 bdata[nnz] = entry; 488 nnz++; 489 } 490 } 491 bii[i+1] = nnz; 492 } 493 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 494 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 495 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 496 { 497 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 498 b->free_a = PETSC_TRUE; 499 b->free_ij = PETSC_TRUE; 500 } 501 *B = Bt; 502 PetscFunctionReturn(0); 503 } 504 505 #undef __FUNCT__ 506 #define __FUNCT__ "MatDetectDisconnectedComponents" 507 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 508 { 509 Mat B; 510 IS is_dummy,*cc_n; 511 ISLocalToGlobalMapping l2gmap_dummy; 512 PCBDDCGraph graph; 513 PetscInt i,n; 514 PetscInt *xadj,*adjncy; 515 PetscInt *xadj_filtered,*adjncy_filtered; 516 PetscBool flg_row,isseqaij; 517 PetscErrorCode ierr; 518 519 PetscFunctionBegin; 520 if (!A->rmap->N || !A->cmap->N) { 521 *ncc = 0; 522 *cc = NULL; 523 PetscFunctionReturn(0); 524 } 525 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 526 if (!isseqaij && filter) { 527 PetscBool isseqdense; 528 529 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 530 if (!isseqdense) { 531 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 532 } else { /* TODO: rectangular case and LDA */ 533 PetscScalar *array; 534 PetscReal chop=1.e-6; 535 536 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 537 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 538 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 539 for (i=0;i<n;i++) { 540 PetscInt j; 541 for (j=i+1;j<n;j++) { 542 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 543 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 544 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 545 } 546 } 547 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 548 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 549 } 550 } else { 551 B = A; 552 } 553 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 554 555 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 556 if (filter) { 557 PetscScalar *data; 558 PetscInt j,cum; 559 560 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 561 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 562 cum = 0; 563 for (i=0;i<n;i++) { 564 PetscInt t; 565 566 for (j=xadj[i];j<xadj[i+1];j++) { 567 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 568 continue; 569 } 570 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 571 } 572 t = xadj_filtered[i]; 573 xadj_filtered[i] = cum; 574 cum += t; 575 } 576 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 577 } else { 578 xadj_filtered = NULL; 579 adjncy_filtered = NULL; 580 } 581 582 /* compute local connected components using PCBDDCGraph */ 583 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 584 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 585 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 586 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 587 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n);CHKERRQ(ierr); 588 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 589 if (xadj_filtered) { 590 graph->xadj = xadj_filtered; 591 graph->adjncy = adjncy_filtered; 592 } else { 593 graph->xadj = xadj; 594 graph->adjncy = adjncy; 595 } 596 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 597 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 598 /* partial clean up */ 599 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 600 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 601 if (A != B) { 602 ierr = MatDestroy(&B);CHKERRQ(ierr); 603 } 604 605 /* get back data */ 606 if (ncc) *ncc = graph->ncc; 607 if (cc) { 608 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 609 for (i=0;i<graph->ncc;i++) { 610 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 611 } 612 *cc = cc_n; 613 } 614 /* clean up graph */ 615 graph->xadj = 0; 616 graph->adjncy = 0; 617 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 618 PetscFunctionReturn(0); 619 } 620 621 #undef __FUNCT__ 622 #define __FUNCT__ "PCBDDCBenignCheck" 623 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 624 { 625 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 626 PC_IS* pcis = (PC_IS*)(pc->data); 627 IS dirIS = NULL; 628 PetscInt i; 629 PetscErrorCode ierr; 630 631 PetscFunctionBegin; 632 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 633 if (zerodiag) { 634 Mat A; 635 Vec vec3_N; 636 PetscScalar *vals; 637 const PetscInt *idxs; 638 PetscInt nz,*count; 639 640 /* p0 */ 641 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 642 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 643 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 644 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 645 for (i=0;i<nz;i++) vals[i] = 1.; 646 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 647 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 648 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 649 /* v_I */ 650 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 651 for (i=0;i<nz;i++) vals[i] = 0.; 652 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 653 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 654 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 655 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 656 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 657 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 658 if (dirIS) { 659 PetscInt n; 660 661 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 662 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 663 for (i=0;i<n;i++) vals[i] = 0.; 664 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 665 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 666 } 667 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 668 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 669 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 670 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 671 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 672 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 673 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 674 if (PetscAbsScalar(vals[0]) > 1.e-1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 675 ierr = PetscFree(vals);CHKERRQ(ierr); 676 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 677 678 /* there should not be any pressure dofs lying on the interface */ 679 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 680 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 681 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 682 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 683 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 684 for (i=0;i<nz;i++) if (count[idxs[i]]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 685 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 686 ierr = PetscFree(count);CHKERRQ(ierr); 687 } 688 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 689 690 /* check PCBDDCBenignGetOrSetP0 */ 691 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 692 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 693 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 694 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 695 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 696 for (i=0;i<pcbddc->benign_n;i++) { 697 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]); 698 if (val != -PetscGlobalRank-i) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %d instead of %g\n",PetscRealPart(pcbddc->benign_p0[i]),i,-PetscGlobalRank-i);CHKERRQ(ierr); 699 } 700 PetscFunctionReturn(0); 701 } 702 703 #undef __FUNCT__ 704 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 705 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 706 { 707 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 708 IS pressures,zerodiag,*zerodiag_subs; 709 PetscInt nz,n; 710 PetscInt *interior_dofs,n_interior_dofs; 711 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag; 712 PetscErrorCode ierr; 713 714 PetscFunctionBegin; 715 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 716 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 717 for (n=0;n<pcbddc->benign_n;n++) { 718 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 719 } 720 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 721 pcbddc->benign_n = 0; 722 /* if a local info on dofs is present, assumes that the last field represents "pressures" 723 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 724 Checks if all the pressure dofs in each subdomain have a zero diagonal 725 If not, a change of basis on pressures is not needed 726 since the local Schur complements are already SPD 727 */ 728 has_null_pressures = PETSC_TRUE; 729 have_null = PETSC_TRUE; 730 if (pcbddc->n_ISForDofsLocal) { 731 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 732 733 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 734 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 735 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 736 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 737 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 738 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 739 if (!sorted) { 740 ierr = ISSort(pressures);CHKERRQ(ierr); 741 } 742 } else { 743 pressures = NULL; 744 } 745 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 746 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 747 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 748 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 749 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 750 if (!sorted) { 751 ierr = ISSort(zerodiag);CHKERRQ(ierr); 752 } 753 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 754 if (!nz) { 755 if (n) have_null = PETSC_FALSE; 756 has_null_pressures = PETSC_FALSE; 757 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 758 } 759 recompute_zerodiag = PETSC_FALSE; 760 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 761 zerodiag_subs = NULL; 762 pcbddc->benign_n = 0; 763 n_interior_dofs = 0; 764 interior_dofs = NULL; 765 if (pcbddc->current_level) { /* need to compute interior nodes */ 766 PetscInt n,i,j; 767 PetscInt n_neigh,*neigh,*n_shared,**shared; 768 PetscInt *iwork; 769 770 ierr = ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);CHKERRQ(ierr); 771 ierr = ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 772 ierr = PetscCalloc1(n,&iwork);CHKERRQ(ierr); 773 ierr = PetscMalloc1(n,&interior_dofs);CHKERRQ(ierr); 774 for (i=1;i<n_neigh;i++) 775 for (j=0;j<n_shared[i];j++) 776 iwork[shared[i][j]] += 1; 777 for (i=0;i<n;i++) 778 if (!iwork[i]) 779 interior_dofs[n_interior_dofs++] = i; 780 ierr = PetscFree(iwork);CHKERRQ(ierr); 781 ierr = ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 782 } 783 if (has_null_pressures) { 784 IS *subs; 785 PetscInt nsubs,i,j,nl; 786 const PetscInt *idxs; 787 PetscScalar *array; 788 Vec *work; 789 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 790 791 subs = pcbddc->local_subs; 792 nsubs = pcbddc->n_local_subs; 793 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 794 if (pcbddc->current_level) { 795 ierr = VecDuplicateVecs(matis->y,2,&work);CHKERRQ(ierr); 796 ierr = ISGetLocalSize(zerodiag,&nl);CHKERRQ(ierr); 797 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 798 /* work[0] = 1_p */ 799 ierr = VecSet(work[0],0.);CHKERRQ(ierr); 800 ierr = VecGetArray(work[0],&array);CHKERRQ(ierr); 801 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 802 ierr = VecRestoreArray(work[0],&array);CHKERRQ(ierr); 803 /* work[0] = 1_v */ 804 ierr = VecSet(work[1],1.);CHKERRQ(ierr); 805 ierr = VecGetArray(work[1],&array);CHKERRQ(ierr); 806 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 807 ierr = VecRestoreArray(work[1],&array);CHKERRQ(ierr); 808 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 809 } 810 if (nsubs > 1) { 811 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 812 for (i=0;i<nsubs;i++) { 813 ISLocalToGlobalMapping l2g; 814 IS t_zerodiag_subs; 815 PetscInt nl; 816 817 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 818 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 819 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 820 if (nl) { 821 PetscBool valid = PETSC_TRUE; 822 823 if (pcbddc->current_level) { 824 ierr = VecSet(matis->x,0);CHKERRQ(ierr); 825 ierr = ISGetLocalSize(subs[i],&nl);CHKERRQ(ierr); 826 ierr = ISGetIndices(subs[i],&idxs);CHKERRQ(ierr); 827 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 828 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 829 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 830 ierr = ISRestoreIndices(subs[i],&idxs);CHKERRQ(ierr); 831 ierr = VecPointwiseMult(matis->x,work[0],matis->x);CHKERRQ(ierr); 832 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 833 ierr = VecPointwiseMult(matis->y,work[1],matis->y);CHKERRQ(ierr); 834 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 835 for (j=0;j<n_interior_dofs;j++) { 836 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 837 valid = PETSC_FALSE; 838 break; 839 } 840 } 841 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 842 } 843 if (valid && pcbddc->NeumannBoundariesLocal) { 844 IS t_bc; 845 PetscInt nzb; 846 847 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pcbddc->NeumannBoundariesLocal,&t_bc);CHKERRQ(ierr); 848 ierr = ISGetLocalSize(t_bc,&nzb);CHKERRQ(ierr); 849 ierr = ISDestroy(&t_bc);CHKERRQ(ierr); 850 if (nzb) valid = PETSC_FALSE; 851 } 852 if (valid && pressures) { 853 IS t_pressure_subs; 854 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 855 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 856 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 857 } 858 if (valid) { 859 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 860 pcbddc->benign_n++; 861 } else { 862 recompute_zerodiag = PETSC_TRUE; 863 } 864 } 865 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 866 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 867 } 868 } else { /* there's just one subdomain (or zero if they have not been detected */ 869 PetscBool valid = PETSC_TRUE; 870 871 if (pcbddc->NeumannBoundariesLocal) { 872 PetscInt nzb; 873 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nzb);CHKERRQ(ierr); 874 if (nzb) valid = PETSC_FALSE; 875 } 876 if (valid && pressures) { 877 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 878 } 879 if (valid && pcbddc->current_level) { 880 ierr = MatMult(matis->A,work[0],matis->x);CHKERRQ(ierr); 881 ierr = VecPointwiseMult(matis->x,work[1],matis->x);CHKERRQ(ierr); 882 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 883 for (j=0;j<n_interior_dofs;j++) { 884 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 885 valid = PETSC_FALSE; 886 break; 887 } 888 } 889 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 890 } 891 if (valid) { 892 pcbddc->benign_n = 1; 893 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 894 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 895 zerodiag_subs[0] = zerodiag; 896 } 897 } 898 if (pcbddc->current_level) { 899 ierr = VecDestroyVecs(2,&work);CHKERRQ(ierr); 900 } 901 } 902 ierr = PetscFree(interior_dofs);CHKERRQ(ierr); 903 904 if (!pcbddc->benign_n) { 905 PetscInt n; 906 907 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 908 recompute_zerodiag = PETSC_FALSE; 909 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 910 if (n) { 911 has_null_pressures = PETSC_FALSE; 912 have_null = PETSC_FALSE; 913 } 914 } 915 916 /* final check for null pressures */ 917 if (zerodiag && pressures) { 918 PetscInt nz,np; 919 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 920 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 921 if (nz != np) have_null = PETSC_FALSE; 922 } 923 924 if (recompute_zerodiag) { 925 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 926 if (pcbddc->benign_n == 1) { 927 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 928 zerodiag = zerodiag_subs[0]; 929 } else { 930 PetscInt i,nzn,*new_idxs; 931 932 nzn = 0; 933 for (i=0;i<pcbddc->benign_n;i++) { 934 PetscInt ns; 935 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 936 nzn += ns; 937 } 938 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 939 nzn = 0; 940 for (i=0;i<pcbddc->benign_n;i++) { 941 PetscInt ns,*idxs; 942 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 943 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 944 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 945 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 946 nzn += ns; 947 } 948 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 949 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 950 } 951 have_null = PETSC_FALSE; 952 } 953 954 /* Prepare matrix to compute no-net-flux */ 955 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 956 Mat A,loc_divudotp; 957 ISLocalToGlobalMapping rl2g,cl2g,l2gmap; 958 IS row,col,isused = NULL; 959 PetscInt M,N,n,st,n_isused; 960 961 if (pressures) { 962 isused = pressures; 963 } else { 964 isused = zerodiag; 965 } 966 ierr = MatGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);CHKERRQ(ierr); 967 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 968 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 969 if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field"); 970 n_isused = 0; 971 if (isused) { 972 ierr = ISGetLocalSize(isused,&n_isused);CHKERRQ(ierr); 973 } 974 ierr = MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 975 st = st-n_isused; 976 if (n) { 977 const PetscInt *gidxs; 978 979 ierr = MatGetSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);CHKERRQ(ierr); 980 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 981 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 982 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 983 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 984 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 985 } else { 986 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);CHKERRQ(ierr); 987 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 988 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 989 } 990 ierr = MatGetSize(pc->pmat,NULL,&N);CHKERRQ(ierr); 991 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 992 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 993 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 994 ierr = ISDestroy(&row);CHKERRQ(ierr); 995 ierr = ISDestroy(&col);CHKERRQ(ierr); 996 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);CHKERRQ(ierr); 997 ierr = MatSetType(pcbddc->divudotp,MATIS);CHKERRQ(ierr); 998 ierr = MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 999 ierr = MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);CHKERRQ(ierr); 1000 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 1001 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 1002 ierr = MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);CHKERRQ(ierr); 1003 ierr = MatDestroy(&loc_divudotp);CHKERRQ(ierr); 1004 ierr = MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1005 ierr = MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1006 } 1007 1008 /* change of basis and p0 dofs */ 1009 if (has_null_pressures) { 1010 IS zerodiagc; 1011 const PetscInt *idxs,*idxsc; 1012 PetscInt i,s,*nnz; 1013 1014 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 1015 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 1016 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 1017 /* local change of basis for pressures */ 1018 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 1019 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 1020 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 1021 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1022 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 1023 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 1024 for (i=0;i<pcbddc->benign_n;i++) { 1025 PetscInt nzs,j; 1026 1027 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 1028 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1029 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 1030 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 1031 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1032 } 1033 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 1034 ierr = PetscFree(nnz);CHKERRQ(ierr); 1035 /* set identity on velocities */ 1036 for (i=0;i<n-nz;i++) { 1037 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 1038 } 1039 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 1040 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 1041 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 1042 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 1043 /* set change on pressures */ 1044 for (s=0;s<pcbddc->benign_n;s++) { 1045 PetscScalar *array; 1046 PetscInt nzs; 1047 1048 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 1049 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 1050 for (i=0;i<nzs-1;i++) { 1051 PetscScalar vals[2]; 1052 PetscInt cols[2]; 1053 1054 cols[0] = idxs[i]; 1055 cols[1] = idxs[nzs-1]; 1056 vals[0] = 1.; 1057 vals[1] = 1.; 1058 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 1059 } 1060 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 1061 for (i=0;i<nzs-1;i++) array[i] = -1.; 1062 array[nzs-1] = 1.; 1063 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 1064 /* store local idxs for p0 */ 1065 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 1066 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 1067 ierr = PetscFree(array);CHKERRQ(ierr); 1068 } 1069 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1070 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1071 /* project if needed */ 1072 if (pcbddc->benign_change_explicit) { 1073 Mat M; 1074 1075 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 1076 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1077 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 1078 ierr = MatDestroy(&M);CHKERRQ(ierr); 1079 } 1080 /* store global idxs for p0 */ 1081 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 1082 } 1083 pcbddc->benign_zerodiag_subs = zerodiag_subs; 1084 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 1085 1086 /* determines if the coarse solver will be singular or not */ 1087 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1088 /* determines if the problem has subdomains with 0 pressure block */ 1089 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1090 *zerodiaglocal = zerodiag; 1091 PetscFunctionReturn(0); 1092 } 1093 1094 #undef __FUNCT__ 1095 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 1096 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 1097 { 1098 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1099 PetscScalar *array; 1100 PetscErrorCode ierr; 1101 1102 PetscFunctionBegin; 1103 if (!pcbddc->benign_sf) { 1104 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 1105 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 1106 } 1107 if (get) { 1108 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 1109 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 1110 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 1111 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 1112 } else { 1113 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 1114 ierr = PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 1115 ierr = PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 1116 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 1117 } 1118 PetscFunctionReturn(0); 1119 } 1120 1121 #undef __FUNCT__ 1122 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 1123 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 1124 { 1125 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1126 PetscErrorCode ierr; 1127 1128 PetscFunctionBegin; 1129 /* TODO: add error checking 1130 - avoid nested pop (or push) calls. 1131 - cannot push before pop. 1132 - cannot call this if pcbddc->local_mat is NULL 1133 */ 1134 if (!pcbddc->benign_n) { 1135 PetscFunctionReturn(0); 1136 } 1137 if (pop) { 1138 if (pcbddc->benign_change_explicit) { 1139 IS is_p0; 1140 MatReuse reuse; 1141 1142 /* extract B_0 */ 1143 reuse = MAT_INITIAL_MATRIX; 1144 if (pcbddc->benign_B0) { 1145 reuse = MAT_REUSE_MATRIX; 1146 } 1147 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 1148 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 1149 /* remove rows and cols from local problem */ 1150 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 1151 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1152 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 1153 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 1154 } else { 1155 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1156 PetscScalar *vals; 1157 PetscInt i,n,*idxs_ins; 1158 1159 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 1160 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 1161 if (!pcbddc->benign_B0) { 1162 PetscInt *nnz; 1163 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 1164 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 1165 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1166 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 1167 for (i=0;i<pcbddc->benign_n;i++) { 1168 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 1169 nnz[i] = n - nnz[i]; 1170 } 1171 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 1172 ierr = PetscFree(nnz);CHKERRQ(ierr); 1173 } 1174 1175 for (i=0;i<pcbddc->benign_n;i++) { 1176 PetscScalar *array; 1177 PetscInt *idxs,j,nz,cum; 1178 1179 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 1180 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1181 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 1182 for (j=0;j<nz;j++) vals[j] = 1.; 1183 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1184 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 1185 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 1186 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 1187 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 1188 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 1189 cum = 0; 1190 for (j=0;j<n;j++) { 1191 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 1192 vals[cum] = array[j]; 1193 idxs_ins[cum] = j; 1194 cum++; 1195 } 1196 } 1197 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 1198 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 1199 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 1200 } 1201 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1202 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1203 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 1204 } 1205 } else { /* push */ 1206 if (pcbddc->benign_change_explicit) { 1207 PetscInt i; 1208 1209 for (i=0;i<pcbddc->benign_n;i++) { 1210 PetscScalar *B0_vals; 1211 PetscInt *B0_cols,B0_ncol; 1212 1213 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 1214 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 1215 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 1216 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 1217 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 1218 } 1219 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1220 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1221 } else { 1222 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 1223 } 1224 } 1225 PetscFunctionReturn(0); 1226 } 1227 1228 #undef __FUNCT__ 1229 #define __FUNCT__ "PCBDDCAdaptiveSelection" 1230 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 1231 { 1232 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1233 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1234 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 1235 PetscBLASInt *B_iwork,*B_ifail; 1236 PetscScalar *work,lwork; 1237 PetscScalar *St,*S,*eigv; 1238 PetscScalar *Sarray,*Starray; 1239 PetscReal *eigs,thresh; 1240 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 1241 PetscBool allocated_S_St; 1242 #if defined(PETSC_USE_COMPLEX) 1243 PetscReal *rwork; 1244 #endif 1245 PetscErrorCode ierr; 1246 1247 PetscFunctionBegin; 1248 if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data"); 1249 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 1250 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 1251 1252 if (pcbddc->dbg_flag) { 1253 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1254 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1255 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 1256 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 1257 } 1258 1259 if (pcbddc->dbg_flag) { 1260 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 1261 } 1262 1263 /* max size of subsets */ 1264 mss = 0; 1265 for (i=0;i<sub_schurs->n_subs;i++) { 1266 PetscInt subset_size; 1267 1268 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1269 mss = PetscMax(mss,subset_size); 1270 } 1271 1272 /* min/max and threshold */ 1273 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 1274 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 1275 nmax = PetscMax(nmin,nmax); 1276 allocated_S_St = PETSC_FALSE; 1277 if (nmin) { 1278 allocated_S_St = PETSC_TRUE; 1279 } 1280 1281 /* allocate lapack workspace */ 1282 cum = cum2 = 0; 1283 maxneigs = 0; 1284 for (i=0;i<sub_schurs->n_subs;i++) { 1285 PetscInt n,subset_size; 1286 1287 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1288 n = PetscMin(subset_size,nmax); 1289 cum += subset_size; 1290 cum2 += subset_size*n; 1291 maxneigs = PetscMax(maxneigs,n); 1292 } 1293 if (mss) { 1294 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 1295 PetscBLASInt B_itype = 1; 1296 PetscBLASInt B_N = mss; 1297 PetscReal zero = 0.0; 1298 PetscReal eps = 0.0; /* dlamch? */ 1299 1300 B_lwork = -1; 1301 S = NULL; 1302 St = NULL; 1303 eigs = NULL; 1304 eigv = NULL; 1305 B_iwork = NULL; 1306 B_ifail = NULL; 1307 #if defined(PETSC_USE_COMPLEX) 1308 rwork = NULL; 1309 #endif 1310 thresh = 1.0; 1311 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1312 #if defined(PETSC_USE_COMPLEX) 1313 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1314 #else 1315 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1316 #endif 1317 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 1318 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1319 } else { 1320 /* TODO */ 1321 } 1322 } else { 1323 lwork = 0; 1324 } 1325 1326 nv = 0; 1327 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 1328 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 1329 } 1330 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 1331 if (allocated_S_St) { 1332 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 1333 } 1334 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 1335 #if defined(PETSC_USE_COMPLEX) 1336 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 1337 #endif 1338 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 1339 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 1340 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 1341 nv+cum,&pcbddc->adaptive_constraints_idxs, 1342 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 1343 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 1344 1345 maxneigs = 0; 1346 cum = cumarray = 0; 1347 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 1348 pcbddc->adaptive_constraints_data_ptr[0] = 0; 1349 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 1350 const PetscInt *idxs; 1351 1352 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 1353 for (cum=0;cum<nv;cum++) { 1354 pcbddc->adaptive_constraints_n[cum] = 1; 1355 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 1356 pcbddc->adaptive_constraints_data[cum] = 1.0; 1357 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 1358 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 1359 } 1360 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 1361 } 1362 1363 if (mss) { /* multilevel */ 1364 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1365 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1366 } 1367 1368 thresh = pcbddc->adaptive_threshold; 1369 for (i=0;i<sub_schurs->n_subs;i++) { 1370 const PetscInt *idxs; 1371 PetscReal upper,lower; 1372 PetscInt j,subset_size,eigs_start = 0; 1373 PetscBLASInt B_N; 1374 PetscBool same_data = PETSC_FALSE; 1375 1376 if (pcbddc->use_deluxe_scaling) { 1377 upper = PETSC_MAX_REAL; 1378 lower = thresh; 1379 } else { 1380 upper = 1./thresh; 1381 lower = 0.; 1382 } 1383 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1384 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1385 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 1386 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 1387 if (sub_schurs->is_hermitian) { 1388 PetscInt j,k; 1389 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 1390 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1391 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1392 } 1393 for (j=0;j<subset_size;j++) { 1394 for (k=j;k<subset_size;k++) { 1395 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 1396 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 1397 } 1398 } 1399 } else { 1400 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1401 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1402 } 1403 } else { 1404 S = Sarray + cumarray; 1405 St = Starray + cumarray; 1406 } 1407 /* see if we can save some work */ 1408 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 1409 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 1410 } 1411 1412 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 1413 B_neigs = 0; 1414 } else { 1415 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 1416 PetscBLASInt B_itype = 1; 1417 PetscBLASInt B_IL, B_IU; 1418 PetscReal eps = -1.0; /* dlamch? */ 1419 PetscInt nmin_s; 1420 PetscBool compute_range = PETSC_FALSE; 1421 1422 if (pcbddc->dbg_flag) { 1423 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 1424 } 1425 1426 compute_range = PETSC_FALSE; 1427 if (thresh > 1.+PETSC_SMALL && !same_data) { 1428 compute_range = PETSC_TRUE; 1429 } 1430 1431 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1432 if (compute_range) { 1433 1434 /* ask for eigenvalues larger than thresh */ 1435 #if defined(PETSC_USE_COMPLEX) 1436 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1437 #else 1438 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1439 #endif 1440 } else if (!same_data) { 1441 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 1442 B_IL = 1; 1443 #if defined(PETSC_USE_COMPLEX) 1444 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1445 #else 1446 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1447 #endif 1448 } else { /* same_data is true, so get the adaptive function requested by the user */ 1449 PetscInt k; 1450 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 1451 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 1452 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 1453 nmin = nmax; 1454 ierr = PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));CHKERRQ(ierr); 1455 for (k=0;k<nmax;k++) { 1456 eigs[k] = 1./PETSC_SMALL; 1457 eigv[k*(subset_size+1)] = 1.0; 1458 } 1459 } 1460 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1461 if (B_ierr) { 1462 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1463 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1464 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1465 } 1466 1467 if (B_neigs > nmax) { 1468 if (pcbddc->dbg_flag) { 1469 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 1470 } 1471 if (pcbddc->use_deluxe_scaling) eigs_start = B_neigs -nmax; 1472 B_neigs = nmax; 1473 } 1474 1475 nmin_s = PetscMin(nmin,B_N); 1476 if (B_neigs < nmin_s) { 1477 PetscBLASInt B_neigs2; 1478 1479 if (pcbddc->use_deluxe_scaling) { 1480 B_IL = B_N - nmin_s + 1; 1481 B_IU = B_N - B_neigs; 1482 } else { 1483 B_IL = B_neigs + 1; 1484 B_IU = nmin_s; 1485 } 1486 if (pcbddc->dbg_flag) { 1487 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 1488 } 1489 if (sub_schurs->is_hermitian) { 1490 PetscInt j,k; 1491 for (j=0;j<subset_size;j++) { 1492 for (k=j;k<subset_size;k++) { 1493 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 1494 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 1495 } 1496 } 1497 } else { 1498 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1499 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1500 } 1501 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1502 #if defined(PETSC_USE_COMPLEX) 1503 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1504 #else 1505 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1506 #endif 1507 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1508 B_neigs += B_neigs2; 1509 } 1510 if (B_ierr) { 1511 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1512 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1513 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1514 } 1515 if (pcbddc->dbg_flag) { 1516 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 1517 for (j=0;j<B_neigs;j++) { 1518 if (eigs[j] == 0.0) { 1519 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 1520 } else { 1521 if (pcbddc->use_deluxe_scaling) { 1522 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 1523 } else { 1524 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 1525 } 1526 } 1527 } 1528 } 1529 } else { 1530 /* TODO */ 1531 } 1532 } 1533 /* change the basis back to the original one */ 1534 if (sub_schurs->change) { 1535 Mat change,phi,phit; 1536 1537 if (pcbddc->dbg_flag > 1) { 1538 PetscInt ii; 1539 for (ii=0;ii<B_neigs;ii++) { 1540 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1541 for (j=0;j<B_N;j++) { 1542 #if defined(PETSC_USE_COMPLEX) 1543 PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]); 1544 PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]); 1545 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 1546 #else 1547 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 1548 #endif 1549 } 1550 } 1551 } 1552 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 1553 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 1554 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 1555 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1556 ierr = MatDestroy(&phit);CHKERRQ(ierr); 1557 ierr = MatDestroy(&phi);CHKERRQ(ierr); 1558 } 1559 maxneigs = PetscMax(B_neigs,maxneigs); 1560 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 1561 if (B_neigs) { 1562 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1563 1564 if (pcbddc->dbg_flag > 1) { 1565 PetscInt ii; 1566 for (ii=0;ii<B_neigs;ii++) { 1567 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1568 for (j=0;j<B_N;j++) { 1569 #if defined(PETSC_USE_COMPLEX) 1570 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1571 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1572 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 1573 #else 1574 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 1575 #endif 1576 } 1577 } 1578 } 1579 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 1580 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 1581 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 1582 cum++; 1583 } 1584 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1585 /* shift for next computation */ 1586 cumarray += subset_size*subset_size; 1587 } 1588 if (pcbddc->dbg_flag) { 1589 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1590 } 1591 1592 if (mss) { 1593 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1594 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1595 /* destroy matrices (junk) */ 1596 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 1597 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 1598 } 1599 if (allocated_S_St) { 1600 ierr = PetscFree2(S,St);CHKERRQ(ierr); 1601 } 1602 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 1603 #if defined(PETSC_USE_COMPLEX) 1604 ierr = PetscFree(rwork);CHKERRQ(ierr); 1605 #endif 1606 if (pcbddc->dbg_flag) { 1607 PetscInt maxneigs_r; 1608 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1609 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 1610 } 1611 PetscFunctionReturn(0); 1612 } 1613 1614 #undef __FUNCT__ 1615 #define __FUNCT__ "PCBDDCSetUpSolvers" 1616 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 1617 { 1618 PetscScalar *coarse_submat_vals; 1619 PetscErrorCode ierr; 1620 1621 PetscFunctionBegin; 1622 /* Setup local scatters R_to_B and (optionally) R_to_D */ 1623 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 1624 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 1625 1626 /* Setup local neumann solver ksp_R */ 1627 /* PCBDDCSetUpLocalScatters should be called first! */ 1628 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 1629 1630 /* 1631 Setup local correction and local part of coarse basis. 1632 Gives back the dense local part of the coarse matrix in column major ordering 1633 */ 1634 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 1635 1636 /* Compute total number of coarse nodes and setup coarse solver */ 1637 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 1638 1639 /* free */ 1640 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 1641 PetscFunctionReturn(0); 1642 } 1643 1644 #undef __FUNCT__ 1645 #define __FUNCT__ "PCBDDCResetCustomization" 1646 PetscErrorCode PCBDDCResetCustomization(PC pc) 1647 { 1648 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1649 PetscErrorCode ierr; 1650 1651 PetscFunctionBegin; 1652 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1653 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 1654 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1655 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 1656 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1657 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 1658 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1659 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1660 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1661 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 1662 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 1663 PetscFunctionReturn(0); 1664 } 1665 1666 #undef __FUNCT__ 1667 #define __FUNCT__ "PCBDDCResetTopography" 1668 PetscErrorCode PCBDDCResetTopography(PC pc) 1669 { 1670 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1671 PetscInt i; 1672 PetscErrorCode ierr; 1673 1674 PetscFunctionBegin; 1675 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 1676 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1677 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 1678 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 1679 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1680 ierr = MatDestroy(&pcbddc->divudotp);CHKERRQ(ierr); 1681 ierr = ISDestroy(&pcbddc->divudotp_vl2l);CHKERRQ(ierr); 1682 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 1683 pcbddc->graphanalyzed = PETSC_FALSE; 1684 for (i=0;i<pcbddc->n_local_subs;i++) { 1685 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 1686 } 1687 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 1688 if (pcbddc->sub_schurs) { 1689 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 1690 } 1691 PetscFunctionReturn(0); 1692 } 1693 1694 #undef __FUNCT__ 1695 #define __FUNCT__ "PCBDDCResetSolvers" 1696 PetscErrorCode PCBDDCResetSolvers(PC pc) 1697 { 1698 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1699 PetscErrorCode ierr; 1700 1701 PetscFunctionBegin; 1702 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 1703 if (pcbddc->coarse_phi_B) { 1704 PetscScalar *array; 1705 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 1706 ierr = PetscFree(array);CHKERRQ(ierr); 1707 } 1708 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1709 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1710 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1711 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1712 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1713 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1714 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1715 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1716 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1717 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1718 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1719 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1720 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1721 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 1722 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 1723 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 1724 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 1725 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1726 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1727 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1728 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 1729 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 1730 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 1731 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 1732 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 1733 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 1734 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 1735 if (pcbddc->benign_zerodiag_subs) { 1736 PetscInt i; 1737 for (i=0;i<pcbddc->benign_n;i++) { 1738 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1739 } 1740 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 1741 } 1742 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 1743 PetscFunctionReturn(0); 1744 } 1745 1746 #undef __FUNCT__ 1747 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 1748 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 1749 { 1750 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1751 PC_IS *pcis = (PC_IS*)pc->data; 1752 VecType impVecType; 1753 PetscInt n_constraints,n_R,old_size; 1754 PetscErrorCode ierr; 1755 1756 PetscFunctionBegin; 1757 if (!pcbddc->ConstraintMatrix) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 1758 /* get sizes */ 1759 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 1760 n_R = pcis->n - pcbddc->n_vertices; 1761 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 1762 /* local work vectors (try to avoid unneeded work)*/ 1763 /* R nodes */ 1764 old_size = -1; 1765 if (pcbddc->vec1_R) { 1766 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 1767 } 1768 if (n_R != old_size) { 1769 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1770 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1771 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 1772 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 1773 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 1774 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 1775 } 1776 /* local primal dofs */ 1777 old_size = -1; 1778 if (pcbddc->vec1_P) { 1779 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 1780 } 1781 if (pcbddc->local_primal_size != old_size) { 1782 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1783 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 1784 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 1785 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 1786 } 1787 /* local explicit constraints */ 1788 old_size = -1; 1789 if (pcbddc->vec1_C) { 1790 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 1791 } 1792 if (n_constraints && n_constraints != old_size) { 1793 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1794 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 1795 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 1796 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 1797 } 1798 PetscFunctionReturn(0); 1799 } 1800 1801 #undef __FUNCT__ 1802 #define __FUNCT__ "PCBDDCSetUpCorrection" 1803 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 1804 { 1805 PetscErrorCode ierr; 1806 /* pointers to pcis and pcbddc */ 1807 PC_IS* pcis = (PC_IS*)pc->data; 1808 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1809 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1810 /* submatrices of local problem */ 1811 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 1812 /* submatrices of local coarse problem */ 1813 Mat S_VV,S_CV,S_VC,S_CC; 1814 /* working matrices */ 1815 Mat C_CR; 1816 /* additional working stuff */ 1817 PC pc_R; 1818 Mat F; 1819 Vec dummy_vec; 1820 PetscBool isLU,isCHOL,isILU,need_benign_correction; 1821 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 1822 PetscScalar *work; 1823 PetscInt *idx_V_B; 1824 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 1825 PetscInt i,n_R,n_D,n_B; 1826 1827 /* some shortcuts to scalars */ 1828 PetscScalar one=1.0,m_one=-1.0; 1829 1830 PetscFunctionBegin; 1831 if (!pcbddc->symmetric_primal && pcbddc->benign_n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 1832 1833 /* Set Non-overlapping dimensions */ 1834 n_vertices = pcbddc->n_vertices; 1835 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 1836 n_B = pcis->n_B; 1837 n_D = pcis->n - n_B; 1838 n_R = pcis->n - n_vertices; 1839 1840 /* vertices in boundary numbering */ 1841 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 1842 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 1843 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i); 1844 1845 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 1846 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 1847 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 1848 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 1849 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 1850 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 1851 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 1852 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 1853 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 1854 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 1855 1856 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 1857 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 1858 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 1859 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 1860 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 1861 lda_rhs = n_R; 1862 need_benign_correction = PETSC_FALSE; 1863 if (isLU || isILU || isCHOL) { 1864 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 1865 } else if (sub_schurs && sub_schurs->reuse_solver) { 1866 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1867 MatFactorType type; 1868 1869 F = reuse_solver->F; 1870 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 1871 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 1872 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 1873 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 1874 } else { 1875 F = NULL; 1876 } 1877 1878 /* allocate workspace */ 1879 n = 0; 1880 if (n_constraints) { 1881 n += lda_rhs*n_constraints; 1882 } 1883 if (n_vertices) { 1884 n = PetscMax(2*lda_rhs*n_vertices,n); 1885 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 1886 } 1887 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1888 1889 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 1890 dummy_vec = NULL; 1891 if (need_benign_correction && lda_rhs != n_R && F) { 1892 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);CHKERRQ(ierr); 1893 } 1894 1895 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 1896 if (n_constraints) { 1897 Mat M1,M2,M3,C_B; 1898 IS is_aux; 1899 PetscScalar *array,*array2; 1900 1901 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1902 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1903 1904 /* Extract constraints on R nodes: C_{CR} */ 1905 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 1906 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 1907 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 1908 1909 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 1910 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 1911 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1912 for (i=0;i<n_constraints;i++) { 1913 const PetscScalar *row_cmat_values; 1914 const PetscInt *row_cmat_indices; 1915 PetscInt size_of_constraint,j; 1916 1917 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1918 for (j=0;j<size_of_constraint;j++) { 1919 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 1920 } 1921 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1922 } 1923 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 1924 if (F) { 1925 Mat B; 1926 1927 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 1928 if (need_benign_correction) { 1929 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1930 1931 /* rhs is already zero on interior dofs, no need to change the rhs */ 1932 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 1933 } 1934 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 1935 if (need_benign_correction) { 1936 PetscScalar *marr; 1937 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1938 1939 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1940 if (lda_rhs != n_R) { 1941 for (i=0;i<n_constraints;i++) { 1942 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 1943 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1944 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 1945 } 1946 } else { 1947 for (i=0;i<n_constraints;i++) { 1948 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1949 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1950 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1951 } 1952 } 1953 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1954 } 1955 ierr = MatDestroy(&B);CHKERRQ(ierr); 1956 } else { 1957 PetscScalar *marr; 1958 1959 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1960 for (i=0;i<n_constraints;i++) { 1961 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1962 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 1963 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1964 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1965 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1966 } 1967 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1968 } 1969 if (!pcbddc->switch_static) { 1970 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1971 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1972 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1973 for (i=0;i<n_constraints;i++) { 1974 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 1975 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 1976 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1977 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1978 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1979 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1980 } 1981 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1982 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1983 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1984 } else { 1985 if (lda_rhs != n_R) { 1986 IS dummy; 1987 1988 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 1989 ierr = MatGetSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1990 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1991 } else { 1992 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 1993 pcbddc->local_auxmat2 = local_auxmat2_R; 1994 } 1995 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1996 } 1997 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1998 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 1999 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 2000 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 2001 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 2002 if (isCHOL) { 2003 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 2004 } else { 2005 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 2006 } 2007 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 2008 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 2009 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 2010 ierr = MatDestroy(&M2);CHKERRQ(ierr); 2011 ierr = MatDestroy(&M3);CHKERRQ(ierr); 2012 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 2013 ierr = MatMatMult(M1,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 2014 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 2015 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 2016 ierr = MatDestroy(&M1);CHKERRQ(ierr); 2017 } 2018 2019 /* Get submatrices from subdomain matrix */ 2020 if (n_vertices) { 2021 IS is_aux; 2022 2023 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 2024 IS tis; 2025 2026 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 2027 ierr = ISSort(tis);CHKERRQ(ierr); 2028 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 2029 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2030 } else { 2031 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 2032 } 2033 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 2034 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 2035 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 2036 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 2037 } 2038 2039 /* Matrix of coarse basis functions (local) */ 2040 if (pcbddc->coarse_phi_B) { 2041 PetscInt on_B,on_primal,on_D=n_D; 2042 if (pcbddc->coarse_phi_D) { 2043 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 2044 } 2045 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 2046 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 2047 PetscScalar *marray; 2048 2049 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 2050 ierr = PetscFree(marray);CHKERRQ(ierr); 2051 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 2052 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 2053 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 2054 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 2055 } 2056 } 2057 2058 if (!pcbddc->coarse_phi_B) { 2059 PetscScalar *marray; 2060 2061 n = n_B*pcbddc->local_primal_size; 2062 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2063 n += n_D*pcbddc->local_primal_size; 2064 } 2065 if (!pcbddc->symmetric_primal) { 2066 n *= 2; 2067 } 2068 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 2069 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 2070 n = n_B*pcbddc->local_primal_size; 2071 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2072 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 2073 n += n_D*pcbddc->local_primal_size; 2074 } 2075 if (!pcbddc->symmetric_primal) { 2076 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 2077 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2078 n = n_B*pcbddc->local_primal_size; 2079 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 2080 } 2081 } else { 2082 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 2083 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 2084 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2085 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 2086 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 2087 } 2088 } 2089 } 2090 2091 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 2092 p0_lidx_I = NULL; 2093 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 2094 const PetscInt *idxs; 2095 2096 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 2097 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 2098 for (i=0;i<pcbddc->benign_n;i++) { 2099 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 2100 } 2101 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 2102 } 2103 2104 /* vertices */ 2105 if (n_vertices) { 2106 2107 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 2108 2109 if (n_R) { 2110 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 2111 PetscBLASInt B_N,B_one = 1; 2112 PetscScalar *x,*y; 2113 PetscBool isseqaij; 2114 2115 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 2116 if (need_benign_correction) { 2117 ISLocalToGlobalMapping RtoN; 2118 IS is_p0; 2119 PetscInt *idxs_p0,n; 2120 2121 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 2122 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 2123 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 2124 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 2125 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 2126 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 2127 ierr = MatGetSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 2128 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 2129 } 2130 2131 if (lda_rhs == n_R) { 2132 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 2133 } else { 2134 PetscScalar *av,*array; 2135 const PetscInt *xadj,*adjncy; 2136 PetscInt n; 2137 PetscBool flg_row; 2138 2139 array = work+lda_rhs*n_vertices; 2140 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 2141 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 2142 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2143 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 2144 for (i=0;i<n;i++) { 2145 PetscInt j; 2146 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 2147 } 2148 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2149 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 2150 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 2151 } 2152 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 2153 if (need_benign_correction) { 2154 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2155 PetscScalar *marr; 2156 2157 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 2158 /* need \Phi^T A_RV = (I+L)A_RV, L given by 2159 2160 | 0 0 0 | (V) 2161 L = | 0 0 -1 | (P-p0) 2162 | 0 0 -1 | (p0) 2163 2164 */ 2165 for (i=0;i<reuse_solver->benign_n;i++) { 2166 const PetscScalar *vals; 2167 const PetscInt *idxs,*idxs_zero; 2168 PetscInt n,j,nz; 2169 2170 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2171 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 2172 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 2173 for (j=0;j<n;j++) { 2174 PetscScalar val = vals[j]; 2175 PetscInt k,col = idxs[j]; 2176 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 2177 } 2178 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 2179 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 2180 } 2181 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 2182 } 2183 if (F) { 2184 /* need to correct the rhs */ 2185 if (need_benign_correction) { 2186 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2187 PetscScalar *marr; 2188 2189 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 2190 if (lda_rhs != n_R) { 2191 for (i=0;i<n_vertices;i++) { 2192 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 2193 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 2194 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 2195 } 2196 } else { 2197 for (i=0;i<n_vertices;i++) { 2198 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 2199 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 2200 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2201 } 2202 } 2203 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 2204 } 2205 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 2206 /* need to correct the solution */ 2207 if (need_benign_correction) { 2208 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2209 PetscScalar *marr; 2210 2211 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 2212 if (lda_rhs != n_R) { 2213 for (i=0;i<n_vertices;i++) { 2214 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 2215 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 2216 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 2217 } 2218 } else { 2219 for (i=0;i<n_vertices;i++) { 2220 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 2221 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 2222 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2223 } 2224 } 2225 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 2226 } 2227 } else { 2228 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 2229 for (i=0;i<n_vertices;i++) { 2230 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 2231 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 2232 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2233 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2234 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2235 } 2236 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 2237 } 2238 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 2239 /* S_VV and S_CV */ 2240 if (n_constraints) { 2241 Mat B; 2242 2243 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 2244 for (i=0;i<n_vertices;i++) { 2245 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 2246 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 2247 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2248 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2249 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2250 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2251 } 2252 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 2253 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 2254 ierr = MatDestroy(&B);CHKERRQ(ierr); 2255 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 2256 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 2257 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 2258 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 2259 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 2260 ierr = MatDestroy(&B);CHKERRQ(ierr); 2261 } 2262 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2263 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 2264 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 2265 } 2266 if (lda_rhs != n_R) { 2267 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 2268 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 2269 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 2270 } 2271 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 2272 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 2273 if (need_benign_correction) { 2274 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2275 PetscScalar *marr,*sums; 2276 2277 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 2278 ierr = MatDenseGetArray(S_VVt,&marr); 2279 for (i=0;i<reuse_solver->benign_n;i++) { 2280 const PetscScalar *vals; 2281 const PetscInt *idxs,*idxs_zero; 2282 PetscInt n,j,nz; 2283 2284 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2285 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 2286 for (j=0;j<n_vertices;j++) { 2287 PetscInt k; 2288 sums[j] = 0.; 2289 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 2290 } 2291 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 2292 for (j=0;j<n;j++) { 2293 PetscScalar val = vals[j]; 2294 PetscInt k; 2295 for (k=0;k<n_vertices;k++) { 2296 marr[idxs[j]+k*n_vertices] += val*sums[k]; 2297 } 2298 } 2299 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 2300 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 2301 } 2302 ierr = PetscFree(sums);CHKERRQ(ierr); 2303 ierr = MatDenseRestoreArray(S_VVt,&marr); 2304 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 2305 } 2306 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 2307 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 2308 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 2309 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 2310 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 2311 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 2312 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 2313 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2314 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 2315 } else { 2316 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2317 } 2318 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 2319 2320 /* coarse basis functions */ 2321 for (i=0;i<n_vertices;i++) { 2322 PetscScalar *y; 2323 2324 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 2325 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2326 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 2327 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2328 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2329 y[n_B*i+idx_V_B[i]] = 1.0; 2330 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2331 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2332 2333 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2334 PetscInt j; 2335 2336 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2337 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2338 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2339 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2340 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2341 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 2342 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2343 } 2344 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2345 } 2346 /* if n_R == 0 the object is not destroyed */ 2347 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 2348 } 2349 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 2350 2351 if (n_constraints) { 2352 Mat B; 2353 2354 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 2355 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 2356 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 2357 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 2358 if (n_vertices) { 2359 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 2360 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 2361 } else { 2362 Mat S_VCt; 2363 2364 if (lda_rhs != n_R) { 2365 ierr = MatDestroy(&B);CHKERRQ(ierr); 2366 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 2367 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 2368 } 2369 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 2370 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2371 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 2372 } 2373 } 2374 ierr = MatDestroy(&B);CHKERRQ(ierr); 2375 /* coarse basis functions */ 2376 for (i=0;i<n_constraints;i++) { 2377 PetscScalar *y; 2378 2379 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 2380 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2381 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 2382 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2383 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2384 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2385 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2386 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2387 PetscInt j; 2388 2389 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2390 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 2391 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2392 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2393 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2394 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 2395 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2396 } 2397 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2398 } 2399 } 2400 if (n_constraints) { 2401 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 2402 } 2403 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 2404 2405 /* coarse matrix entries relative to B_0 */ 2406 if (pcbddc->benign_n) { 2407 Mat B0_B,B0_BPHI; 2408 IS is_dummy; 2409 PetscScalar *data; 2410 PetscInt j; 2411 2412 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2413 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 2414 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2415 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2416 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2417 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 2418 for (j=0;j<pcbddc->benign_n;j++) { 2419 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2420 for (i=0;i<pcbddc->local_primal_size;i++) { 2421 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 2422 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 2423 } 2424 } 2425 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 2426 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2427 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2428 } 2429 2430 /* compute other basis functions for non-symmetric problems */ 2431 if (!pcbddc->symmetric_primal) { 2432 Mat B_V=NULL,B_C=NULL; 2433 PetscScalar *marray; 2434 2435 if (n_constraints) { 2436 Mat S_CCT,C_CRT; 2437 2438 ierr = MatTranspose(C_CR,MAT_INPLACE_MATRIX,&C_CRT);CHKERRQ(ierr); 2439 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 2440 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 2441 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 2442 if (n_vertices) { 2443 Mat S_VCT; 2444 2445 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 2446 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 2447 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 2448 } 2449 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 2450 } else { 2451 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);CHKERRQ(ierr); 2452 } 2453 if (n_vertices && n_R) { 2454 PetscScalar *av,*marray; 2455 const PetscInt *xadj,*adjncy; 2456 PetscInt n; 2457 PetscBool flg_row; 2458 2459 /* B_V = B_V - A_VR^T */ 2460 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 2461 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2462 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 2463 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2464 for (i=0;i<n;i++) { 2465 PetscInt j; 2466 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 2467 } 2468 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2469 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2470 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2471 } 2472 2473 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 2474 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2475 for (i=0;i<n_vertices;i++) { 2476 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 2477 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2478 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2479 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2480 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2481 } 2482 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2483 if (B_C) { 2484 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 2485 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 2486 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 2487 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2488 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2489 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2490 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2491 } 2492 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 2493 } 2494 /* coarse basis functions */ 2495 for (i=0;i<pcbddc->local_primal_size;i++) { 2496 PetscScalar *y; 2497 2498 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 2499 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2500 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 2501 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2502 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2503 if (i<n_vertices) { 2504 y[n_B*i+idx_V_B[i]] = 1.0; 2505 } 2506 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2507 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2508 2509 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2510 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2511 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2512 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2513 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2514 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2515 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2516 } 2517 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2518 } 2519 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 2520 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 2521 } 2522 /* free memory */ 2523 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 2524 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 2525 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 2526 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 2527 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 2528 ierr = PetscFree(work);CHKERRQ(ierr); 2529 if (n_vertices) { 2530 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2531 } 2532 if (n_constraints) { 2533 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 2534 } 2535 /* Checking coarse_sub_mat and coarse basis functios */ 2536 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2537 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2538 if (pcbddc->dbg_flag) { 2539 Mat coarse_sub_mat; 2540 Mat AUXMAT,TM1,TM2,TM3,TM4; 2541 Mat coarse_phi_D,coarse_phi_B; 2542 Mat coarse_psi_D,coarse_psi_B; 2543 Mat A_II,A_BB,A_IB,A_BI; 2544 Mat C_B,CPHI; 2545 IS is_dummy; 2546 Vec mones; 2547 MatType checkmattype=MATSEQAIJ; 2548 PetscReal real_value; 2549 2550 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 2551 Mat A; 2552 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 2553 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2554 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2555 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2556 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2557 ierr = MatDestroy(&A);CHKERRQ(ierr); 2558 } else { 2559 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2560 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2561 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2562 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2563 } 2564 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 2565 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 2566 if (!pcbddc->symmetric_primal) { 2567 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 2568 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 2569 } 2570 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 2571 2572 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2573 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 2574 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2575 if (!pcbddc->symmetric_primal) { 2576 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2577 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2578 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2579 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2580 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2581 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2582 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2583 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2584 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2585 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2586 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2587 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2588 } else { 2589 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2590 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2591 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2592 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2593 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2594 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2595 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2596 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2597 } 2598 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2599 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2600 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2601 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 2602 if (pcbddc->benign_n) { 2603 Mat B0_B,B0_BPHI; 2604 PetscScalar *data,*data2; 2605 PetscInt j; 2606 2607 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2608 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 2609 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2610 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2611 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 2612 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 2613 for (j=0;j<pcbddc->benign_n;j++) { 2614 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2615 for (i=0;i<pcbddc->local_primal_size;i++) { 2616 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 2617 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 2618 } 2619 } 2620 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 2621 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 2622 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2623 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2624 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2625 } 2626 #if 0 2627 { 2628 PetscViewer viewer; 2629 char filename[256]; 2630 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 2631 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 2632 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2633 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 2634 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 2635 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 2636 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 2637 if (save_change) { 2638 Mat phi_B; 2639 ierr = MatMatMult(save_change,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&phi_B);CHKERRQ(ierr); 2640 ierr = PetscObjectSetName((PetscObject)phi_B,"phi_B");CHKERRQ(ierr); 2641 ierr = MatView(phi_B,viewer);CHKERRQ(ierr); 2642 ierr = MatDestroy(&phi_B);CHKERRQ(ierr); 2643 } else { 2644 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 2645 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 2646 } 2647 if (pcbddc->coarse_phi_D) { 2648 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 2649 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 2650 } 2651 if (pcbddc->coarse_psi_B) { 2652 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 2653 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 2654 } 2655 if (pcbddc->coarse_psi_D) { 2656 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 2657 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 2658 } 2659 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 2660 } 2661 #endif 2662 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2663 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2664 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2665 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2666 2667 /* check constraints */ 2668 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2669 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 2670 if (!pcbddc->benign_n) { /* TODO: add benign case */ 2671 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2672 } else { 2673 PetscScalar *data; 2674 Mat tmat; 2675 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 2676 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 2677 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 2678 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2679 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2680 } 2681 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 2682 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2683 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2684 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2685 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2686 if (!pcbddc->symmetric_primal) { 2687 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2688 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2689 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2690 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2691 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2692 } 2693 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 2694 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 2695 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2696 ierr = VecDestroy(&mones);CHKERRQ(ierr); 2697 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2698 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 2699 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 2700 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 2701 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 2702 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 2703 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 2704 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 2705 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 2706 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 2707 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 2708 if (!pcbddc->symmetric_primal) { 2709 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 2710 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 2711 } 2712 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 2713 } 2714 /* get back data */ 2715 *coarse_submat_vals_n = coarse_submat_vals; 2716 PetscFunctionReturn(0); 2717 } 2718 2719 #undef __FUNCT__ 2720 #define __FUNCT__ "MatGetSubMatrixUnsorted" 2721 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 2722 { 2723 Mat *work_mat; 2724 IS isrow_s,iscol_s; 2725 PetscBool rsorted,csorted; 2726 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 2727 PetscErrorCode ierr; 2728 2729 PetscFunctionBegin; 2730 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 2731 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 2732 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 2733 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2734 2735 if (!rsorted) { 2736 const PetscInt *idxs; 2737 PetscInt *idxs_sorted,i; 2738 2739 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 2740 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 2741 for (i=0;i<rsize;i++) { 2742 idxs_perm_r[i] = i; 2743 } 2744 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 2745 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 2746 for (i=0;i<rsize;i++) { 2747 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 2748 } 2749 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 2750 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 2751 } else { 2752 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 2753 isrow_s = isrow; 2754 } 2755 2756 if (!csorted) { 2757 if (isrow == iscol) { 2758 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 2759 iscol_s = isrow_s; 2760 } else { 2761 const PetscInt *idxs; 2762 PetscInt *idxs_sorted,i; 2763 2764 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 2765 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 2766 for (i=0;i<csize;i++) { 2767 idxs_perm_c[i] = i; 2768 } 2769 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 2770 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 2771 for (i=0;i<csize;i++) { 2772 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 2773 } 2774 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 2775 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 2776 } 2777 } else { 2778 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 2779 iscol_s = iscol; 2780 } 2781 2782 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2783 2784 if (!rsorted || !csorted) { 2785 Mat new_mat; 2786 IS is_perm_r,is_perm_c; 2787 2788 if (!rsorted) { 2789 PetscInt *idxs_r,i; 2790 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 2791 for (i=0;i<rsize;i++) { 2792 idxs_r[idxs_perm_r[i]] = i; 2793 } 2794 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 2795 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 2796 } else { 2797 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 2798 } 2799 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 2800 2801 if (!csorted) { 2802 if (isrow_s == iscol_s) { 2803 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 2804 is_perm_c = is_perm_r; 2805 } else { 2806 PetscInt *idxs_c,i; 2807 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 2808 for (i=0;i<csize;i++) { 2809 idxs_c[idxs_perm_c[i]] = i; 2810 } 2811 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 2812 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 2813 } 2814 } else { 2815 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 2816 } 2817 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 2818 2819 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 2820 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 2821 work_mat[0] = new_mat; 2822 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 2823 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 2824 } 2825 2826 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 2827 *B = work_mat[0]; 2828 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 2829 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 2830 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 2831 PetscFunctionReturn(0); 2832 } 2833 2834 #undef __FUNCT__ 2835 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 2836 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 2837 { 2838 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2839 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2840 Mat new_mat; 2841 IS is_local,is_global; 2842 PetscInt local_size; 2843 PetscBool isseqaij; 2844 PetscErrorCode ierr; 2845 2846 PetscFunctionBegin; 2847 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2848 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 2849 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 2850 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 2851 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 2852 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 2853 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 2854 2855 /* check */ 2856 if (pcbddc->dbg_flag) { 2857 Vec x,x_change; 2858 PetscReal error; 2859 2860 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 2861 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2862 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 2863 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2864 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2865 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 2866 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2867 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2868 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2869 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2870 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2871 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 2872 ierr = VecDestroy(&x);CHKERRQ(ierr); 2873 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2874 } 2875 2876 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 2877 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2878 if (isseqaij) { 2879 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2880 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 2881 } else { 2882 Mat work_mat; 2883 2884 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2885 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2886 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 2887 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 2888 } 2889 if (matis->A->symmetric_set) { 2890 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 2891 #if !defined(PETSC_USE_COMPLEX) 2892 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 2893 #endif 2894 } 2895 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 2896 PetscFunctionReturn(0); 2897 } 2898 2899 #undef __FUNCT__ 2900 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 2901 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 2902 { 2903 PC_IS* pcis = (PC_IS*)(pc->data); 2904 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2905 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2906 PetscInt *idx_R_local=NULL; 2907 PetscInt n_vertices,i,j,n_R,n_D,n_B; 2908 PetscInt vbs,bs; 2909 PetscBT bitmask=NULL; 2910 PetscErrorCode ierr; 2911 2912 PetscFunctionBegin; 2913 /* 2914 No need to setup local scatters if 2915 - primal space is unchanged 2916 AND 2917 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 2918 AND 2919 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 2920 */ 2921 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 2922 PetscFunctionReturn(0); 2923 } 2924 /* destroy old objects */ 2925 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 2926 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 2927 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 2928 /* Set Non-overlapping dimensions */ 2929 n_B = pcis->n_B; 2930 n_D = pcis->n - n_B; 2931 n_vertices = pcbddc->n_vertices; 2932 2933 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 2934 2935 /* create auxiliary bitmask and allocate workspace */ 2936 if (!sub_schurs || !sub_schurs->reuse_solver) { 2937 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 2938 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 2939 for (i=0;i<n_vertices;i++) { 2940 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 2941 } 2942 2943 for (i=0, n_R=0; i<pcis->n; i++) { 2944 if (!PetscBTLookup(bitmask,i)) { 2945 idx_R_local[n_R++] = i; 2946 } 2947 } 2948 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 2949 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2950 2951 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2952 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 2953 } 2954 2955 /* Block code */ 2956 vbs = 1; 2957 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 2958 if (bs>1 && !(n_vertices%bs)) { 2959 PetscBool is_blocked = PETSC_TRUE; 2960 PetscInt *vary; 2961 if (!sub_schurs || !sub_schurs->reuse_solver) { 2962 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 2963 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 2964 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 2965 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 2966 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 2967 for (i=0; i<pcis->n/bs; i++) { 2968 if (vary[i]!=0 && vary[i]!=bs) { 2969 is_blocked = PETSC_FALSE; 2970 break; 2971 } 2972 } 2973 ierr = PetscFree(vary);CHKERRQ(ierr); 2974 } else { 2975 /* Verify directly the R set */ 2976 for (i=0; i<n_R/bs; i++) { 2977 PetscInt j,node=idx_R_local[bs*i]; 2978 for (j=1; j<bs; j++) { 2979 if (node != idx_R_local[bs*i+j]-j) { 2980 is_blocked = PETSC_FALSE; 2981 break; 2982 } 2983 } 2984 } 2985 } 2986 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 2987 vbs = bs; 2988 for (i=0;i<n_R/vbs;i++) { 2989 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 2990 } 2991 } 2992 } 2993 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 2994 if (sub_schurs && sub_schurs->reuse_solver) { 2995 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2996 2997 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2998 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 2999 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 3000 reuse_solver->is_R = pcbddc->is_R_local; 3001 } else { 3002 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 3003 } 3004 3005 /* print some info if requested */ 3006 if (pcbddc->dbg_flag) { 3007 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3008 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3009 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3010 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 3011 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 3012 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 3013 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3014 } 3015 3016 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 3017 if (!sub_schurs || !sub_schurs->reuse_solver) { 3018 IS is_aux1,is_aux2; 3019 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 3020 3021 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 3022 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 3023 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 3024 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3025 for (i=0; i<n_D; i++) { 3026 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 3027 } 3028 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3029 for (i=0, j=0; i<n_R; i++) { 3030 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 3031 aux_array1[j++] = i; 3032 } 3033 } 3034 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 3035 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3036 for (i=0, j=0; i<n_B; i++) { 3037 if (!PetscBTLookup(bitmask,is_indices[i])) { 3038 aux_array2[j++] = i; 3039 } 3040 } 3041 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3042 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 3043 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 3044 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 3045 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 3046 3047 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3048 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 3049 for (i=0, j=0; i<n_R; i++) { 3050 if (PetscBTLookup(bitmask,idx_R_local[i])) { 3051 aux_array1[j++] = i; 3052 } 3053 } 3054 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 3055 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 3056 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 3057 } 3058 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 3059 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 3060 } else { 3061 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3062 IS tis; 3063 PetscInt schur_size; 3064 3065 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 3066 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 3067 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 3068 ierr = ISDestroy(&tis);CHKERRQ(ierr); 3069 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3070 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 3071 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 3072 ierr = ISDestroy(&tis);CHKERRQ(ierr); 3073 } 3074 } 3075 PetscFunctionReturn(0); 3076 } 3077 3078 3079 #undef __FUNCT__ 3080 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 3081 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 3082 { 3083 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3084 PC_IS *pcis = (PC_IS*)pc->data; 3085 PC pc_temp; 3086 Mat A_RR; 3087 MatReuse reuse; 3088 PetscScalar m_one = -1.0; 3089 PetscReal value; 3090 PetscInt n_D,n_R; 3091 PetscBool check_corr[2],issbaij; 3092 PetscErrorCode ierr; 3093 /* prefixes stuff */ 3094 char dir_prefix[256],neu_prefix[256],str_level[16]; 3095 size_t len; 3096 3097 PetscFunctionBegin; 3098 3099 /* compute prefixes */ 3100 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 3101 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 3102 if (!pcbddc->current_level) { 3103 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 3104 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 3105 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 3106 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 3107 } else { 3108 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 3109 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 3110 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 3111 len -= 15; /* remove "pc_bddc_coarse_" */ 3112 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 3113 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 3114 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 3115 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 3116 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 3117 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 3118 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 3119 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 3120 } 3121 3122 /* DIRICHLET PROBLEM */ 3123 if (dirichlet) { 3124 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3125 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 3126 if (!sub_schurs || !sub_schurs->reuse_solver) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 3127 if (pcbddc->dbg_flag) { 3128 Mat A_IIn; 3129 3130 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 3131 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 3132 pcis->A_II = A_IIn; 3133 } 3134 } 3135 if (pcbddc->local_mat->symmetric_set) { 3136 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 3137 } 3138 /* Matrix for Dirichlet problem is pcis->A_II */ 3139 n_D = pcis->n - pcis->n_B; 3140 if (!pcbddc->ksp_D) { /* create object if not yet build */ 3141 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 3142 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 3143 /* default */ 3144 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 3145 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 3146 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 3147 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 3148 if (issbaij) { 3149 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 3150 } else { 3151 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 3152 } 3153 /* Allow user's customization */ 3154 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 3155 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 3156 } 3157 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 3158 if (sub_schurs && sub_schurs->reuse_solver) { 3159 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3160 3161 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 3162 } 3163 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 3164 if (!n_D) { 3165 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 3166 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 3167 } 3168 /* Set Up KSP for Dirichlet problem of BDDC */ 3169 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 3170 /* set ksp_D into pcis data */ 3171 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 3172 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 3173 pcis->ksp_D = pcbddc->ksp_D; 3174 } 3175 3176 /* NEUMANN PROBLEM */ 3177 A_RR = 0; 3178 if (neumann) { 3179 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3180 PetscInt ibs,mbs; 3181 PetscBool issbaij; 3182 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3183 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 3184 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 3185 if (pcbddc->ksp_R) { /* already created ksp */ 3186 PetscInt nn_R; 3187 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 3188 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 3189 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 3190 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 3191 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 3192 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3193 reuse = MAT_INITIAL_MATRIX; 3194 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 3195 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 3196 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3197 reuse = MAT_INITIAL_MATRIX; 3198 } else { /* safe to reuse the matrix */ 3199 reuse = MAT_REUSE_MATRIX; 3200 } 3201 } 3202 /* last check */ 3203 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 3204 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3205 reuse = MAT_INITIAL_MATRIX; 3206 } 3207 } else { /* first time, so we need to create the matrix */ 3208 reuse = MAT_INITIAL_MATRIX; 3209 } 3210 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */ 3211 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 3212 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 3213 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 3214 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 3215 if (matis->A == pcbddc->local_mat) { 3216 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3217 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 3218 } else { 3219 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 3220 } 3221 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 3222 if (matis->A == pcbddc->local_mat) { 3223 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3224 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 3225 } else { 3226 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 3227 } 3228 } 3229 /* extract A_RR */ 3230 if (sub_schurs && sub_schurs->reuse_solver) { 3231 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3232 3233 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 3234 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3235 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 3236 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 3237 } else { 3238 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 3239 } 3240 } else { 3241 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3242 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 3243 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 3244 } 3245 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 3246 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 3247 } 3248 if (pcbddc->local_mat->symmetric_set) { 3249 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 3250 } 3251 if (!pcbddc->ksp_R) { /* create object if not present */ 3252 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 3253 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 3254 /* default */ 3255 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 3256 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 3257 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 3258 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 3259 if (issbaij) { 3260 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 3261 } else { 3262 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 3263 } 3264 /* Allow user's customization */ 3265 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 3266 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 3267 } 3268 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 3269 if (!n_R) { 3270 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 3271 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 3272 } 3273 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 3274 /* Reuse solver if it is present */ 3275 if (sub_schurs && sub_schurs->reuse_solver) { 3276 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3277 3278 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 3279 } 3280 /* Set Up KSP for Neumann problem of BDDC */ 3281 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 3282 } 3283 3284 if (pcbddc->dbg_flag) { 3285 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3286 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3287 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3288 } 3289 3290 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 3291 check_corr[0] = check_corr[1] = PETSC_FALSE; 3292 if (pcbddc->NullSpace_corr[0]) { 3293 ierr = PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);CHKERRQ(ierr); 3294 } 3295 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 3296 check_corr[0] = PETSC_TRUE; 3297 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);CHKERRQ(ierr); 3298 } 3299 if (neumann && pcbddc->NullSpace_corr[2]) { 3300 check_corr[1] = PETSC_TRUE; 3301 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);CHKERRQ(ierr); 3302 } 3303 3304 /* check Dirichlet and Neumann solvers */ 3305 if (pcbddc->dbg_flag) { 3306 if (dirichlet) { /* Dirichlet */ 3307 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 3308 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 3309 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 3310 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 3311 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 3312 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 3313 if (check_corr[0]) { 3314 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_TRUE);CHKERRQ(ierr); 3315 } 3316 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3317 } 3318 if (neumann) { /* Neumann */ 3319 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 3320 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3321 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 3322 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 3323 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 3324 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 3325 if (check_corr[1]) { 3326 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_FALSE);CHKERRQ(ierr); 3327 } 3328 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3329 } 3330 } 3331 /* free Neumann problem's matrix */ 3332 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3333 PetscFunctionReturn(0); 3334 } 3335 3336 #undef __FUNCT__ 3337 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 3338 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 3339 { 3340 PetscErrorCode ierr; 3341 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3342 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3343 PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE ) : PETSC_FALSE; 3344 3345 PetscFunctionBegin; 3346 if (!reuse_solver) { 3347 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 3348 } 3349 if (!pcbddc->switch_static) { 3350 if (applytranspose && pcbddc->local_auxmat1) { 3351 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3352 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3353 } 3354 if (!reuse_solver) { 3355 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3356 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3357 } else { 3358 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3359 3360 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3361 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3362 } 3363 } else { 3364 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3365 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3366 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3367 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3368 if (applytranspose && pcbddc->local_auxmat1) { 3369 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 3370 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3371 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3372 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3373 } 3374 } 3375 if (!reuse_solver || pcbddc->switch_static) { 3376 if (applytranspose) { 3377 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3378 } else { 3379 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3380 } 3381 } else { 3382 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3383 3384 if (applytranspose) { 3385 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 3386 } else { 3387 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 3388 } 3389 } 3390 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 3391 if (!pcbddc->switch_static) { 3392 if (!reuse_solver) { 3393 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3394 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3395 } else { 3396 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3397 3398 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3399 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3400 } 3401 if (!applytranspose && pcbddc->local_auxmat1) { 3402 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3403 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3404 } 3405 } else { 3406 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3407 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3408 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3409 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3410 if (!applytranspose && pcbddc->local_auxmat1) { 3411 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3412 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3413 } 3414 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3415 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3416 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3417 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3418 } 3419 PetscFunctionReturn(0); 3420 } 3421 3422 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 3423 #undef __FUNCT__ 3424 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 3425 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 3426 { 3427 PetscErrorCode ierr; 3428 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3429 PC_IS* pcis = (PC_IS*) (pc->data); 3430 const PetscScalar zero = 0.0; 3431 3432 PetscFunctionBegin; 3433 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 3434 if (!pcbddc->benign_apply_coarse_only) { 3435 if (applytranspose) { 3436 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3437 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3438 } else { 3439 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3440 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3441 } 3442 } else { 3443 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 3444 } 3445 3446 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 3447 if (pcbddc->benign_n) { 3448 PetscScalar *array; 3449 PetscInt j; 3450 3451 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3452 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 3453 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3454 } 3455 3456 /* start communications from local primal nodes to rhs of coarse solver */ 3457 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 3458 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3459 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3460 3461 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 3462 if (pcbddc->coarse_ksp) { 3463 Mat coarse_mat; 3464 Vec rhs,sol; 3465 MatNullSpace nullsp; 3466 PetscBool isbddc = PETSC_FALSE; 3467 3468 if (pcbddc->benign_have_null) { 3469 PC coarse_pc; 3470 3471 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 3472 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 3473 /* we need to propagate to coarser levels the need for a possible benign correction */ 3474 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 3475 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 3476 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 3477 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 3478 } 3479 } 3480 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 3481 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 3482 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 3483 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 3484 if (nullsp) { 3485 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 3486 } 3487 if (applytranspose) { 3488 if (pcbddc->benign_apply_coarse_only) SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 3489 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3490 } else { 3491 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 3492 PC coarse_pc; 3493 3494 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 3495 ierr = PCPreSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 3496 ierr = PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);CHKERRQ(ierr); 3497 ierr = PCPostSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 3498 } else { 3499 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3500 } 3501 } 3502 /* we don't need the benign correction at coarser levels anymore */ 3503 if (pcbddc->benign_have_null && isbddc) { 3504 PC coarse_pc; 3505 PC_BDDC* coarsepcbddc; 3506 3507 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 3508 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 3509 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 3510 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 3511 } 3512 if (nullsp) { 3513 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 3514 } 3515 } 3516 3517 /* Local solution on R nodes */ 3518 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 3519 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 3520 } 3521 /* communications from coarse sol to local primal nodes */ 3522 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3523 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3524 3525 /* Sum contributions from the two levels */ 3526 if (!pcbddc->benign_apply_coarse_only) { 3527 if (applytranspose) { 3528 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3529 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3530 } else { 3531 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3532 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3533 } 3534 /* store p0 */ 3535 if (pcbddc->benign_n) { 3536 PetscScalar *array; 3537 PetscInt j; 3538 3539 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3540 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 3541 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3542 } 3543 } else { /* expand the coarse solution */ 3544 if (applytranspose) { 3545 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3546 } else { 3547 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3548 } 3549 } 3550 PetscFunctionReturn(0); 3551 } 3552 3553 #undef __FUNCT__ 3554 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 3555 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 3556 { 3557 PetscErrorCode ierr; 3558 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3559 PetscScalar *array; 3560 Vec from,to; 3561 3562 PetscFunctionBegin; 3563 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3564 from = pcbddc->coarse_vec; 3565 to = pcbddc->vec1_P; 3566 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3567 Vec tvec; 3568 3569 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3570 ierr = VecResetArray(tvec);CHKERRQ(ierr); 3571 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3572 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 3573 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 3574 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 3575 } 3576 } else { /* from local to global -> put data in coarse right hand side */ 3577 from = pcbddc->vec1_P; 3578 to = pcbddc->coarse_vec; 3579 } 3580 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3581 PetscFunctionReturn(0); 3582 } 3583 3584 #undef __FUNCT__ 3585 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 3586 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 3587 { 3588 PetscErrorCode ierr; 3589 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3590 PetscScalar *array; 3591 Vec from,to; 3592 3593 PetscFunctionBegin; 3594 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3595 from = pcbddc->coarse_vec; 3596 to = pcbddc->vec1_P; 3597 } else { /* from local to global -> put data in coarse right hand side */ 3598 from = pcbddc->vec1_P; 3599 to = pcbddc->coarse_vec; 3600 } 3601 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3602 if (smode == SCATTER_FORWARD) { 3603 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3604 Vec tvec; 3605 3606 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3607 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 3608 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 3609 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 3610 } 3611 } else { 3612 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 3613 ierr = VecResetArray(from);CHKERRQ(ierr); 3614 } 3615 } 3616 PetscFunctionReturn(0); 3617 } 3618 3619 /* uncomment for testing purposes */ 3620 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 3621 #undef __FUNCT__ 3622 #define __FUNCT__ "PCBDDCConstraintsSetUp" 3623 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 3624 { 3625 PetscErrorCode ierr; 3626 PC_IS* pcis = (PC_IS*)(pc->data); 3627 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3628 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3629 /* one and zero */ 3630 PetscScalar one=1.0,zero=0.0; 3631 /* space to store constraints and their local indices */ 3632 PetscScalar *constraints_data; 3633 PetscInt *constraints_idxs,*constraints_idxs_B; 3634 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 3635 PetscInt *constraints_n; 3636 /* iterators */ 3637 PetscInt i,j,k,total_counts,total_counts_cc,cum; 3638 /* BLAS integers */ 3639 PetscBLASInt lwork,lierr; 3640 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 3641 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 3642 /* reuse */ 3643 PetscInt olocal_primal_size,olocal_primal_size_cc; 3644 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 3645 /* change of basis */ 3646 PetscBool qr_needed; 3647 PetscBT change_basis,qr_needed_idx; 3648 /* auxiliary stuff */ 3649 PetscInt *nnz,*is_indices; 3650 PetscInt ncc; 3651 /* some quantities */ 3652 PetscInt n_vertices,total_primal_vertices,valid_constraints; 3653 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 3654 3655 PetscFunctionBegin; 3656 /* Destroy Mat objects computed previously */ 3657 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3658 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3659 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 3660 /* save info on constraints from previous setup (if any) */ 3661 olocal_primal_size = pcbddc->local_primal_size; 3662 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 3663 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 3664 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3665 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3666 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3667 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3668 3669 if (!pcbddc->adaptive_selection) { 3670 IS ISForVertices,*ISForFaces,*ISForEdges; 3671 MatNullSpace nearnullsp; 3672 const Vec *nearnullvecs; 3673 Vec *localnearnullsp; 3674 PetscScalar *array; 3675 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 3676 PetscBool nnsp_has_cnst; 3677 /* LAPACK working arrays for SVD or POD */ 3678 PetscBool skip_lapack,boolforchange; 3679 PetscScalar *work; 3680 PetscReal *singular_vals; 3681 #if defined(PETSC_USE_COMPLEX) 3682 PetscReal *rwork; 3683 #endif 3684 #if defined(PETSC_MISSING_LAPACK_GESVD) 3685 PetscScalar *temp_basis,*correlation_mat; 3686 #else 3687 PetscBLASInt dummy_int=1; 3688 PetscScalar dummy_scalar=1.; 3689 #endif 3690 3691 /* Get index sets for faces, edges and vertices from graph */ 3692 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 3693 /* print some info */ 3694 if (pcbddc->dbg_flag && !pcbddc->sub_schurs) { 3695 PetscInt nv; 3696 3697 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 3698 ierr = ISGetSize(ISForVertices,&nv);CHKERRQ(ierr); 3699 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3700 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3701 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 3702 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);CHKERRQ(ierr); 3703 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);CHKERRQ(ierr); 3704 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3705 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3706 } 3707 3708 /* free unneeded index sets */ 3709 if (!pcbddc->use_vertices) { 3710 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3711 } 3712 if (!pcbddc->use_edges) { 3713 for (i=0;i<n_ISForEdges;i++) { 3714 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3715 } 3716 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3717 n_ISForEdges = 0; 3718 } 3719 if (!pcbddc->use_faces) { 3720 for (i=0;i<n_ISForFaces;i++) { 3721 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3722 } 3723 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3724 n_ISForFaces = 0; 3725 } 3726 3727 /* check if near null space is attached to global mat */ 3728 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 3729 if (nearnullsp) { 3730 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 3731 /* remove any stored info */ 3732 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3733 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3734 /* store information for BDDC solver reuse */ 3735 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 3736 pcbddc->onearnullspace = nearnullsp; 3737 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3738 for (i=0;i<nnsp_size;i++) { 3739 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 3740 } 3741 } else { /* if near null space is not provided BDDC uses constants by default */ 3742 nnsp_size = 0; 3743 nnsp_has_cnst = PETSC_TRUE; 3744 } 3745 /* get max number of constraints on a single cc */ 3746 max_constraints = nnsp_size; 3747 if (nnsp_has_cnst) max_constraints++; 3748 3749 /* 3750 Evaluate maximum storage size needed by the procedure 3751 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 3752 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 3753 There can be multiple constraints per connected component 3754 */ 3755 n_vertices = 0; 3756 if (ISForVertices) { 3757 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 3758 } 3759 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 3760 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 3761 3762 total_counts = n_ISForFaces+n_ISForEdges; 3763 total_counts *= max_constraints; 3764 total_counts += n_vertices; 3765 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 3766 3767 total_counts = 0; 3768 max_size_of_constraint = 0; 3769 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 3770 IS used_is; 3771 if (i<n_ISForEdges) { 3772 used_is = ISForEdges[i]; 3773 } else { 3774 used_is = ISForFaces[i-n_ISForEdges]; 3775 } 3776 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 3777 total_counts += j; 3778 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 3779 } 3780 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 3781 3782 /* get local part of global near null space vectors */ 3783 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 3784 for (k=0;k<nnsp_size;k++) { 3785 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 3786 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3787 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3788 } 3789 3790 /* whether or not to skip lapack calls */ 3791 skip_lapack = PETSC_TRUE; 3792 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 3793 3794 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 3795 if (!skip_lapack) { 3796 PetscScalar temp_work; 3797 3798 #if defined(PETSC_MISSING_LAPACK_GESVD) 3799 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 3800 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 3801 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 3802 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 3803 #if defined(PETSC_USE_COMPLEX) 3804 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 3805 #endif 3806 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3807 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3808 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 3809 lwork = -1; 3810 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3811 #if !defined(PETSC_USE_COMPLEX) 3812 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 3813 #else 3814 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 3815 #endif 3816 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3817 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 3818 #else /* on missing GESVD */ 3819 /* SVD */ 3820 PetscInt max_n,min_n; 3821 max_n = max_size_of_constraint; 3822 min_n = max_constraints; 3823 if (max_size_of_constraint < max_constraints) { 3824 min_n = max_size_of_constraint; 3825 max_n = max_constraints; 3826 } 3827 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 3828 #if defined(PETSC_USE_COMPLEX) 3829 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 3830 #endif 3831 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3832 lwork = -1; 3833 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 3834 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 3835 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 3836 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3837 #if !defined(PETSC_USE_COMPLEX) 3838 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 3839 #else 3840 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 3841 #endif 3842 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3843 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 3844 #endif /* on missing GESVD */ 3845 /* Allocate optimal workspace */ 3846 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 3847 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 3848 } 3849 /* Now we can loop on constraining sets */ 3850 total_counts = 0; 3851 constraints_idxs_ptr[0] = 0; 3852 constraints_data_ptr[0] = 0; 3853 /* vertices */ 3854 if (n_vertices) { 3855 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3856 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3857 for (i=0;i<n_vertices;i++) { 3858 constraints_n[total_counts] = 1; 3859 constraints_data[total_counts] = 1.0; 3860 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 3861 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 3862 total_counts++; 3863 } 3864 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3865 n_vertices = total_counts; 3866 } 3867 3868 /* edges and faces */ 3869 total_counts_cc = total_counts; 3870 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 3871 IS used_is; 3872 PetscBool idxs_copied = PETSC_FALSE; 3873 3874 if (ncc<n_ISForEdges) { 3875 used_is = ISForEdges[ncc]; 3876 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 3877 } else { 3878 used_is = ISForFaces[ncc-n_ISForEdges]; 3879 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 3880 } 3881 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 3882 3883 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 3884 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3885 /* change of basis should not be performed on local periodic nodes */ 3886 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 3887 if (nnsp_has_cnst) { 3888 PetscScalar quad_value; 3889 3890 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3891 idxs_copied = PETSC_TRUE; 3892 3893 if (!pcbddc->use_nnsp_true) { 3894 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 3895 } else { 3896 quad_value = 1.0; 3897 } 3898 for (j=0;j<size_of_constraint;j++) { 3899 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 3900 } 3901 temp_constraints++; 3902 total_counts++; 3903 } 3904 for (k=0;k<nnsp_size;k++) { 3905 PetscReal real_value; 3906 PetscScalar *ptr_to_data; 3907 3908 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3909 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 3910 for (j=0;j<size_of_constraint;j++) { 3911 ptr_to_data[j] = array[is_indices[j]]; 3912 } 3913 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3914 /* check if array is null on the connected component */ 3915 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3916 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 3917 if (real_value > 0.0) { /* keep indices and values */ 3918 temp_constraints++; 3919 total_counts++; 3920 if (!idxs_copied) { 3921 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3922 idxs_copied = PETSC_TRUE; 3923 } 3924 } 3925 } 3926 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3927 valid_constraints = temp_constraints; 3928 if (!pcbddc->use_nnsp_true && temp_constraints) { 3929 if (temp_constraints == 1) { /* just normalize the constraint */ 3930 PetscScalar norm,*ptr_to_data; 3931 3932 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3933 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3934 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 3935 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 3936 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 3937 } else { /* perform SVD */ 3938 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 3939 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3940 3941 #if defined(PETSC_MISSING_LAPACK_GESVD) 3942 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 3943 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 3944 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 3945 the constraints basis will differ (by a complex factor with absolute value equal to 1) 3946 from that computed using LAPACKgesvd 3947 -> This is due to a different computation of eigenvectors in LAPACKheev 3948 -> The quality of the POD-computed basis will be the same */ 3949 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3950 /* Store upper triangular part of correlation matrix */ 3951 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3952 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3953 for (j=0;j<temp_constraints;j++) { 3954 for (k=0;k<j+1;k++) { 3955 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 3956 } 3957 } 3958 /* compute eigenvalues and eigenvectors of correlation matrix */ 3959 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3960 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 3961 #if !defined(PETSC_USE_COMPLEX) 3962 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 3963 #else 3964 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 3965 #endif 3966 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3967 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 3968 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 3969 j = 0; 3970 while (j < temp_constraints && singular_vals[j] < tol) j++; 3971 total_counts = total_counts-j; 3972 valid_constraints = temp_constraints-j; 3973 /* scale and copy POD basis into used quadrature memory */ 3974 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3975 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3976 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 3977 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3978 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 3979 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3980 if (j<temp_constraints) { 3981 PetscInt ii; 3982 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 3983 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3984 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 3985 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3986 for (k=0;k<temp_constraints-j;k++) { 3987 for (ii=0;ii<size_of_constraint;ii++) { 3988 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 3989 } 3990 } 3991 } 3992 #else /* on missing GESVD */ 3993 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3994 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3995 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3996 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3997 #if !defined(PETSC_USE_COMPLEX) 3998 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 3999 #else 4000 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 4001 #endif 4002 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 4003 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4004 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 4005 k = temp_constraints; 4006 if (k > size_of_constraint) k = size_of_constraint; 4007 j = 0; 4008 while (j < k && singular_vals[k-j-1] < tol) j++; 4009 valid_constraints = k-j; 4010 total_counts = total_counts-temp_constraints+valid_constraints; 4011 #endif /* on missing GESVD */ 4012 } 4013 } 4014 /* update pointers information */ 4015 if (valid_constraints) { 4016 constraints_n[total_counts_cc] = valid_constraints; 4017 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 4018 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 4019 /* set change_of_basis flag */ 4020 if (boolforchange) { 4021 PetscBTSet(change_basis,total_counts_cc); 4022 } 4023 total_counts_cc++; 4024 } 4025 } 4026 /* free workspace */ 4027 if (!skip_lapack) { 4028 ierr = PetscFree(work);CHKERRQ(ierr); 4029 #if defined(PETSC_USE_COMPLEX) 4030 ierr = PetscFree(rwork);CHKERRQ(ierr); 4031 #endif 4032 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 4033 #if defined(PETSC_MISSING_LAPACK_GESVD) 4034 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 4035 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 4036 #endif 4037 } 4038 for (k=0;k<nnsp_size;k++) { 4039 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 4040 } 4041 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 4042 /* free index sets of faces, edges and vertices */ 4043 for (i=0;i<n_ISForFaces;i++) { 4044 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 4045 } 4046 if (n_ISForFaces) { 4047 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 4048 } 4049 for (i=0;i<n_ISForEdges;i++) { 4050 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 4051 } 4052 if (n_ISForEdges) { 4053 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 4054 } 4055 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 4056 } else { 4057 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4058 4059 total_counts = 0; 4060 n_vertices = 0; 4061 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 4062 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 4063 } 4064 max_constraints = 0; 4065 total_counts_cc = 0; 4066 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 4067 total_counts += pcbddc->adaptive_constraints_n[i]; 4068 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 4069 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 4070 } 4071 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 4072 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 4073 constraints_idxs = pcbddc->adaptive_constraints_idxs; 4074 constraints_data = pcbddc->adaptive_constraints_data; 4075 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 4076 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 4077 total_counts_cc = 0; 4078 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 4079 if (pcbddc->adaptive_constraints_n[i]) { 4080 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 4081 } 4082 } 4083 #if 0 4084 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 4085 for (i=0;i<total_counts_cc;i++) { 4086 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 4087 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 4088 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 4089 printf(" %d",constraints_idxs[j]); 4090 } 4091 printf("\n"); 4092 printf("number of cc: %d\n",constraints_n[i]); 4093 } 4094 for (i=0;i<n_vertices;i++) { 4095 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 4096 } 4097 for (i=0;i<sub_schurs->n_subs;i++) { 4098 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 4099 } 4100 #endif 4101 4102 max_size_of_constraint = 0; 4103 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 4104 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 4105 /* Change of basis */ 4106 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 4107 if (pcbddc->use_change_of_basis) { 4108 for (i=0;i<sub_schurs->n_subs;i++) { 4109 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 4110 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 4111 } 4112 } 4113 } 4114 } 4115 pcbddc->local_primal_size = total_counts; 4116 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 4117 4118 /* map constraints_idxs in boundary numbering */ 4119 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 4120 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i); 4121 4122 /* Create constraint matrix */ 4123 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 4124 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 4125 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 4126 4127 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 4128 /* determine if a QR strategy is needed for change of basis */ 4129 qr_needed = PETSC_FALSE; 4130 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 4131 total_primal_vertices=0; 4132 pcbddc->local_primal_size_cc = 0; 4133 for (i=0;i<total_counts_cc;i++) { 4134 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 4135 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 4136 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 4137 pcbddc->local_primal_size_cc += 1; 4138 } else if (PetscBTLookup(change_basis,i)) { 4139 for (k=0;k<constraints_n[i];k++) { 4140 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 4141 } 4142 pcbddc->local_primal_size_cc += constraints_n[i]; 4143 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 4144 PetscBTSet(qr_needed_idx,i); 4145 qr_needed = PETSC_TRUE; 4146 } 4147 } else { 4148 pcbddc->local_primal_size_cc += 1; 4149 } 4150 } 4151 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 4152 pcbddc->n_vertices = total_primal_vertices; 4153 /* permute indices in order to have a sorted set of vertices */ 4154 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 4155 4156 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 4157 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 4158 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 4159 4160 /* nonzero structure of constraint matrix */ 4161 /* and get reference dof for local constraints */ 4162 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 4163 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 4164 4165 j = total_primal_vertices; 4166 total_counts = total_primal_vertices; 4167 cum = total_primal_vertices; 4168 for (i=n_vertices;i<total_counts_cc;i++) { 4169 if (!PetscBTLookup(change_basis,i)) { 4170 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 4171 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 4172 cum++; 4173 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 4174 for (k=0;k<constraints_n[i];k++) { 4175 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 4176 nnz[j+k] = size_of_constraint; 4177 } 4178 j += constraints_n[i]; 4179 } 4180 } 4181 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 4182 ierr = PetscFree(nnz);CHKERRQ(ierr); 4183 4184 /* set values in constraint matrix */ 4185 for (i=0;i<total_primal_vertices;i++) { 4186 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4187 } 4188 total_counts = total_primal_vertices; 4189 for (i=n_vertices;i<total_counts_cc;i++) { 4190 if (!PetscBTLookup(change_basis,i)) { 4191 PetscInt *cols; 4192 4193 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 4194 cols = constraints_idxs+constraints_idxs_ptr[i]; 4195 for (k=0;k<constraints_n[i];k++) { 4196 PetscInt row = total_counts+k; 4197 PetscScalar *vals; 4198 4199 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 4200 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 4201 } 4202 total_counts += constraints_n[i]; 4203 } 4204 } 4205 /* assembling */ 4206 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4207 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4208 4209 /* 4210 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4211 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 4212 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 4213 */ 4214 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 4215 if (pcbddc->use_change_of_basis) { 4216 /* dual and primal dofs on a single cc */ 4217 PetscInt dual_dofs,primal_dofs; 4218 /* working stuff for GEQRF */ 4219 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 4220 PetscBLASInt lqr_work; 4221 /* working stuff for UNGQR */ 4222 PetscScalar *gqr_work,lgqr_work_t; 4223 PetscBLASInt lgqr_work; 4224 /* working stuff for TRTRS */ 4225 PetscScalar *trs_rhs; 4226 PetscBLASInt Blas_NRHS; 4227 /* pointers for values insertion into change of basis matrix */ 4228 PetscInt *start_rows,*start_cols; 4229 PetscScalar *start_vals; 4230 /* working stuff for values insertion */ 4231 PetscBT is_primal; 4232 PetscInt *aux_primal_numbering_B; 4233 /* matrix sizes */ 4234 PetscInt global_size,local_size; 4235 /* temporary change of basis */ 4236 Mat localChangeOfBasisMatrix; 4237 /* extra space for debugging */ 4238 PetscScalar *dbg_work; 4239 4240 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 4241 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 4242 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 4243 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 4244 /* nonzeros for local mat */ 4245 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 4246 if (!pcbddc->benign_change || pcbddc->fake_change) { 4247 for (i=0;i<pcis->n;i++) nnz[i]=1; 4248 } else { 4249 const PetscInt *ii; 4250 PetscInt n; 4251 PetscBool flg_row; 4252 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 4253 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 4254 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 4255 } 4256 for (i=n_vertices;i<total_counts_cc;i++) { 4257 if (PetscBTLookup(change_basis,i)) { 4258 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 4259 if (PetscBTLookup(qr_needed_idx,i)) { 4260 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 4261 } else { 4262 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 4263 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 4264 } 4265 } 4266 } 4267 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 4268 ierr = PetscFree(nnz);CHKERRQ(ierr); 4269 /* Set interior change in the matrix */ 4270 if (!pcbddc->benign_change || pcbddc->fake_change) { 4271 for (i=0;i<pcis->n;i++) { 4272 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 4273 } 4274 } else { 4275 const PetscInt *ii,*jj; 4276 PetscScalar *aa; 4277 PetscInt n; 4278 PetscBool flg_row; 4279 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 4280 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 4281 for (i=0;i<n;i++) { 4282 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 4283 } 4284 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 4285 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 4286 } 4287 4288 if (pcbddc->dbg_flag) { 4289 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 4290 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4291 } 4292 4293 4294 /* Now we loop on the constraints which need a change of basis */ 4295 /* 4296 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 4297 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 4298 4299 Basic blocks of change of basis matrix T computed by 4300 4301 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 4302 4303 | 1 0 ... 0 s_1/S | 4304 | 0 1 ... 0 s_2/S | 4305 | ... | 4306 | 0 ... 1 s_{n-1}/S | 4307 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 4308 4309 with S = \sum_{i=1}^n s_i^2 4310 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 4311 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 4312 4313 - QR decomposition of constraints otherwise 4314 */ 4315 if (qr_needed) { 4316 /* space to store Q */ 4317 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 4318 /* first we issue queries for optimal work */ 4319 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 4320 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 4321 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4322 lqr_work = -1; 4323 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 4324 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 4325 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 4326 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 4327 lgqr_work = -1; 4328 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 4329 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 4330 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 4331 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4332 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 4333 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 4334 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 4335 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 4336 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 4337 /* array to store scaling factors for reflectors */ 4338 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 4339 /* array to store rhs and solution of triangular solver */ 4340 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 4341 /* allocating workspace for check */ 4342 if (pcbddc->dbg_flag) { 4343 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 4344 } 4345 } 4346 /* array to store whether a node is primal or not */ 4347 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 4348 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 4349 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 4350 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i); 4351 for (i=0;i<total_primal_vertices;i++) { 4352 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 4353 } 4354 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 4355 4356 /* loop on constraints and see whether or not they need a change of basis and compute it */ 4357 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 4358 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 4359 if (PetscBTLookup(change_basis,total_counts)) { 4360 /* get constraint info */ 4361 primal_dofs = constraints_n[total_counts]; 4362 dual_dofs = size_of_constraint-primal_dofs; 4363 4364 if (pcbddc->dbg_flag) { 4365 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 4366 } 4367 4368 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 4369 4370 /* copy quadrature constraints for change of basis check */ 4371 if (pcbddc->dbg_flag) { 4372 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4373 } 4374 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 4375 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4376 4377 /* compute QR decomposition of constraints */ 4378 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4379 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4380 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4381 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4382 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 4383 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 4384 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4385 4386 /* explictly compute R^-T */ 4387 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 4388 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 4389 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4390 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 4391 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4392 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 4393 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4394 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 4395 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 4396 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4397 4398 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 4399 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4400 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4401 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 4402 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4403 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4404 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 4405 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 4406 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4407 4408 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 4409 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 4410 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 4411 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4412 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4413 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 4414 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4415 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 4416 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 4417 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4418 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 4419 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4420 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4421 4422 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 4423 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 4424 /* insert cols for primal dofs */ 4425 for (j=0;j<primal_dofs;j++) { 4426 start_vals = &qr_basis[j*size_of_constraint]; 4427 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 4428 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 4429 } 4430 /* insert cols for dual dofs */ 4431 for (j=0,k=0;j<dual_dofs;k++) { 4432 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 4433 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 4434 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 4435 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 4436 j++; 4437 } 4438 } 4439 4440 /* check change of basis */ 4441 if (pcbddc->dbg_flag) { 4442 PetscInt ii,jj; 4443 PetscBool valid_qr=PETSC_TRUE; 4444 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 4445 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4446 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 4447 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4448 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 4449 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 4450 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4451 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 4452 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4453 for (jj=0;jj<size_of_constraint;jj++) { 4454 for (ii=0;ii<primal_dofs;ii++) { 4455 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 4456 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 4457 } 4458 } 4459 if (!valid_qr) { 4460 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 4461 for (jj=0;jj<size_of_constraint;jj++) { 4462 for (ii=0;ii<primal_dofs;ii++) { 4463 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 4464 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4465 } 4466 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 4467 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4468 } 4469 } 4470 } 4471 } else { 4472 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 4473 } 4474 } 4475 } else { /* simple transformation block */ 4476 PetscInt row,col; 4477 PetscScalar val,norm; 4478 4479 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4480 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 4481 for (j=0;j<size_of_constraint;j++) { 4482 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 4483 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 4484 if (!PetscBTLookup(is_primal,row_B)) { 4485 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 4486 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 4487 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 4488 } else { 4489 for (k=0;k<size_of_constraint;k++) { 4490 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 4491 if (row != col) { 4492 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 4493 } else { 4494 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 4495 } 4496 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 4497 } 4498 } 4499 } 4500 if (pcbddc->dbg_flag) { 4501 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 4502 } 4503 } 4504 } else { 4505 if (pcbddc->dbg_flag) { 4506 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 4507 } 4508 } 4509 } 4510 4511 /* free workspace */ 4512 if (qr_needed) { 4513 if (pcbddc->dbg_flag) { 4514 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 4515 } 4516 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 4517 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 4518 ierr = PetscFree(qr_work);CHKERRQ(ierr); 4519 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 4520 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 4521 } 4522 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 4523 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4524 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4525 4526 /* assembling of global change of variable */ 4527 if (!pcbddc->fake_change) { 4528 Mat tmat; 4529 PetscInt bs; 4530 4531 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 4532 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 4533 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 4534 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 4535 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4536 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 4537 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 4538 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 4539 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 4540 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 4541 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4542 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4543 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4544 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4545 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4546 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4547 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 4548 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 4549 4550 /* check */ 4551 if (pcbddc->dbg_flag) { 4552 PetscReal error; 4553 Vec x,x_change; 4554 4555 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 4556 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 4557 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4558 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 4559 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4560 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4561 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 4562 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4563 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4564 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 4565 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4566 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4567 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4568 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 4569 ierr = VecDestroy(&x);CHKERRQ(ierr); 4570 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4571 } 4572 /* adapt sub_schurs computed (if any) */ 4573 if (pcbddc->use_deluxe_scaling) { 4574 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4575 4576 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");CHKERRQ(ierr); 4577 if (sub_schurs && sub_schurs->S_Ej_all) { 4578 Mat S_new,tmat; 4579 IS is_all_N,is_V_Sall = NULL; 4580 4581 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 4582 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 4583 if (pcbddc->deluxe_zerorows) { 4584 ISLocalToGlobalMapping NtoSall; 4585 IS is_V; 4586 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 4587 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 4588 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 4589 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 4590 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 4591 } 4592 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 4593 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4594 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 4595 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4596 if (pcbddc->deluxe_zerorows) { 4597 const PetscScalar *array; 4598 const PetscInt *idxs_V,*idxs_all; 4599 PetscInt i,n_V; 4600 4601 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4602 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 4603 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4604 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4605 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 4606 for (i=0;i<n_V;i++) { 4607 PetscScalar val; 4608 PetscInt idx; 4609 4610 idx = idxs_V[i]; 4611 val = array[idxs_all[idxs_V[i]]]; 4612 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 4613 } 4614 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4615 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4616 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 4617 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4618 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4619 } 4620 sub_schurs->S_Ej_all = S_new; 4621 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4622 if (sub_schurs->sum_S_Ej_all) { 4623 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4624 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 4625 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4626 if (pcbddc->deluxe_zerorows) { 4627 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4628 } 4629 sub_schurs->sum_S_Ej_all = S_new; 4630 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4631 } 4632 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 4633 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4634 } 4635 /* destroy any change of basis context in sub_schurs */ 4636 if (sub_schurs && sub_schurs->change) { 4637 PetscInt i; 4638 4639 for (i=0;i<sub_schurs->n_subs;i++) { 4640 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 4641 } 4642 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 4643 } 4644 } 4645 if (pcbddc->switch_static) { /* need to save the local change */ 4646 pcbddc->switch_static_change = localChangeOfBasisMatrix; 4647 } else { 4648 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 4649 } 4650 /* determine if any process has changed the pressures locally */ 4651 pcbddc->change_interior = pcbddc->benign_have_null; 4652 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 4653 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 4654 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 4655 pcbddc->use_qr_single = qr_needed; 4656 } 4657 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 4658 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 4659 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 4660 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 4661 } else { 4662 Mat benign_global = NULL; 4663 if (pcbddc->benign_have_null) { 4664 Mat tmat; 4665 4666 pcbddc->change_interior = PETSC_TRUE; 4667 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4668 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4669 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4670 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4671 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 4672 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4673 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4674 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 4675 if (pcbddc->benign_change) { 4676 Mat M; 4677 4678 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 4679 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 4680 ierr = MatISSetLocalMat(tmat,M);CHKERRQ(ierr); 4681 ierr = MatDestroy(&M);CHKERRQ(ierr); 4682 } else { 4683 Mat eye; 4684 PetscScalar *array; 4685 4686 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4687 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&eye);CHKERRQ(ierr); 4688 for (i=0;i<pcis->n;i++) { 4689 ierr = MatSetValue(eye,i,i,array[i],INSERT_VALUES);CHKERRQ(ierr); 4690 } 4691 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4692 ierr = MatAssemblyBegin(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4693 ierr = MatAssemblyEnd(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4694 ierr = MatISSetLocalMat(tmat,eye);CHKERRQ(ierr); 4695 ierr = MatDestroy(&eye);CHKERRQ(ierr); 4696 } 4697 ierr = MatISGetMPIXAIJ(tmat,MAT_INITIAL_MATRIX,&benign_global);CHKERRQ(ierr); 4698 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4699 } 4700 if (pcbddc->user_ChangeOfBasisMatrix) { 4701 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4702 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 4703 } else if (pcbddc->benign_have_null) { 4704 pcbddc->ChangeOfBasisMatrix = benign_global; 4705 } 4706 } 4707 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 4708 IS is_global; 4709 const PetscInt *gidxs; 4710 4711 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 4712 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 4713 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 4714 ierr = MatGetSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 4715 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 4716 } 4717 } 4718 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 4719 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 4720 } 4721 4722 if (!pcbddc->fake_change) { 4723 /* add pressure dofs to set of primal nodes for numbering purposes */ 4724 for (i=0;i<pcbddc->benign_n;i++) { 4725 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 4726 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 4727 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 4728 pcbddc->local_primal_size_cc++; 4729 pcbddc->local_primal_size++; 4730 } 4731 4732 /* check if a new primal space has been introduced (also take into account benign trick) */ 4733 pcbddc->new_primal_space_local = PETSC_TRUE; 4734 if (olocal_primal_size == pcbddc->local_primal_size) { 4735 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4736 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4737 if (!pcbddc->new_primal_space_local) { 4738 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4739 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4740 } 4741 } 4742 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 4743 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4744 } 4745 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 4746 4747 /* flush dbg viewer */ 4748 if (pcbddc->dbg_flag) { 4749 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4750 } 4751 4752 /* free workspace */ 4753 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 4754 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 4755 if (!pcbddc->adaptive_selection) { 4756 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 4757 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 4758 } else { 4759 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 4760 pcbddc->adaptive_constraints_idxs_ptr, 4761 pcbddc->adaptive_constraints_data_ptr, 4762 pcbddc->adaptive_constraints_idxs, 4763 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 4764 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 4765 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 4766 } 4767 PetscFunctionReturn(0); 4768 } 4769 4770 #undef __FUNCT__ 4771 #define __FUNCT__ "PCBDDCAnalyzeInterface" 4772 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 4773 { 4774 ISLocalToGlobalMapping map; 4775 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4776 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 4777 PetscInt ierr,i,N; 4778 4779 PetscFunctionBegin; 4780 if (pcbddc->graphanalyzed && !pcbddc->recompute_topography) PetscFunctionReturn(0); 4781 /* Reset previously computed graph */ 4782 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 4783 /* Init local Graph struct */ 4784 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 4785 ierr = MatGetLocalToGlobalMapping(pc->pmat,&map,NULL);CHKERRQ(ierr); 4786 ierr = PCBDDCGraphInit(pcbddc->mat_graph,map,N);CHKERRQ(ierr); 4787 4788 /* Check validity of the csr graph passed in by the user */ 4789 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 4790 4791 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 4792 if ( (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) && pcbddc->use_local_adj) { 4793 PetscInt *xadj,*adjncy; 4794 PetscInt nvtxs; 4795 PetscBool flg_row=PETSC_FALSE; 4796 4797 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4798 if (flg_row) { 4799 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 4800 pcbddc->computed_rowadj = PETSC_TRUE; 4801 } 4802 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4803 } 4804 if (pcbddc->dbg_flag) { 4805 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4806 } 4807 4808 /* Setup of Graph */ 4809 pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */ 4810 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 4811 4812 /* attach info on disconnected subdomains if present */ 4813 if (pcbddc->n_local_subs) { 4814 PetscInt *local_subs; 4815 4816 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 4817 for (i=0;i<pcbddc->n_local_subs;i++) { 4818 const PetscInt *idxs; 4819 PetscInt nl,j; 4820 4821 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 4822 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4823 for (j=0;j<nl;j++) local_subs[idxs[j]] = i; 4824 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4825 } 4826 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 4827 pcbddc->mat_graph->local_subs = local_subs; 4828 } 4829 4830 /* Graph's connected components analysis */ 4831 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 4832 4833 /* set flag indicating analysis has been done */ 4834 pcbddc->graphanalyzed = PETSC_TRUE; 4835 PetscFunctionReturn(0); 4836 } 4837 4838 #undef __FUNCT__ 4839 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 4840 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 4841 { 4842 PetscInt i,j; 4843 PetscScalar *alphas; 4844 PetscErrorCode ierr; 4845 4846 PetscFunctionBegin; 4847 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 4848 for (i=0;i<n;i++) { 4849 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 4850 ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],alphas);CHKERRQ(ierr); 4851 for (j=0;j<n-i-1;j++) alphas[j] = PetscConj(-alphas[j]); 4852 ierr = VecMAXPY(vecs[j],n-i-1,alphas,vecs+i);CHKERRQ(ierr); 4853 } 4854 ierr = PetscFree(alphas);CHKERRQ(ierr); 4855 PetscFunctionReturn(0); 4856 } 4857 4858 #undef __FUNCT__ 4859 #define __FUNCT__ "MatISGetSubassemblingPattern" 4860 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 4861 { 4862 Mat A; 4863 PetscInt n_neighs,*neighs,*n_shared,**shared; 4864 PetscMPIInt size,rank,color; 4865 PetscInt *xadj,*adjncy; 4866 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 4867 PetscInt im_active,active_procs,n,i,j,local_size,threshold = 2; 4868 PetscInt void_procs,*procs_candidates = NULL; 4869 PetscInt xadj_count, *count; 4870 PetscBool ismatis,use_vwgt=PETSC_FALSE; 4871 PetscSubcomm psubcomm; 4872 MPI_Comm subcomm; 4873 PetscErrorCode ierr; 4874 4875 PetscFunctionBegin; 4876 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 4877 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 4878 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 4879 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 4880 PetscValidLogicalCollectiveInt(mat,redprocs,3); 4881 if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %d\n",*n_subdomains); 4882 4883 if (have_void) *have_void = PETSC_FALSE; 4884 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 4885 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 4886 ierr = MatISGetLocalMat(mat,&A);CHKERRQ(ierr); 4887 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 4888 im_active = !!(n); 4889 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 4890 void_procs = size - active_procs; 4891 /* get ranks of of non-active processes in mat communicator */ 4892 if (void_procs) { 4893 PetscInt ncand; 4894 4895 if (have_void) *have_void = PETSC_TRUE; 4896 ierr = PetscMalloc1(size,&procs_candidates);CHKERRQ(ierr); 4897 ierr = MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 4898 for (i=0,ncand=0;i<size;i++) { 4899 if (!procs_candidates[i]) { 4900 procs_candidates[ncand++] = i; 4901 } 4902 } 4903 /* force n_subdomains to be not greater that the number of non-active processes */ 4904 *n_subdomains = PetscMin(void_procs,*n_subdomains); 4905 } 4906 4907 /* number of subdomains requested greater than active processes -> just shift the matrix 4908 number of subdomains requested 1 -> send to master or first candidate in voids */ 4909 if (active_procs < *n_subdomains || *n_subdomains == 1) { 4910 PetscInt issize,isidx,dest; 4911 if (*n_subdomains == 1) dest = 0; 4912 else dest = rank; 4913 if (im_active) { 4914 issize = 1; 4915 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 4916 isidx = procs_candidates[dest]; 4917 } else { 4918 isidx = dest; 4919 } 4920 } else { 4921 issize = 0; 4922 isidx = -1; 4923 } 4924 *n_subdomains = active_procs; 4925 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);CHKERRQ(ierr); 4926 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 4927 PetscFunctionReturn(0); 4928 } 4929 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 4930 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 4931 threshold = PetscMax(threshold,2); 4932 4933 /* Get info on mapping */ 4934 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 4935 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4936 4937 /* build local CSR graph of subdomains' connectivity */ 4938 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 4939 xadj[0] = 0; 4940 xadj[1] = PetscMax(n_neighs-1,0); 4941 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 4942 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 4943 ierr = PetscCalloc1(local_size,&count);CHKERRQ(ierr); 4944 for (i=1;i<n_neighs;i++) 4945 for (j=0;j<n_shared[i];j++) 4946 count[shared[i][j]] += 1; 4947 4948 xadj_count = 0; 4949 for (i=1;i<n_neighs;i++) { 4950 for (j=0;j<n_shared[i];j++) { 4951 if (count[shared[i][j]] < threshold) { 4952 adjncy[xadj_count] = neighs[i]; 4953 adjncy_wgt[xadj_count] = n_shared[i]; 4954 xadj_count++; 4955 break; 4956 } 4957 } 4958 } 4959 xadj[1] = xadj_count; 4960 ierr = PetscFree(count);CHKERRQ(ierr); 4961 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4962 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4963 4964 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 4965 4966 /* Restrict work on active processes only */ 4967 ierr = PetscMPIIntCast(im_active,&color);CHKERRQ(ierr); 4968 if (void_procs) { 4969 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);CHKERRQ(ierr); 4970 ierr = PetscSubcommSetNumber(psubcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 4971 ierr = PetscSubcommSetTypeGeneral(psubcomm,color,rank);CHKERRQ(ierr); 4972 subcomm = PetscSubcommChild(psubcomm); 4973 } else { 4974 psubcomm = NULL; 4975 subcomm = PetscObjectComm((PetscObject)mat); 4976 } 4977 4978 v_wgt = NULL; 4979 if (!color) { 4980 ierr = PetscFree(xadj);CHKERRQ(ierr); 4981 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4982 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4983 } else { 4984 Mat subdomain_adj; 4985 IS new_ranks,new_ranks_contig; 4986 MatPartitioning partitioner; 4987 PetscInt rstart=0,rend=0; 4988 PetscInt *is_indices,*oldranks; 4989 PetscMPIInt size; 4990 PetscBool aggregate; 4991 4992 ierr = MPI_Comm_size(subcomm,&size);CHKERRQ(ierr); 4993 if (void_procs) { 4994 PetscInt prank = rank; 4995 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 4996 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);CHKERRQ(ierr); 4997 for (i=0;i<xadj[1];i++) { 4998 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 4999 } 5000 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 5001 } else { 5002 oldranks = NULL; 5003 } 5004 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 5005 if (aggregate) { /* TODO: all this part could be made more efficient */ 5006 PetscInt lrows,row,ncols,*cols; 5007 PetscMPIInt nrank; 5008 PetscScalar *vals; 5009 5010 ierr = MPI_Comm_rank(subcomm,&nrank);CHKERRQ(ierr); 5011 lrows = 0; 5012 if (nrank<redprocs) { 5013 lrows = size/redprocs; 5014 if (nrank<size%redprocs) lrows++; 5015 } 5016 ierr = MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 5017 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 5018 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 5019 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 5020 row = nrank; 5021 ncols = xadj[1]-xadj[0]; 5022 cols = adjncy; 5023 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 5024 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 5025 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 5026 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5027 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5028 ierr = PetscFree(xadj);CHKERRQ(ierr); 5029 ierr = PetscFree(adjncy);CHKERRQ(ierr); 5030 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 5031 ierr = PetscFree(vals);CHKERRQ(ierr); 5032 if (use_vwgt) { 5033 Vec v; 5034 const PetscScalar *array; 5035 PetscInt nl; 5036 5037 ierr = MatCreateVecs(subdomain_adj,&v,NULL);CHKERRQ(ierr); 5038 ierr = VecSetValue(v,row,(PetscScalar)local_size,INSERT_VALUES);CHKERRQ(ierr); 5039 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 5040 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 5041 ierr = VecGetLocalSize(v,&nl);CHKERRQ(ierr); 5042 ierr = VecGetArrayRead(v,&array);CHKERRQ(ierr); 5043 ierr = PetscMalloc1(nl,&v_wgt);CHKERRQ(ierr); 5044 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 5045 ierr = VecRestoreArrayRead(v,&array);CHKERRQ(ierr); 5046 ierr = VecDestroy(&v);CHKERRQ(ierr); 5047 } 5048 } else { 5049 ierr = MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 5050 if (use_vwgt) { 5051 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 5052 v_wgt[0] = local_size; 5053 } 5054 } 5055 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 5056 5057 /* Partition */ 5058 ierr = MatPartitioningCreate(subcomm,&partitioner);CHKERRQ(ierr); 5059 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 5060 if (v_wgt) { 5061 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 5062 } 5063 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 5064 ierr = MatPartitioningSetNParts(partitioner,*n_subdomains);CHKERRQ(ierr); 5065 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 5066 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 5067 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 5068 5069 /* renumber new_ranks to avoid "holes" in new set of processors */ 5070 ierr = ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 5071 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 5072 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5073 if (!aggregate) { 5074 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 5075 #if defined(PETSC_USE_DEBUG) 5076 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 5077 #endif 5078 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 5079 } else if (oldranks) { 5080 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 5081 } else { 5082 ranks_send_to_idx[0] = is_indices[0]; 5083 } 5084 } else { 5085 PetscInt idxs[1]; 5086 PetscMPIInt tag; 5087 MPI_Request *reqs; 5088 5089 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 5090 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 5091 for (i=rstart;i<rend;i++) { 5092 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);CHKERRQ(ierr); 5093 } 5094 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 5095 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5096 ierr = PetscFree(reqs);CHKERRQ(ierr); 5097 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 5098 #if defined(PETSC_USE_DEBUG) 5099 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 5100 #endif 5101 ranks_send_to_idx[0] = procs_candidates[oldranks[idxs[0]]]; 5102 } else if (oldranks) { 5103 ranks_send_to_idx[0] = oldranks[idxs[0]]; 5104 } else { 5105 ranks_send_to_idx[0] = idxs[0]; 5106 } 5107 } 5108 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5109 /* clean up */ 5110 ierr = PetscFree(oldranks);CHKERRQ(ierr); 5111 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 5112 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 5113 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 5114 } 5115 ierr = PetscSubcommDestroy(&psubcomm);CHKERRQ(ierr); 5116 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 5117 5118 /* assemble parallel IS for sends */ 5119 i = 1; 5120 if (!color) i=0; 5121 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);CHKERRQ(ierr); 5122 PetscFunctionReturn(0); 5123 } 5124 5125 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 5126 5127 #undef __FUNCT__ 5128 #define __FUNCT__ "MatISSubassemble" 5129 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 5130 { 5131 Mat local_mat; 5132 IS is_sends_internal; 5133 PetscInt rows,cols,new_local_rows; 5134 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs; 5135 PetscBool ismatis,isdense,newisdense,destroy_mat; 5136 ISLocalToGlobalMapping l2gmap; 5137 PetscInt* l2gmap_indices; 5138 const PetscInt* is_indices; 5139 MatType new_local_type; 5140 /* buffers */ 5141 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 5142 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 5143 PetscInt *recv_buffer_idxs_local; 5144 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 5145 PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs; 5146 /* MPI */ 5147 MPI_Comm comm,comm_n; 5148 PetscSubcomm subcomm; 5149 PetscMPIInt n_sends,n_recvs,commsize; 5150 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 5151 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 5152 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest; 5153 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs; 5154 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs; 5155 PetscErrorCode ierr; 5156 5157 PetscFunctionBegin; 5158 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 5159 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 5160 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 5161 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 5162 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 5163 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 5164 PetscValidLogicalCollectiveBool(mat,reuse,6); 5165 PetscValidLogicalCollectiveInt(mat,nis,8); 5166 PetscValidLogicalCollectiveInt(mat,nvecs,10); 5167 if (nvecs) { 5168 if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported"); 5169 PetscValidHeaderSpecific(nnsp_vec[0],VEC_CLASSID,11); 5170 } 5171 /* further checks */ 5172 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 5173 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 5174 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 5175 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 5176 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 5177 if (reuse && *mat_n) { 5178 PetscInt mrows,mcols,mnrows,mncols; 5179 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 5180 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 5181 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 5182 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 5183 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 5184 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 5185 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 5186 } 5187 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 5188 PetscValidLogicalCollectiveInt(mat,bs,0); 5189 5190 /* prepare IS for sending if not provided */ 5191 if (!is_sends) { 5192 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 5193 ierr = MatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);CHKERRQ(ierr); 5194 } else { 5195 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 5196 is_sends_internal = is_sends; 5197 } 5198 5199 /* get comm */ 5200 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 5201 5202 /* compute number of sends */ 5203 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 5204 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 5205 5206 /* compute number of receives */ 5207 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 5208 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 5209 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 5210 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 5211 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 5212 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 5213 ierr = PetscFree(iflags);CHKERRQ(ierr); 5214 5215 /* restrict comm if requested */ 5216 subcomm = 0; 5217 destroy_mat = PETSC_FALSE; 5218 if (restrict_comm) { 5219 PetscMPIInt color,subcommsize; 5220 5221 color = 0; 5222 if (restrict_full) { 5223 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 5224 } else { 5225 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 5226 } 5227 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 5228 subcommsize = commsize - subcommsize; 5229 /* check if reuse has been requested */ 5230 if (reuse) { 5231 if (*mat_n) { 5232 PetscMPIInt subcommsize2; 5233 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 5234 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 5235 comm_n = PetscObjectComm((PetscObject)*mat_n); 5236 } else { 5237 comm_n = PETSC_COMM_SELF; 5238 } 5239 } else { /* MAT_INITIAL_MATRIX */ 5240 PetscMPIInt rank; 5241 5242 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5243 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 5244 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 5245 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 5246 comm_n = PetscSubcommChild(subcomm); 5247 } 5248 /* flag to destroy *mat_n if not significative */ 5249 if (color) destroy_mat = PETSC_TRUE; 5250 } else { 5251 comm_n = comm; 5252 } 5253 5254 /* prepare send/receive buffers */ 5255 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 5256 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 5257 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 5258 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 5259 if (nis) { 5260 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 5261 } 5262 5263 /* Get data from local matrices */ 5264 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 5265 /* TODO: See below some guidelines on how to prepare the local buffers */ 5266 /* 5267 send_buffer_vals should contain the raw values of the local matrix 5268 send_buffer_idxs should contain: 5269 - MatType_PRIVATE type 5270 - PetscInt size_of_l2gmap 5271 - PetscInt global_row_indices[size_of_l2gmap] 5272 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 5273 */ 5274 else { 5275 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 5276 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 5277 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 5278 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 5279 send_buffer_idxs[1] = i; 5280 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 5281 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 5282 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 5283 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 5284 for (i=0;i<n_sends;i++) { 5285 ilengths_vals[is_indices[i]] = len*len; 5286 ilengths_idxs[is_indices[i]] = len+2; 5287 } 5288 } 5289 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 5290 /* additional is (if any) */ 5291 if (nis) { 5292 PetscMPIInt psum; 5293 PetscInt j; 5294 for (j=0,psum=0;j<nis;j++) { 5295 PetscInt plen; 5296 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 5297 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 5298 psum += len+1; /* indices + lenght */ 5299 } 5300 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 5301 for (j=0,psum=0;j<nis;j++) { 5302 PetscInt plen; 5303 const PetscInt *is_array_idxs; 5304 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 5305 send_buffer_idxs_is[psum] = plen; 5306 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 5307 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 5308 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 5309 psum += plen+1; /* indices + lenght */ 5310 } 5311 for (i=0;i<n_sends;i++) { 5312 ilengths_idxs_is[is_indices[i]] = psum; 5313 } 5314 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 5315 } 5316 5317 buf_size_idxs = 0; 5318 buf_size_vals = 0; 5319 buf_size_idxs_is = 0; 5320 buf_size_vecs = 0; 5321 for (i=0;i<n_recvs;i++) { 5322 buf_size_idxs += (PetscInt)olengths_idxs[i]; 5323 buf_size_vals += (PetscInt)olengths_vals[i]; 5324 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 5325 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 5326 } 5327 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 5328 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 5329 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 5330 ierr = PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);CHKERRQ(ierr); 5331 5332 /* get new tags for clean communications */ 5333 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 5334 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 5335 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 5336 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);CHKERRQ(ierr); 5337 5338 /* allocate for requests */ 5339 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 5340 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 5341 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 5342 ierr = PetscMalloc1(n_sends,&send_req_vecs);CHKERRQ(ierr); 5343 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 5344 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 5345 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 5346 ierr = PetscMalloc1(n_recvs,&recv_req_vecs);CHKERRQ(ierr); 5347 5348 /* communications */ 5349 ptr_idxs = recv_buffer_idxs; 5350 ptr_vals = recv_buffer_vals; 5351 ptr_idxs_is = recv_buffer_idxs_is; 5352 ptr_vecs = recv_buffer_vecs; 5353 for (i=0;i<n_recvs;i++) { 5354 source_dest = onodes[i]; 5355 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 5356 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 5357 ptr_idxs += olengths_idxs[i]; 5358 ptr_vals += olengths_vals[i]; 5359 if (nis) { 5360 source_dest = onodes_is[i]; 5361 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 5362 ptr_idxs_is += olengths_idxs_is[i]; 5363 } 5364 if (nvecs) { 5365 source_dest = onodes[i]; 5366 ierr = MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);CHKERRQ(ierr); 5367 ptr_vecs += olengths_idxs[i]-2; 5368 } 5369 } 5370 for (i=0;i<n_sends;i++) { 5371 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 5372 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 5373 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 5374 if (nis) { 5375 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 5376 } 5377 if (nvecs) { 5378 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 5379 ierr = MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);CHKERRQ(ierr); 5380 } 5381 } 5382 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 5383 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 5384 5385 /* assemble new l2g map */ 5386 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5387 ptr_idxs = recv_buffer_idxs; 5388 new_local_rows = 0; 5389 for (i=0;i<n_recvs;i++) { 5390 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 5391 ptr_idxs += olengths_idxs[i]; 5392 } 5393 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 5394 ptr_idxs = recv_buffer_idxs; 5395 new_local_rows = 0; 5396 for (i=0;i<n_recvs;i++) { 5397 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 5398 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 5399 ptr_idxs += olengths_idxs[i]; 5400 } 5401 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 5402 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 5403 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 5404 5405 /* infer new local matrix type from received local matrices type */ 5406 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 5407 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 5408 if (n_recvs) { 5409 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 5410 ptr_idxs = recv_buffer_idxs; 5411 for (i=0;i<n_recvs;i++) { 5412 if ((PetscInt)new_local_type_private != *ptr_idxs) { 5413 new_local_type_private = MATAIJ_PRIVATE; 5414 break; 5415 } 5416 ptr_idxs += olengths_idxs[i]; 5417 } 5418 switch (new_local_type_private) { 5419 case MATDENSE_PRIVATE: 5420 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 5421 new_local_type = MATSEQAIJ; 5422 bs = 1; 5423 } else { /* if I receive only 1 dense matrix */ 5424 new_local_type = MATSEQDENSE; 5425 bs = 1; 5426 } 5427 break; 5428 case MATAIJ_PRIVATE: 5429 new_local_type = MATSEQAIJ; 5430 bs = 1; 5431 break; 5432 case MATBAIJ_PRIVATE: 5433 new_local_type = MATSEQBAIJ; 5434 break; 5435 case MATSBAIJ_PRIVATE: 5436 new_local_type = MATSEQSBAIJ; 5437 break; 5438 default: 5439 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 5440 break; 5441 } 5442 } else { /* by default, new_local_type is seqdense */ 5443 new_local_type = MATSEQDENSE; 5444 bs = 1; 5445 } 5446 5447 /* create MATIS object if needed */ 5448 if (!reuse) { 5449 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 5450 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 5451 } else { 5452 /* it also destroys the local matrices */ 5453 if (*mat_n) { 5454 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 5455 } else { /* this is a fake object */ 5456 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 5457 } 5458 } 5459 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 5460 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 5461 5462 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5463 5464 /* Global to local map of received indices */ 5465 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 5466 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 5467 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 5468 5469 /* restore attributes -> type of incoming data and its size */ 5470 buf_size_idxs = 0; 5471 for (i=0;i<n_recvs;i++) { 5472 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 5473 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 5474 buf_size_idxs += (PetscInt)olengths_idxs[i]; 5475 } 5476 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 5477 5478 /* set preallocation */ 5479 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 5480 if (!newisdense) { 5481 PetscInt *new_local_nnz=0; 5482 5483 ptr_idxs = recv_buffer_idxs_local; 5484 if (n_recvs) { 5485 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 5486 } 5487 for (i=0;i<n_recvs;i++) { 5488 PetscInt j; 5489 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 5490 for (j=0;j<*(ptr_idxs+1);j++) { 5491 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 5492 } 5493 } else { 5494 /* TODO */ 5495 } 5496 ptr_idxs += olengths_idxs[i]; 5497 } 5498 if (new_local_nnz) { 5499 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 5500 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 5501 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 5502 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5503 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 5504 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5505 } else { 5506 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5507 } 5508 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 5509 } else { 5510 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5511 } 5512 5513 /* set values */ 5514 ptr_vals = recv_buffer_vals; 5515 ptr_idxs = recv_buffer_idxs_local; 5516 for (i=0;i<n_recvs;i++) { 5517 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 5518 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 5519 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 5520 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5521 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5522 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 5523 } else { 5524 /* TODO */ 5525 } 5526 ptr_idxs += olengths_idxs[i]; 5527 ptr_vals += olengths_vals[i]; 5528 } 5529 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5530 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5531 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5532 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5533 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 5534 5535 #if 0 5536 if (!restrict_comm) { /* check */ 5537 Vec lvec,rvec; 5538 PetscReal infty_error; 5539 5540 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 5541 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 5542 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 5543 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 5544 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 5545 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5546 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 5547 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 5548 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 5549 } 5550 #endif 5551 5552 /* assemble new additional is (if any) */ 5553 if (nis) { 5554 PetscInt **temp_idxs,*count_is,j,psum; 5555 5556 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5557 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 5558 ptr_idxs = recv_buffer_idxs_is; 5559 psum = 0; 5560 for (i=0;i<n_recvs;i++) { 5561 for (j=0;j<nis;j++) { 5562 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5563 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 5564 psum += plen; 5565 ptr_idxs += plen+1; /* shift pointer to received data */ 5566 } 5567 } 5568 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 5569 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 5570 for (i=1;i<nis;i++) { 5571 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 5572 } 5573 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 5574 ptr_idxs = recv_buffer_idxs_is; 5575 for (i=0;i<n_recvs;i++) { 5576 for (j=0;j<nis;j++) { 5577 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5578 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 5579 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 5580 ptr_idxs += plen+1; /* shift pointer to received data */ 5581 } 5582 } 5583 for (i=0;i<nis;i++) { 5584 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5585 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 5586 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5587 } 5588 ierr = PetscFree(count_is);CHKERRQ(ierr); 5589 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 5590 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 5591 } 5592 /* free workspace */ 5593 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 5594 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5595 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 5596 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5597 if (isdense) { 5598 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 5599 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 5600 } else { 5601 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 5602 } 5603 if (nis) { 5604 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5605 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 5606 } 5607 5608 if (nvecs) { 5609 ierr = MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5610 ierr = MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5611 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 5612 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 5613 ierr = VecCreate(comm_n,&nnsp_vec[0]);CHKERRQ(ierr); 5614 ierr = VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);CHKERRQ(ierr); 5615 ierr = VecSetType(nnsp_vec[0],VECSTANDARD);CHKERRQ(ierr); 5616 /* set values */ 5617 ptr_vals = recv_buffer_vecs; 5618 ptr_idxs = recv_buffer_idxs_local; 5619 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 5620 for (i=0;i<n_recvs;i++) { 5621 PetscInt j; 5622 for (j=0;j<*(ptr_idxs+1);j++) { 5623 send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j); 5624 } 5625 ptr_idxs += olengths_idxs[i]; 5626 ptr_vals += olengths_idxs[i]-2; 5627 } 5628 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 5629 ierr = VecAssemblyBegin(nnsp_vec[0]);CHKERRQ(ierr); 5630 ierr = VecAssemblyEnd(nnsp_vec[0]);CHKERRQ(ierr); 5631 } 5632 5633 ierr = PetscFree(recv_buffer_vecs);CHKERRQ(ierr); 5634 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 5635 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 5636 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 5637 ierr = PetscFree(recv_req_vecs);CHKERRQ(ierr); 5638 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 5639 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 5640 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 5641 ierr = PetscFree(send_req_vecs);CHKERRQ(ierr); 5642 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 5643 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 5644 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 5645 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 5646 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 5647 ierr = PetscFree(onodes);CHKERRQ(ierr); 5648 if (nis) { 5649 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 5650 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 5651 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 5652 } 5653 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 5654 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 5655 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 5656 for (i=0;i<nis;i++) { 5657 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5658 } 5659 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 5660 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 5661 } 5662 *mat_n = NULL; 5663 } 5664 PetscFunctionReturn(0); 5665 } 5666 5667 /* temporary hack into ksp private data structure */ 5668 #include <petsc/private/kspimpl.h> 5669 5670 #undef __FUNCT__ 5671 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 5672 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 5673 { 5674 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5675 PC_IS *pcis = (PC_IS*)pc->data; 5676 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 5677 Mat coarsedivudotp = NULL; 5678 MatNullSpace CoarseNullSpace = NULL; 5679 ISLocalToGlobalMapping coarse_islg; 5680 IS coarse_is,*isarray; 5681 PetscInt i,im_active=-1,active_procs=-1; 5682 PetscInt nis,nisdofs,nisneu,nisvert; 5683 PC pc_temp; 5684 PCType coarse_pc_type; 5685 KSPType coarse_ksp_type; 5686 PetscBool multilevel_requested,multilevel_allowed; 5687 PetscBool isredundant,isbddc,isnn,coarse_reuse; 5688 Mat t_coarse_mat_is; 5689 PetscInt ncoarse; 5690 PetscBool compute_vecs = PETSC_FALSE; 5691 PetscScalar *array; 5692 MatReuse coarse_mat_reuse; 5693 PetscBool restr, full_restr, have_void; 5694 PetscErrorCode ierr; 5695 5696 PetscFunctionBegin; 5697 /* Assign global numbering to coarse dofs */ 5698 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 5699 PetscInt ocoarse_size; 5700 compute_vecs = PETSC_TRUE; 5701 ocoarse_size = pcbddc->coarse_size; 5702 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 5703 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 5704 /* see if we can avoid some work */ 5705 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 5706 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 5707 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 5708 PC pc; 5709 PetscBool isbddc; 5710 5711 /* temporary workaround since PCBDDC does not have a reset method so far */ 5712 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 5713 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5714 if (isbddc) { 5715 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 5716 } else { 5717 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 5718 } 5719 coarse_reuse = PETSC_FALSE; 5720 } else { /* we can safely reuse already computed coarse matrix */ 5721 coarse_reuse = PETSC_TRUE; 5722 } 5723 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 5724 coarse_reuse = PETSC_FALSE; 5725 } 5726 /* reset any subassembling information */ 5727 if (!coarse_reuse || pcbddc->recompute_topography) { 5728 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5729 } 5730 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 5731 coarse_reuse = PETSC_TRUE; 5732 } 5733 /* assemble coarse matrix */ 5734 if (coarse_reuse && pcbddc->coarse_ksp) { 5735 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5736 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 5737 coarse_mat_reuse = MAT_REUSE_MATRIX; 5738 } else { 5739 coarse_mat = NULL; 5740 coarse_mat_reuse = MAT_INITIAL_MATRIX; 5741 } 5742 5743 /* creates temporary l2gmap and IS for coarse indexes */ 5744 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 5745 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 5746 5747 /* creates temporary MATIS object for coarse matrix */ 5748 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 5749 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5750 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 5751 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5752 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 5753 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 5754 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5755 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5756 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 5757 5758 /* count "active" (i.e. with positive local size) and "void" processes */ 5759 im_active = !!(pcis->n); 5760 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5761 5762 /* determine number of processes partecipating to coarse solver and compute subassembling pattern */ 5763 /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */ 5764 /* full_restr : just use the receivers from the subassembling pattern */ 5765 coarse_mat_is = NULL; 5766 multilevel_allowed = PETSC_FALSE; 5767 multilevel_requested = PETSC_FALSE; 5768 full_restr = PETSC_TRUE; 5769 pcbddc->coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc); 5770 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 5771 if (multilevel_requested) { 5772 ncoarse = active_procs/pcbddc->coarsening_ratio; 5773 restr = PETSC_FALSE; 5774 full_restr = PETSC_FALSE; 5775 } else { 5776 ncoarse = pcbddc->coarse_size/pcbddc->coarse_eqs_per_proc; 5777 restr = PETSC_TRUE; 5778 full_restr = PETSC_TRUE; 5779 } 5780 if (!pcbddc->coarse_size) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 5781 ncoarse = PetscMax(1,ncoarse); 5782 if (!pcbddc->coarse_subassembling) { 5783 if (pcbddc->coarsening_ratio > 1) { 5784 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 5785 } else { 5786 PetscMPIInt size,rank; 5787 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 5788 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 5789 have_void = (active_procs == (PetscInt)size) ? PETSC_FALSE : PETSC_TRUE; 5790 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5791 } 5792 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 5793 PetscInt psum; 5794 PetscMPIInt size; 5795 if (pcbddc->coarse_ksp) psum = 1; 5796 else psum = 0; 5797 ierr = MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5798 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 5799 if (ncoarse < size) have_void = PETSC_TRUE; 5800 } 5801 /* determine if we can go multilevel */ 5802 if (multilevel_requested) { 5803 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 5804 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 5805 } 5806 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 5807 5808 /* dump subassembling pattern */ 5809 if (pcbddc->dbg_flag && multilevel_allowed) { 5810 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 5811 } 5812 5813 /* compute dofs splitting and neumann boundaries for coarse dofs */ 5814 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal)) { /* protects from unneded computations */ 5815 PetscInt *tidxs,*tidxs2,nout,tsize,i; 5816 const PetscInt *idxs; 5817 ISLocalToGlobalMapping tmap; 5818 5819 /* create map between primal indices (in local representative ordering) and local primal numbering */ 5820 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 5821 /* allocate space for temporary storage */ 5822 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 5823 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 5824 /* allocate for IS array */ 5825 nisdofs = pcbddc->n_ISForDofsLocal; 5826 nisneu = !!pcbddc->NeumannBoundariesLocal; 5827 nisvert = 0; /* nisvert is not used */ 5828 nis = nisdofs + nisneu + nisvert; 5829 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 5830 /* dofs splitting */ 5831 for (i=0;i<nisdofs;i++) { 5832 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 5833 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 5834 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5835 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5836 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5837 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5838 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5839 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 5840 } 5841 /* neumann boundaries */ 5842 if (pcbddc->NeumannBoundariesLocal) { 5843 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 5844 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 5845 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5846 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5847 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5848 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5849 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 5850 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 5851 } 5852 /* free memory */ 5853 ierr = PetscFree(tidxs);CHKERRQ(ierr); 5854 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 5855 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 5856 } else { 5857 nis = 0; 5858 nisdofs = 0; 5859 nisneu = 0; 5860 nisvert = 0; 5861 isarray = NULL; 5862 } 5863 /* destroy no longer needed map */ 5864 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 5865 5866 /* subassemble */ 5867 if (multilevel_allowed) { 5868 Vec vp[1]; 5869 PetscInt nvecs = 0; 5870 PetscBool reuse,reuser; 5871 5872 if (coarse_mat) reuse = PETSC_TRUE; 5873 else reuse = PETSC_FALSE; 5874 ierr = MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5875 vp[0] = NULL; 5876 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 5877 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);CHKERRQ(ierr); 5878 ierr = VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);CHKERRQ(ierr); 5879 ierr = VecSetType(vp[0],VECSTANDARD);CHKERRQ(ierr); 5880 nvecs = 1; 5881 5882 if (pcbddc->divudotp) { 5883 Mat B,loc_divudotp; 5884 Vec v,p; 5885 IS dummy; 5886 PetscInt np; 5887 5888 ierr = MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);CHKERRQ(ierr); 5889 ierr = MatGetSize(loc_divudotp,&np,NULL);CHKERRQ(ierr); 5890 ierr = ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);CHKERRQ(ierr); 5891 ierr = MatGetSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 5892 ierr = MatCreateVecs(B,&v,&p);CHKERRQ(ierr); 5893 ierr = VecSet(p,1.);CHKERRQ(ierr); 5894 ierr = MatMultTranspose(B,p,v);CHKERRQ(ierr); 5895 ierr = VecDestroy(&p);CHKERRQ(ierr); 5896 ierr = MatDestroy(&B);CHKERRQ(ierr); 5897 ierr = VecGetArray(vp[0],&array);CHKERRQ(ierr); 5898 ierr = VecPlaceArray(pcbddc->vec1_P,array);CHKERRQ(ierr); 5899 ierr = VecRestoreArray(vp[0],&array);CHKERRQ(ierr); 5900 ierr = MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);CHKERRQ(ierr); 5901 ierr = VecResetArray(pcbddc->vec1_P);CHKERRQ(ierr); 5902 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 5903 ierr = VecDestroy(&v);CHKERRQ(ierr); 5904 } 5905 } 5906 if (reuser) { 5907 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);CHKERRQ(ierr); 5908 } else { 5909 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);CHKERRQ(ierr); 5910 } 5911 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 5912 PetscScalar *arraym,*arrayv; 5913 PetscInt nl; 5914 ierr = VecGetLocalSize(vp[0],&nl);CHKERRQ(ierr); 5915 ierr = MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);CHKERRQ(ierr); 5916 ierr = MatDenseGetArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 5917 ierr = VecGetArray(vp[0],&arrayv);CHKERRQ(ierr); 5918 ierr = PetscMemcpy(arraym,arrayv,nl*sizeof(PetscScalar));CHKERRQ(ierr); 5919 ierr = VecRestoreArray(vp[0],&arrayv);CHKERRQ(ierr); 5920 ierr = MatDenseRestoreArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 5921 ierr = VecDestroy(&vp[0]);CHKERRQ(ierr); 5922 } else { 5923 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);CHKERRQ(ierr); 5924 } 5925 } else { 5926 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,0,NULL);CHKERRQ(ierr); 5927 } 5928 if (coarse_mat_is || coarse_mat) { 5929 PetscMPIInt size; 5930 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)coarse_mat_is),&size); 5931 if (!multilevel_allowed) { 5932 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 5933 } else { 5934 Mat A; 5935 5936 /* if this matrix is present, it means we are not reusing the coarse matrix */ 5937 if (coarse_mat_is) { 5938 if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 5939 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 5940 coarse_mat = coarse_mat_is; 5941 } 5942 /* be sure we don't have MatSeqDENSE as local mat */ 5943 ierr = MatISGetLocalMat(coarse_mat,&A);CHKERRQ(ierr); 5944 ierr = MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 5945 } 5946 } 5947 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 5948 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 5949 5950 /* create local to global scatters for coarse problem */ 5951 if (compute_vecs) { 5952 PetscInt lrows; 5953 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 5954 if (coarse_mat) { 5955 ierr = MatGetLocalSize(coarse_mat,&lrows,NULL);CHKERRQ(ierr); 5956 } else { 5957 lrows = 0; 5958 } 5959 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 5960 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 5961 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 5962 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5963 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5964 } 5965 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 5966 5967 /* set defaults for coarse KSP and PC */ 5968 if (multilevel_allowed) { 5969 coarse_ksp_type = KSPRICHARDSON; 5970 coarse_pc_type = PCBDDC; 5971 } else { 5972 coarse_ksp_type = KSPPREONLY; 5973 coarse_pc_type = PCREDUNDANT; 5974 } 5975 5976 /* print some info if requested */ 5977 if (pcbddc->dbg_flag) { 5978 if (!multilevel_allowed) { 5979 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5980 if (multilevel_requested) { 5981 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5982 } else if (pcbddc->max_levels) { 5983 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 5984 } 5985 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5986 } 5987 } 5988 5989 /* create the coarse KSP object only once with defaults */ 5990 if (coarse_mat) { 5991 PetscViewer dbg_viewer = NULL; 5992 if (pcbddc->dbg_flag) { 5993 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 5994 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5995 } 5996 if (!pcbddc->coarse_ksp) { 5997 char prefix[256],str_level[16]; 5998 size_t len; 5999 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 6000 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 6001 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 6002 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 6003 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 6004 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 6005 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 6006 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 6007 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 6008 /* prefix */ 6009 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 6010 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 6011 if (!pcbddc->current_level) { 6012 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 6013 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 6014 } else { 6015 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 6016 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 6017 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 6018 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 6019 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 6020 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 6021 } 6022 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 6023 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 6024 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 6025 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 6026 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 6027 /* allow user customization */ 6028 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 6029 } 6030 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 6031 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 6032 if (nisdofs) { 6033 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 6034 for (i=0;i<nisdofs;i++) { 6035 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 6036 } 6037 } 6038 if (nisneu) { 6039 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 6040 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 6041 } 6042 if (nisvert) { 6043 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 6044 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 6045 } 6046 6047 /* get some info after set from options */ 6048 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 6049 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 6050 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 6051 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 6052 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 6053 isbddc = PETSC_FALSE; 6054 } 6055 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 6056 if (isredundant) { 6057 KSP inner_ksp; 6058 PC inner_pc; 6059 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 6060 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 6061 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 6062 } 6063 6064 /* parameters which miss an API */ 6065 if (isbddc) { 6066 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 6067 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 6068 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 6069 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 6070 if (pcbddc_coarse->benign_saddle_point) { 6071 Mat coarsedivudotp_is; 6072 ISLocalToGlobalMapping l2gmap,rl2g,cl2g; 6073 IS row,col; 6074 const PetscInt *gidxs; 6075 PetscInt n,st,M,N; 6076 6077 ierr = MatGetSize(coarsedivudotp,&n,NULL);CHKERRQ(ierr); 6078 ierr = MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));CHKERRQ(ierr); 6079 st = st-n; 6080 ierr = ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);CHKERRQ(ierr); 6081 ierr = MatGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);CHKERRQ(ierr); 6082 ierr = ISLocalToGlobalMappingGetSize(l2gmap,&n);CHKERRQ(ierr); 6083 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 6084 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 6085 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 6086 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 6087 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 6088 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 6089 ierr = MatGetSize(coarse_mat,&N,NULL);CHKERRQ(ierr); 6090 ierr = ISDestroy(&row);CHKERRQ(ierr); 6091 ierr = ISDestroy(&col);CHKERRQ(ierr); 6092 ierr = MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);CHKERRQ(ierr); 6093 ierr = MatSetType(coarsedivudotp_is,MATIS);CHKERRQ(ierr); 6094 ierr = MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 6095 ierr = MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);CHKERRQ(ierr); 6096 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 6097 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 6098 ierr = MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);CHKERRQ(ierr); 6099 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 6100 ierr = PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL);CHKERRQ(ierr); 6101 ierr = MatDestroy(&coarsedivudotp_is);CHKERRQ(ierr); 6102 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 6103 if (pcbddc->adaptive_threshold < 1.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 6104 } 6105 } 6106 6107 /* propagate symmetry info of coarse matrix */ 6108 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 6109 if (pc->pmat->symmetric_set) { 6110 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 6111 } 6112 if (pc->pmat->hermitian_set) { 6113 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 6114 } 6115 if (pc->pmat->spd_set) { 6116 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 6117 } 6118 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 6119 ierr = MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 6120 } 6121 /* set operators */ 6122 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 6123 if (pcbddc->dbg_flag) { 6124 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 6125 } 6126 } 6127 ierr = PetscFree(isarray);CHKERRQ(ierr); 6128 #if 0 6129 { 6130 PetscViewer viewer; 6131 char filename[256]; 6132 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 6133 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 6134 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 6135 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 6136 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 6137 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 6138 } 6139 #endif 6140 6141 if (pcbddc->coarse_ksp) { 6142 Vec crhs,csol; 6143 6144 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 6145 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 6146 if (!csol) { 6147 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 6148 } 6149 if (!crhs) { 6150 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 6151 } 6152 } 6153 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 6154 6155 /* compute null space for coarse solver if the benign trick has been requested */ 6156 if (pcbddc->benign_null) { 6157 6158 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 6159 for (i=0;i<pcbddc->benign_n;i++) { 6160 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 6161 } 6162 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 6163 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 6164 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6165 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6166 if (coarse_mat) { 6167 Vec nullv; 6168 PetscScalar *array,*array2; 6169 PetscInt nl; 6170 6171 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 6172 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 6173 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 6174 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 6175 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 6176 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 6177 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 6178 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 6179 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 6180 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 6181 } 6182 } 6183 6184 if (pcbddc->coarse_ksp) { 6185 PetscBool ispreonly; 6186 6187 if (CoarseNullSpace) { 6188 PetscBool isnull; 6189 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 6190 if (isnull) { 6191 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 6192 } 6193 /* TODO: add local nullspaces (if any) */ 6194 } 6195 /* setup coarse ksp */ 6196 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 6197 /* Check coarse problem if in debug mode or if solving with an iterative method */ 6198 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 6199 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 6200 KSP check_ksp; 6201 KSPType check_ksp_type; 6202 PC check_pc; 6203 Vec check_vec,coarse_vec; 6204 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 6205 PetscInt its; 6206 PetscBool compute_eigs; 6207 PetscReal *eigs_r,*eigs_c; 6208 PetscInt neigs; 6209 const char *prefix; 6210 6211 /* Create ksp object suitable for estimation of extreme eigenvalues */ 6212 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 6213 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 6214 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 6215 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 6216 /* prevent from setup unneeded object */ 6217 ierr = KSPGetPC(check_ksp,&check_pc);CHKERRQ(ierr); 6218 ierr = PCSetType(check_pc,PCNONE);CHKERRQ(ierr); 6219 if (ispreonly) { 6220 check_ksp_type = KSPPREONLY; 6221 compute_eigs = PETSC_FALSE; 6222 } else { 6223 check_ksp_type = KSPGMRES; 6224 compute_eigs = PETSC_TRUE; 6225 } 6226 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 6227 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 6228 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 6229 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 6230 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 6231 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 6232 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 6233 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 6234 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 6235 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 6236 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 6237 /* create random vec */ 6238 ierr = MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);CHKERRQ(ierr); 6239 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 6240 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 6241 /* solve coarse problem */ 6242 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 6243 /* set eigenvalue estimation if preonly has not been requested */ 6244 if (compute_eigs) { 6245 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 6246 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 6247 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 6248 if (neigs) { 6249 lambda_max = eigs_r[neigs-1]; 6250 lambda_min = eigs_r[0]; 6251 if (pcbddc->use_coarse_estimates) { 6252 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 6253 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);CHKERRQ(ierr); 6254 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 6255 } 6256 } 6257 } 6258 } 6259 6260 /* check coarse problem residual error */ 6261 if (pcbddc->dbg_flag) { 6262 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 6263 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 6264 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 6265 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 6266 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 6267 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 6268 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 6269 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 6270 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 6271 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 6272 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 6273 if (CoarseNullSpace) { 6274 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 6275 } 6276 if (compute_eigs) { 6277 PetscReal lambda_max_s,lambda_min_s; 6278 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 6279 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 6280 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 6281 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 6282 for (i=0;i<neigs;i++) { 6283 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 6284 } 6285 } 6286 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 6287 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 6288 } 6289 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 6290 ierr = VecDestroy(&coarse_vec);CHKERRQ(ierr); 6291 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 6292 if (compute_eigs) { 6293 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 6294 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 6295 } 6296 } 6297 } 6298 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 6299 /* print additional info */ 6300 if (pcbddc->dbg_flag) { 6301 /* waits until all processes reaches this point */ 6302 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 6303 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 6304 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6305 } 6306 6307 /* free memory */ 6308 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 6309 PetscFunctionReturn(0); 6310 } 6311 6312 #undef __FUNCT__ 6313 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 6314 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 6315 { 6316 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 6317 PC_IS* pcis = (PC_IS*)pc->data; 6318 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6319 IS subset,subset_mult,subset_n; 6320 PetscInt local_size,coarse_size=0; 6321 PetscInt *local_primal_indices=NULL; 6322 const PetscInt *t_local_primal_indices; 6323 PetscErrorCode ierr; 6324 6325 PetscFunctionBegin; 6326 /* Compute global number of coarse dofs */ 6327 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 6328 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 6329 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 6330 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 6331 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 6332 ierr = ISRenumber(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 6333 ierr = ISDestroy(&subset);CHKERRQ(ierr); 6334 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 6335 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 6336 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 6337 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 6338 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 6339 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 6340 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 6341 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 6342 6343 /* check numbering */ 6344 if (pcbddc->dbg_flag) { 6345 PetscScalar coarsesum,*array,*array2; 6346 PetscInt i; 6347 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 6348 6349 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6350 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 6351 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 6352 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6353 /* counter */ 6354 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6355 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6356 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6357 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6358 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6359 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6360 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 6361 for (i=0;i<pcbddc->local_primal_size;i++) { 6362 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 6363 } 6364 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6365 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6366 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6367 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6368 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6369 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6370 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6371 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6372 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 6373 for (i=0;i<pcis->n;i++) { 6374 if (array[i] != 0.0 && array[i] != array2[i]) { 6375 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 6376 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 6377 set_error = PETSC_TRUE; 6378 ierr = ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);CHKERRQ(ierr); 6379 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d (gid %d) owned by %d processes instead of %d!\n",PetscGlobalRank,i,gi,owned,neigh);CHKERRQ(ierr); 6380 } 6381 } 6382 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 6383 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6384 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6385 for (i=0;i<pcis->n;i++) { 6386 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 6387 } 6388 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6389 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6390 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6391 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6392 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 6393 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 6394 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 6395 PetscInt *gidxs; 6396 6397 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 6398 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 6399 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 6400 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6401 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 6402 for (i=0;i<pcbddc->local_primal_size;i++) { 6403 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 6404 } 6405 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6406 ierr = PetscFree(gidxs);CHKERRQ(ierr); 6407 } 6408 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6409 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6410 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 6411 } 6412 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 6413 /* get back data */ 6414 *coarse_size_n = coarse_size; 6415 *local_primal_indices_n = local_primal_indices; 6416 PetscFunctionReturn(0); 6417 } 6418 6419 #undef __FUNCT__ 6420 #define __FUNCT__ "PCBDDCGlobalToLocal" 6421 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 6422 { 6423 IS localis_t; 6424 PetscInt i,lsize,*idxs,n; 6425 PetscScalar *vals; 6426 PetscErrorCode ierr; 6427 6428 PetscFunctionBegin; 6429 /* get indices in local ordering exploiting local to global map */ 6430 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 6431 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 6432 for (i=0;i<lsize;i++) vals[i] = 1.0; 6433 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 6434 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 6435 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 6436 if (idxs) { /* multilevel guard */ 6437 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 6438 } 6439 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 6440 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 6441 ierr = PetscFree(vals);CHKERRQ(ierr); 6442 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 6443 /* now compute set in local ordering */ 6444 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6445 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6446 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6447 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 6448 for (i=0,lsize=0;i<n;i++) { 6449 if (PetscRealPart(vals[i]) > 0.5) { 6450 lsize++; 6451 } 6452 } 6453 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 6454 for (i=0,lsize=0;i<n;i++) { 6455 if (PetscRealPart(vals[i]) > 0.5) { 6456 idxs[lsize++] = i; 6457 } 6458 } 6459 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6460 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 6461 *localis = localis_t; 6462 PetscFunctionReturn(0); 6463 } 6464 6465 #undef __FUNCT__ 6466 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 6467 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 6468 { 6469 PC_IS *pcis=(PC_IS*)pc->data; 6470 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6471 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6472 Mat S_j; 6473 PetscInt *used_xadj,*used_adjncy; 6474 PetscBool free_used_adj; 6475 PetscErrorCode ierr; 6476 6477 PetscFunctionBegin; 6478 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 6479 free_used_adj = PETSC_FALSE; 6480 if (pcbddc->sub_schurs_layers == -1) { 6481 used_xadj = NULL; 6482 used_adjncy = NULL; 6483 } else { 6484 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 6485 used_xadj = pcbddc->mat_graph->xadj; 6486 used_adjncy = pcbddc->mat_graph->adjncy; 6487 } else if (pcbddc->computed_rowadj) { 6488 used_xadj = pcbddc->mat_graph->xadj; 6489 used_adjncy = pcbddc->mat_graph->adjncy; 6490 } else { 6491 PetscBool flg_row=PETSC_FALSE; 6492 const PetscInt *xadj,*adjncy; 6493 PetscInt nvtxs; 6494 6495 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6496 if (flg_row) { 6497 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 6498 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 6499 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 6500 free_used_adj = PETSC_TRUE; 6501 } else { 6502 pcbddc->sub_schurs_layers = -1; 6503 used_xadj = NULL; 6504 used_adjncy = NULL; 6505 } 6506 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6507 } 6508 } 6509 6510 /* setup sub_schurs data */ 6511 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6512 if (!sub_schurs->schur_explicit) { 6513 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 6514 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6515 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 6516 } else { 6517 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 6518 PetscBool isseqaij,need_change = PETSC_FALSE; 6519 PetscInt benign_n; 6520 Mat change = NULL; 6521 Vec scaling = NULL; 6522 IS change_primal = NULL; 6523 6524 if (!pcbddc->use_vertices && reuse_solvers) { 6525 PetscInt n_vertices; 6526 6527 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 6528 reuse_solvers = (PetscBool)!n_vertices; 6529 } 6530 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 6531 if (!isseqaij) { 6532 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6533 if (matis->A == pcbddc->local_mat) { 6534 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 6535 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6536 } else { 6537 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6538 } 6539 } 6540 if (!pcbddc->benign_change_explicit) { 6541 benign_n = pcbddc->benign_n; 6542 } else { 6543 benign_n = 0; 6544 } 6545 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 6546 We need a global reduction to avoid possible deadlocks. 6547 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 6548 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 6549 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 6550 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6551 need_change = (PetscBool)(!need_change); 6552 } 6553 /* If the user defines additional constraints, we import them here. 6554 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 6555 if (need_change) { 6556 PC_IS *pcisf; 6557 PC_BDDC *pcbddcf; 6558 PC pcf; 6559 6560 if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 6561 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 6562 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 6563 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 6564 /* hacks */ 6565 pcisf = (PC_IS*)pcf->data; 6566 pcisf->is_B_local = pcis->is_B_local; 6567 pcisf->vec1_N = pcis->vec1_N; 6568 pcisf->BtoNmap = pcis->BtoNmap; 6569 pcisf->n = pcis->n; 6570 pcisf->n_B = pcis->n_B; 6571 pcbddcf = (PC_BDDC*)pcf->data; 6572 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 6573 pcbddcf->mat_graph = pcbddc->mat_graph; 6574 pcbddcf->use_faces = PETSC_TRUE; 6575 pcbddcf->use_change_of_basis = PETSC_TRUE; 6576 pcbddcf->use_change_on_faces = PETSC_TRUE; 6577 pcbddcf->use_qr_single = PETSC_TRUE; 6578 pcbddcf->fake_change = PETSC_TRUE; 6579 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 6580 /* store information on primal vertices and change of basis (in local numbering) */ 6581 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 6582 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 6583 change = pcbddcf->ConstraintMatrix; 6584 pcbddcf->ConstraintMatrix = NULL; 6585 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 6586 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 6587 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 6588 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 6589 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 6590 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 6591 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 6592 pcf->ops->destroy = NULL; 6593 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 6594 } 6595 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 6596 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 6597 ierr = MatDestroy(&change);CHKERRQ(ierr); 6598 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 6599 } 6600 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6601 6602 /* free adjacency */ 6603 if (free_used_adj) { 6604 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 6605 } 6606 PetscFunctionReturn(0); 6607 } 6608 6609 #undef __FUNCT__ 6610 #define __FUNCT__ "PCBDDCInitSubSchurs" 6611 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 6612 { 6613 PC_IS *pcis=(PC_IS*)pc->data; 6614 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6615 PCBDDCGraph graph; 6616 PetscErrorCode ierr; 6617 6618 PetscFunctionBegin; 6619 /* attach interface graph for determining subsets */ 6620 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 6621 IS verticesIS,verticescomm; 6622 PetscInt vsize,*idxs; 6623 6624 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 6625 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 6626 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6627 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 6628 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6629 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 6630 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 6631 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 6632 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 6633 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 6634 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 6635 } else { 6636 graph = pcbddc->mat_graph; 6637 } 6638 /* print some info */ 6639 if (pcbddc->dbg_flag) { 6640 IS vertices; 6641 PetscInt nv,nedges,nfaces; 6642 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 6643 ierr = PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 6644 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 6645 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6646 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 6647 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 6648 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 6649 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 6650 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6651 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6652 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 6653 } 6654 6655 /* sub_schurs init */ 6656 if (!pcbddc->sub_schurs) { 6657 ierr = PCBDDCSubSchursCreate(&pcbddc->sub_schurs);CHKERRQ(ierr); 6658 } 6659 ierr = PCBDDCSubSchursInit(pcbddc->sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 6660 6661 /* free graph struct */ 6662 if (pcbddc->sub_schurs_rebuild) { 6663 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 6664 } 6665 PetscFunctionReturn(0); 6666 } 6667 6668 #undef __FUNCT__ 6669 #define __FUNCT__ "PCBDDCCheckOperator" 6670 PetscErrorCode PCBDDCCheckOperator(PC pc) 6671 { 6672 PC_IS *pcis=(PC_IS*)pc->data; 6673 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6674 PetscErrorCode ierr; 6675 6676 PetscFunctionBegin; 6677 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 6678 IS zerodiag = NULL; 6679 Mat S_j,B0_B=NULL; 6680 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 6681 PetscScalar *p0_check,*array,*array2; 6682 PetscReal norm; 6683 PetscInt i; 6684 6685 /* B0 and B0_B */ 6686 if (zerodiag) { 6687 IS dummy; 6688 6689 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 6690 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 6691 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 6692 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 6693 } 6694 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 6695 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 6696 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 6697 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6698 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6699 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6700 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6701 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 6702 /* S_j */ 6703 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6704 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6705 6706 /* mimic vector in \widetilde{W}_\Gamma */ 6707 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 6708 /* continuous in primal space */ 6709 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 6710 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6711 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6712 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6713 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 6714 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 6715 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6716 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6717 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6718 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6719 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6720 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6721 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 6722 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 6723 6724 /* assemble rhs for coarse problem */ 6725 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 6726 /* local with Schur */ 6727 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 6728 if (zerodiag) { 6729 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6730 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 6731 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6732 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 6733 } 6734 /* sum on primal nodes the local contributions */ 6735 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6736 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6737 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6738 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6739 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 6740 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6741 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6742 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 6743 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6744 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6745 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6746 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6747 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6748 /* scale primal nodes (BDDC sums contibutions) */ 6749 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 6750 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6751 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6752 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6753 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6754 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6755 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6756 /* global: \widetilde{B0}_B w_\Gamma */ 6757 if (zerodiag) { 6758 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 6759 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6760 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 6761 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6762 } 6763 /* BDDC */ 6764 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 6765 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 6766 6767 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 6768 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 6769 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 6770 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 6771 for (i=0;i<pcbddc->benign_n;i++) { 6772 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 6773 } 6774 ierr = PetscFree(p0_check);CHKERRQ(ierr); 6775 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 6776 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 6777 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 6778 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6779 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 6780 } 6781 PetscFunctionReturn(0); 6782 } 6783