1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 #include <petsc/private/sfimpl.h> 6 7 #undef __FUNCT__ 8 #define __FUNCT__ "PCBDDCComputeNoNetFlux" 9 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 10 { 11 Mat loc_divudotp; 12 Vec p,v,vins,quad_vec,*quad_vecs; 13 ISLocalToGlobalMapping map; 14 IS *faces,*edges; 15 PetscScalar *vals; 16 const PetscScalar *array; 17 PetscInt i,maxneighs,lmaxneighs,maxsize,nf,ne; 18 PetscMPIInt rank; 19 PetscErrorCode ierr; 20 21 PetscFunctionBegin; 22 ierr = PCBDDCGraphGetCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 23 if (graph->twodim) { 24 lmaxneighs = 2; 25 } else { 26 lmaxneighs = 1; 27 for (i=0;i<ne;i++) { 28 const PetscInt *idxs; 29 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 30 lmaxneighs = PetscMax(lmaxneighs,graph->count[idxs[0]]); 31 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 32 } 33 lmaxneighs++; /* graph count does not include self */ 34 } 35 ierr = MPIU_Allreduce(&lmaxneighs,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 36 maxsize = 0; 37 for (i=0;i<ne;i++) { 38 PetscInt nn; 39 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 40 maxsize = PetscMax(maxsize,nn); 41 } 42 for (i=0;i<nf;i++) { 43 PetscInt nn; 44 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 45 maxsize = PetscMax(maxsize,nn); 46 } 47 ierr = PetscMalloc1(maxsize,&vals);CHKERRQ(ierr); 48 /* create vectors to hold quadrature weights */ 49 ierr = MatCreateVecs(A,&quad_vec,NULL);CHKERRQ(ierr); 50 if (!transpose) { 51 ierr = MatGetLocalToGlobalMapping(A,&map,NULL);CHKERRQ(ierr); 52 } else { 53 ierr = MatGetLocalToGlobalMapping(A,NULL,&map);CHKERRQ(ierr); 54 } 55 ierr = VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);CHKERRQ(ierr); 56 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 57 for (i=0;i<maxneighs;i++) { 58 PetscInt first,last; 59 60 ierr = VecSetLocalToGlobalMapping(quad_vecs[i],map);CHKERRQ(ierr); 61 /* the near-null space fo BDDC carries information on quadrature weights, 62 and these can be collinear -> so cheat with MatNullSpaceCreate 63 and create a suitable set of basis vectors first */ 64 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 65 if (i>=first && i < last) { 66 PetscScalar *data; 67 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 68 data[i-first] = 1.; 69 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 70 } 71 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 72 } 73 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);CHKERRQ(ierr); 74 for (i=0;i<maxneighs;i++) { /* reset vectors */ 75 PetscInt first,last; 76 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 77 if (i>=first && i < last) { 78 PetscScalar *data; 79 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 80 data[i-first] = 0.; 81 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 82 } 83 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 84 } 85 /* compute local quad vec */ 86 ierr = MatISGetLocalMat(divudotp,&loc_divudotp);CHKERRQ(ierr); 87 if (!transpose) { 88 ierr = MatCreateVecs(loc_divudotp,&v,&p);CHKERRQ(ierr); 89 } else { 90 ierr = MatCreateVecs(loc_divudotp,&p,&v);CHKERRQ(ierr); 91 } 92 ierr = VecSet(p,1.);CHKERRQ(ierr); 93 if (!transpose) { 94 ierr = MatMultTranspose(loc_divudotp,p,v);CHKERRQ(ierr); 95 } else { 96 ierr = MatMult(loc_divudotp,p,v);CHKERRQ(ierr); 97 } 98 if (vl2l) { 99 ierr = VecGetSubVector(v,vl2l,&vins);CHKERRQ(ierr); 100 } else { 101 vins = v; 102 } 103 ierr = VecGetArrayRead(vins,&array);CHKERRQ(ierr); 104 ierr = VecDestroy(&p);CHKERRQ(ierr); 105 106 /* insert in global quadrature vecs */ 107 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 108 for (i=0;i<nf;i++) { 109 const PetscInt *idxs; 110 PetscInt idx,nn,j; 111 112 ierr = ISGetIndices(faces[i],&idxs);CHKERRQ(ierr); 113 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 114 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 115 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 116 idx = -(idx+1); 117 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 118 ierr = ISRestoreIndices(faces[i],&idxs);CHKERRQ(ierr); 119 ierr = ISDestroy(&faces[i]);CHKERRQ(ierr); 120 } 121 for (i=0;i<ne;i++) { 122 const PetscInt *idxs; 123 PetscInt idx,nn,j; 124 125 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 126 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 127 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 128 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 129 idx = -(idx+1); 130 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 131 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 132 ierr = ISDestroy(&edges[i]);CHKERRQ(ierr); 133 } 134 ierr = PetscFree(edges);CHKERRQ(ierr); 135 ierr = PetscFree(faces);CHKERRQ(ierr); 136 ierr = VecRestoreArrayRead(vins,&array);CHKERRQ(ierr); 137 if (vl2l) { 138 ierr = VecRestoreSubVector(v,vl2l,&vins);CHKERRQ(ierr); 139 } 140 ierr = VecDestroy(&v);CHKERRQ(ierr); 141 ierr = PetscFree(vals);CHKERRQ(ierr); 142 143 /* assemble near null space */ 144 for (i=0;i<maxneighs;i++) { 145 ierr = VecAssemblyBegin(quad_vecs[i]);CHKERRQ(ierr); 146 } 147 for (i=0;i<maxneighs;i++) { 148 ierr = VecAssemblyEnd(quad_vecs[i]);CHKERRQ(ierr); 149 } 150 ierr = VecDestroyVecs(maxneighs,&quad_vecs);CHKERRQ(ierr); 151 PetscFunctionReturn(0); 152 } 153 154 155 #undef __FUNCT__ 156 #define __FUNCT__ "PCBDDCComputeLocalTopologyInfo" 157 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 158 { 159 PetscErrorCode ierr; 160 Vec local,global; 161 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 162 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 163 164 PetscFunctionBegin; 165 ierr = MatCreateVecs(pc->pmat,&global,NULL);CHKERRQ(ierr); 166 /* need to convert from global to local topology information and remove references to information in global ordering */ 167 ierr = MatCreateVecs(matis->A,&local,NULL);CHKERRQ(ierr); 168 if (pcbddc->user_provided_isfordofs) { 169 if (pcbddc->n_ISForDofs) { 170 PetscInt i; 171 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 172 for (i=0;i<pcbddc->n_ISForDofs;i++) { 173 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 174 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 175 } 176 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 177 pcbddc->n_ISForDofs = 0; 178 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 179 } 180 } else { 181 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering if bs > 1 */ 182 PetscInt i, n = matis->A->rmap->n; 183 ierr = MatGetBlockSize(pc->pmat,&i);CHKERRQ(ierr); 184 if (i > 1) { 185 pcbddc->n_ISForDofsLocal = i; 186 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 187 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 188 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 189 } 190 } 191 } 192 } 193 194 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 195 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 196 } 197 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 198 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 199 } 200 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 201 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 202 } 203 ierr = VecDestroy(&global);CHKERRQ(ierr); 204 ierr = VecDestroy(&local);CHKERRQ(ierr); 205 PetscFunctionReturn(0); 206 } 207 208 #undef __FUNCT__ 209 #define __FUNCT__ "PCBDDCBenignRemoveInterior" 210 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z) 211 { 212 PC_IS *pcis = (PC_IS*)(pc->data); 213 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 214 PetscErrorCode ierr; 215 216 PetscFunctionBegin; 217 if (!pcbddc->benign_have_null) { 218 PetscFunctionReturn(0); 219 } 220 if (pcbddc->ChangeOfBasisMatrix) { 221 Vec swap; 222 223 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);CHKERRQ(ierr); 224 swap = pcbddc->work_change; 225 pcbddc->work_change = r; 226 r = swap; 227 } 228 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 229 ierr = VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 230 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 231 ierr = VecSet(z,0.);CHKERRQ(ierr); 232 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 233 ierr = VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 234 if (pcbddc->ChangeOfBasisMatrix) { 235 Vec swap; 236 237 swap = r; 238 r = pcbddc->work_change; 239 pcbddc->work_change = swap; 240 ierr = VecCopy(z,pcbddc->work_change);CHKERRQ(ierr); 241 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);CHKERRQ(ierr); 242 } 243 PetscFunctionReturn(0); 244 } 245 246 #undef __FUNCT__ 247 #define __FUNCT__ "PCBDDCBenignMatMult_Private_Private" 248 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 249 { 250 PCBDDCBenignMatMult_ctx ctx; 251 PetscErrorCode ierr; 252 PetscBool apply_right,apply_left,reset_x; 253 254 PetscFunctionBegin; 255 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 256 if (transpose) { 257 apply_right = ctx->apply_left; 258 apply_left = ctx->apply_right; 259 } else { 260 apply_right = ctx->apply_right; 261 apply_left = ctx->apply_left; 262 } 263 reset_x = PETSC_FALSE; 264 if (apply_right) { 265 const PetscScalar *ax; 266 PetscInt nl,i; 267 268 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 269 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 270 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 271 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 272 for (i=0;i<ctx->benign_n;i++) { 273 PetscScalar sum,val; 274 const PetscInt *idxs; 275 PetscInt nz,j; 276 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 277 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 278 sum = 0.; 279 if (ctx->apply_p0) { 280 val = ctx->work[idxs[nz-1]]; 281 for (j=0;j<nz-1;j++) { 282 sum += ctx->work[idxs[j]]; 283 ctx->work[idxs[j]] += val; 284 } 285 } else { 286 for (j=0;j<nz-1;j++) { 287 sum += ctx->work[idxs[j]]; 288 } 289 } 290 ctx->work[idxs[nz-1]] -= sum; 291 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 292 } 293 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 294 reset_x = PETSC_TRUE; 295 } 296 if (transpose) { 297 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 298 } else { 299 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 300 } 301 if (reset_x) { 302 ierr = VecResetArray(x);CHKERRQ(ierr); 303 } 304 if (apply_left) { 305 PetscScalar *ay; 306 PetscInt i; 307 308 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 309 for (i=0;i<ctx->benign_n;i++) { 310 PetscScalar sum,val; 311 const PetscInt *idxs; 312 PetscInt nz,j; 313 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 314 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 315 val = -ay[idxs[nz-1]]; 316 if (ctx->apply_p0) { 317 sum = 0.; 318 for (j=0;j<nz-1;j++) { 319 sum += ay[idxs[j]]; 320 ay[idxs[j]] += val; 321 } 322 ay[idxs[nz-1]] += sum; 323 } else { 324 for (j=0;j<nz-1;j++) { 325 ay[idxs[j]] += val; 326 } 327 ay[idxs[nz-1]] = 0.; 328 } 329 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 330 } 331 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 332 } 333 PetscFunctionReturn(0); 334 } 335 336 #undef __FUNCT__ 337 #define __FUNCT__ "PCBDDCBenignMatMultTranspose_Private" 338 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 339 { 340 PetscErrorCode ierr; 341 342 PetscFunctionBegin; 343 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 344 PetscFunctionReturn(0); 345 } 346 347 #undef __FUNCT__ 348 #define __FUNCT__ "PCBDDCBenignMatMult_Private" 349 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 350 { 351 PetscErrorCode ierr; 352 353 PetscFunctionBegin; 354 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 355 PetscFunctionReturn(0); 356 } 357 358 #undef __FUNCT__ 359 #define __FUNCT__ "PCBDDCBenignShellMat" 360 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 361 { 362 PC_IS *pcis = (PC_IS*)pc->data; 363 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 364 PCBDDCBenignMatMult_ctx ctx; 365 PetscErrorCode ierr; 366 367 PetscFunctionBegin; 368 if (!restore) { 369 Mat A_IB,A_BI; 370 PetscScalar *work; 371 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 372 373 if (pcbddc->benign_original_mat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 374 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(0); 375 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 376 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 377 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 378 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 379 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 380 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 381 ierr = PetscNew(&ctx);CHKERRQ(ierr); 382 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 383 ctx->apply_left = PETSC_TRUE; 384 ctx->apply_right = PETSC_FALSE; 385 ctx->apply_p0 = PETSC_FALSE; 386 ctx->benign_n = pcbddc->benign_n; 387 if (reuse) { 388 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 389 ctx->free = PETSC_FALSE; 390 } else { /* TODO: could be optimized for successive solves */ 391 ISLocalToGlobalMapping N_to_D; 392 PetscInt i; 393 394 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 395 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 396 for (i=0;i<pcbddc->benign_n;i++) { 397 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 398 } 399 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 400 ctx->free = PETSC_TRUE; 401 } 402 ctx->A = pcis->A_IB; 403 ctx->work = work; 404 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 405 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 406 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 407 pcis->A_IB = A_IB; 408 409 /* A_BI as A_IB^T */ 410 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 411 pcbddc->benign_original_mat = pcis->A_BI; 412 pcis->A_BI = A_BI; 413 } else { 414 if (!pcbddc->benign_original_mat) { 415 PetscFunctionReturn(0); 416 } 417 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 418 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 419 pcis->A_IB = ctx->A; 420 ctx->A = NULL; 421 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 422 pcis->A_BI = pcbddc->benign_original_mat; 423 pcbddc->benign_original_mat = NULL; 424 if (ctx->free) { 425 PetscInt i; 426 for (i=0;i<ctx->benign_n;i++) { 427 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 428 } 429 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 430 } 431 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 432 ierr = PetscFree(ctx);CHKERRQ(ierr); 433 } 434 PetscFunctionReturn(0); 435 } 436 437 /* used just in bddc debug mode */ 438 #undef __FUNCT__ 439 #define __FUNCT__ "PCBDDCBenignProject" 440 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 441 { 442 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 443 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 444 Mat An; 445 PetscErrorCode ierr; 446 447 PetscFunctionBegin; 448 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 449 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 450 if (is1) { 451 ierr = MatGetSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 452 ierr = MatDestroy(&An);CHKERRQ(ierr); 453 } else { 454 *B = An; 455 } 456 PetscFunctionReturn(0); 457 } 458 459 /* TODO: add reuse flag */ 460 #undef __FUNCT__ 461 #define __FUNCT__ "MatSeqAIJCompress" 462 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 463 { 464 Mat Bt; 465 PetscScalar *a,*bdata; 466 const PetscInt *ii,*ij; 467 PetscInt m,n,i,nnz,*bii,*bij; 468 PetscBool flg_row; 469 PetscErrorCode ierr; 470 471 PetscFunctionBegin; 472 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 473 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 474 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 475 nnz = n; 476 for (i=0;i<ii[n];i++) { 477 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 478 } 479 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 480 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 481 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 482 nnz = 0; 483 bii[0] = 0; 484 for (i=0;i<n;i++) { 485 PetscInt j; 486 for (j=ii[i];j<ii[i+1];j++) { 487 PetscScalar entry = a[j]; 488 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 489 bij[nnz] = ij[j]; 490 bdata[nnz] = entry; 491 nnz++; 492 } 493 } 494 bii[i+1] = nnz; 495 } 496 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 497 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 498 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 499 { 500 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 501 b->free_a = PETSC_TRUE; 502 b->free_ij = PETSC_TRUE; 503 } 504 *B = Bt; 505 PetscFunctionReturn(0); 506 } 507 508 #undef __FUNCT__ 509 #define __FUNCT__ "MatDetectDisconnectedComponents" 510 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 511 { 512 Mat B; 513 IS is_dummy,*cc_n; 514 ISLocalToGlobalMapping l2gmap_dummy; 515 PCBDDCGraph graph; 516 PetscInt i,n; 517 PetscInt *xadj,*adjncy; 518 PetscInt *xadj_filtered,*adjncy_filtered; 519 PetscBool flg_row,isseqaij; 520 PetscErrorCode ierr; 521 522 PetscFunctionBegin; 523 if (!A->rmap->N || !A->cmap->N) { 524 *ncc = 0; 525 *cc = NULL; 526 PetscFunctionReturn(0); 527 } 528 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 529 if (!isseqaij && filter) { 530 PetscBool isseqdense; 531 532 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 533 if (!isseqdense) { 534 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 535 } else { /* TODO: rectangular case and LDA */ 536 PetscScalar *array; 537 PetscReal chop=1.e-6; 538 539 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 540 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 541 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 542 for (i=0;i<n;i++) { 543 PetscInt j; 544 for (j=i+1;j<n;j++) { 545 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 546 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 547 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 548 } 549 } 550 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 551 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 552 } 553 } else { 554 B = A; 555 } 556 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 557 558 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 559 if (filter) { 560 PetscScalar *data; 561 PetscInt j,cum; 562 563 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 564 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 565 cum = 0; 566 for (i=0;i<n;i++) { 567 PetscInt t; 568 569 for (j=xadj[i];j<xadj[i+1];j++) { 570 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 571 continue; 572 } 573 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 574 } 575 t = xadj_filtered[i]; 576 xadj_filtered[i] = cum; 577 cum += t; 578 } 579 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 580 } else { 581 xadj_filtered = NULL; 582 adjncy_filtered = NULL; 583 } 584 585 /* compute local connected components using PCBDDCGraph */ 586 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 587 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 588 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 589 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 590 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n);CHKERRQ(ierr); 591 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 592 if (xadj_filtered) { 593 graph->xadj = xadj_filtered; 594 graph->adjncy = adjncy_filtered; 595 } else { 596 graph->xadj = xadj; 597 graph->adjncy = adjncy; 598 } 599 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 600 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 601 /* partial clean up */ 602 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 603 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 604 if (A != B) { 605 ierr = MatDestroy(&B);CHKERRQ(ierr); 606 } 607 608 /* get back data */ 609 if (ncc) *ncc = graph->ncc; 610 if (cc) { 611 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 612 for (i=0;i<graph->ncc;i++) { 613 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 614 } 615 *cc = cc_n; 616 } 617 /* clean up graph */ 618 graph->xadj = 0; 619 graph->adjncy = 0; 620 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 621 PetscFunctionReturn(0); 622 } 623 624 #undef __FUNCT__ 625 #define __FUNCT__ "PCBDDCBenignCheck" 626 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 627 { 628 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 629 PC_IS* pcis = (PC_IS*)(pc->data); 630 IS dirIS = NULL; 631 PetscInt i; 632 PetscErrorCode ierr; 633 634 PetscFunctionBegin; 635 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 636 if (zerodiag) { 637 Mat A; 638 Vec vec3_N; 639 PetscScalar *vals; 640 const PetscInt *idxs; 641 PetscInt nz,*count; 642 643 /* p0 */ 644 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 645 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 646 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 647 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 648 for (i=0;i<nz;i++) vals[i] = 1.; 649 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 650 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 651 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 652 /* v_I */ 653 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 654 for (i=0;i<nz;i++) vals[i] = 0.; 655 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 656 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 657 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 658 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 659 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 660 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 661 if (dirIS) { 662 PetscInt n; 663 664 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 665 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 666 for (i=0;i<n;i++) vals[i] = 0.; 667 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 668 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 669 } 670 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 671 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 672 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 673 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 674 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 675 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 676 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 677 if (PetscAbsScalar(vals[0]) > 1.e-1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 678 ierr = PetscFree(vals);CHKERRQ(ierr); 679 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 680 681 /* there should not be any pressure dofs lying on the interface */ 682 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 683 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 684 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 685 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 686 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 687 for (i=0;i<nz;i++) if (count[idxs[i]]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 688 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 689 ierr = PetscFree(count);CHKERRQ(ierr); 690 } 691 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 692 693 /* check PCBDDCBenignGetOrSetP0 */ 694 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 695 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 696 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 697 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 698 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 699 for (i=0;i<pcbddc->benign_n;i++) if ((PetscInt)(PetscRealPart(pcbddc->benign_p0[i])) != -PetscGlobalRank-i) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %d instead of %g\n",PetscRealPart(pcbddc->benign_p0[i]),i,-PetscGlobalRank-i);CHKERRQ(ierr); 700 PetscFunctionReturn(0); 701 } 702 703 #undef __FUNCT__ 704 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 705 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 706 { 707 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 708 IS pressures,zerodiag,*zerodiag_subs; 709 PetscInt nz,n; 710 PetscInt *interior_dofs,n_interior_dofs; 711 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag; 712 PetscErrorCode ierr; 713 714 PetscFunctionBegin; 715 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 716 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 717 for (n=0;n<pcbddc->benign_n;n++) { 718 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 719 } 720 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 721 pcbddc->benign_n = 0; 722 /* if a local info on dofs is present, assumes that the last field represents "pressures" 723 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 724 Checks if all the pressure dofs in each subdomain have a zero diagonal 725 If not, a change of basis on pressures is not needed 726 since the local Schur complements are already SPD 727 */ 728 has_null_pressures = PETSC_TRUE; 729 have_null = PETSC_TRUE; 730 if (pcbddc->n_ISForDofsLocal) { 731 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 732 733 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 734 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 735 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 736 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 737 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 738 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 739 if (!sorted) { 740 ierr = ISSort(pressures);CHKERRQ(ierr); 741 } 742 } else { 743 pressures = NULL; 744 } 745 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 746 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 747 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 748 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 749 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 750 if (!sorted) { 751 ierr = ISSort(zerodiag);CHKERRQ(ierr); 752 } 753 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 754 if (!nz) { 755 if (n) have_null = PETSC_FALSE; 756 has_null_pressures = PETSC_FALSE; 757 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 758 } 759 recompute_zerodiag = PETSC_FALSE; 760 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 761 zerodiag_subs = NULL; 762 pcbddc->benign_n = 0; 763 n_interior_dofs = 0; 764 interior_dofs = NULL; 765 if (pcbddc->current_level) { /* need to compute interior nodes */ 766 PetscInt n,i,j; 767 PetscInt n_neigh,*neigh,*n_shared,**shared; 768 PetscInt *iwork; 769 770 ierr = ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);CHKERRQ(ierr); 771 ierr = ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 772 ierr = PetscCalloc1(n,&iwork);CHKERRQ(ierr); 773 ierr = PetscMalloc1(n,&interior_dofs);CHKERRQ(ierr); 774 for (i=0;i<n_neigh;i++) 775 for (j=0;j<n_shared[i];j++) 776 iwork[shared[i][j]] += 1; 777 for (i=0;i<n;i++) 778 if (!iwork[i]) 779 interior_dofs[n_interior_dofs++] = i; 780 ierr = PetscFree(iwork);CHKERRQ(ierr); 781 ierr = ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 782 } 783 if (has_null_pressures) { 784 IS *subs; 785 PetscInt nsubs,i,j,nl; 786 const PetscInt *idxs; 787 PetscScalar *array; 788 Vec *work; 789 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 790 791 subs = pcbddc->local_subs; 792 nsubs = pcbddc->n_local_subs; 793 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 794 if (pcbddc->current_level) { 795 ierr = VecDuplicateVecs(matis->y,2,&work);CHKERRQ(ierr); 796 ierr = ISGetLocalSize(zerodiag,&nl);CHKERRQ(ierr); 797 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 798 /* work[0] = 1_p */ 799 ierr = VecSet(work[0],0.);CHKERRQ(ierr); 800 ierr = VecGetArray(work[0],&array);CHKERRQ(ierr); 801 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 802 ierr = VecRestoreArray(work[0],&array);CHKERRQ(ierr); 803 /* work[0] = 1_v */ 804 ierr = VecSet(work[1],1.);CHKERRQ(ierr); 805 ierr = VecGetArray(work[1],&array);CHKERRQ(ierr); 806 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 807 ierr = VecRestoreArray(work[1],&array);CHKERRQ(ierr); 808 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 809 } 810 if (nsubs > 1) { 811 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 812 for (i=0;i<nsubs;i++) { 813 ISLocalToGlobalMapping l2g; 814 IS t_zerodiag_subs; 815 PetscInt nl; 816 817 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 818 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 819 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 820 if (nl) { 821 PetscBool valid = PETSC_TRUE; 822 823 if (pcbddc->current_level) { 824 ierr = VecSet(matis->x,0);CHKERRQ(ierr); 825 ierr = ISGetLocalSize(subs[i],&nl);CHKERRQ(ierr); 826 ierr = ISGetIndices(subs[i],&idxs);CHKERRQ(ierr); 827 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 828 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 829 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 830 ierr = ISRestoreIndices(subs[i],&idxs);CHKERRQ(ierr); 831 ierr = VecPointwiseMult(matis->x,work[0],matis->x);CHKERRQ(ierr); 832 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 833 ierr = VecPointwiseMult(matis->y,work[1],matis->y);CHKERRQ(ierr); 834 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 835 for (j=0;j<n_interior_dofs;j++) { 836 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 837 valid = PETSC_FALSE; 838 break; 839 } 840 } 841 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 842 } 843 if (valid && pcbddc->NeumannBoundariesLocal) { 844 IS t_bc; 845 PetscInt nzb; 846 847 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pcbddc->NeumannBoundariesLocal,&t_bc);CHKERRQ(ierr); 848 ierr = ISGetLocalSize(t_bc,&nzb);CHKERRQ(ierr); 849 ierr = ISDestroy(&t_bc);CHKERRQ(ierr); 850 if (nzb) valid = PETSC_FALSE; 851 } 852 if (valid && pressures) { 853 IS t_pressure_subs; 854 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 855 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 856 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 857 } 858 if (valid) { 859 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 860 pcbddc->benign_n++; 861 } else { 862 recompute_zerodiag = PETSC_TRUE; 863 } 864 } 865 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 866 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 867 } 868 } else { /* there's just one subdomain (or zero if they have not been detected */ 869 PetscBool valid = PETSC_TRUE; 870 871 if (pcbddc->NeumannBoundariesLocal) { 872 PetscInt nzb; 873 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nzb);CHKERRQ(ierr); 874 if (nzb) valid = PETSC_FALSE; 875 } 876 if (valid && pressures) { 877 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 878 } 879 if (valid && pcbddc->current_level) { 880 ierr = MatMult(matis->A,work[0],matis->x);CHKERRQ(ierr); 881 ierr = VecPointwiseMult(matis->x,work[1],matis->x);CHKERRQ(ierr); 882 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 883 for (j=0;j<n_interior_dofs;j++) { 884 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 885 valid = PETSC_FALSE; 886 break; 887 } 888 } 889 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 890 } 891 if (valid) { 892 pcbddc->benign_n = 1; 893 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 894 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 895 zerodiag_subs[0] = zerodiag; 896 } 897 } 898 if (pcbddc->current_level) { 899 ierr = VecDestroyVecs(2,&work);CHKERRQ(ierr); 900 } 901 } 902 ierr = PetscFree(interior_dofs);CHKERRQ(ierr); 903 904 if (!pcbddc->benign_n) { 905 PetscInt n; 906 907 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 908 recompute_zerodiag = PETSC_FALSE; 909 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 910 if (n) { 911 has_null_pressures = PETSC_FALSE; 912 have_null = PETSC_FALSE; 913 } 914 } 915 916 /* final check for null pressures */ 917 if (zerodiag && pressures) { 918 PetscInt nz,np; 919 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 920 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 921 if (nz != np) have_null = PETSC_FALSE; 922 } 923 924 if (recompute_zerodiag) { 925 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 926 if (pcbddc->benign_n == 1) { 927 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 928 zerodiag = zerodiag_subs[0]; 929 } else { 930 PetscInt i,nzn,*new_idxs; 931 932 nzn = 0; 933 for (i=0;i<pcbddc->benign_n;i++) { 934 PetscInt ns; 935 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 936 nzn += ns; 937 } 938 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 939 nzn = 0; 940 for (i=0;i<pcbddc->benign_n;i++) { 941 PetscInt ns,*idxs; 942 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 943 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 944 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 945 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 946 nzn += ns; 947 } 948 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 949 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 950 } 951 have_null = PETSC_FALSE; 952 } 953 954 /* Prepare matrix to compute no-net-flux */ 955 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 956 Mat A,loc_divudotp; 957 ISLocalToGlobalMapping rl2g,cl2g,l2gmap; 958 IS row,col,isused = NULL; 959 PetscInt M,N,n,st,n_isused; 960 961 if (pressures) { 962 isused = pressures; 963 } else { 964 isused = zerodiag; 965 } 966 ierr = MatGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);CHKERRQ(ierr); 967 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 968 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 969 if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field"); 970 n_isused = 0; 971 if (isused) { 972 ierr = ISGetLocalSize(isused,&n_isused);CHKERRQ(ierr); 973 } 974 ierr = MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 975 st = st-n_isused; 976 if (n) { 977 const PetscInt *gidxs; 978 979 ierr = MatGetSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);CHKERRQ(ierr); 980 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 981 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 982 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 983 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 984 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 985 } else { 986 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);CHKERRQ(ierr); 987 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 988 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 989 } 990 ierr = MatGetSize(pc->pmat,NULL,&N);CHKERRQ(ierr); 991 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 992 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 993 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 994 ierr = ISDestroy(&row);CHKERRQ(ierr); 995 ierr = ISDestroy(&col);CHKERRQ(ierr); 996 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);CHKERRQ(ierr); 997 ierr = MatSetType(pcbddc->divudotp,MATIS);CHKERRQ(ierr); 998 ierr = MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 999 ierr = MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);CHKERRQ(ierr); 1000 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 1001 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 1002 ierr = MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);CHKERRQ(ierr); 1003 ierr = MatDestroy(&loc_divudotp);CHKERRQ(ierr); 1004 ierr = MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1005 ierr = MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1006 } 1007 1008 /* change of basis and p0 dofs */ 1009 if (has_null_pressures) { 1010 IS zerodiagc; 1011 const PetscInt *idxs,*idxsc; 1012 PetscInt i,s,*nnz; 1013 1014 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 1015 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 1016 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 1017 /* local change of basis for pressures */ 1018 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 1019 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 1020 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 1021 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1022 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 1023 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 1024 for (i=0;i<pcbddc->benign_n;i++) { 1025 PetscInt nzs,j; 1026 1027 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 1028 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1029 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 1030 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 1031 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1032 } 1033 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 1034 ierr = PetscFree(nnz);CHKERRQ(ierr); 1035 /* set identity on velocities */ 1036 for (i=0;i<n-nz;i++) { 1037 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 1038 } 1039 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 1040 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 1041 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 1042 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 1043 /* set change on pressures */ 1044 for (s=0;s<pcbddc->benign_n;s++) { 1045 PetscScalar *array; 1046 PetscInt nzs; 1047 1048 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 1049 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 1050 for (i=0;i<nzs-1;i++) { 1051 PetscScalar vals[2]; 1052 PetscInt cols[2]; 1053 1054 cols[0] = idxs[i]; 1055 cols[1] = idxs[nzs-1]; 1056 vals[0] = 1.; 1057 vals[1] = 1.; 1058 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 1059 } 1060 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 1061 for (i=0;i<nzs-1;i++) array[i] = -1.; 1062 array[nzs-1] = 1.; 1063 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 1064 /* store local idxs for p0 */ 1065 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 1066 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 1067 ierr = PetscFree(array);CHKERRQ(ierr); 1068 } 1069 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1070 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1071 /* project if needed */ 1072 if (pcbddc->benign_change_explicit) { 1073 Mat M; 1074 1075 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 1076 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1077 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 1078 ierr = MatDestroy(&M);CHKERRQ(ierr); 1079 } 1080 /* store global idxs for p0 */ 1081 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 1082 } 1083 pcbddc->benign_zerodiag_subs = zerodiag_subs; 1084 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 1085 1086 /* determines if the coarse solver will be singular or not */ 1087 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1088 /* determines if the problem has subdomains with 0 pressure block */ 1089 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1090 *zerodiaglocal = zerodiag; 1091 PetscFunctionReturn(0); 1092 } 1093 1094 #undef __FUNCT__ 1095 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 1096 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 1097 { 1098 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1099 PetscScalar *array; 1100 PetscErrorCode ierr; 1101 1102 PetscFunctionBegin; 1103 if (!pcbddc->benign_sf) { 1104 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 1105 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 1106 } 1107 if (get) { 1108 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 1109 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 1110 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 1111 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 1112 } else { 1113 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 1114 ierr = PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 1115 ierr = PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 1116 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 1117 } 1118 PetscFunctionReturn(0); 1119 } 1120 1121 #undef __FUNCT__ 1122 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 1123 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 1124 { 1125 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1126 PetscErrorCode ierr; 1127 1128 PetscFunctionBegin; 1129 /* TODO: add error checking 1130 - avoid nested pop (or push) calls. 1131 - cannot push before pop. 1132 - cannot call this if pcbddc->local_mat is NULL 1133 */ 1134 if (!pcbddc->benign_n) { 1135 PetscFunctionReturn(0); 1136 } 1137 if (pop) { 1138 if (pcbddc->benign_change_explicit) { 1139 IS is_p0; 1140 MatReuse reuse; 1141 1142 /* extract B_0 */ 1143 reuse = MAT_INITIAL_MATRIX; 1144 if (pcbddc->benign_B0) { 1145 reuse = MAT_REUSE_MATRIX; 1146 } 1147 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 1148 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 1149 /* remove rows and cols from local problem */ 1150 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 1151 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1152 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 1153 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 1154 } else { 1155 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1156 PetscScalar *vals; 1157 PetscInt i,n,*idxs_ins; 1158 1159 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 1160 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 1161 if (!pcbddc->benign_B0) { 1162 PetscInt *nnz; 1163 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 1164 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 1165 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1166 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 1167 for (i=0;i<pcbddc->benign_n;i++) { 1168 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 1169 nnz[i] = n - nnz[i]; 1170 } 1171 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 1172 ierr = PetscFree(nnz);CHKERRQ(ierr); 1173 } 1174 1175 for (i=0;i<pcbddc->benign_n;i++) { 1176 PetscScalar *array; 1177 PetscInt *idxs,j,nz,cum; 1178 1179 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 1180 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1181 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 1182 for (j=0;j<nz;j++) vals[j] = 1.; 1183 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1184 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 1185 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 1186 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 1187 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 1188 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 1189 cum = 0; 1190 for (j=0;j<n;j++) { 1191 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 1192 vals[cum] = array[j]; 1193 idxs_ins[cum] = j; 1194 cum++; 1195 } 1196 } 1197 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 1198 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 1199 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 1200 } 1201 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1202 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1203 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 1204 } 1205 } else { /* push */ 1206 if (pcbddc->benign_change_explicit) { 1207 PetscInt i; 1208 1209 for (i=0;i<pcbddc->benign_n;i++) { 1210 PetscScalar *B0_vals; 1211 PetscInt *B0_cols,B0_ncol; 1212 1213 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 1214 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 1215 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 1216 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 1217 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 1218 } 1219 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1220 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1221 } else { 1222 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 1223 } 1224 } 1225 PetscFunctionReturn(0); 1226 } 1227 1228 #undef __FUNCT__ 1229 #define __FUNCT__ "PCBDDCAdaptiveSelection" 1230 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 1231 { 1232 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1233 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1234 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 1235 PetscBLASInt *B_iwork,*B_ifail; 1236 PetscScalar *work,lwork; 1237 PetscScalar *St,*S,*eigv; 1238 PetscScalar *Sarray,*Starray; 1239 PetscReal *eigs,thresh; 1240 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 1241 PetscBool allocated_S_St; 1242 #if defined(PETSC_USE_COMPLEX) 1243 PetscReal *rwork; 1244 #endif 1245 PetscErrorCode ierr; 1246 1247 PetscFunctionBegin; 1248 if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data"); 1249 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 1250 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 1251 1252 if (pcbddc->dbg_flag) { 1253 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1254 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1255 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 1256 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 1257 } 1258 1259 if (pcbddc->dbg_flag) { 1260 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 1261 } 1262 1263 /* max size of subsets */ 1264 mss = 0; 1265 for (i=0;i<sub_schurs->n_subs;i++) { 1266 PetscInt subset_size; 1267 1268 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1269 mss = PetscMax(mss,subset_size); 1270 } 1271 1272 /* min/max and threshold */ 1273 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 1274 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 1275 nmax = PetscMax(nmin,nmax); 1276 allocated_S_St = PETSC_FALSE; 1277 if (nmin) { 1278 allocated_S_St = PETSC_TRUE; 1279 } 1280 1281 /* allocate lapack workspace */ 1282 cum = cum2 = 0; 1283 maxneigs = 0; 1284 for (i=0;i<sub_schurs->n_subs;i++) { 1285 PetscInt n,subset_size; 1286 1287 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1288 n = PetscMin(subset_size,nmax); 1289 cum += subset_size; 1290 cum2 += subset_size*n; 1291 maxneigs = PetscMax(maxneigs,n); 1292 } 1293 if (mss) { 1294 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 1295 PetscBLASInt B_itype = 1; 1296 PetscBLASInt B_N = mss; 1297 PetscReal zero = 0.0; 1298 PetscReal eps = 0.0; /* dlamch? */ 1299 1300 B_lwork = -1; 1301 S = NULL; 1302 St = NULL; 1303 eigs = NULL; 1304 eigv = NULL; 1305 B_iwork = NULL; 1306 B_ifail = NULL; 1307 #if defined(PETSC_USE_COMPLEX) 1308 rwork = NULL; 1309 #endif 1310 thresh = 1.0; 1311 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1312 #if defined(PETSC_USE_COMPLEX) 1313 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1314 #else 1315 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1316 #endif 1317 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 1318 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1319 } else { 1320 /* TODO */ 1321 } 1322 } else { 1323 lwork = 0; 1324 } 1325 1326 nv = 0; 1327 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 1328 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 1329 } 1330 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 1331 if (allocated_S_St) { 1332 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 1333 } 1334 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 1335 #if defined(PETSC_USE_COMPLEX) 1336 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 1337 #endif 1338 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 1339 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 1340 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 1341 nv+cum,&pcbddc->adaptive_constraints_idxs, 1342 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 1343 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 1344 1345 maxneigs = 0; 1346 cum = cumarray = 0; 1347 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 1348 pcbddc->adaptive_constraints_data_ptr[0] = 0; 1349 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 1350 const PetscInt *idxs; 1351 1352 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 1353 for (cum=0;cum<nv;cum++) { 1354 pcbddc->adaptive_constraints_n[cum] = 1; 1355 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 1356 pcbddc->adaptive_constraints_data[cum] = 1.0; 1357 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 1358 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 1359 } 1360 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 1361 } 1362 1363 if (mss) { /* multilevel */ 1364 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1365 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1366 } 1367 1368 thresh = pcbddc->adaptive_threshold; 1369 for (i=0;i<sub_schurs->n_subs;i++) { 1370 const PetscInt *idxs; 1371 PetscReal upper,lower; 1372 PetscInt j,subset_size,eigs_start = 0; 1373 PetscBLASInt B_N; 1374 PetscBool same_data = PETSC_FALSE; 1375 1376 if (pcbddc->use_deluxe_scaling) { 1377 upper = PETSC_MAX_REAL; 1378 lower = thresh; 1379 } else { 1380 upper = 1./thresh; 1381 lower = 0.; 1382 } 1383 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1384 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1385 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 1386 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 1387 if (sub_schurs->is_hermitian) { 1388 PetscInt j,k; 1389 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 1390 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1391 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1392 } 1393 for (j=0;j<subset_size;j++) { 1394 for (k=j;k<subset_size;k++) { 1395 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 1396 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 1397 } 1398 } 1399 } else { 1400 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1401 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1402 } 1403 } else { 1404 S = Sarray + cumarray; 1405 St = Starray + cumarray; 1406 } 1407 /* see if we can save some work */ 1408 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 1409 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 1410 } 1411 1412 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 1413 B_neigs = 0; 1414 } else { 1415 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 1416 PetscBLASInt B_itype = 1; 1417 PetscBLASInt B_IL, B_IU; 1418 PetscReal eps = -1.0; /* dlamch? */ 1419 PetscInt nmin_s; 1420 PetscBool compute_range = PETSC_FALSE; 1421 1422 if (pcbddc->dbg_flag) { 1423 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 1424 } 1425 1426 compute_range = PETSC_FALSE; 1427 if (thresh > 1.+PETSC_SMALL && !same_data) { 1428 compute_range = PETSC_TRUE; 1429 } 1430 1431 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1432 if (compute_range) { 1433 1434 /* ask for eigenvalues larger than thresh */ 1435 #if defined(PETSC_USE_COMPLEX) 1436 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1437 #else 1438 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1439 #endif 1440 } else if (!same_data) { 1441 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 1442 B_IL = 1; 1443 #if defined(PETSC_USE_COMPLEX) 1444 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1445 #else 1446 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1447 #endif 1448 } else { /* same_data is true, so get the adaptive function requested by the user */ 1449 PetscInt k; 1450 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 1451 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 1452 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 1453 nmin = nmax; 1454 ierr = PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));CHKERRQ(ierr); 1455 for (k=0;k<nmax;k++) { 1456 eigs[k] = 1./PETSC_SMALL; 1457 eigv[k*(subset_size+1)] = 1.0; 1458 } 1459 } 1460 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1461 if (B_ierr) { 1462 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1463 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1464 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1465 } 1466 1467 if (B_neigs > nmax) { 1468 if (pcbddc->dbg_flag) { 1469 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 1470 } 1471 if (pcbddc->use_deluxe_scaling) eigs_start = B_neigs -nmax; 1472 B_neigs = nmax; 1473 } 1474 1475 nmin_s = PetscMin(nmin,B_N); 1476 if (B_neigs < nmin_s) { 1477 PetscBLASInt B_neigs2; 1478 1479 if (pcbddc->use_deluxe_scaling) { 1480 B_IL = B_N - nmin_s + 1; 1481 B_IU = B_N - B_neigs; 1482 } else { 1483 B_IL = B_neigs + 1; 1484 B_IU = nmin_s; 1485 } 1486 if (pcbddc->dbg_flag) { 1487 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 1488 } 1489 if (sub_schurs->is_hermitian) { 1490 PetscInt j,k; 1491 for (j=0;j<subset_size;j++) { 1492 for (k=j;k<subset_size;k++) { 1493 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 1494 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 1495 } 1496 } 1497 } else { 1498 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1499 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1500 } 1501 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1502 #if defined(PETSC_USE_COMPLEX) 1503 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1504 #else 1505 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1506 #endif 1507 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1508 B_neigs += B_neigs2; 1509 } 1510 if (B_ierr) { 1511 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1512 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1513 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1514 } 1515 if (pcbddc->dbg_flag) { 1516 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 1517 for (j=0;j<B_neigs;j++) { 1518 if (eigs[j] == 0.0) { 1519 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 1520 } else { 1521 if (pcbddc->use_deluxe_scaling) { 1522 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 1523 } else { 1524 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 1525 } 1526 } 1527 } 1528 } 1529 } else { 1530 /* TODO */ 1531 } 1532 } 1533 /* change the basis back to the original one */ 1534 if (sub_schurs->change) { 1535 Mat change,phi,phit; 1536 1537 if (pcbddc->dbg_flag > 1) { 1538 PetscInt ii; 1539 for (ii=0;ii<B_neigs;ii++) { 1540 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1541 for (j=0;j<B_N;j++) { 1542 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 1543 } 1544 } 1545 } 1546 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 1547 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 1548 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 1549 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1550 ierr = MatDestroy(&phit);CHKERRQ(ierr); 1551 ierr = MatDestroy(&phi);CHKERRQ(ierr); 1552 } 1553 maxneigs = PetscMax(B_neigs,maxneigs); 1554 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 1555 if (B_neigs) { 1556 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1557 1558 if (pcbddc->dbg_flag > 1) { 1559 PetscInt ii; 1560 for (ii=0;ii<B_neigs;ii++) { 1561 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1562 for (j=0;j<B_N;j++) { 1563 #if defined(PETSC_USE_COMPLEX) 1564 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1565 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1566 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 1567 #else 1568 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 1569 #endif 1570 } 1571 } 1572 } 1573 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 1574 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 1575 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 1576 cum++; 1577 } 1578 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1579 /* shift for next computation */ 1580 cumarray += subset_size*subset_size; 1581 } 1582 if (pcbddc->dbg_flag) { 1583 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1584 } 1585 1586 if (mss) { 1587 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1588 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1589 /* destroy matrices (junk) */ 1590 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 1591 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 1592 } 1593 if (allocated_S_St) { 1594 ierr = PetscFree2(S,St);CHKERRQ(ierr); 1595 } 1596 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 1597 #if defined(PETSC_USE_COMPLEX) 1598 ierr = PetscFree(rwork);CHKERRQ(ierr); 1599 #endif 1600 if (pcbddc->dbg_flag) { 1601 PetscInt maxneigs_r; 1602 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1603 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 1604 } 1605 PetscFunctionReturn(0); 1606 } 1607 1608 #undef __FUNCT__ 1609 #define __FUNCT__ "PCBDDCSetUpSolvers" 1610 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 1611 { 1612 PetscScalar *coarse_submat_vals; 1613 PetscErrorCode ierr; 1614 1615 PetscFunctionBegin; 1616 /* Setup local scatters R_to_B and (optionally) R_to_D */ 1617 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 1618 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 1619 1620 /* Setup local neumann solver ksp_R */ 1621 /* PCBDDCSetUpLocalScatters should be called first! */ 1622 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 1623 1624 /* 1625 Setup local correction and local part of coarse basis. 1626 Gives back the dense local part of the coarse matrix in column major ordering 1627 */ 1628 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 1629 1630 /* Compute total number of coarse nodes and setup coarse solver */ 1631 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 1632 1633 /* free */ 1634 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 1635 PetscFunctionReturn(0); 1636 } 1637 1638 #undef __FUNCT__ 1639 #define __FUNCT__ "PCBDDCResetCustomization" 1640 PetscErrorCode PCBDDCResetCustomization(PC pc) 1641 { 1642 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1643 PetscErrorCode ierr; 1644 1645 PetscFunctionBegin; 1646 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1647 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 1648 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1649 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 1650 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1651 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 1652 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1653 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1654 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1655 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 1656 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 1657 PetscFunctionReturn(0); 1658 } 1659 1660 #undef __FUNCT__ 1661 #define __FUNCT__ "PCBDDCResetTopography" 1662 PetscErrorCode PCBDDCResetTopography(PC pc) 1663 { 1664 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1665 PetscInt i; 1666 PetscErrorCode ierr; 1667 1668 PetscFunctionBegin; 1669 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 1670 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1671 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 1672 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 1673 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1674 ierr = MatDestroy(&pcbddc->divudotp);CHKERRQ(ierr); 1675 ierr = ISDestroy(&pcbddc->divudotp_vl2l);CHKERRQ(ierr); 1676 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 1677 for (i=0;i<pcbddc->n_local_subs;i++) { 1678 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 1679 } 1680 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 1681 if (pcbddc->sub_schurs) { 1682 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 1683 } 1684 PetscFunctionReturn(0); 1685 } 1686 1687 #undef __FUNCT__ 1688 #define __FUNCT__ "PCBDDCResetSolvers" 1689 PetscErrorCode PCBDDCResetSolvers(PC pc) 1690 { 1691 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1692 PetscErrorCode ierr; 1693 1694 PetscFunctionBegin; 1695 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 1696 if (pcbddc->coarse_phi_B) { 1697 PetscScalar *array; 1698 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 1699 ierr = PetscFree(array);CHKERRQ(ierr); 1700 } 1701 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1702 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1703 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1704 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1705 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1706 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1707 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1708 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1709 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1710 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1711 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1712 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1713 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1714 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 1715 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 1716 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 1717 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 1718 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1719 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1720 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1721 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 1722 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 1723 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 1724 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 1725 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 1726 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 1727 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 1728 if (pcbddc->benign_zerodiag_subs) { 1729 PetscInt i; 1730 for (i=0;i<pcbddc->benign_n;i++) { 1731 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1732 } 1733 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 1734 } 1735 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 1736 PetscFunctionReturn(0); 1737 } 1738 1739 #undef __FUNCT__ 1740 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 1741 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 1742 { 1743 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1744 PC_IS *pcis = (PC_IS*)pc->data; 1745 VecType impVecType; 1746 PetscInt n_constraints,n_R,old_size; 1747 PetscErrorCode ierr; 1748 1749 PetscFunctionBegin; 1750 if (!pcbddc->ConstraintMatrix) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 1751 /* get sizes */ 1752 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 1753 n_R = pcis->n - pcbddc->n_vertices; 1754 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 1755 /* local work vectors (try to avoid unneeded work)*/ 1756 /* R nodes */ 1757 old_size = -1; 1758 if (pcbddc->vec1_R) { 1759 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 1760 } 1761 if (n_R != old_size) { 1762 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1763 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1764 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 1765 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 1766 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 1767 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 1768 } 1769 /* local primal dofs */ 1770 old_size = -1; 1771 if (pcbddc->vec1_P) { 1772 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 1773 } 1774 if (pcbddc->local_primal_size != old_size) { 1775 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1776 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 1777 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 1778 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 1779 } 1780 /* local explicit constraints */ 1781 old_size = -1; 1782 if (pcbddc->vec1_C) { 1783 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 1784 } 1785 if (n_constraints && n_constraints != old_size) { 1786 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1787 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 1788 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 1789 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 1790 } 1791 PetscFunctionReturn(0); 1792 } 1793 1794 #undef __FUNCT__ 1795 #define __FUNCT__ "PCBDDCSetUpCorrection" 1796 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 1797 { 1798 PetscErrorCode ierr; 1799 /* pointers to pcis and pcbddc */ 1800 PC_IS* pcis = (PC_IS*)pc->data; 1801 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1802 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1803 /* submatrices of local problem */ 1804 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 1805 /* submatrices of local coarse problem */ 1806 Mat S_VV,S_CV,S_VC,S_CC; 1807 /* working matrices */ 1808 Mat C_CR; 1809 /* additional working stuff */ 1810 PC pc_R; 1811 Mat F; 1812 Vec dummy_vec; 1813 PetscBool isLU,isCHOL,isILU,need_benign_correction; 1814 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 1815 PetscScalar *work; 1816 PetscInt *idx_V_B; 1817 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 1818 PetscInt i,n_R,n_D,n_B; 1819 1820 /* some shortcuts to scalars */ 1821 PetscScalar one=1.0,m_one=-1.0; 1822 1823 PetscFunctionBegin; 1824 if (!pcbddc->symmetric_primal && pcbddc->benign_n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 1825 1826 /* Set Non-overlapping dimensions */ 1827 n_vertices = pcbddc->n_vertices; 1828 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 1829 n_B = pcis->n_B; 1830 n_D = pcis->n - n_B; 1831 n_R = pcis->n - n_vertices; 1832 1833 /* vertices in boundary numbering */ 1834 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 1835 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 1836 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i); 1837 1838 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 1839 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 1840 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 1841 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 1842 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 1843 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 1844 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 1845 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 1846 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 1847 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 1848 1849 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 1850 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 1851 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 1852 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 1853 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 1854 lda_rhs = n_R; 1855 need_benign_correction = PETSC_FALSE; 1856 if (isLU || isILU || isCHOL) { 1857 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 1858 } else if (sub_schurs && sub_schurs->reuse_solver) { 1859 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1860 MatFactorType type; 1861 1862 F = reuse_solver->F; 1863 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 1864 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 1865 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 1866 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 1867 } else { 1868 F = NULL; 1869 } 1870 1871 /* allocate workspace */ 1872 n = 0; 1873 if (n_constraints) { 1874 n += lda_rhs*n_constraints; 1875 } 1876 if (n_vertices) { 1877 n = PetscMax(2*lda_rhs*n_vertices,n); 1878 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 1879 } 1880 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1881 1882 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 1883 dummy_vec = NULL; 1884 if (need_benign_correction && lda_rhs != n_R && F) { 1885 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);CHKERRQ(ierr); 1886 } 1887 1888 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 1889 if (n_constraints) { 1890 Mat M1,M2,M3,C_B; 1891 IS is_aux; 1892 PetscScalar *array,*array2; 1893 1894 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1895 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1896 1897 /* Extract constraints on R nodes: C_{CR} */ 1898 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 1899 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 1900 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 1901 1902 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 1903 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 1904 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1905 for (i=0;i<n_constraints;i++) { 1906 const PetscScalar *row_cmat_values; 1907 const PetscInt *row_cmat_indices; 1908 PetscInt size_of_constraint,j; 1909 1910 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1911 for (j=0;j<size_of_constraint;j++) { 1912 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 1913 } 1914 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1915 } 1916 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 1917 if (F) { 1918 Mat B; 1919 1920 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 1921 if (need_benign_correction) { 1922 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1923 1924 /* rhs is already zero on interior dofs, no need to change the rhs */ 1925 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 1926 } 1927 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 1928 if (need_benign_correction) { 1929 PetscScalar *marr; 1930 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1931 1932 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1933 if (lda_rhs != n_R) { 1934 for (i=0;i<n_constraints;i++) { 1935 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 1936 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1937 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 1938 } 1939 } else { 1940 for (i=0;i<n_constraints;i++) { 1941 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1942 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1943 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1944 } 1945 } 1946 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1947 } 1948 ierr = MatDestroy(&B);CHKERRQ(ierr); 1949 } else { 1950 PetscScalar *marr; 1951 1952 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1953 for (i=0;i<n_constraints;i++) { 1954 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1955 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 1956 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1957 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1958 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1959 } 1960 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1961 } 1962 if (!pcbddc->switch_static) { 1963 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1964 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1965 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1966 for (i=0;i<n_constraints;i++) { 1967 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 1968 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 1969 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1970 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1971 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1972 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1973 } 1974 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1975 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1976 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1977 } else { 1978 if (lda_rhs != n_R) { 1979 IS dummy; 1980 1981 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 1982 ierr = MatGetSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1983 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1984 } else { 1985 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 1986 pcbddc->local_auxmat2 = local_auxmat2_R; 1987 } 1988 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1989 } 1990 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1991 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 1992 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 1993 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 1994 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 1995 if (isCHOL) { 1996 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 1997 } else { 1998 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 1999 } 2000 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 2001 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 2002 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 2003 ierr = MatDestroy(&M2);CHKERRQ(ierr); 2004 ierr = MatDestroy(&M3);CHKERRQ(ierr); 2005 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 2006 ierr = MatMatMult(M1,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 2007 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 2008 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 2009 ierr = MatDestroy(&M1);CHKERRQ(ierr); 2010 } 2011 2012 /* Get submatrices from subdomain matrix */ 2013 if (n_vertices) { 2014 IS is_aux; 2015 2016 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 2017 IS tis; 2018 2019 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 2020 ierr = ISSort(tis);CHKERRQ(ierr); 2021 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 2022 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2023 } else { 2024 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 2025 } 2026 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 2027 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 2028 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 2029 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 2030 } 2031 2032 /* Matrix of coarse basis functions (local) */ 2033 if (pcbddc->coarse_phi_B) { 2034 PetscInt on_B,on_primal,on_D=n_D; 2035 if (pcbddc->coarse_phi_D) { 2036 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 2037 } 2038 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 2039 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 2040 PetscScalar *marray; 2041 2042 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 2043 ierr = PetscFree(marray);CHKERRQ(ierr); 2044 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 2045 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 2046 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 2047 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 2048 } 2049 } 2050 2051 if (!pcbddc->coarse_phi_B) { 2052 PetscScalar *marray; 2053 2054 n = n_B*pcbddc->local_primal_size; 2055 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2056 n += n_D*pcbddc->local_primal_size; 2057 } 2058 if (!pcbddc->symmetric_primal) { 2059 n *= 2; 2060 } 2061 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 2062 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 2063 n = n_B*pcbddc->local_primal_size; 2064 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2065 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 2066 n += n_D*pcbddc->local_primal_size; 2067 } 2068 if (!pcbddc->symmetric_primal) { 2069 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 2070 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2071 n = n_B*pcbddc->local_primal_size; 2072 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 2073 } 2074 } else { 2075 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 2076 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 2077 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2078 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 2079 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 2080 } 2081 } 2082 } 2083 2084 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 2085 p0_lidx_I = NULL; 2086 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 2087 const PetscInt *idxs; 2088 2089 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 2090 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 2091 for (i=0;i<pcbddc->benign_n;i++) { 2092 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 2093 } 2094 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 2095 } 2096 2097 /* vertices */ 2098 if (n_vertices) { 2099 2100 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 2101 2102 if (n_R) { 2103 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 2104 PetscBLASInt B_N,B_one = 1; 2105 PetscScalar *x,*y; 2106 PetscBool isseqaij; 2107 2108 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 2109 if (need_benign_correction) { 2110 ISLocalToGlobalMapping RtoN; 2111 IS is_p0; 2112 PetscInt *idxs_p0,n; 2113 2114 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 2115 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 2116 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 2117 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 2118 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 2119 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 2120 ierr = MatGetSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 2121 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 2122 } 2123 2124 if (lda_rhs == n_R) { 2125 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 2126 } else { 2127 PetscScalar *av,*array; 2128 const PetscInt *xadj,*adjncy; 2129 PetscInt n; 2130 PetscBool flg_row; 2131 2132 array = work+lda_rhs*n_vertices; 2133 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 2134 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 2135 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2136 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 2137 for (i=0;i<n;i++) { 2138 PetscInt j; 2139 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 2140 } 2141 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2142 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 2143 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 2144 } 2145 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 2146 if (need_benign_correction) { 2147 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2148 PetscScalar *marr; 2149 2150 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 2151 /* need \Phi^T A_RV = (I+L)A_RV, L given by 2152 2153 | 0 0 0 | (V) 2154 L = | 0 0 -1 | (P-p0) 2155 | 0 0 -1 | (p0) 2156 2157 */ 2158 for (i=0;i<reuse_solver->benign_n;i++) { 2159 const PetscScalar *vals; 2160 const PetscInt *idxs,*idxs_zero; 2161 PetscInt n,j,nz; 2162 2163 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2164 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 2165 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 2166 for (j=0;j<n;j++) { 2167 PetscScalar val = vals[j]; 2168 PetscInt k,col = idxs[j]; 2169 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 2170 } 2171 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 2172 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 2173 } 2174 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 2175 } 2176 if (F) { 2177 /* need to correct the rhs */ 2178 if (need_benign_correction) { 2179 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2180 PetscScalar *marr; 2181 2182 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 2183 if (lda_rhs != n_R) { 2184 for (i=0;i<n_vertices;i++) { 2185 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 2186 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 2187 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 2188 } 2189 } else { 2190 for (i=0;i<n_vertices;i++) { 2191 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 2192 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 2193 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2194 } 2195 } 2196 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 2197 } 2198 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 2199 /* need to correct the solution */ 2200 if (need_benign_correction) { 2201 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2202 PetscScalar *marr; 2203 2204 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 2205 if (lda_rhs != n_R) { 2206 for (i=0;i<n_vertices;i++) { 2207 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 2208 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 2209 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 2210 } 2211 } else { 2212 for (i=0;i<n_vertices;i++) { 2213 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 2214 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 2215 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2216 } 2217 } 2218 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 2219 } 2220 } else { 2221 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 2222 for (i=0;i<n_vertices;i++) { 2223 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 2224 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 2225 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2226 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2227 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2228 } 2229 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 2230 } 2231 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 2232 /* S_VV and S_CV */ 2233 if (n_constraints) { 2234 Mat B; 2235 2236 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 2237 for (i=0;i<n_vertices;i++) { 2238 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 2239 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 2240 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2241 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2242 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2243 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2244 } 2245 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 2246 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 2247 ierr = MatDestroy(&B);CHKERRQ(ierr); 2248 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 2249 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 2250 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 2251 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 2252 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 2253 ierr = MatDestroy(&B);CHKERRQ(ierr); 2254 } 2255 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2256 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 2257 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 2258 } 2259 if (lda_rhs != n_R) { 2260 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 2261 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 2262 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 2263 } 2264 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 2265 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 2266 if (need_benign_correction) { 2267 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2268 PetscScalar *marr,*sums; 2269 2270 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 2271 ierr = MatDenseGetArray(S_VVt,&marr); 2272 for (i=0;i<reuse_solver->benign_n;i++) { 2273 const PetscScalar *vals; 2274 const PetscInt *idxs,*idxs_zero; 2275 PetscInt n,j,nz; 2276 2277 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2278 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 2279 for (j=0;j<n_vertices;j++) { 2280 PetscInt k; 2281 sums[j] = 0.; 2282 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 2283 } 2284 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 2285 for (j=0;j<n;j++) { 2286 PetscScalar val = vals[j]; 2287 PetscInt k; 2288 for (k=0;k<n_vertices;k++) { 2289 marr[idxs[j]+k*n_vertices] += val*sums[k]; 2290 } 2291 } 2292 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 2293 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 2294 } 2295 ierr = PetscFree(sums);CHKERRQ(ierr); 2296 ierr = MatDenseRestoreArray(S_VVt,&marr); 2297 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 2298 } 2299 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 2300 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 2301 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 2302 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 2303 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 2304 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 2305 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 2306 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2307 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 2308 } else { 2309 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2310 } 2311 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 2312 2313 /* coarse basis functions */ 2314 for (i=0;i<n_vertices;i++) { 2315 PetscScalar *y; 2316 2317 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 2318 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2319 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 2320 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2321 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2322 y[n_B*i+idx_V_B[i]] = 1.0; 2323 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2324 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2325 2326 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2327 PetscInt j; 2328 2329 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2330 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2331 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2332 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2333 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2334 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 2335 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2336 } 2337 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2338 } 2339 /* if n_R == 0 the object is not destroyed */ 2340 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 2341 } 2342 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 2343 2344 if (n_constraints) { 2345 Mat B; 2346 2347 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 2348 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 2349 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 2350 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 2351 if (n_vertices) { 2352 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 2353 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 2354 } else { 2355 Mat S_VCt; 2356 2357 if (lda_rhs != n_R) { 2358 ierr = MatDestroy(&B);CHKERRQ(ierr); 2359 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 2360 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 2361 } 2362 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 2363 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2364 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 2365 } 2366 } 2367 ierr = MatDestroy(&B);CHKERRQ(ierr); 2368 /* coarse basis functions */ 2369 for (i=0;i<n_constraints;i++) { 2370 PetscScalar *y; 2371 2372 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 2373 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2374 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 2375 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2376 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2377 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2378 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2379 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2380 PetscInt j; 2381 2382 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2383 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 2384 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2385 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2386 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2387 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 2388 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2389 } 2390 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2391 } 2392 } 2393 if (n_constraints) { 2394 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 2395 } 2396 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 2397 2398 /* coarse matrix entries relative to B_0 */ 2399 if (pcbddc->benign_n) { 2400 Mat B0_B,B0_BPHI; 2401 IS is_dummy; 2402 PetscScalar *data; 2403 PetscInt j; 2404 2405 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2406 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 2407 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2408 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2409 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2410 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 2411 for (j=0;j<pcbddc->benign_n;j++) { 2412 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2413 for (i=0;i<pcbddc->local_primal_size;i++) { 2414 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 2415 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 2416 } 2417 } 2418 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 2419 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2420 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2421 } 2422 2423 /* compute other basis functions for non-symmetric problems */ 2424 if (!pcbddc->symmetric_primal) { 2425 Mat B_V=NULL,B_C=NULL; 2426 PetscScalar *marray; 2427 2428 if (n_constraints) { 2429 Mat S_CCT,C_CRT; 2430 2431 ierr = MatTranspose(C_CR,MAT_INPLACE_MATRIX,&C_CRT);CHKERRQ(ierr); 2432 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 2433 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 2434 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 2435 if (n_vertices) { 2436 Mat S_VCT; 2437 2438 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 2439 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 2440 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 2441 } 2442 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 2443 } else { 2444 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);CHKERRQ(ierr); 2445 } 2446 if (n_vertices && n_R) { 2447 PetscScalar *av,*marray; 2448 const PetscInt *xadj,*adjncy; 2449 PetscInt n; 2450 PetscBool flg_row; 2451 2452 /* B_V = B_V - A_VR^T */ 2453 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 2454 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2455 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 2456 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2457 for (i=0;i<n;i++) { 2458 PetscInt j; 2459 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 2460 } 2461 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2462 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2463 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2464 } 2465 2466 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 2467 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2468 for (i=0;i<n_vertices;i++) { 2469 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 2470 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2471 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2472 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2473 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2474 } 2475 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2476 if (B_C) { 2477 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 2478 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 2479 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 2480 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2481 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2482 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2483 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2484 } 2485 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 2486 } 2487 /* coarse basis functions */ 2488 for (i=0;i<pcbddc->local_primal_size;i++) { 2489 PetscScalar *y; 2490 2491 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 2492 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2493 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 2494 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2495 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2496 if (i<n_vertices) { 2497 y[n_B*i+idx_V_B[i]] = 1.0; 2498 } 2499 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2500 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2501 2502 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2503 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2504 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2505 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2506 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2507 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2508 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2509 } 2510 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2511 } 2512 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 2513 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 2514 } 2515 /* free memory */ 2516 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 2517 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 2518 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 2519 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 2520 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 2521 ierr = PetscFree(work);CHKERRQ(ierr); 2522 if (n_vertices) { 2523 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2524 } 2525 if (n_constraints) { 2526 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 2527 } 2528 /* Checking coarse_sub_mat and coarse basis functios */ 2529 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2530 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2531 if (pcbddc->dbg_flag) { 2532 Mat coarse_sub_mat; 2533 Mat AUXMAT,TM1,TM2,TM3,TM4; 2534 Mat coarse_phi_D,coarse_phi_B; 2535 Mat coarse_psi_D,coarse_psi_B; 2536 Mat A_II,A_BB,A_IB,A_BI; 2537 Mat C_B,CPHI; 2538 IS is_dummy; 2539 Vec mones; 2540 MatType checkmattype=MATSEQAIJ; 2541 PetscReal real_value; 2542 2543 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 2544 Mat A; 2545 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 2546 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2547 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2548 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2549 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2550 ierr = MatDestroy(&A);CHKERRQ(ierr); 2551 } else { 2552 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2553 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2554 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2555 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2556 } 2557 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 2558 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 2559 if (!pcbddc->symmetric_primal) { 2560 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 2561 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 2562 } 2563 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 2564 2565 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2566 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 2567 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2568 if (!pcbddc->symmetric_primal) { 2569 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2570 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2571 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2572 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2573 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2574 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2575 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2576 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2577 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2578 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2579 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2580 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2581 } else { 2582 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2583 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2584 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2585 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2586 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2587 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2588 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2589 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2590 } 2591 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2592 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2593 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2594 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 2595 if (pcbddc->benign_n) { 2596 Mat B0_B,B0_BPHI; 2597 PetscScalar *data,*data2; 2598 PetscInt j; 2599 2600 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2601 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 2602 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2603 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2604 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 2605 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 2606 for (j=0;j<pcbddc->benign_n;j++) { 2607 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2608 for (i=0;i<pcbddc->local_primal_size;i++) { 2609 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 2610 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 2611 } 2612 } 2613 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 2614 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 2615 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2616 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2617 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2618 } 2619 #if 0 2620 { 2621 PetscViewer viewer; 2622 char filename[256]; 2623 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 2624 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 2625 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2626 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 2627 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 2628 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 2629 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 2630 if (save_change) { 2631 Mat phi_B; 2632 ierr = MatMatMult(save_change,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&phi_B);CHKERRQ(ierr); 2633 ierr = PetscObjectSetName((PetscObject)phi_B,"phi_B");CHKERRQ(ierr); 2634 ierr = MatView(phi_B,viewer);CHKERRQ(ierr); 2635 ierr = MatDestroy(&phi_B);CHKERRQ(ierr); 2636 } else { 2637 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 2638 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 2639 } 2640 if (pcbddc->coarse_phi_D) { 2641 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 2642 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 2643 } 2644 if (pcbddc->coarse_psi_B) { 2645 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 2646 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 2647 } 2648 if (pcbddc->coarse_psi_D) { 2649 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 2650 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 2651 } 2652 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 2653 } 2654 #endif 2655 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2656 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2657 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2658 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2659 2660 /* check constraints */ 2661 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2662 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 2663 if (!pcbddc->benign_n) { /* TODO: add benign case */ 2664 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2665 } else { 2666 PetscScalar *data; 2667 Mat tmat; 2668 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 2669 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 2670 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 2671 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2672 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2673 } 2674 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 2675 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2676 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2677 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2678 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2679 if (!pcbddc->symmetric_primal) { 2680 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2681 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2682 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2683 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2684 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2685 } 2686 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 2687 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 2688 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2689 ierr = VecDestroy(&mones);CHKERRQ(ierr); 2690 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2691 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 2692 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 2693 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 2694 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 2695 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 2696 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 2697 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 2698 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 2699 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 2700 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 2701 if (!pcbddc->symmetric_primal) { 2702 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 2703 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 2704 } 2705 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 2706 } 2707 /* get back data */ 2708 *coarse_submat_vals_n = coarse_submat_vals; 2709 PetscFunctionReturn(0); 2710 } 2711 2712 #undef __FUNCT__ 2713 #define __FUNCT__ "MatGetSubMatrixUnsorted" 2714 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 2715 { 2716 Mat *work_mat; 2717 IS isrow_s,iscol_s; 2718 PetscBool rsorted,csorted; 2719 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 2720 PetscErrorCode ierr; 2721 2722 PetscFunctionBegin; 2723 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 2724 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 2725 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 2726 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2727 2728 if (!rsorted) { 2729 const PetscInt *idxs; 2730 PetscInt *idxs_sorted,i; 2731 2732 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 2733 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 2734 for (i=0;i<rsize;i++) { 2735 idxs_perm_r[i] = i; 2736 } 2737 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 2738 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 2739 for (i=0;i<rsize;i++) { 2740 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 2741 } 2742 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 2743 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 2744 } else { 2745 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 2746 isrow_s = isrow; 2747 } 2748 2749 if (!csorted) { 2750 if (isrow == iscol) { 2751 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 2752 iscol_s = isrow_s; 2753 } else { 2754 const PetscInt *idxs; 2755 PetscInt *idxs_sorted,i; 2756 2757 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 2758 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 2759 for (i=0;i<csize;i++) { 2760 idxs_perm_c[i] = i; 2761 } 2762 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 2763 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 2764 for (i=0;i<csize;i++) { 2765 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 2766 } 2767 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 2768 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 2769 } 2770 } else { 2771 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 2772 iscol_s = iscol; 2773 } 2774 2775 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2776 2777 if (!rsorted || !csorted) { 2778 Mat new_mat; 2779 IS is_perm_r,is_perm_c; 2780 2781 if (!rsorted) { 2782 PetscInt *idxs_r,i; 2783 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 2784 for (i=0;i<rsize;i++) { 2785 idxs_r[idxs_perm_r[i]] = i; 2786 } 2787 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 2788 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 2789 } else { 2790 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 2791 } 2792 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 2793 2794 if (!csorted) { 2795 if (isrow_s == iscol_s) { 2796 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 2797 is_perm_c = is_perm_r; 2798 } else { 2799 PetscInt *idxs_c,i; 2800 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 2801 for (i=0;i<csize;i++) { 2802 idxs_c[idxs_perm_c[i]] = i; 2803 } 2804 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 2805 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 2806 } 2807 } else { 2808 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 2809 } 2810 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 2811 2812 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 2813 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 2814 work_mat[0] = new_mat; 2815 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 2816 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 2817 } 2818 2819 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 2820 *B = work_mat[0]; 2821 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 2822 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 2823 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 2824 PetscFunctionReturn(0); 2825 } 2826 2827 #undef __FUNCT__ 2828 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 2829 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 2830 { 2831 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2832 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2833 Mat new_mat; 2834 IS is_local,is_global; 2835 PetscInt local_size; 2836 PetscBool isseqaij; 2837 PetscErrorCode ierr; 2838 2839 PetscFunctionBegin; 2840 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2841 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 2842 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 2843 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 2844 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 2845 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 2846 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 2847 2848 /* check */ 2849 if (pcbddc->dbg_flag) { 2850 Vec x,x_change; 2851 PetscReal error; 2852 2853 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 2854 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2855 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 2856 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2857 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2858 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 2859 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2860 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2861 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2862 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2863 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2864 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 2865 ierr = VecDestroy(&x);CHKERRQ(ierr); 2866 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2867 } 2868 2869 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 2870 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2871 if (isseqaij) { 2872 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2873 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 2874 } else { 2875 Mat work_mat; 2876 2877 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2878 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2879 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 2880 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 2881 } 2882 if (matis->A->symmetric_set) { 2883 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 2884 #if !defined(PETSC_USE_COMPLEX) 2885 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 2886 #endif 2887 } 2888 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 2889 PetscFunctionReturn(0); 2890 } 2891 2892 #undef __FUNCT__ 2893 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 2894 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 2895 { 2896 PC_IS* pcis = (PC_IS*)(pc->data); 2897 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2898 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2899 PetscInt *idx_R_local=NULL; 2900 PetscInt n_vertices,i,j,n_R,n_D,n_B; 2901 PetscInt vbs,bs; 2902 PetscBT bitmask=NULL; 2903 PetscErrorCode ierr; 2904 2905 PetscFunctionBegin; 2906 /* 2907 No need to setup local scatters if 2908 - primal space is unchanged 2909 AND 2910 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 2911 AND 2912 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 2913 */ 2914 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 2915 PetscFunctionReturn(0); 2916 } 2917 /* destroy old objects */ 2918 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 2919 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 2920 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 2921 /* Set Non-overlapping dimensions */ 2922 n_B = pcis->n_B; 2923 n_D = pcis->n - n_B; 2924 n_vertices = pcbddc->n_vertices; 2925 2926 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 2927 2928 /* create auxiliary bitmask and allocate workspace */ 2929 if (!sub_schurs || !sub_schurs->reuse_solver) { 2930 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 2931 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 2932 for (i=0;i<n_vertices;i++) { 2933 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 2934 } 2935 2936 for (i=0, n_R=0; i<pcis->n; i++) { 2937 if (!PetscBTLookup(bitmask,i)) { 2938 idx_R_local[n_R++] = i; 2939 } 2940 } 2941 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 2942 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2943 2944 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2945 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 2946 } 2947 2948 /* Block code */ 2949 vbs = 1; 2950 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 2951 if (bs>1 && !(n_vertices%bs)) { 2952 PetscBool is_blocked = PETSC_TRUE; 2953 PetscInt *vary; 2954 if (!sub_schurs || !sub_schurs->reuse_solver) { 2955 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 2956 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 2957 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 2958 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 2959 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 2960 for (i=0; i<pcis->n/bs; i++) { 2961 if (vary[i]!=0 && vary[i]!=bs) { 2962 is_blocked = PETSC_FALSE; 2963 break; 2964 } 2965 } 2966 ierr = PetscFree(vary);CHKERRQ(ierr); 2967 } else { 2968 /* Verify directly the R set */ 2969 for (i=0; i<n_R/bs; i++) { 2970 PetscInt j,node=idx_R_local[bs*i]; 2971 for (j=1; j<bs; j++) { 2972 if (node != idx_R_local[bs*i+j]-j) { 2973 is_blocked = PETSC_FALSE; 2974 break; 2975 } 2976 } 2977 } 2978 } 2979 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 2980 vbs = bs; 2981 for (i=0;i<n_R/vbs;i++) { 2982 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 2983 } 2984 } 2985 } 2986 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 2987 if (sub_schurs && sub_schurs->reuse_solver) { 2988 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2989 2990 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2991 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 2992 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 2993 reuse_solver->is_R = pcbddc->is_R_local; 2994 } else { 2995 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 2996 } 2997 2998 /* print some info if requested */ 2999 if (pcbddc->dbg_flag) { 3000 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3001 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3002 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3003 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 3004 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 3005 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 3006 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3007 } 3008 3009 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 3010 if (!sub_schurs || !sub_schurs->reuse_solver) { 3011 IS is_aux1,is_aux2; 3012 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 3013 3014 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 3015 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 3016 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 3017 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3018 for (i=0; i<n_D; i++) { 3019 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 3020 } 3021 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3022 for (i=0, j=0; i<n_R; i++) { 3023 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 3024 aux_array1[j++] = i; 3025 } 3026 } 3027 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 3028 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3029 for (i=0, j=0; i<n_B; i++) { 3030 if (!PetscBTLookup(bitmask,is_indices[i])) { 3031 aux_array2[j++] = i; 3032 } 3033 } 3034 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3035 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 3036 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 3037 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 3038 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 3039 3040 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3041 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 3042 for (i=0, j=0; i<n_R; i++) { 3043 if (PetscBTLookup(bitmask,idx_R_local[i])) { 3044 aux_array1[j++] = i; 3045 } 3046 } 3047 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 3048 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 3049 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 3050 } 3051 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 3052 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 3053 } else { 3054 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3055 IS tis; 3056 PetscInt schur_size; 3057 3058 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 3059 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 3060 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 3061 ierr = ISDestroy(&tis);CHKERRQ(ierr); 3062 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3063 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 3064 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 3065 ierr = ISDestroy(&tis);CHKERRQ(ierr); 3066 } 3067 } 3068 PetscFunctionReturn(0); 3069 } 3070 3071 3072 #undef __FUNCT__ 3073 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 3074 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 3075 { 3076 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3077 PC_IS *pcis = (PC_IS*)pc->data; 3078 PC pc_temp; 3079 Mat A_RR; 3080 MatReuse reuse; 3081 PetscScalar m_one = -1.0; 3082 PetscReal value; 3083 PetscInt n_D,n_R; 3084 PetscBool check_corr[2],issbaij; 3085 PetscErrorCode ierr; 3086 /* prefixes stuff */ 3087 char dir_prefix[256],neu_prefix[256],str_level[16]; 3088 size_t len; 3089 3090 PetscFunctionBegin; 3091 3092 /* compute prefixes */ 3093 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 3094 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 3095 if (!pcbddc->current_level) { 3096 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 3097 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 3098 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 3099 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 3100 } else { 3101 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 3102 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 3103 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 3104 len -= 15; /* remove "pc_bddc_coarse_" */ 3105 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 3106 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 3107 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 3108 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 3109 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 3110 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 3111 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 3112 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 3113 } 3114 3115 /* DIRICHLET PROBLEM */ 3116 if (dirichlet) { 3117 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3118 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 3119 if (!sub_schurs || !sub_schurs->reuse_solver) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 3120 if (pcbddc->dbg_flag) { 3121 Mat A_IIn; 3122 3123 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 3124 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 3125 pcis->A_II = A_IIn; 3126 } 3127 } 3128 if (pcbddc->local_mat->symmetric_set) { 3129 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 3130 } 3131 /* Matrix for Dirichlet problem is pcis->A_II */ 3132 n_D = pcis->n - pcis->n_B; 3133 if (!pcbddc->ksp_D) { /* create object if not yet build */ 3134 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 3135 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 3136 /* default */ 3137 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 3138 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 3139 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 3140 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 3141 if (issbaij) { 3142 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 3143 } else { 3144 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 3145 } 3146 /* Allow user's customization */ 3147 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 3148 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 3149 } 3150 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 3151 if (sub_schurs && sub_schurs->reuse_solver) { 3152 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3153 3154 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 3155 } 3156 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 3157 if (!n_D) { 3158 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 3159 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 3160 } 3161 /* Set Up KSP for Dirichlet problem of BDDC */ 3162 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 3163 /* set ksp_D into pcis data */ 3164 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 3165 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 3166 pcis->ksp_D = pcbddc->ksp_D; 3167 } 3168 3169 /* NEUMANN PROBLEM */ 3170 A_RR = 0; 3171 if (neumann) { 3172 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3173 PetscInt ibs,mbs; 3174 PetscBool issbaij; 3175 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3176 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 3177 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 3178 if (pcbddc->ksp_R) { /* already created ksp */ 3179 PetscInt nn_R; 3180 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 3181 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 3182 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 3183 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 3184 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 3185 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3186 reuse = MAT_INITIAL_MATRIX; 3187 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 3188 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 3189 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3190 reuse = MAT_INITIAL_MATRIX; 3191 } else { /* safe to reuse the matrix */ 3192 reuse = MAT_REUSE_MATRIX; 3193 } 3194 } 3195 /* last check */ 3196 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 3197 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3198 reuse = MAT_INITIAL_MATRIX; 3199 } 3200 } else { /* first time, so we need to create the matrix */ 3201 reuse = MAT_INITIAL_MATRIX; 3202 } 3203 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */ 3204 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 3205 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 3206 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 3207 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 3208 if (matis->A == pcbddc->local_mat) { 3209 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3210 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 3211 } else { 3212 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 3213 } 3214 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 3215 if (matis->A == pcbddc->local_mat) { 3216 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3217 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 3218 } else { 3219 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 3220 } 3221 } 3222 /* extract A_RR */ 3223 if (sub_schurs && sub_schurs->reuse_solver) { 3224 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3225 3226 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 3227 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3228 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 3229 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 3230 } else { 3231 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 3232 } 3233 } else { 3234 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3235 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 3236 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 3237 } 3238 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 3239 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 3240 } 3241 if (pcbddc->local_mat->symmetric_set) { 3242 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 3243 } 3244 if (!pcbddc->ksp_R) { /* create object if not present */ 3245 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 3246 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 3247 /* default */ 3248 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 3249 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 3250 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 3251 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 3252 if (issbaij) { 3253 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 3254 } else { 3255 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 3256 } 3257 /* Allow user's customization */ 3258 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 3259 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 3260 } 3261 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 3262 if (!n_R) { 3263 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 3264 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 3265 } 3266 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 3267 /* Reuse solver if it is present */ 3268 if (sub_schurs && sub_schurs->reuse_solver) { 3269 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3270 3271 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 3272 } 3273 /* Set Up KSP for Neumann problem of BDDC */ 3274 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 3275 } 3276 3277 if (pcbddc->dbg_flag) { 3278 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3279 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3280 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3281 } 3282 3283 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 3284 check_corr[0] = check_corr[1] = PETSC_FALSE; 3285 if (pcbddc->NullSpace_corr[0]) { 3286 ierr = PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);CHKERRQ(ierr); 3287 } 3288 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 3289 check_corr[0] = PETSC_TRUE; 3290 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);CHKERRQ(ierr); 3291 } 3292 if (neumann && pcbddc->NullSpace_corr[2]) { 3293 check_corr[1] = PETSC_TRUE; 3294 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);CHKERRQ(ierr); 3295 } 3296 3297 /* check Dirichlet and Neumann solvers */ 3298 if (pcbddc->dbg_flag) { 3299 if (dirichlet) { /* Dirichlet */ 3300 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 3301 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 3302 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 3303 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 3304 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 3305 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 3306 if (check_corr[0]) { 3307 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_TRUE);CHKERRQ(ierr); 3308 } 3309 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3310 } 3311 if (neumann) { /* Neumann */ 3312 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 3313 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3314 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 3315 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 3316 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 3317 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 3318 if (check_corr[1]) { 3319 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_FALSE);CHKERRQ(ierr); 3320 } 3321 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3322 } 3323 } 3324 /* free Neumann problem's matrix */ 3325 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3326 PetscFunctionReturn(0); 3327 } 3328 3329 #undef __FUNCT__ 3330 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 3331 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 3332 { 3333 PetscErrorCode ierr; 3334 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3335 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3336 PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE ) : PETSC_FALSE; 3337 3338 PetscFunctionBegin; 3339 if (!reuse_solver) { 3340 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 3341 } 3342 if (!pcbddc->switch_static) { 3343 if (applytranspose && pcbddc->local_auxmat1) { 3344 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3345 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3346 } 3347 if (!reuse_solver) { 3348 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3349 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3350 } else { 3351 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3352 3353 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3354 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3355 } 3356 } else { 3357 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3358 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3359 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3360 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3361 if (applytranspose && pcbddc->local_auxmat1) { 3362 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 3363 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3364 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3365 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3366 } 3367 } 3368 if (!reuse_solver || pcbddc->switch_static) { 3369 if (applytranspose) { 3370 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3371 } else { 3372 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3373 } 3374 } else { 3375 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3376 3377 if (applytranspose) { 3378 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 3379 } else { 3380 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 3381 } 3382 } 3383 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 3384 if (!pcbddc->switch_static) { 3385 if (!reuse_solver) { 3386 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3387 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3388 } else { 3389 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3390 3391 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3392 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3393 } 3394 if (!applytranspose && pcbddc->local_auxmat1) { 3395 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3396 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3397 } 3398 } else { 3399 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3400 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3401 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3402 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3403 if (!applytranspose && pcbddc->local_auxmat1) { 3404 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3405 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3406 } 3407 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3408 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3409 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3410 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3411 } 3412 PetscFunctionReturn(0); 3413 } 3414 3415 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 3416 #undef __FUNCT__ 3417 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 3418 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 3419 { 3420 PetscErrorCode ierr; 3421 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3422 PC_IS* pcis = (PC_IS*) (pc->data); 3423 const PetscScalar zero = 0.0; 3424 3425 PetscFunctionBegin; 3426 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 3427 if (!pcbddc->benign_apply_coarse_only) { 3428 if (applytranspose) { 3429 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3430 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3431 } else { 3432 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3433 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3434 } 3435 } else { 3436 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 3437 } 3438 3439 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 3440 if (pcbddc->benign_n) { 3441 PetscScalar *array; 3442 PetscInt j; 3443 3444 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3445 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 3446 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3447 } 3448 3449 /* start communications from local primal nodes to rhs of coarse solver */ 3450 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 3451 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3452 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3453 3454 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 3455 if (pcbddc->coarse_ksp) { 3456 Mat coarse_mat; 3457 Vec rhs,sol; 3458 MatNullSpace nullsp; 3459 PetscBool isbddc = PETSC_FALSE; 3460 3461 if (pcbddc->benign_have_null) { 3462 PC coarse_pc; 3463 3464 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 3465 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 3466 /* we need to propagate to coarser levels the need for a possible benign correction */ 3467 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 3468 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 3469 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 3470 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 3471 } 3472 } 3473 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 3474 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 3475 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 3476 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 3477 if (nullsp) { 3478 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 3479 } 3480 if (applytranspose) { 3481 if (pcbddc->benign_apply_coarse_only) SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 3482 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3483 } else { 3484 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 3485 PC coarse_pc; 3486 3487 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 3488 ierr = PCPreSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 3489 ierr = PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);CHKERRQ(ierr); 3490 ierr = PCPostSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 3491 } else { 3492 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3493 } 3494 } 3495 /* we don't need the benign correction at coarser levels anymore */ 3496 if (pcbddc->benign_have_null && isbddc) { 3497 PC coarse_pc; 3498 PC_BDDC* coarsepcbddc; 3499 3500 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 3501 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 3502 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 3503 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 3504 } 3505 if (nullsp) { 3506 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 3507 } 3508 } 3509 3510 /* Local solution on R nodes */ 3511 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 3512 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 3513 } 3514 /* communications from coarse sol to local primal nodes */ 3515 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3516 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3517 3518 /* Sum contributions from the two levels */ 3519 if (!pcbddc->benign_apply_coarse_only) { 3520 if (applytranspose) { 3521 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3522 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3523 } else { 3524 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3525 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3526 } 3527 /* store p0 */ 3528 if (pcbddc->benign_n) { 3529 PetscScalar *array; 3530 PetscInt j; 3531 3532 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3533 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 3534 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3535 } 3536 } else { /* expand the coarse solution */ 3537 if (applytranspose) { 3538 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3539 } else { 3540 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3541 } 3542 } 3543 PetscFunctionReturn(0); 3544 } 3545 3546 #undef __FUNCT__ 3547 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 3548 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 3549 { 3550 PetscErrorCode ierr; 3551 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3552 PetscScalar *array; 3553 Vec from,to; 3554 3555 PetscFunctionBegin; 3556 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3557 from = pcbddc->coarse_vec; 3558 to = pcbddc->vec1_P; 3559 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3560 Vec tvec; 3561 3562 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3563 ierr = VecResetArray(tvec);CHKERRQ(ierr); 3564 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3565 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 3566 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 3567 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 3568 } 3569 } else { /* from local to global -> put data in coarse right hand side */ 3570 from = pcbddc->vec1_P; 3571 to = pcbddc->coarse_vec; 3572 } 3573 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3574 PetscFunctionReturn(0); 3575 } 3576 3577 #undef __FUNCT__ 3578 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 3579 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 3580 { 3581 PetscErrorCode ierr; 3582 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3583 PetscScalar *array; 3584 Vec from,to; 3585 3586 PetscFunctionBegin; 3587 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3588 from = pcbddc->coarse_vec; 3589 to = pcbddc->vec1_P; 3590 } else { /* from local to global -> put data in coarse right hand side */ 3591 from = pcbddc->vec1_P; 3592 to = pcbddc->coarse_vec; 3593 } 3594 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3595 if (smode == SCATTER_FORWARD) { 3596 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3597 Vec tvec; 3598 3599 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3600 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 3601 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 3602 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 3603 } 3604 } else { 3605 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 3606 ierr = VecResetArray(from);CHKERRQ(ierr); 3607 } 3608 } 3609 PetscFunctionReturn(0); 3610 } 3611 3612 /* uncomment for testing purposes */ 3613 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 3614 #undef __FUNCT__ 3615 #define __FUNCT__ "PCBDDCConstraintsSetUp" 3616 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 3617 { 3618 PetscErrorCode ierr; 3619 PC_IS* pcis = (PC_IS*)(pc->data); 3620 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3621 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3622 /* one and zero */ 3623 PetscScalar one=1.0,zero=0.0; 3624 /* space to store constraints and their local indices */ 3625 PetscScalar *constraints_data; 3626 PetscInt *constraints_idxs,*constraints_idxs_B; 3627 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 3628 PetscInt *constraints_n; 3629 /* iterators */ 3630 PetscInt i,j,k,total_counts,total_counts_cc,cum; 3631 /* BLAS integers */ 3632 PetscBLASInt lwork,lierr; 3633 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 3634 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 3635 /* reuse */ 3636 PetscInt olocal_primal_size,olocal_primal_size_cc; 3637 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 3638 /* change of basis */ 3639 PetscBool qr_needed; 3640 PetscBT change_basis,qr_needed_idx; 3641 /* auxiliary stuff */ 3642 PetscInt *nnz,*is_indices; 3643 PetscInt ncc; 3644 /* some quantities */ 3645 PetscInt n_vertices,total_primal_vertices,valid_constraints; 3646 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 3647 3648 PetscFunctionBegin; 3649 /* Destroy Mat objects computed previously */ 3650 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3651 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3652 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 3653 /* save info on constraints from previous setup (if any) */ 3654 olocal_primal_size = pcbddc->local_primal_size; 3655 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 3656 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 3657 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3658 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3659 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3660 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3661 3662 if (!pcbddc->adaptive_selection) { 3663 IS ISForVertices,*ISForFaces,*ISForEdges; 3664 MatNullSpace nearnullsp; 3665 const Vec *nearnullvecs; 3666 Vec *localnearnullsp; 3667 PetscScalar *array; 3668 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 3669 PetscBool nnsp_has_cnst; 3670 /* LAPACK working arrays for SVD or POD */ 3671 PetscBool skip_lapack,boolforchange; 3672 PetscScalar *work; 3673 PetscReal *singular_vals; 3674 #if defined(PETSC_USE_COMPLEX) 3675 PetscReal *rwork; 3676 #endif 3677 #if defined(PETSC_MISSING_LAPACK_GESVD) 3678 PetscScalar *temp_basis,*correlation_mat; 3679 #else 3680 PetscBLASInt dummy_int=1; 3681 PetscScalar dummy_scalar=1.; 3682 #endif 3683 3684 /* Get index sets for faces, edges and vertices from graph */ 3685 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 3686 /* print some info */ 3687 if (pcbddc->dbg_flag && !pcbddc->sub_schurs) { 3688 PetscInt nv; 3689 3690 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag-1,pcbddc->dbg_viewer);CHKERRQ(ierr); 3691 ierr = ISGetSize(ISForVertices,&nv);CHKERRQ(ierr); 3692 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3693 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3694 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 3695 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);CHKERRQ(ierr); 3696 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);CHKERRQ(ierr); 3697 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3698 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3699 } 3700 3701 /* free unneeded index sets */ 3702 if (!pcbddc->use_vertices) { 3703 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3704 } 3705 if (!pcbddc->use_edges) { 3706 for (i=0;i<n_ISForEdges;i++) { 3707 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3708 } 3709 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3710 n_ISForEdges = 0; 3711 } 3712 if (!pcbddc->use_faces) { 3713 for (i=0;i<n_ISForFaces;i++) { 3714 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3715 } 3716 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3717 n_ISForFaces = 0; 3718 } 3719 3720 /* check if near null space is attached to global mat */ 3721 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 3722 if (nearnullsp) { 3723 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 3724 /* remove any stored info */ 3725 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3726 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3727 /* store information for BDDC solver reuse */ 3728 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 3729 pcbddc->onearnullspace = nearnullsp; 3730 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3731 for (i=0;i<nnsp_size;i++) { 3732 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 3733 } 3734 } else { /* if near null space is not provided BDDC uses constants by default */ 3735 nnsp_size = 0; 3736 nnsp_has_cnst = PETSC_TRUE; 3737 } 3738 /* get max number of constraints on a single cc */ 3739 max_constraints = nnsp_size; 3740 if (nnsp_has_cnst) max_constraints++; 3741 3742 /* 3743 Evaluate maximum storage size needed by the procedure 3744 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 3745 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 3746 There can be multiple constraints per connected component 3747 */ 3748 n_vertices = 0; 3749 if (ISForVertices) { 3750 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 3751 } 3752 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 3753 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 3754 3755 total_counts = n_ISForFaces+n_ISForEdges; 3756 total_counts *= max_constraints; 3757 total_counts += n_vertices; 3758 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 3759 3760 total_counts = 0; 3761 max_size_of_constraint = 0; 3762 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 3763 IS used_is; 3764 if (i<n_ISForEdges) { 3765 used_is = ISForEdges[i]; 3766 } else { 3767 used_is = ISForFaces[i-n_ISForEdges]; 3768 } 3769 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 3770 total_counts += j; 3771 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 3772 } 3773 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 3774 3775 /* get local part of global near null space vectors */ 3776 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 3777 for (k=0;k<nnsp_size;k++) { 3778 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 3779 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3780 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3781 } 3782 3783 /* whether or not to skip lapack calls */ 3784 skip_lapack = PETSC_TRUE; 3785 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 3786 3787 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 3788 if (!skip_lapack) { 3789 PetscScalar temp_work; 3790 3791 #if defined(PETSC_MISSING_LAPACK_GESVD) 3792 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 3793 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 3794 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 3795 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 3796 #if defined(PETSC_USE_COMPLEX) 3797 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 3798 #endif 3799 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3800 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3801 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 3802 lwork = -1; 3803 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3804 #if !defined(PETSC_USE_COMPLEX) 3805 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 3806 #else 3807 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 3808 #endif 3809 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3810 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 3811 #else /* on missing GESVD */ 3812 /* SVD */ 3813 PetscInt max_n,min_n; 3814 max_n = max_size_of_constraint; 3815 min_n = max_constraints; 3816 if (max_size_of_constraint < max_constraints) { 3817 min_n = max_size_of_constraint; 3818 max_n = max_constraints; 3819 } 3820 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 3821 #if defined(PETSC_USE_COMPLEX) 3822 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 3823 #endif 3824 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3825 lwork = -1; 3826 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 3827 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 3828 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 3829 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3830 #if !defined(PETSC_USE_COMPLEX) 3831 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 3832 #else 3833 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 3834 #endif 3835 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3836 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 3837 #endif /* on missing GESVD */ 3838 /* Allocate optimal workspace */ 3839 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 3840 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 3841 } 3842 /* Now we can loop on constraining sets */ 3843 total_counts = 0; 3844 constraints_idxs_ptr[0] = 0; 3845 constraints_data_ptr[0] = 0; 3846 /* vertices */ 3847 if (n_vertices) { 3848 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3849 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3850 for (i=0;i<n_vertices;i++) { 3851 constraints_n[total_counts] = 1; 3852 constraints_data[total_counts] = 1.0; 3853 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 3854 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 3855 total_counts++; 3856 } 3857 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3858 n_vertices = total_counts; 3859 } 3860 3861 /* edges and faces */ 3862 total_counts_cc = total_counts; 3863 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 3864 IS used_is; 3865 PetscBool idxs_copied = PETSC_FALSE; 3866 3867 if (ncc<n_ISForEdges) { 3868 used_is = ISForEdges[ncc]; 3869 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 3870 } else { 3871 used_is = ISForFaces[ncc-n_ISForEdges]; 3872 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 3873 } 3874 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 3875 3876 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 3877 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3878 /* change of basis should not be performed on local periodic nodes */ 3879 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 3880 if (nnsp_has_cnst) { 3881 PetscScalar quad_value; 3882 3883 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3884 idxs_copied = PETSC_TRUE; 3885 3886 if (!pcbddc->use_nnsp_true) { 3887 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 3888 } else { 3889 quad_value = 1.0; 3890 } 3891 for (j=0;j<size_of_constraint;j++) { 3892 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 3893 } 3894 temp_constraints++; 3895 total_counts++; 3896 } 3897 for (k=0;k<nnsp_size;k++) { 3898 PetscReal real_value; 3899 PetscScalar *ptr_to_data; 3900 3901 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3902 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 3903 for (j=0;j<size_of_constraint;j++) { 3904 ptr_to_data[j] = array[is_indices[j]]; 3905 } 3906 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3907 /* check if array is null on the connected component */ 3908 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3909 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 3910 if (real_value > 0.0) { /* keep indices and values */ 3911 temp_constraints++; 3912 total_counts++; 3913 if (!idxs_copied) { 3914 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3915 idxs_copied = PETSC_TRUE; 3916 } 3917 } 3918 } 3919 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3920 valid_constraints = temp_constraints; 3921 if (!pcbddc->use_nnsp_true && temp_constraints) { 3922 if (temp_constraints == 1) { /* just normalize the constraint */ 3923 PetscScalar norm,*ptr_to_data; 3924 3925 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3926 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3927 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 3928 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 3929 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 3930 } else { /* perform SVD */ 3931 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 3932 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3933 3934 #if defined(PETSC_MISSING_LAPACK_GESVD) 3935 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 3936 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 3937 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 3938 the constraints basis will differ (by a complex factor with absolute value equal to 1) 3939 from that computed using LAPACKgesvd 3940 -> This is due to a different computation of eigenvectors in LAPACKheev 3941 -> The quality of the POD-computed basis will be the same */ 3942 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3943 /* Store upper triangular part of correlation matrix */ 3944 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3945 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3946 for (j=0;j<temp_constraints;j++) { 3947 for (k=0;k<j+1;k++) { 3948 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 3949 } 3950 } 3951 /* compute eigenvalues and eigenvectors of correlation matrix */ 3952 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3953 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 3954 #if !defined(PETSC_USE_COMPLEX) 3955 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 3956 #else 3957 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 3958 #endif 3959 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3960 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 3961 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 3962 j = 0; 3963 while (j < temp_constraints && singular_vals[j] < tol) j++; 3964 total_counts = total_counts-j; 3965 valid_constraints = temp_constraints-j; 3966 /* scale and copy POD basis into used quadrature memory */ 3967 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3968 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3969 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 3970 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3971 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 3972 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3973 if (j<temp_constraints) { 3974 PetscInt ii; 3975 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 3976 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3977 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 3978 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3979 for (k=0;k<temp_constraints-j;k++) { 3980 for (ii=0;ii<size_of_constraint;ii++) { 3981 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 3982 } 3983 } 3984 } 3985 #else /* on missing GESVD */ 3986 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3987 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3988 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3989 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3990 #if !defined(PETSC_USE_COMPLEX) 3991 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 3992 #else 3993 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 3994 #endif 3995 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 3996 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3997 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 3998 k = temp_constraints; 3999 if (k > size_of_constraint) k = size_of_constraint; 4000 j = 0; 4001 while (j < k && singular_vals[k-j-1] < tol) j++; 4002 valid_constraints = k-j; 4003 total_counts = total_counts-temp_constraints+valid_constraints; 4004 #endif /* on missing GESVD */ 4005 } 4006 } 4007 /* update pointers information */ 4008 if (valid_constraints) { 4009 constraints_n[total_counts_cc] = valid_constraints; 4010 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 4011 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 4012 /* set change_of_basis flag */ 4013 if (boolforchange) { 4014 PetscBTSet(change_basis,total_counts_cc); 4015 } 4016 total_counts_cc++; 4017 } 4018 } 4019 /* free workspace */ 4020 if (!skip_lapack) { 4021 ierr = PetscFree(work);CHKERRQ(ierr); 4022 #if defined(PETSC_USE_COMPLEX) 4023 ierr = PetscFree(rwork);CHKERRQ(ierr); 4024 #endif 4025 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 4026 #if defined(PETSC_MISSING_LAPACK_GESVD) 4027 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 4028 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 4029 #endif 4030 } 4031 for (k=0;k<nnsp_size;k++) { 4032 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 4033 } 4034 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 4035 /* free index sets of faces, edges and vertices */ 4036 for (i=0;i<n_ISForFaces;i++) { 4037 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 4038 } 4039 if (n_ISForFaces) { 4040 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 4041 } 4042 for (i=0;i<n_ISForEdges;i++) { 4043 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 4044 } 4045 if (n_ISForEdges) { 4046 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 4047 } 4048 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 4049 } else { 4050 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4051 4052 total_counts = 0; 4053 n_vertices = 0; 4054 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 4055 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 4056 } 4057 max_constraints = 0; 4058 total_counts_cc = 0; 4059 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 4060 total_counts += pcbddc->adaptive_constraints_n[i]; 4061 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 4062 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 4063 } 4064 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 4065 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 4066 constraints_idxs = pcbddc->adaptive_constraints_idxs; 4067 constraints_data = pcbddc->adaptive_constraints_data; 4068 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 4069 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 4070 total_counts_cc = 0; 4071 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 4072 if (pcbddc->adaptive_constraints_n[i]) { 4073 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 4074 } 4075 } 4076 #if 0 4077 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 4078 for (i=0;i<total_counts_cc;i++) { 4079 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 4080 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 4081 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 4082 printf(" %d",constraints_idxs[j]); 4083 } 4084 printf("\n"); 4085 printf("number of cc: %d\n",constraints_n[i]); 4086 } 4087 for (i=0;i<n_vertices;i++) { 4088 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 4089 } 4090 for (i=0;i<sub_schurs->n_subs;i++) { 4091 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 4092 } 4093 #endif 4094 4095 max_size_of_constraint = 0; 4096 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 4097 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 4098 /* Change of basis */ 4099 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 4100 if (pcbddc->use_change_of_basis) { 4101 for (i=0;i<sub_schurs->n_subs;i++) { 4102 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 4103 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 4104 } 4105 } 4106 } 4107 } 4108 pcbddc->local_primal_size = total_counts; 4109 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 4110 4111 /* map constraints_idxs in boundary numbering */ 4112 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 4113 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i); 4114 4115 /* Create constraint matrix */ 4116 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 4117 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 4118 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 4119 4120 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 4121 /* determine if a QR strategy is needed for change of basis */ 4122 qr_needed = PETSC_FALSE; 4123 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 4124 total_primal_vertices=0; 4125 pcbddc->local_primal_size_cc = 0; 4126 for (i=0;i<total_counts_cc;i++) { 4127 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 4128 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 4129 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 4130 pcbddc->local_primal_size_cc += 1; 4131 } else if (PetscBTLookup(change_basis,i)) { 4132 for (k=0;k<constraints_n[i];k++) { 4133 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 4134 } 4135 pcbddc->local_primal_size_cc += constraints_n[i]; 4136 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 4137 PetscBTSet(qr_needed_idx,i); 4138 qr_needed = PETSC_TRUE; 4139 } 4140 } else { 4141 pcbddc->local_primal_size_cc += 1; 4142 } 4143 } 4144 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 4145 pcbddc->n_vertices = total_primal_vertices; 4146 /* permute indices in order to have a sorted set of vertices */ 4147 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 4148 4149 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 4150 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 4151 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 4152 4153 /* nonzero structure of constraint matrix */ 4154 /* and get reference dof for local constraints */ 4155 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 4156 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 4157 4158 j = total_primal_vertices; 4159 total_counts = total_primal_vertices; 4160 cum = total_primal_vertices; 4161 for (i=n_vertices;i<total_counts_cc;i++) { 4162 if (!PetscBTLookup(change_basis,i)) { 4163 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 4164 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 4165 cum++; 4166 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 4167 for (k=0;k<constraints_n[i];k++) { 4168 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 4169 nnz[j+k] = size_of_constraint; 4170 } 4171 j += constraints_n[i]; 4172 } 4173 } 4174 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 4175 ierr = PetscFree(nnz);CHKERRQ(ierr); 4176 4177 /* set values in constraint matrix */ 4178 for (i=0;i<total_primal_vertices;i++) { 4179 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4180 } 4181 total_counts = total_primal_vertices; 4182 for (i=n_vertices;i<total_counts_cc;i++) { 4183 if (!PetscBTLookup(change_basis,i)) { 4184 PetscInt *cols; 4185 4186 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 4187 cols = constraints_idxs+constraints_idxs_ptr[i]; 4188 for (k=0;k<constraints_n[i];k++) { 4189 PetscInt row = total_counts+k; 4190 PetscScalar *vals; 4191 4192 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 4193 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 4194 } 4195 total_counts += constraints_n[i]; 4196 } 4197 } 4198 /* assembling */ 4199 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4200 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4201 4202 /* 4203 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4204 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 4205 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 4206 */ 4207 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 4208 if (pcbddc->use_change_of_basis) { 4209 /* dual and primal dofs on a single cc */ 4210 PetscInt dual_dofs,primal_dofs; 4211 /* working stuff for GEQRF */ 4212 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 4213 PetscBLASInt lqr_work; 4214 /* working stuff for UNGQR */ 4215 PetscScalar *gqr_work,lgqr_work_t; 4216 PetscBLASInt lgqr_work; 4217 /* working stuff for TRTRS */ 4218 PetscScalar *trs_rhs; 4219 PetscBLASInt Blas_NRHS; 4220 /* pointers for values insertion into change of basis matrix */ 4221 PetscInt *start_rows,*start_cols; 4222 PetscScalar *start_vals; 4223 /* working stuff for values insertion */ 4224 PetscBT is_primal; 4225 PetscInt *aux_primal_numbering_B; 4226 /* matrix sizes */ 4227 PetscInt global_size,local_size; 4228 /* temporary change of basis */ 4229 Mat localChangeOfBasisMatrix; 4230 /* extra space for debugging */ 4231 PetscScalar *dbg_work; 4232 4233 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 4234 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 4235 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 4236 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 4237 /* nonzeros for local mat */ 4238 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 4239 if (!pcbddc->benign_change || pcbddc->fake_change) { 4240 for (i=0;i<pcis->n;i++) nnz[i]=1; 4241 } else { 4242 const PetscInt *ii; 4243 PetscInt n; 4244 PetscBool flg_row; 4245 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 4246 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 4247 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 4248 } 4249 for (i=n_vertices;i<total_counts_cc;i++) { 4250 if (PetscBTLookup(change_basis,i)) { 4251 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 4252 if (PetscBTLookup(qr_needed_idx,i)) { 4253 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 4254 } else { 4255 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 4256 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 4257 } 4258 } 4259 } 4260 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 4261 ierr = PetscFree(nnz);CHKERRQ(ierr); 4262 /* Set interior change in the matrix */ 4263 if (!pcbddc->benign_change || pcbddc->fake_change) { 4264 for (i=0;i<pcis->n;i++) { 4265 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 4266 } 4267 } else { 4268 const PetscInt *ii,*jj; 4269 PetscScalar *aa; 4270 PetscInt n; 4271 PetscBool flg_row; 4272 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 4273 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 4274 for (i=0;i<n;i++) { 4275 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 4276 } 4277 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 4278 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 4279 } 4280 4281 if (pcbddc->dbg_flag) { 4282 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 4283 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4284 } 4285 4286 4287 /* Now we loop on the constraints which need a change of basis */ 4288 /* 4289 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 4290 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 4291 4292 Basic blocks of change of basis matrix T computed by 4293 4294 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 4295 4296 | 1 0 ... 0 s_1/S | 4297 | 0 1 ... 0 s_2/S | 4298 | ... | 4299 | 0 ... 1 s_{n-1}/S | 4300 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 4301 4302 with S = \sum_{i=1}^n s_i^2 4303 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 4304 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 4305 4306 - QR decomposition of constraints otherwise 4307 */ 4308 if (qr_needed) { 4309 /* space to store Q */ 4310 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 4311 /* first we issue queries for optimal work */ 4312 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 4313 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 4314 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4315 lqr_work = -1; 4316 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 4317 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 4318 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 4319 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 4320 lgqr_work = -1; 4321 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 4322 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 4323 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 4324 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4325 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 4326 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 4327 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 4328 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 4329 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 4330 /* array to store scaling factors for reflectors */ 4331 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 4332 /* array to store rhs and solution of triangular solver */ 4333 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 4334 /* allocating workspace for check */ 4335 if (pcbddc->dbg_flag) { 4336 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 4337 } 4338 } 4339 /* array to store whether a node is primal or not */ 4340 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 4341 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 4342 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 4343 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i); 4344 for (i=0;i<total_primal_vertices;i++) { 4345 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 4346 } 4347 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 4348 4349 /* loop on constraints and see whether or not they need a change of basis and compute it */ 4350 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 4351 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 4352 if (PetscBTLookup(change_basis,total_counts)) { 4353 /* get constraint info */ 4354 primal_dofs = constraints_n[total_counts]; 4355 dual_dofs = size_of_constraint-primal_dofs; 4356 4357 if (pcbddc->dbg_flag) { 4358 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 4359 } 4360 4361 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 4362 4363 /* copy quadrature constraints for change of basis check */ 4364 if (pcbddc->dbg_flag) { 4365 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4366 } 4367 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 4368 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4369 4370 /* compute QR decomposition of constraints */ 4371 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4372 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4373 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4374 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4375 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 4376 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 4377 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4378 4379 /* explictly compute R^-T */ 4380 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 4381 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 4382 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4383 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 4384 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4385 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 4386 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4387 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 4388 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 4389 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4390 4391 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 4392 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4393 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4394 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 4395 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4396 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4397 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 4398 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 4399 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4400 4401 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 4402 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 4403 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 4404 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4405 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4406 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 4407 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4408 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 4409 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 4410 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4411 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 4412 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4413 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4414 4415 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 4416 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 4417 /* insert cols for primal dofs */ 4418 for (j=0;j<primal_dofs;j++) { 4419 start_vals = &qr_basis[j*size_of_constraint]; 4420 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 4421 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 4422 } 4423 /* insert cols for dual dofs */ 4424 for (j=0,k=0;j<dual_dofs;k++) { 4425 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 4426 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 4427 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 4428 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 4429 j++; 4430 } 4431 } 4432 4433 /* check change of basis */ 4434 if (pcbddc->dbg_flag) { 4435 PetscInt ii,jj; 4436 PetscBool valid_qr=PETSC_TRUE; 4437 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 4438 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4439 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 4440 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4441 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 4442 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 4443 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4444 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 4445 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4446 for (jj=0;jj<size_of_constraint;jj++) { 4447 for (ii=0;ii<primal_dofs;ii++) { 4448 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 4449 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 4450 } 4451 } 4452 if (!valid_qr) { 4453 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 4454 for (jj=0;jj<size_of_constraint;jj++) { 4455 for (ii=0;ii<primal_dofs;ii++) { 4456 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 4457 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4458 } 4459 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 4460 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4461 } 4462 } 4463 } 4464 } else { 4465 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 4466 } 4467 } 4468 } else { /* simple transformation block */ 4469 PetscInt row,col; 4470 PetscScalar val,norm; 4471 4472 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4473 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 4474 for (j=0;j<size_of_constraint;j++) { 4475 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 4476 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 4477 if (!PetscBTLookup(is_primal,row_B)) { 4478 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 4479 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 4480 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 4481 } else { 4482 for (k=0;k<size_of_constraint;k++) { 4483 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 4484 if (row != col) { 4485 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 4486 } else { 4487 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 4488 } 4489 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 4490 } 4491 } 4492 } 4493 if (pcbddc->dbg_flag) { 4494 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 4495 } 4496 } 4497 } else { 4498 if (pcbddc->dbg_flag) { 4499 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 4500 } 4501 } 4502 } 4503 4504 /* free workspace */ 4505 if (qr_needed) { 4506 if (pcbddc->dbg_flag) { 4507 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 4508 } 4509 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 4510 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 4511 ierr = PetscFree(qr_work);CHKERRQ(ierr); 4512 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 4513 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 4514 } 4515 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 4516 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4517 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4518 4519 /* assembling of global change of variable */ 4520 if (!pcbddc->fake_change) { 4521 Mat tmat; 4522 PetscInt bs; 4523 4524 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 4525 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 4526 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 4527 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 4528 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4529 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 4530 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 4531 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 4532 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 4533 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 4534 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4535 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4536 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4537 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4538 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4539 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4540 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 4541 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 4542 4543 /* check */ 4544 if (pcbddc->dbg_flag) { 4545 PetscReal error; 4546 Vec x,x_change; 4547 4548 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 4549 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 4550 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4551 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 4552 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4553 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4554 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 4555 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4556 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4557 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 4558 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4559 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4560 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4561 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 4562 ierr = VecDestroy(&x);CHKERRQ(ierr); 4563 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4564 } 4565 /* adapt sub_schurs computed (if any) */ 4566 if (pcbddc->use_deluxe_scaling) { 4567 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4568 4569 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");CHKERRQ(ierr); 4570 if (sub_schurs && sub_schurs->S_Ej_all) { 4571 Mat S_new,tmat; 4572 IS is_all_N,is_V_Sall = NULL; 4573 4574 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 4575 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 4576 if (pcbddc->deluxe_zerorows) { 4577 ISLocalToGlobalMapping NtoSall; 4578 IS is_V; 4579 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 4580 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 4581 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 4582 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 4583 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 4584 } 4585 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 4586 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4587 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 4588 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4589 if (pcbddc->deluxe_zerorows) { 4590 const PetscScalar *array; 4591 const PetscInt *idxs_V,*idxs_all; 4592 PetscInt i,n_V; 4593 4594 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4595 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 4596 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4597 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4598 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 4599 for (i=0;i<n_V;i++) { 4600 PetscScalar val; 4601 PetscInt idx; 4602 4603 idx = idxs_V[i]; 4604 val = array[idxs_all[idxs_V[i]]]; 4605 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 4606 } 4607 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4608 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4609 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 4610 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4611 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4612 } 4613 sub_schurs->S_Ej_all = S_new; 4614 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4615 if (sub_schurs->sum_S_Ej_all) { 4616 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4617 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 4618 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4619 if (pcbddc->deluxe_zerorows) { 4620 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4621 } 4622 sub_schurs->sum_S_Ej_all = S_new; 4623 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4624 } 4625 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 4626 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4627 } 4628 /* destroy any change of basis context in sub_schurs */ 4629 if (sub_schurs && sub_schurs->change) { 4630 PetscInt i; 4631 4632 for (i=0;i<sub_schurs->n_subs;i++) { 4633 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 4634 } 4635 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 4636 } 4637 } 4638 if (pcbddc->switch_static) { /* need to save the local change */ 4639 pcbddc->switch_static_change = localChangeOfBasisMatrix; 4640 } else { 4641 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 4642 } 4643 /* determine if any process has changed the pressures locally */ 4644 pcbddc->change_interior = pcbddc->benign_have_null; 4645 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 4646 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 4647 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 4648 pcbddc->use_qr_single = qr_needed; 4649 } 4650 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 4651 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 4652 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 4653 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 4654 } else { 4655 Mat benign_global = NULL; 4656 if (pcbddc->benign_have_null) { 4657 Mat tmat; 4658 4659 pcbddc->change_interior = PETSC_TRUE; 4660 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4661 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4662 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4663 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4664 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 4665 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4666 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4667 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 4668 if (pcbddc->benign_change) { 4669 Mat M; 4670 4671 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 4672 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 4673 ierr = MatISSetLocalMat(tmat,M);CHKERRQ(ierr); 4674 ierr = MatDestroy(&M);CHKERRQ(ierr); 4675 } else { 4676 Mat eye; 4677 PetscScalar *array; 4678 4679 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4680 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&eye);CHKERRQ(ierr); 4681 for (i=0;i<pcis->n;i++) { 4682 ierr = MatSetValue(eye,i,i,array[i],INSERT_VALUES);CHKERRQ(ierr); 4683 } 4684 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4685 ierr = MatAssemblyBegin(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4686 ierr = MatAssemblyEnd(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4687 ierr = MatISSetLocalMat(tmat,eye);CHKERRQ(ierr); 4688 ierr = MatDestroy(&eye);CHKERRQ(ierr); 4689 } 4690 ierr = MatISGetMPIXAIJ(tmat,MAT_INITIAL_MATRIX,&benign_global);CHKERRQ(ierr); 4691 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4692 } 4693 if (pcbddc->user_ChangeOfBasisMatrix) { 4694 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4695 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 4696 } else if (pcbddc->benign_have_null) { 4697 pcbddc->ChangeOfBasisMatrix = benign_global; 4698 } 4699 } 4700 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 4701 IS is_global; 4702 const PetscInt *gidxs; 4703 4704 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 4705 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 4706 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 4707 ierr = MatGetSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 4708 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 4709 } 4710 } 4711 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 4712 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 4713 } 4714 4715 if (!pcbddc->fake_change) { 4716 /* add pressure dofs to set of primal nodes for numbering purposes */ 4717 for (i=0;i<pcbddc->benign_n;i++) { 4718 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 4719 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 4720 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 4721 pcbddc->local_primal_size_cc++; 4722 pcbddc->local_primal_size++; 4723 } 4724 4725 /* check if a new primal space has been introduced (also take into account benign trick) */ 4726 pcbddc->new_primal_space_local = PETSC_TRUE; 4727 if (olocal_primal_size == pcbddc->local_primal_size) { 4728 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4729 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4730 if (!pcbddc->new_primal_space_local) { 4731 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4732 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4733 } 4734 } 4735 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 4736 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4737 } 4738 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 4739 4740 /* flush dbg viewer */ 4741 if (pcbddc->dbg_flag) { 4742 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4743 } 4744 4745 /* free workspace */ 4746 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 4747 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 4748 if (!pcbddc->adaptive_selection) { 4749 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 4750 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 4751 } else { 4752 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 4753 pcbddc->adaptive_constraints_idxs_ptr, 4754 pcbddc->adaptive_constraints_data_ptr, 4755 pcbddc->adaptive_constraints_idxs, 4756 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 4757 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 4758 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 4759 } 4760 PetscFunctionReturn(0); 4761 } 4762 4763 #undef __FUNCT__ 4764 #define __FUNCT__ "PCBDDCAnalyzeInterface" 4765 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 4766 { 4767 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4768 PC_IS *pcis = (PC_IS*)pc->data; 4769 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 4770 PetscInt ierr,i,N; 4771 4772 PetscFunctionBegin; 4773 /* Reset previously computed graph */ 4774 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 4775 /* Init local Graph struct */ 4776 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 4777 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 4778 4779 /* Check validity of the csr graph passed in by the user */ 4780 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 4781 4782 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 4783 if ( (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) && pcbddc->use_local_adj) { 4784 PetscInt *xadj,*adjncy; 4785 PetscInt nvtxs; 4786 PetscBool flg_row=PETSC_FALSE; 4787 4788 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4789 if (flg_row) { 4790 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 4791 pcbddc->computed_rowadj = PETSC_TRUE; 4792 } 4793 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4794 } 4795 if (pcbddc->dbg_flag) { 4796 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4797 } 4798 4799 /* Setup of Graph */ 4800 pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */ 4801 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 4802 4803 /* attach info on disconnected subdomains if present */ 4804 if (pcbddc->n_local_subs) { 4805 PetscInt *local_subs; 4806 4807 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 4808 for (i=0;i<pcbddc->n_local_subs;i++) { 4809 const PetscInt *idxs; 4810 PetscInt nl,j; 4811 4812 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 4813 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4814 for (j=0;j<nl;j++) { 4815 local_subs[idxs[j]] = i; 4816 } 4817 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4818 } 4819 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 4820 pcbddc->mat_graph->local_subs = local_subs; 4821 } 4822 4823 /* Graph's connected components analysis */ 4824 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 4825 PetscFunctionReturn(0); 4826 } 4827 4828 #undef __FUNCT__ 4829 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 4830 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 4831 { 4832 PetscInt i,j; 4833 PetscScalar *alphas; 4834 PetscErrorCode ierr; 4835 4836 PetscFunctionBegin; 4837 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 4838 for (i=0;i<n;i++) { 4839 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 4840 ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],alphas);CHKERRQ(ierr); 4841 for (j=0;j<n-i-1;j++) alphas[j] = PetscConj(-alphas[j]); 4842 ierr = VecMAXPY(vecs[j],n-i-1,alphas,vecs+i);CHKERRQ(ierr); 4843 } 4844 ierr = PetscFree(alphas);CHKERRQ(ierr); 4845 PetscFunctionReturn(0); 4846 } 4847 4848 #undef __FUNCT__ 4849 #define __FUNCT__ "MatISGetSubassemblingPattern" 4850 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 4851 { 4852 Mat A; 4853 PetscInt n_neighs,*neighs,*n_shared,**shared; 4854 PetscMPIInt size,rank,color; 4855 PetscInt *xadj,*adjncy; 4856 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 4857 PetscInt im_active,active_procs,n,i,j,local_size,threshold = 2; 4858 PetscInt void_procs,*procs_candidates = NULL; 4859 PetscInt xadj_count, *count; 4860 PetscBool ismatis,use_vwgt=PETSC_FALSE; 4861 PetscSubcomm psubcomm; 4862 MPI_Comm subcomm; 4863 PetscErrorCode ierr; 4864 4865 PetscFunctionBegin; 4866 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 4867 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 4868 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 4869 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 4870 PetscValidLogicalCollectiveInt(mat,redprocs,3); 4871 if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %d\n",*n_subdomains); 4872 4873 if (have_void) *have_void = PETSC_FALSE; 4874 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 4875 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 4876 ierr = MatISGetLocalMat(mat,&A);CHKERRQ(ierr); 4877 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 4878 im_active = !!(n); 4879 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 4880 void_procs = size - active_procs; 4881 /* get ranks of of non-active processes in mat communicator */ 4882 if (void_procs) { 4883 PetscInt ncand; 4884 4885 if (have_void) *have_void = PETSC_TRUE; 4886 ierr = PetscMalloc1(size,&procs_candidates);CHKERRQ(ierr); 4887 ierr = MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 4888 for (i=0,ncand=0;i<size;i++) { 4889 if (!procs_candidates[i]) { 4890 procs_candidates[ncand++] = i; 4891 } 4892 } 4893 /* force n_subdomains to be not greater that the number of non-active processes */ 4894 *n_subdomains = PetscMin(void_procs,*n_subdomains); 4895 } 4896 4897 /* number of subdomains requested greater than active processes -> just shift the matrix */ 4898 if (active_procs < *n_subdomains) { 4899 PetscInt issize,isidx; 4900 if (im_active) { 4901 issize = 1; 4902 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 4903 isidx = procs_candidates[rank]; 4904 } else { 4905 isidx = rank; 4906 } 4907 } else { 4908 issize = 0; 4909 isidx = -1; 4910 } 4911 *n_subdomains = active_procs; 4912 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);CHKERRQ(ierr); 4913 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 4914 PetscFunctionReturn(0); 4915 } 4916 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 4917 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 4918 threshold = PetscMax(threshold,2); 4919 4920 /* Get info on mapping */ 4921 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 4922 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4923 4924 /* build local CSR graph of subdomains' connectivity */ 4925 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 4926 xadj[0] = 0; 4927 xadj[1] = PetscMax(n_neighs-1,0); 4928 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 4929 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 4930 ierr = PetscCalloc1(local_size,&count);CHKERRQ(ierr); 4931 for (i=1;i<n_neighs;i++) 4932 for (j=0;j<n_shared[i];j++) 4933 count[shared[i][j]] += 1; 4934 4935 xadj_count = 0; 4936 for (i=1;i<n_neighs;i++) { 4937 for (j=0;j<n_shared[i];j++) { 4938 if (count[shared[i][j]] < threshold) { 4939 adjncy[xadj_count] = neighs[i]; 4940 adjncy_wgt[xadj_count] = n_shared[i]; 4941 xadj_count++; 4942 break; 4943 } 4944 } 4945 } 4946 xadj[1] = xadj_count; 4947 ierr = PetscFree(count);CHKERRQ(ierr); 4948 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4949 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4950 4951 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 4952 4953 /* Restrict work on active processes only */ 4954 ierr = PetscMPIIntCast(im_active,&color);CHKERRQ(ierr); 4955 if (void_procs) { 4956 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);CHKERRQ(ierr); 4957 ierr = PetscSubcommSetNumber(psubcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 4958 ierr = PetscSubcommSetTypeGeneral(psubcomm,color,rank);CHKERRQ(ierr); 4959 subcomm = PetscSubcommChild(psubcomm); 4960 } else { 4961 psubcomm = NULL; 4962 subcomm = PetscObjectComm((PetscObject)mat); 4963 } 4964 4965 v_wgt = NULL; 4966 if (!color) { 4967 ierr = PetscFree(xadj);CHKERRQ(ierr); 4968 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4969 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4970 } else { 4971 Mat subdomain_adj; 4972 IS new_ranks,new_ranks_contig; 4973 MatPartitioning partitioner; 4974 PetscInt rstart=0,rend=0; 4975 PetscInt *is_indices,*oldranks; 4976 PetscMPIInt size; 4977 PetscBool aggregate; 4978 4979 ierr = MPI_Comm_size(subcomm,&size);CHKERRQ(ierr); 4980 if (void_procs) { 4981 PetscInt prank = rank; 4982 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 4983 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);CHKERRQ(ierr); 4984 for (i=0;i<xadj[1];i++) { 4985 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 4986 } 4987 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4988 } else { 4989 oldranks = NULL; 4990 } 4991 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 4992 if (aggregate) { /* TODO: all this part could be made more efficient */ 4993 PetscInt lrows,row,ncols,*cols; 4994 PetscMPIInt nrank; 4995 PetscScalar *vals; 4996 4997 ierr = MPI_Comm_rank(subcomm,&nrank);CHKERRQ(ierr); 4998 lrows = 0; 4999 if (nrank<redprocs) { 5000 lrows = size/redprocs; 5001 if (nrank<size%redprocs) lrows++; 5002 } 5003 ierr = MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 5004 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 5005 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 5006 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 5007 row = nrank; 5008 ncols = xadj[1]-xadj[0]; 5009 cols = adjncy; 5010 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 5011 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 5012 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 5013 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5014 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5015 ierr = PetscFree(xadj);CHKERRQ(ierr); 5016 ierr = PetscFree(adjncy);CHKERRQ(ierr); 5017 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 5018 ierr = PetscFree(vals);CHKERRQ(ierr); 5019 if (use_vwgt) { 5020 Vec v; 5021 const PetscScalar *array; 5022 PetscInt nl; 5023 5024 ierr = MatCreateVecs(subdomain_adj,&v,NULL);CHKERRQ(ierr); 5025 ierr = VecSetValue(v,row,(PetscScalar)local_size,INSERT_VALUES);CHKERRQ(ierr); 5026 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 5027 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 5028 ierr = VecGetLocalSize(v,&nl);CHKERRQ(ierr); 5029 ierr = VecGetArrayRead(v,&array);CHKERRQ(ierr); 5030 ierr = PetscMalloc1(nl,&v_wgt);CHKERRQ(ierr); 5031 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 5032 ierr = VecRestoreArrayRead(v,&array);CHKERRQ(ierr); 5033 ierr = VecDestroy(&v);CHKERRQ(ierr); 5034 } 5035 } else { 5036 ierr = MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 5037 if (use_vwgt) { 5038 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 5039 v_wgt[0] = local_size; 5040 } 5041 } 5042 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 5043 5044 /* Partition */ 5045 ierr = MatPartitioningCreate(subcomm,&partitioner);CHKERRQ(ierr); 5046 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 5047 if (v_wgt) { 5048 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 5049 } 5050 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 5051 ierr = MatPartitioningSetNParts(partitioner,*n_subdomains);CHKERRQ(ierr); 5052 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 5053 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 5054 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 5055 5056 /* renumber new_ranks to avoid "holes" in new set of processors */ 5057 ierr = ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 5058 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 5059 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5060 if (!aggregate) { 5061 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 5062 #if defined(PETSC_USE_DEBUG) 5063 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 5064 #endif 5065 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 5066 } else if (oldranks) { 5067 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 5068 } else { 5069 ranks_send_to_idx[0] = is_indices[0]; 5070 } 5071 } else { 5072 PetscInt idxs[1]; 5073 PetscMPIInt tag; 5074 MPI_Request *reqs; 5075 5076 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 5077 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 5078 for (i=rstart;i<rend;i++) { 5079 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);CHKERRQ(ierr); 5080 } 5081 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 5082 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5083 ierr = PetscFree(reqs);CHKERRQ(ierr); 5084 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 5085 #if defined(PETSC_USE_DEBUG) 5086 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 5087 #endif 5088 ranks_send_to_idx[0] = procs_candidates[oldranks[idxs[0]]]; 5089 } else if (oldranks) { 5090 ranks_send_to_idx[0] = oldranks[idxs[0]]; 5091 } else { 5092 ranks_send_to_idx[0] = idxs[0]; 5093 } 5094 } 5095 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5096 /* clean up */ 5097 ierr = PetscFree(oldranks);CHKERRQ(ierr); 5098 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 5099 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 5100 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 5101 } 5102 ierr = PetscSubcommDestroy(&psubcomm);CHKERRQ(ierr); 5103 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 5104 5105 /* assemble parallel IS for sends */ 5106 i = 1; 5107 if (!color) i=0; 5108 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);CHKERRQ(ierr); 5109 PetscFunctionReturn(0); 5110 } 5111 5112 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 5113 5114 #undef __FUNCT__ 5115 #define __FUNCT__ "MatISSubassemble" 5116 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 5117 { 5118 Mat local_mat; 5119 IS is_sends_internal; 5120 PetscInt rows,cols,new_local_rows; 5121 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs; 5122 PetscBool ismatis,isdense,newisdense,destroy_mat; 5123 ISLocalToGlobalMapping l2gmap; 5124 PetscInt* l2gmap_indices; 5125 const PetscInt* is_indices; 5126 MatType new_local_type; 5127 /* buffers */ 5128 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 5129 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 5130 PetscInt *recv_buffer_idxs_local; 5131 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 5132 PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs; 5133 /* MPI */ 5134 MPI_Comm comm,comm_n; 5135 PetscSubcomm subcomm; 5136 PetscMPIInt n_sends,n_recvs,commsize; 5137 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 5138 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 5139 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest; 5140 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs; 5141 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs; 5142 PetscErrorCode ierr; 5143 5144 PetscFunctionBegin; 5145 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 5146 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 5147 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 5148 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 5149 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 5150 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 5151 PetscValidLogicalCollectiveBool(mat,reuse,6); 5152 PetscValidLogicalCollectiveInt(mat,nis,8); 5153 PetscValidLogicalCollectiveInt(mat,nvecs,10); 5154 if (nvecs) { 5155 if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported"); 5156 PetscValidHeaderSpecific(nnsp_vec[0],VEC_CLASSID,11); 5157 } 5158 /* further checks */ 5159 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 5160 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 5161 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 5162 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 5163 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 5164 if (reuse && *mat_n) { 5165 PetscInt mrows,mcols,mnrows,mncols; 5166 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 5167 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 5168 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 5169 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 5170 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 5171 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 5172 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 5173 } 5174 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 5175 PetscValidLogicalCollectiveInt(mat,bs,0); 5176 5177 /* prepare IS for sending if not provided */ 5178 if (!is_sends) { 5179 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 5180 ierr = MatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);CHKERRQ(ierr); 5181 } else { 5182 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 5183 is_sends_internal = is_sends; 5184 } 5185 5186 /* get comm */ 5187 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 5188 5189 /* compute number of sends */ 5190 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 5191 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 5192 5193 /* compute number of receives */ 5194 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 5195 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 5196 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 5197 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 5198 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 5199 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 5200 ierr = PetscFree(iflags);CHKERRQ(ierr); 5201 5202 /* restrict comm if requested */ 5203 subcomm = 0; 5204 destroy_mat = PETSC_FALSE; 5205 if (restrict_comm) { 5206 PetscMPIInt color,subcommsize; 5207 5208 color = 0; 5209 if (restrict_full) { 5210 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 5211 } else { 5212 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 5213 } 5214 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 5215 subcommsize = commsize - subcommsize; 5216 /* check if reuse has been requested */ 5217 if (reuse) { 5218 if (*mat_n) { 5219 PetscMPIInt subcommsize2; 5220 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 5221 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 5222 comm_n = PetscObjectComm((PetscObject)*mat_n); 5223 } else { 5224 comm_n = PETSC_COMM_SELF; 5225 } 5226 } else { /* MAT_INITIAL_MATRIX */ 5227 PetscMPIInt rank; 5228 5229 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5230 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 5231 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 5232 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 5233 comm_n = PetscSubcommChild(subcomm); 5234 } 5235 /* flag to destroy *mat_n if not significative */ 5236 if (color) destroy_mat = PETSC_TRUE; 5237 } else { 5238 comm_n = comm; 5239 } 5240 5241 /* prepare send/receive buffers */ 5242 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 5243 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 5244 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 5245 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 5246 if (nis) { 5247 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 5248 } 5249 5250 /* Get data from local matrices */ 5251 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 5252 /* TODO: See below some guidelines on how to prepare the local buffers */ 5253 /* 5254 send_buffer_vals should contain the raw values of the local matrix 5255 send_buffer_idxs should contain: 5256 - MatType_PRIVATE type 5257 - PetscInt size_of_l2gmap 5258 - PetscInt global_row_indices[size_of_l2gmap] 5259 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 5260 */ 5261 else { 5262 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 5263 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 5264 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 5265 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 5266 send_buffer_idxs[1] = i; 5267 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 5268 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 5269 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 5270 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 5271 for (i=0;i<n_sends;i++) { 5272 ilengths_vals[is_indices[i]] = len*len; 5273 ilengths_idxs[is_indices[i]] = len+2; 5274 } 5275 } 5276 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 5277 /* additional is (if any) */ 5278 if (nis) { 5279 PetscMPIInt psum; 5280 PetscInt j; 5281 for (j=0,psum=0;j<nis;j++) { 5282 PetscInt plen; 5283 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 5284 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 5285 psum += len+1; /* indices + lenght */ 5286 } 5287 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 5288 for (j=0,psum=0;j<nis;j++) { 5289 PetscInt plen; 5290 const PetscInt *is_array_idxs; 5291 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 5292 send_buffer_idxs_is[psum] = plen; 5293 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 5294 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 5295 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 5296 psum += plen+1; /* indices + lenght */ 5297 } 5298 for (i=0;i<n_sends;i++) { 5299 ilengths_idxs_is[is_indices[i]] = psum; 5300 } 5301 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 5302 } 5303 5304 buf_size_idxs = 0; 5305 buf_size_vals = 0; 5306 buf_size_idxs_is = 0; 5307 buf_size_vecs = 0; 5308 for (i=0;i<n_recvs;i++) { 5309 buf_size_idxs += (PetscInt)olengths_idxs[i]; 5310 buf_size_vals += (PetscInt)olengths_vals[i]; 5311 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 5312 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 5313 } 5314 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 5315 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 5316 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 5317 ierr = PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);CHKERRQ(ierr); 5318 5319 /* get new tags for clean communications */ 5320 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 5321 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 5322 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 5323 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);CHKERRQ(ierr); 5324 5325 /* allocate for requests */ 5326 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 5327 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 5328 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 5329 ierr = PetscMalloc1(n_sends,&send_req_vecs);CHKERRQ(ierr); 5330 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 5331 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 5332 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 5333 ierr = PetscMalloc1(n_recvs,&recv_req_vecs);CHKERRQ(ierr); 5334 5335 /* communications */ 5336 ptr_idxs = recv_buffer_idxs; 5337 ptr_vals = recv_buffer_vals; 5338 ptr_idxs_is = recv_buffer_idxs_is; 5339 ptr_vecs = recv_buffer_vecs; 5340 for (i=0;i<n_recvs;i++) { 5341 source_dest = onodes[i]; 5342 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 5343 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 5344 ptr_idxs += olengths_idxs[i]; 5345 ptr_vals += olengths_vals[i]; 5346 if (nis) { 5347 source_dest = onodes_is[i]; 5348 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 5349 ptr_idxs_is += olengths_idxs_is[i]; 5350 } 5351 if (nvecs) { 5352 source_dest = onodes[i]; 5353 ierr = MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);CHKERRQ(ierr); 5354 ptr_vecs += olengths_idxs[i]-2; 5355 } 5356 } 5357 for (i=0;i<n_sends;i++) { 5358 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 5359 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 5360 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 5361 if (nis) { 5362 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 5363 } 5364 if (nvecs) { 5365 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 5366 ierr = MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);CHKERRQ(ierr); 5367 } 5368 } 5369 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 5370 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 5371 5372 /* assemble new l2g map */ 5373 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5374 ptr_idxs = recv_buffer_idxs; 5375 new_local_rows = 0; 5376 for (i=0;i<n_recvs;i++) { 5377 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 5378 ptr_idxs += olengths_idxs[i]; 5379 } 5380 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 5381 ptr_idxs = recv_buffer_idxs; 5382 new_local_rows = 0; 5383 for (i=0;i<n_recvs;i++) { 5384 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 5385 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 5386 ptr_idxs += olengths_idxs[i]; 5387 } 5388 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 5389 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 5390 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 5391 5392 /* infer new local matrix type from received local matrices type */ 5393 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 5394 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 5395 if (n_recvs) { 5396 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 5397 ptr_idxs = recv_buffer_idxs; 5398 for (i=0;i<n_recvs;i++) { 5399 if ((PetscInt)new_local_type_private != *ptr_idxs) { 5400 new_local_type_private = MATAIJ_PRIVATE; 5401 break; 5402 } 5403 ptr_idxs += olengths_idxs[i]; 5404 } 5405 switch (new_local_type_private) { 5406 case MATDENSE_PRIVATE: 5407 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 5408 new_local_type = MATSEQAIJ; 5409 bs = 1; 5410 } else { /* if I receive only 1 dense matrix */ 5411 new_local_type = MATSEQDENSE; 5412 bs = 1; 5413 } 5414 break; 5415 case MATAIJ_PRIVATE: 5416 new_local_type = MATSEQAIJ; 5417 bs = 1; 5418 break; 5419 case MATBAIJ_PRIVATE: 5420 new_local_type = MATSEQBAIJ; 5421 break; 5422 case MATSBAIJ_PRIVATE: 5423 new_local_type = MATSEQSBAIJ; 5424 break; 5425 default: 5426 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 5427 break; 5428 } 5429 } else { /* by default, new_local_type is seqdense */ 5430 new_local_type = MATSEQDENSE; 5431 bs = 1; 5432 } 5433 5434 /* create MATIS object if needed */ 5435 if (!reuse) { 5436 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 5437 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 5438 } else { 5439 /* it also destroys the local matrices */ 5440 if (*mat_n) { 5441 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 5442 } else { /* this is a fake object */ 5443 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 5444 } 5445 } 5446 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 5447 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 5448 5449 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5450 5451 /* Global to local map of received indices */ 5452 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 5453 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 5454 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 5455 5456 /* restore attributes -> type of incoming data and its size */ 5457 buf_size_idxs = 0; 5458 for (i=0;i<n_recvs;i++) { 5459 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 5460 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 5461 buf_size_idxs += (PetscInt)olengths_idxs[i]; 5462 } 5463 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 5464 5465 /* set preallocation */ 5466 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 5467 if (!newisdense) { 5468 PetscInt *new_local_nnz=0; 5469 5470 ptr_idxs = recv_buffer_idxs_local; 5471 if (n_recvs) { 5472 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 5473 } 5474 for (i=0;i<n_recvs;i++) { 5475 PetscInt j; 5476 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 5477 for (j=0;j<*(ptr_idxs+1);j++) { 5478 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 5479 } 5480 } else { 5481 /* TODO */ 5482 } 5483 ptr_idxs += olengths_idxs[i]; 5484 } 5485 if (new_local_nnz) { 5486 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 5487 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 5488 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 5489 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5490 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 5491 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5492 } else { 5493 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5494 } 5495 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 5496 } else { 5497 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5498 } 5499 5500 /* set values */ 5501 ptr_vals = recv_buffer_vals; 5502 ptr_idxs = recv_buffer_idxs_local; 5503 for (i=0;i<n_recvs;i++) { 5504 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 5505 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 5506 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 5507 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5508 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5509 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 5510 } else { 5511 /* TODO */ 5512 } 5513 ptr_idxs += olengths_idxs[i]; 5514 ptr_vals += olengths_vals[i]; 5515 } 5516 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5517 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5518 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5519 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5520 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 5521 5522 #if 0 5523 if (!restrict_comm) { /* check */ 5524 Vec lvec,rvec; 5525 PetscReal infty_error; 5526 5527 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 5528 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 5529 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 5530 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 5531 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 5532 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5533 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 5534 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 5535 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 5536 } 5537 #endif 5538 5539 /* assemble new additional is (if any) */ 5540 if (nis) { 5541 PetscInt **temp_idxs,*count_is,j,psum; 5542 5543 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5544 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 5545 ptr_idxs = recv_buffer_idxs_is; 5546 psum = 0; 5547 for (i=0;i<n_recvs;i++) { 5548 for (j=0;j<nis;j++) { 5549 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5550 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 5551 psum += plen; 5552 ptr_idxs += plen+1; /* shift pointer to received data */ 5553 } 5554 } 5555 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 5556 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 5557 for (i=1;i<nis;i++) { 5558 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 5559 } 5560 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 5561 ptr_idxs = recv_buffer_idxs_is; 5562 for (i=0;i<n_recvs;i++) { 5563 for (j=0;j<nis;j++) { 5564 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5565 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 5566 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 5567 ptr_idxs += plen+1; /* shift pointer to received data */ 5568 } 5569 } 5570 for (i=0;i<nis;i++) { 5571 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5572 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 5573 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5574 } 5575 ierr = PetscFree(count_is);CHKERRQ(ierr); 5576 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 5577 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 5578 } 5579 /* free workspace */ 5580 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 5581 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5582 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 5583 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5584 if (isdense) { 5585 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 5586 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 5587 } else { 5588 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 5589 } 5590 if (nis) { 5591 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5592 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 5593 } 5594 5595 if (nvecs) { 5596 ierr = MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5597 ierr = MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5598 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 5599 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 5600 ierr = VecCreate(comm_n,&nnsp_vec[0]);CHKERRQ(ierr); 5601 ierr = VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);CHKERRQ(ierr); 5602 ierr = VecSetType(nnsp_vec[0],VECSTANDARD);CHKERRQ(ierr); 5603 /* set values */ 5604 ptr_vals = recv_buffer_vecs; 5605 ptr_idxs = recv_buffer_idxs_local; 5606 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 5607 for (i=0;i<n_recvs;i++) { 5608 PetscInt j; 5609 for (j=0;j<*(ptr_idxs+1);j++) { 5610 send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j); 5611 } 5612 ptr_idxs += olengths_idxs[i]; 5613 ptr_vals += olengths_idxs[i]-2; 5614 } 5615 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 5616 ierr = VecAssemblyBegin(nnsp_vec[0]);CHKERRQ(ierr); 5617 ierr = VecAssemblyEnd(nnsp_vec[0]);CHKERRQ(ierr); 5618 } 5619 5620 ierr = PetscFree(recv_buffer_vecs);CHKERRQ(ierr); 5621 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 5622 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 5623 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 5624 ierr = PetscFree(recv_req_vecs);CHKERRQ(ierr); 5625 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 5626 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 5627 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 5628 ierr = PetscFree(send_req_vecs);CHKERRQ(ierr); 5629 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 5630 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 5631 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 5632 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 5633 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 5634 ierr = PetscFree(onodes);CHKERRQ(ierr); 5635 if (nis) { 5636 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 5637 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 5638 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 5639 } 5640 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 5641 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 5642 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 5643 for (i=0;i<nis;i++) { 5644 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5645 } 5646 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 5647 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 5648 } 5649 *mat_n = NULL; 5650 } 5651 PetscFunctionReturn(0); 5652 } 5653 5654 /* temporary hack into ksp private data structure */ 5655 #include <petsc/private/kspimpl.h> 5656 5657 #undef __FUNCT__ 5658 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 5659 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 5660 { 5661 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5662 PC_IS *pcis = (PC_IS*)pc->data; 5663 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 5664 Mat coarsedivudotp = NULL; 5665 MatNullSpace CoarseNullSpace = NULL; 5666 ISLocalToGlobalMapping coarse_islg; 5667 IS coarse_is,*isarray; 5668 PetscInt i,im_active=-1,active_procs=-1; 5669 PetscInt nis,nisdofs,nisneu,nisvert; 5670 PC pc_temp; 5671 PCType coarse_pc_type; 5672 KSPType coarse_ksp_type; 5673 PetscBool multilevel_requested,multilevel_allowed; 5674 PetscBool isredundant,isbddc,isnn,coarse_reuse; 5675 Mat t_coarse_mat_is; 5676 PetscInt ncoarse; 5677 PetscBool compute_vecs = PETSC_FALSE; 5678 PetscScalar *array; 5679 MatReuse coarse_mat_reuse; 5680 PetscBool restr, full_restr, have_void; 5681 PetscErrorCode ierr; 5682 5683 PetscFunctionBegin; 5684 /* Assign global numbering to coarse dofs */ 5685 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 5686 PetscInt ocoarse_size; 5687 compute_vecs = PETSC_TRUE; 5688 ocoarse_size = pcbddc->coarse_size; 5689 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 5690 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 5691 /* see if we can avoid some work */ 5692 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 5693 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 5694 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 5695 PC pc; 5696 PetscBool isbddc; 5697 5698 /* temporary workaround since PCBDDC does not have a reset method so far */ 5699 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 5700 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5701 if (isbddc) { 5702 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 5703 } else { 5704 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 5705 } 5706 coarse_reuse = PETSC_FALSE; 5707 } else { /* we can safely reuse already computed coarse matrix */ 5708 coarse_reuse = PETSC_TRUE; 5709 } 5710 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 5711 coarse_reuse = PETSC_FALSE; 5712 } 5713 /* reset any subassembling information */ 5714 if (!coarse_reuse || pcbddc->recompute_topography) { 5715 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5716 } 5717 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 5718 coarse_reuse = PETSC_TRUE; 5719 } 5720 /* assemble coarse matrix */ 5721 if (coarse_reuse && pcbddc->coarse_ksp) { 5722 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5723 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 5724 coarse_mat_reuse = MAT_REUSE_MATRIX; 5725 } else { 5726 coarse_mat = NULL; 5727 coarse_mat_reuse = MAT_INITIAL_MATRIX; 5728 } 5729 5730 /* creates temporary l2gmap and IS for coarse indexes */ 5731 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 5732 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 5733 5734 /* creates temporary MATIS object for coarse matrix */ 5735 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 5736 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5737 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 5738 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5739 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 5740 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 5741 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5742 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5743 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 5744 5745 /* count "active" (i.e. with positive local size) and "void" processes */ 5746 im_active = !!(pcis->n); 5747 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5748 5749 /* determine number of process partecipating to coarse solver and compute subassembling pattern */ 5750 /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */ 5751 /* full_restr : just use the receivers from the subassembling pattern */ 5752 coarse_mat_is = NULL; 5753 multilevel_allowed = PETSC_FALSE; 5754 multilevel_requested = PETSC_FALSE; 5755 full_restr = PETSC_TRUE; 5756 pcbddc->coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc); 5757 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 5758 if (multilevel_requested) { 5759 ncoarse = active_procs/pcbddc->coarsening_ratio; 5760 restr = PETSC_FALSE; 5761 full_restr = PETSC_FALSE; 5762 } else { 5763 ncoarse = pcbddc->coarse_size/pcbddc->coarse_eqs_per_proc; 5764 restr = PETSC_TRUE; 5765 full_restr = PETSC_TRUE; 5766 } 5767 if (!pcbddc->coarse_size) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 5768 ncoarse = PetscMax(1,ncoarse); 5769 if (!pcbddc->coarse_subassembling) { 5770 if (pcbddc->coarsening_ratio > 1) { 5771 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 5772 } else { 5773 PetscMPIInt size,rank; 5774 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 5775 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 5776 have_void = (active_procs == (PetscInt)size) ? PETSC_FALSE : PETSC_TRUE; 5777 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5778 } 5779 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 5780 PetscInt psum; 5781 PetscMPIInt size; 5782 if (pcbddc->coarse_ksp) psum = 1; 5783 else psum = 0; 5784 ierr = MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5785 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 5786 if (ncoarse < size) have_void = PETSC_TRUE; 5787 } 5788 /* determine if we can go multilevel */ 5789 if (multilevel_requested) { 5790 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 5791 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 5792 } 5793 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 5794 5795 /* dump subassembling pattern */ 5796 if (pcbddc->dbg_flag && multilevel_allowed) { 5797 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 5798 } 5799 5800 /* compute dofs splitting and neumann boundaries for coarse dofs */ 5801 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal)) { /* protects from unneded computations */ 5802 PetscInt *tidxs,*tidxs2,nout,tsize,i; 5803 const PetscInt *idxs; 5804 ISLocalToGlobalMapping tmap; 5805 5806 /* create map between primal indices (in local representative ordering) and local primal numbering */ 5807 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 5808 /* allocate space for temporary storage */ 5809 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 5810 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 5811 /* allocate for IS array */ 5812 nisdofs = pcbddc->n_ISForDofsLocal; 5813 nisneu = !!pcbddc->NeumannBoundariesLocal; 5814 nisvert = 0; /* nisvert is not used */ 5815 nis = nisdofs + nisneu + nisvert; 5816 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 5817 /* dofs splitting */ 5818 for (i=0;i<nisdofs;i++) { 5819 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 5820 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 5821 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5822 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5823 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5824 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5825 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5826 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 5827 } 5828 /* neumann boundaries */ 5829 if (pcbddc->NeumannBoundariesLocal) { 5830 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 5831 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 5832 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5833 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5834 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5835 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5836 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 5837 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 5838 } 5839 /* free memory */ 5840 ierr = PetscFree(tidxs);CHKERRQ(ierr); 5841 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 5842 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 5843 } else { 5844 nis = 0; 5845 nisdofs = 0; 5846 nisneu = 0; 5847 nisvert = 0; 5848 isarray = NULL; 5849 } 5850 /* destroy no longer needed map */ 5851 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 5852 5853 /* subassemble */ 5854 if (multilevel_allowed) { 5855 Vec vp[1]; 5856 PetscInt nvecs = 0; 5857 PetscBool reuse,reuser; 5858 5859 if (coarse_mat) reuse = PETSC_TRUE; 5860 else reuse = PETSC_FALSE; 5861 ierr = MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5862 vp[0] = NULL; 5863 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 5864 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);CHKERRQ(ierr); 5865 ierr = VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);CHKERRQ(ierr); 5866 ierr = VecSetType(vp[0],VECSTANDARD);CHKERRQ(ierr); 5867 nvecs = 1; 5868 5869 if (pcbddc->divudotp) { 5870 Mat B,loc_divudotp; 5871 Vec v,p; 5872 IS dummy; 5873 PetscInt np; 5874 5875 ierr = MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);CHKERRQ(ierr); 5876 ierr = MatGetSize(loc_divudotp,&np,NULL);CHKERRQ(ierr); 5877 ierr = ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);CHKERRQ(ierr); 5878 ierr = MatGetSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 5879 ierr = MatCreateVecs(B,&v,&p);CHKERRQ(ierr); 5880 ierr = VecSet(p,1.);CHKERRQ(ierr); 5881 ierr = MatMultTranspose(B,p,v);CHKERRQ(ierr); 5882 ierr = VecDestroy(&p);CHKERRQ(ierr); 5883 ierr = MatDestroy(&B);CHKERRQ(ierr); 5884 ierr = VecGetArray(vp[0],&array);CHKERRQ(ierr); 5885 ierr = VecPlaceArray(pcbddc->vec1_P,array);CHKERRQ(ierr); 5886 ierr = VecRestoreArray(vp[0],&array);CHKERRQ(ierr); 5887 ierr = MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);CHKERRQ(ierr); 5888 ierr = VecResetArray(pcbddc->vec1_P);CHKERRQ(ierr); 5889 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 5890 ierr = VecDestroy(&v);CHKERRQ(ierr); 5891 } 5892 } 5893 if (reuser) { 5894 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);CHKERRQ(ierr); 5895 } else { 5896 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);CHKERRQ(ierr); 5897 } 5898 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 5899 PetscScalar *arraym,*arrayv; 5900 PetscInt nl; 5901 ierr = VecGetLocalSize(vp[0],&nl);CHKERRQ(ierr); 5902 ierr = MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);CHKERRQ(ierr); 5903 ierr = MatDenseGetArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 5904 ierr = VecGetArray(vp[0],&arrayv);CHKERRQ(ierr); 5905 ierr = PetscMemcpy(arraym,arrayv,nl*sizeof(PetscScalar));CHKERRQ(ierr); 5906 ierr = VecRestoreArray(vp[0],&arrayv);CHKERRQ(ierr); 5907 ierr = MatDenseRestoreArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 5908 ierr = VecDestroy(&vp[0]);CHKERRQ(ierr); 5909 } else { 5910 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);CHKERRQ(ierr); 5911 } 5912 } else { 5913 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,0,NULL);CHKERRQ(ierr); 5914 } 5915 if (coarse_mat_is || coarse_mat) { 5916 PetscMPIInt size; 5917 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)coarse_mat_is),&size); 5918 if (!multilevel_allowed) { 5919 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 5920 } else { 5921 Mat A; 5922 5923 /* if this matrix is present, it means we are not reusing the coarse matrix */ 5924 if (coarse_mat_is) { 5925 if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 5926 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 5927 coarse_mat = coarse_mat_is; 5928 } 5929 /* be sure we don't have MatSeqDENSE as local mat */ 5930 ierr = MatISGetLocalMat(coarse_mat,&A);CHKERRQ(ierr); 5931 ierr = MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 5932 } 5933 } 5934 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 5935 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 5936 5937 /* create local to global scatters for coarse problem */ 5938 if (compute_vecs) { 5939 PetscInt lrows; 5940 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 5941 if (coarse_mat) { 5942 ierr = MatGetLocalSize(coarse_mat,&lrows,NULL);CHKERRQ(ierr); 5943 } else { 5944 lrows = 0; 5945 } 5946 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 5947 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 5948 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 5949 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5950 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5951 } 5952 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 5953 5954 /* set defaults for coarse KSP and PC */ 5955 if (multilevel_allowed) { 5956 coarse_ksp_type = KSPRICHARDSON; 5957 coarse_pc_type = PCBDDC; 5958 } else { 5959 coarse_ksp_type = KSPPREONLY; 5960 coarse_pc_type = PCREDUNDANT; 5961 } 5962 5963 /* print some info if requested */ 5964 if (pcbddc->dbg_flag) { 5965 if (!multilevel_allowed) { 5966 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5967 if (multilevel_requested) { 5968 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5969 } else if (pcbddc->max_levels) { 5970 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 5971 } 5972 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5973 } 5974 } 5975 5976 /* create the coarse KSP object only once with defaults */ 5977 if (coarse_mat) { 5978 PetscViewer dbg_viewer = NULL; 5979 if (pcbddc->dbg_flag) { 5980 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 5981 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5982 } 5983 if (!pcbddc->coarse_ksp) { 5984 char prefix[256],str_level[16]; 5985 size_t len; 5986 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 5987 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5988 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 5989 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 5990 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5991 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 5992 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 5993 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5994 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5995 /* prefix */ 5996 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 5997 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 5998 if (!pcbddc->current_level) { 5999 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 6000 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 6001 } else { 6002 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 6003 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 6004 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 6005 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 6006 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 6007 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 6008 } 6009 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 6010 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 6011 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 6012 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 6013 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 6014 /* allow user customization */ 6015 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 6016 } 6017 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 6018 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 6019 if (nisdofs) { 6020 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 6021 for (i=0;i<nisdofs;i++) { 6022 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 6023 } 6024 } 6025 if (nisneu) { 6026 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 6027 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 6028 } 6029 if (nisvert) { 6030 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 6031 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 6032 } 6033 6034 /* get some info after set from options */ 6035 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 6036 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 6037 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 6038 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 6039 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 6040 isbddc = PETSC_FALSE; 6041 } 6042 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 6043 if (isredundant) { 6044 KSP inner_ksp; 6045 PC inner_pc; 6046 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 6047 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 6048 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 6049 } 6050 6051 /* parameters which miss an API */ 6052 if (isbddc) { 6053 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 6054 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 6055 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 6056 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 6057 if (pcbddc_coarse->benign_saddle_point) { 6058 Mat coarsedivudotp_is; 6059 ISLocalToGlobalMapping l2gmap,rl2g,cl2g; 6060 IS row,col; 6061 const PetscInt *gidxs; 6062 PetscInt n,st,M,N; 6063 6064 ierr = MatGetSize(coarsedivudotp,&n,NULL);CHKERRQ(ierr); 6065 ierr = MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));CHKERRQ(ierr); 6066 st = st-n; 6067 ierr = ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);CHKERRQ(ierr); 6068 ierr = MatGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);CHKERRQ(ierr); 6069 ierr = ISLocalToGlobalMappingGetSize(l2gmap,&n);CHKERRQ(ierr); 6070 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 6071 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 6072 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 6073 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 6074 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 6075 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 6076 ierr = MatGetSize(coarse_mat,&N,NULL);CHKERRQ(ierr); 6077 ierr = ISDestroy(&row);CHKERRQ(ierr); 6078 ierr = ISDestroy(&col);CHKERRQ(ierr); 6079 ierr = MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);CHKERRQ(ierr); 6080 ierr = MatSetType(coarsedivudotp_is,MATIS);CHKERRQ(ierr); 6081 ierr = MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 6082 ierr = MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);CHKERRQ(ierr); 6083 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 6084 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 6085 ierr = MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);CHKERRQ(ierr); 6086 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 6087 ierr = PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL);CHKERRQ(ierr); 6088 ierr = MatDestroy(&coarsedivudotp_is);CHKERRQ(ierr); 6089 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 6090 if (pcbddc->adaptive_threshold < 1.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 6091 } 6092 } 6093 6094 /* propagate symmetry info of coarse matrix */ 6095 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 6096 if (pc->pmat->symmetric_set) { 6097 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 6098 } 6099 if (pc->pmat->hermitian_set) { 6100 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 6101 } 6102 if (pc->pmat->spd_set) { 6103 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 6104 } 6105 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 6106 ierr = MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 6107 } 6108 /* set operators */ 6109 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 6110 if (pcbddc->dbg_flag) { 6111 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 6112 } 6113 } 6114 ierr = PetscFree(isarray);CHKERRQ(ierr); 6115 #if 0 6116 { 6117 PetscViewer viewer; 6118 char filename[256]; 6119 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 6120 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 6121 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 6122 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 6123 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 6124 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 6125 } 6126 #endif 6127 6128 if (pcbddc->coarse_ksp) { 6129 Vec crhs,csol; 6130 6131 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 6132 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 6133 if (!csol) { 6134 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 6135 } 6136 if (!crhs) { 6137 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 6138 } 6139 } 6140 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 6141 6142 /* compute null space for coarse solver if the benign trick has been requested */ 6143 if (pcbddc->benign_null) { 6144 6145 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 6146 for (i=0;i<pcbddc->benign_n;i++) { 6147 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 6148 } 6149 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 6150 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 6151 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6152 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6153 if (coarse_mat) { 6154 Vec nullv; 6155 PetscScalar *array,*array2; 6156 PetscInt nl; 6157 6158 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 6159 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 6160 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 6161 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 6162 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 6163 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 6164 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 6165 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 6166 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 6167 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 6168 } 6169 } 6170 6171 if (pcbddc->coarse_ksp) { 6172 PetscBool ispreonly; 6173 6174 if (CoarseNullSpace) { 6175 PetscBool isnull; 6176 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 6177 if (isnull) { 6178 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 6179 } 6180 /* TODO: add local nullspaces (if any) */ 6181 } 6182 /* setup coarse ksp */ 6183 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 6184 /* Check coarse problem if in debug mode or if solving with an iterative method */ 6185 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 6186 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 6187 KSP check_ksp; 6188 KSPType check_ksp_type; 6189 PC check_pc; 6190 Vec check_vec,coarse_vec; 6191 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 6192 PetscInt its; 6193 PetscBool compute_eigs; 6194 PetscReal *eigs_r,*eigs_c; 6195 PetscInt neigs; 6196 const char *prefix; 6197 6198 /* Create ksp object suitable for estimation of extreme eigenvalues */ 6199 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 6200 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 6201 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 6202 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 6203 /* prevent from setup unneeded object */ 6204 ierr = KSPGetPC(check_ksp,&check_pc);CHKERRQ(ierr); 6205 ierr = PCSetType(check_pc,PCNONE);CHKERRQ(ierr); 6206 if (ispreonly) { 6207 check_ksp_type = KSPPREONLY; 6208 compute_eigs = PETSC_FALSE; 6209 } else { 6210 check_ksp_type = KSPGMRES; 6211 compute_eigs = PETSC_TRUE; 6212 } 6213 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 6214 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 6215 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 6216 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 6217 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 6218 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 6219 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 6220 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 6221 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 6222 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 6223 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 6224 /* create random vec */ 6225 ierr = MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);CHKERRQ(ierr); 6226 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 6227 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 6228 /* solve coarse problem */ 6229 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 6230 /* set eigenvalue estimation if preonly has not been requested */ 6231 if (compute_eigs) { 6232 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 6233 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 6234 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 6235 if (neigs) { 6236 lambda_max = eigs_r[neigs-1]; 6237 lambda_min = eigs_r[0]; 6238 if (pcbddc->use_coarse_estimates) { 6239 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 6240 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);CHKERRQ(ierr); 6241 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 6242 } 6243 } 6244 } 6245 } 6246 6247 /* check coarse problem residual error */ 6248 if (pcbddc->dbg_flag) { 6249 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 6250 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 6251 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 6252 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 6253 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 6254 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 6255 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 6256 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 6257 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 6258 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 6259 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 6260 if (CoarseNullSpace) { 6261 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 6262 } 6263 if (compute_eigs) { 6264 PetscReal lambda_max_s,lambda_min_s; 6265 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 6266 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 6267 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 6268 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 6269 for (i=0;i<neigs;i++) { 6270 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 6271 } 6272 } 6273 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 6274 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 6275 } 6276 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 6277 ierr = VecDestroy(&coarse_vec);CHKERRQ(ierr); 6278 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 6279 if (compute_eigs) { 6280 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 6281 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 6282 } 6283 } 6284 } 6285 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 6286 /* print additional info */ 6287 if (pcbddc->dbg_flag) { 6288 /* waits until all processes reaches this point */ 6289 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 6290 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 6291 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6292 } 6293 6294 /* free memory */ 6295 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 6296 PetscFunctionReturn(0); 6297 } 6298 6299 #undef __FUNCT__ 6300 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 6301 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 6302 { 6303 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 6304 PC_IS* pcis = (PC_IS*)pc->data; 6305 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6306 IS subset,subset_mult,subset_n; 6307 PetscInt local_size,coarse_size=0; 6308 PetscInt *local_primal_indices=NULL; 6309 const PetscInt *t_local_primal_indices; 6310 PetscErrorCode ierr; 6311 6312 PetscFunctionBegin; 6313 /* Compute global number of coarse dofs */ 6314 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 6315 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 6316 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 6317 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 6318 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 6319 ierr = ISRenumber(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 6320 ierr = ISDestroy(&subset);CHKERRQ(ierr); 6321 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 6322 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 6323 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 6324 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 6325 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 6326 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 6327 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 6328 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 6329 6330 /* check numbering */ 6331 if (pcbddc->dbg_flag) { 6332 PetscScalar coarsesum,*array,*array2; 6333 PetscInt i; 6334 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 6335 6336 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6337 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 6338 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 6339 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6340 /* counter */ 6341 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6342 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6343 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6344 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6345 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6346 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6347 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 6348 for (i=0;i<pcbddc->local_primal_size;i++) { 6349 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 6350 } 6351 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6352 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6353 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6354 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6355 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6356 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6357 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6358 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6359 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 6360 for (i=0;i<pcis->n;i++) { 6361 if (array[i] != 0.0 && array[i] != array2[i]) { 6362 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 6363 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 6364 set_error = PETSC_TRUE; 6365 ierr = ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);CHKERRQ(ierr); 6366 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d (gid %d) owned by %d processes instead of %d!\n",PetscGlobalRank,i,gi,owned,neigh);CHKERRQ(ierr); 6367 } 6368 } 6369 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 6370 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6371 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6372 for (i=0;i<pcis->n;i++) { 6373 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 6374 } 6375 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6376 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6377 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6378 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6379 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 6380 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 6381 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 6382 PetscInt *gidxs; 6383 6384 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 6385 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 6386 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 6387 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6388 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 6389 for (i=0;i<pcbddc->local_primal_size;i++) { 6390 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 6391 } 6392 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6393 ierr = PetscFree(gidxs);CHKERRQ(ierr); 6394 } 6395 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6396 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6397 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 6398 } 6399 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 6400 /* get back data */ 6401 *coarse_size_n = coarse_size; 6402 *local_primal_indices_n = local_primal_indices; 6403 PetscFunctionReturn(0); 6404 } 6405 6406 #undef __FUNCT__ 6407 #define __FUNCT__ "PCBDDCGlobalToLocal" 6408 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 6409 { 6410 IS localis_t; 6411 PetscInt i,lsize,*idxs,n; 6412 PetscScalar *vals; 6413 PetscErrorCode ierr; 6414 6415 PetscFunctionBegin; 6416 /* get indices in local ordering exploiting local to global map */ 6417 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 6418 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 6419 for (i=0;i<lsize;i++) vals[i] = 1.0; 6420 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 6421 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 6422 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 6423 if (idxs) { /* multilevel guard */ 6424 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 6425 } 6426 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 6427 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 6428 ierr = PetscFree(vals);CHKERRQ(ierr); 6429 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 6430 /* now compute set in local ordering */ 6431 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6432 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6433 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6434 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 6435 for (i=0,lsize=0;i<n;i++) { 6436 if (PetscRealPart(vals[i]) > 0.5) { 6437 lsize++; 6438 } 6439 } 6440 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 6441 for (i=0,lsize=0;i<n;i++) { 6442 if (PetscRealPart(vals[i]) > 0.5) { 6443 idxs[lsize++] = i; 6444 } 6445 } 6446 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6447 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 6448 *localis = localis_t; 6449 PetscFunctionReturn(0); 6450 } 6451 6452 #undef __FUNCT__ 6453 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 6454 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 6455 { 6456 PC_IS *pcis=(PC_IS*)pc->data; 6457 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6458 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6459 Mat S_j; 6460 PetscInt *used_xadj,*used_adjncy; 6461 PetscBool free_used_adj; 6462 PetscErrorCode ierr; 6463 6464 PetscFunctionBegin; 6465 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 6466 free_used_adj = PETSC_FALSE; 6467 if (pcbddc->sub_schurs_layers == -1) { 6468 used_xadj = NULL; 6469 used_adjncy = NULL; 6470 } else { 6471 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 6472 used_xadj = pcbddc->mat_graph->xadj; 6473 used_adjncy = pcbddc->mat_graph->adjncy; 6474 } else if (pcbddc->computed_rowadj) { 6475 used_xadj = pcbddc->mat_graph->xadj; 6476 used_adjncy = pcbddc->mat_graph->adjncy; 6477 } else { 6478 PetscBool flg_row=PETSC_FALSE; 6479 const PetscInt *xadj,*adjncy; 6480 PetscInt nvtxs; 6481 6482 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6483 if (flg_row) { 6484 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 6485 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 6486 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 6487 free_used_adj = PETSC_TRUE; 6488 } else { 6489 pcbddc->sub_schurs_layers = -1; 6490 used_xadj = NULL; 6491 used_adjncy = NULL; 6492 } 6493 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6494 } 6495 } 6496 6497 /* setup sub_schurs data */ 6498 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6499 if (!sub_schurs->schur_explicit) { 6500 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 6501 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6502 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 6503 } else { 6504 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 6505 PetscBool isseqaij,need_change = PETSC_FALSE; 6506 PetscInt benign_n; 6507 Mat change = NULL; 6508 Vec scaling = NULL; 6509 IS change_primal = NULL; 6510 6511 if (!pcbddc->use_vertices && reuse_solvers) { 6512 PetscInt n_vertices; 6513 6514 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 6515 reuse_solvers = (PetscBool)!n_vertices; 6516 } 6517 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 6518 if (!isseqaij) { 6519 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6520 if (matis->A == pcbddc->local_mat) { 6521 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 6522 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6523 } else { 6524 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6525 } 6526 } 6527 if (!pcbddc->benign_change_explicit) { 6528 benign_n = pcbddc->benign_n; 6529 } else { 6530 benign_n = 0; 6531 } 6532 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 6533 We need a global reduction to avoid possible deadlocks. 6534 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 6535 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 6536 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 6537 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6538 need_change = (PetscBool)(!need_change); 6539 } 6540 /* If the user defines additional constraints, we import them here. 6541 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 6542 if (need_change) { 6543 PC_IS *pcisf; 6544 PC_BDDC *pcbddcf; 6545 PC pcf; 6546 6547 if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 6548 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 6549 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 6550 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 6551 /* hacks */ 6552 pcisf = (PC_IS*)pcf->data; 6553 pcisf->is_B_local = pcis->is_B_local; 6554 pcisf->vec1_N = pcis->vec1_N; 6555 pcisf->BtoNmap = pcis->BtoNmap; 6556 pcisf->n = pcis->n; 6557 pcisf->n_B = pcis->n_B; 6558 pcbddcf = (PC_BDDC*)pcf->data; 6559 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 6560 pcbddcf->mat_graph = pcbddc->mat_graph; 6561 pcbddcf->use_faces = PETSC_TRUE; 6562 pcbddcf->use_change_of_basis = PETSC_TRUE; 6563 pcbddcf->use_change_on_faces = PETSC_TRUE; 6564 pcbddcf->use_qr_single = PETSC_TRUE; 6565 pcbddcf->fake_change = PETSC_TRUE; 6566 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 6567 /* store information on primal vertices and change of basis (in local numbering) */ 6568 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 6569 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 6570 change = pcbddcf->ConstraintMatrix; 6571 pcbddcf->ConstraintMatrix = NULL; 6572 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 6573 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 6574 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 6575 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 6576 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 6577 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 6578 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 6579 pcf->ops->destroy = NULL; 6580 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 6581 } 6582 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 6583 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 6584 ierr = MatDestroy(&change);CHKERRQ(ierr); 6585 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 6586 } 6587 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6588 6589 /* free adjacency */ 6590 if (free_used_adj) { 6591 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 6592 } 6593 PetscFunctionReturn(0); 6594 } 6595 6596 #undef __FUNCT__ 6597 #define __FUNCT__ "PCBDDCInitSubSchurs" 6598 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 6599 { 6600 PC_IS *pcis=(PC_IS*)pc->data; 6601 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6602 PCBDDCGraph graph; 6603 PetscErrorCode ierr; 6604 6605 PetscFunctionBegin; 6606 /* attach interface graph for determining subsets */ 6607 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 6608 IS verticesIS,verticescomm; 6609 PetscInt vsize,*idxs; 6610 6611 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 6612 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 6613 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6614 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 6615 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6616 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 6617 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 6618 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 6619 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 6620 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 6621 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 6622 } else { 6623 graph = pcbddc->mat_graph; 6624 } 6625 /* print some info */ 6626 if (pcbddc->dbg_flag) { 6627 IS vertices; 6628 PetscInt nv,nedges,nfaces; 6629 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag-1,pcbddc->dbg_viewer);CHKERRQ(ierr); 6630 ierr = PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 6631 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 6632 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 6633 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6634 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 6635 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 6636 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 6637 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 6638 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6639 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6640 } 6641 6642 /* sub_schurs init */ 6643 if (!pcbddc->sub_schurs) { 6644 ierr = PCBDDCSubSchursCreate(&pcbddc->sub_schurs);CHKERRQ(ierr); 6645 } 6646 ierr = PCBDDCSubSchursInit(pcbddc->sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 6647 6648 /* free graph struct */ 6649 if (pcbddc->sub_schurs_rebuild) { 6650 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 6651 } 6652 PetscFunctionReturn(0); 6653 } 6654 6655 #undef __FUNCT__ 6656 #define __FUNCT__ "PCBDDCCheckOperator" 6657 PetscErrorCode PCBDDCCheckOperator(PC pc) 6658 { 6659 PC_IS *pcis=(PC_IS*)pc->data; 6660 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6661 PetscErrorCode ierr; 6662 6663 PetscFunctionBegin; 6664 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 6665 IS zerodiag = NULL; 6666 Mat S_j,B0_B=NULL; 6667 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 6668 PetscScalar *p0_check,*array,*array2; 6669 PetscReal norm; 6670 PetscInt i; 6671 6672 /* B0 and B0_B */ 6673 if (zerodiag) { 6674 IS dummy; 6675 6676 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 6677 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 6678 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 6679 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 6680 } 6681 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 6682 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 6683 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 6684 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6685 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6686 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6687 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6688 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 6689 /* S_j */ 6690 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6691 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6692 6693 /* mimic vector in \widetilde{W}_\Gamma */ 6694 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 6695 /* continuous in primal space */ 6696 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 6697 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6698 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6699 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6700 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 6701 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 6702 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6703 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6704 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6705 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6706 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6707 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6708 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 6709 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 6710 6711 /* assemble rhs for coarse problem */ 6712 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 6713 /* local with Schur */ 6714 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 6715 if (zerodiag) { 6716 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6717 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 6718 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6719 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 6720 } 6721 /* sum on primal nodes the local contributions */ 6722 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6723 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6724 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6725 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6726 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 6727 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6728 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6729 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 6730 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6731 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6732 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6733 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6734 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6735 /* scale primal nodes (BDDC sums contibutions) */ 6736 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 6737 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6738 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6739 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6740 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6741 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6742 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6743 /* global: \widetilde{B0}_B w_\Gamma */ 6744 if (zerodiag) { 6745 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 6746 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6747 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 6748 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6749 } 6750 /* BDDC */ 6751 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 6752 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 6753 6754 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 6755 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 6756 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 6757 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 6758 for (i=0;i<pcbddc->benign_n;i++) { 6759 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 6760 } 6761 ierr = PetscFree(p0_check);CHKERRQ(ierr); 6762 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 6763 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 6764 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 6765 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6766 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 6767 } 6768 PetscFunctionReturn(0); 6769 } 6770