1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 #include <petsc/private/sfimpl.h> 6 7 #undef __FUNCT__ 8 #define __FUNCT__ "PCBDDCComputeNoNetFlux" 9 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 10 { 11 Mat loc_divudotp; 12 Vec p,v,vins,quad_vec,*quad_vecs; 13 ISLocalToGlobalMapping map; 14 IS *faces,*edges; 15 PetscScalar *vals; 16 const PetscScalar *array; 17 PetscInt i,maxneighs,lmaxneighs,maxsize,nf,ne; 18 PetscMPIInt rank; 19 PetscErrorCode ierr; 20 21 PetscFunctionBegin; 22 ierr = PCBDDCGraphGetCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 23 if (graph->twodim) { 24 lmaxneighs = 2; 25 } else { 26 lmaxneighs = 1; 27 for (i=0;i<ne;i++) { 28 const PetscInt *idxs; 29 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 30 lmaxneighs = PetscMax(lmaxneighs,graph->count[idxs[0]]); 31 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 32 } 33 lmaxneighs++; /* graph count does not include self */ 34 } 35 ierr = MPIU_Allreduce(&lmaxneighs,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 36 maxsize = 0; 37 for (i=0;i<ne;i++) { 38 PetscInt nn; 39 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 40 maxsize = PetscMax(maxsize,nn); 41 } 42 for (i=0;i<nf;i++) { 43 PetscInt nn; 44 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 45 maxsize = PetscMax(maxsize,nn); 46 } 47 ierr = PetscMalloc1(maxsize,&vals);CHKERRQ(ierr); 48 /* create vectors to hold quadrature weights */ 49 ierr = MatCreateVecs(A,&quad_vec,NULL);CHKERRQ(ierr); 50 if (!transpose) { 51 ierr = MatGetLocalToGlobalMapping(A,&map,NULL);CHKERRQ(ierr); 52 } else { 53 ierr = MatGetLocalToGlobalMapping(A,NULL,&map);CHKERRQ(ierr); 54 } 55 ierr = VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);CHKERRQ(ierr); 56 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 57 for (i=0;i<maxneighs;i++) { 58 PetscInt first,last; 59 60 ierr = VecSetLocalToGlobalMapping(quad_vecs[i],map);CHKERRQ(ierr); 61 /* the near-null space fo BDDC carries information on quadrature weights, 62 and these can be collinear -> so cheat with MatNullSpaceCreate 63 and create a suitable set of basis vectors first */ 64 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 65 if (i>=first && i < last) { 66 PetscScalar *data; 67 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 68 data[i-first] = 1.; 69 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 70 } 71 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 72 } 73 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);CHKERRQ(ierr); 74 for (i=0;i<maxneighs;i++) { /* reset vectors */ 75 PetscInt first,last; 76 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 77 if (i>=first && i < last) { 78 PetscScalar *data; 79 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 80 data[i-first] = 0.; 81 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 82 } 83 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 84 } 85 /* compute local quad vec */ 86 ierr = MatISGetLocalMat(divudotp,&loc_divudotp);CHKERRQ(ierr); 87 if (!transpose) { 88 ierr = MatCreateVecs(loc_divudotp,&v,&p);CHKERRQ(ierr); 89 } else { 90 ierr = MatCreateVecs(loc_divudotp,&p,&v);CHKERRQ(ierr); 91 } 92 ierr = VecSet(p,1.);CHKERRQ(ierr); 93 if (!transpose) { 94 ierr = MatMultTranspose(loc_divudotp,p,v);CHKERRQ(ierr); 95 } else { 96 ierr = MatMult(loc_divudotp,p,v);CHKERRQ(ierr); 97 } 98 if (vl2l) { 99 ierr = VecGetSubVector(v,vl2l,&vins);CHKERRQ(ierr); 100 } else { 101 vins = v; 102 } 103 ierr = VecGetArrayRead(vins,&array);CHKERRQ(ierr); 104 ierr = VecDestroy(&p);CHKERRQ(ierr); 105 106 /* insert in global quadrature vecs */ 107 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 108 for (i=0;i<nf;i++) { 109 const PetscInt *idxs; 110 PetscInt idx,nn,j; 111 112 ierr = ISGetIndices(faces[i],&idxs);CHKERRQ(ierr); 113 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 114 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 115 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 116 idx = -(idx+1); 117 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 118 ierr = ISRestoreIndices(faces[i],&idxs);CHKERRQ(ierr); 119 ierr = ISDestroy(&faces[i]);CHKERRQ(ierr); 120 } 121 for (i=0;i<ne;i++) { 122 const PetscInt *idxs; 123 PetscInt idx,nn,j; 124 125 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 126 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 127 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 128 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 129 idx = -(idx+1); 130 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 131 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 132 ierr = ISDestroy(&edges[i]);CHKERRQ(ierr); 133 } 134 ierr = PetscFree(edges);CHKERRQ(ierr); 135 ierr = PetscFree(faces);CHKERRQ(ierr); 136 ierr = VecRestoreArrayRead(vins,&array);CHKERRQ(ierr); 137 if (vl2l) { 138 ierr = VecRestoreSubVector(v,vl2l,&vins);CHKERRQ(ierr); 139 } 140 ierr = VecDestroy(&v);CHKERRQ(ierr); 141 ierr = PetscFree(vals);CHKERRQ(ierr); 142 143 /* assemble near null space */ 144 for (i=0;i<maxneighs;i++) { 145 ierr = VecAssemblyBegin(quad_vecs[i]);CHKERRQ(ierr); 146 } 147 for (i=0;i<maxneighs;i++) { 148 ierr = VecAssemblyEnd(quad_vecs[i]);CHKERRQ(ierr); 149 } 150 ierr = VecDestroyVecs(maxneighs,&quad_vecs);CHKERRQ(ierr); 151 PetscFunctionReturn(0); 152 } 153 154 155 #undef __FUNCT__ 156 #define __FUNCT__ "PCBDDCComputeLocalTopologyInfo" 157 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 158 { 159 PetscErrorCode ierr; 160 Vec local,global; 161 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 162 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 163 164 PetscFunctionBegin; 165 ierr = MatCreateVecs(pc->pmat,&global,NULL);CHKERRQ(ierr); 166 /* need to convert from global to local topology information and remove references to information in global ordering */ 167 ierr = MatCreateVecs(matis->A,&local,NULL);CHKERRQ(ierr); 168 if (pcbddc->user_provided_isfordofs) { 169 if (pcbddc->n_ISForDofs) { 170 PetscInt i; 171 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 172 for (i=0;i<pcbddc->n_ISForDofs;i++) { 173 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 174 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 175 } 176 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 177 pcbddc->n_ISForDofs = 0; 178 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 179 } 180 } else { 181 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering if bs > 1 */ 182 PetscInt i, n = matis->A->rmap->n; 183 ierr = MatGetBlockSize(pc->pmat,&i);CHKERRQ(ierr); 184 if (i > 1) { 185 pcbddc->n_ISForDofsLocal = i; 186 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 187 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 188 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 189 } 190 } 191 } 192 } 193 194 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 195 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 196 } 197 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 198 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 199 } 200 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 201 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 202 } 203 ierr = VecDestroy(&global);CHKERRQ(ierr); 204 ierr = VecDestroy(&local);CHKERRQ(ierr); 205 PetscFunctionReturn(0); 206 } 207 208 #undef __FUNCT__ 209 #define __FUNCT__ "PCBDDCBenignRemoveInterior" 210 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z) 211 { 212 PC_IS *pcis = (PC_IS*)(pc->data); 213 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 214 PetscErrorCode ierr; 215 216 PetscFunctionBegin; 217 if (!pcbddc->benign_have_null) { 218 PetscFunctionReturn(0); 219 } 220 if (pcbddc->ChangeOfBasisMatrix) { 221 Vec swap; 222 223 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);CHKERRQ(ierr); 224 swap = pcbddc->work_change; 225 pcbddc->work_change = r; 226 r = swap; 227 } 228 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 229 ierr = VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 230 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 231 ierr = VecSet(z,0.);CHKERRQ(ierr); 232 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 233 ierr = VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 234 if (pcbddc->ChangeOfBasisMatrix) { 235 Vec swap; 236 237 swap = r; 238 r = pcbddc->work_change; 239 pcbddc->work_change = swap; 240 ierr = VecCopy(z,pcbddc->work_change);CHKERRQ(ierr); 241 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);CHKERRQ(ierr); 242 } 243 PetscFunctionReturn(0); 244 } 245 246 #undef __FUNCT__ 247 #define __FUNCT__ "PCBDDCBenignMatMult_Private_Private" 248 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 249 { 250 PCBDDCBenignMatMult_ctx ctx; 251 PetscErrorCode ierr; 252 PetscBool apply_right,apply_left,reset_x; 253 254 PetscFunctionBegin; 255 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 256 if (transpose) { 257 apply_right = ctx->apply_left; 258 apply_left = ctx->apply_right; 259 } else { 260 apply_right = ctx->apply_right; 261 apply_left = ctx->apply_left; 262 } 263 reset_x = PETSC_FALSE; 264 if (apply_right) { 265 const PetscScalar *ax; 266 PetscInt nl,i; 267 268 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 269 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 270 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 271 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 272 for (i=0;i<ctx->benign_n;i++) { 273 PetscScalar sum,val; 274 const PetscInt *idxs; 275 PetscInt nz,j; 276 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 277 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 278 sum = 0.; 279 if (ctx->apply_p0) { 280 val = ctx->work[idxs[nz-1]]; 281 for (j=0;j<nz-1;j++) { 282 sum += ctx->work[idxs[j]]; 283 ctx->work[idxs[j]] += val; 284 } 285 } else { 286 for (j=0;j<nz-1;j++) { 287 sum += ctx->work[idxs[j]]; 288 } 289 } 290 ctx->work[idxs[nz-1]] -= sum; 291 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 292 } 293 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 294 reset_x = PETSC_TRUE; 295 } 296 if (transpose) { 297 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 298 } else { 299 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 300 } 301 if (reset_x) { 302 ierr = VecResetArray(x);CHKERRQ(ierr); 303 } 304 if (apply_left) { 305 PetscScalar *ay; 306 PetscInt i; 307 308 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 309 for (i=0;i<ctx->benign_n;i++) { 310 PetscScalar sum,val; 311 const PetscInt *idxs; 312 PetscInt nz,j; 313 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 314 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 315 val = -ay[idxs[nz-1]]; 316 if (ctx->apply_p0) { 317 sum = 0.; 318 for (j=0;j<nz-1;j++) { 319 sum += ay[idxs[j]]; 320 ay[idxs[j]] += val; 321 } 322 ay[idxs[nz-1]] += sum; 323 } else { 324 for (j=0;j<nz-1;j++) { 325 ay[idxs[j]] += val; 326 } 327 ay[idxs[nz-1]] = 0.; 328 } 329 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 330 } 331 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 332 } 333 PetscFunctionReturn(0); 334 } 335 336 #undef __FUNCT__ 337 #define __FUNCT__ "PCBDDCBenignMatMultTranspose_Private" 338 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 339 { 340 PetscErrorCode ierr; 341 342 PetscFunctionBegin; 343 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 344 PetscFunctionReturn(0); 345 } 346 347 #undef __FUNCT__ 348 #define __FUNCT__ "PCBDDCBenignMatMult_Private" 349 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 350 { 351 PetscErrorCode ierr; 352 353 PetscFunctionBegin; 354 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 355 PetscFunctionReturn(0); 356 } 357 358 #undef __FUNCT__ 359 #define __FUNCT__ "PCBDDCBenignShellMat" 360 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 361 { 362 PC_IS *pcis = (PC_IS*)pc->data; 363 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 364 PCBDDCBenignMatMult_ctx ctx; 365 PetscErrorCode ierr; 366 367 PetscFunctionBegin; 368 if (!restore) { 369 Mat A_IB,A_BI; 370 PetscScalar *work; 371 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 372 373 if (pcbddc->benign_original_mat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 374 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(0); 375 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 376 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 377 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 378 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 379 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 380 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 381 ierr = PetscNew(&ctx);CHKERRQ(ierr); 382 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 383 ctx->apply_left = PETSC_TRUE; 384 ctx->apply_right = PETSC_FALSE; 385 ctx->apply_p0 = PETSC_FALSE; 386 ctx->benign_n = pcbddc->benign_n; 387 if (reuse) { 388 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 389 ctx->free = PETSC_FALSE; 390 } else { /* TODO: could be optimized for successive solves */ 391 ISLocalToGlobalMapping N_to_D; 392 PetscInt i; 393 394 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 395 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 396 for (i=0;i<pcbddc->benign_n;i++) { 397 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 398 } 399 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 400 ctx->free = PETSC_TRUE; 401 } 402 ctx->A = pcis->A_IB; 403 ctx->work = work; 404 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 405 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 406 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 407 pcis->A_IB = A_IB; 408 409 /* A_BI as A_IB^T */ 410 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 411 pcbddc->benign_original_mat = pcis->A_BI; 412 pcis->A_BI = A_BI; 413 } else { 414 if (!pcbddc->benign_original_mat) { 415 PetscFunctionReturn(0); 416 } 417 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 418 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 419 pcis->A_IB = ctx->A; 420 ctx->A = NULL; 421 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 422 pcis->A_BI = pcbddc->benign_original_mat; 423 pcbddc->benign_original_mat = NULL; 424 if (ctx->free) { 425 PetscInt i; 426 for (i=0;i<ctx->benign_n;i++) { 427 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 428 } 429 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 430 } 431 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 432 ierr = PetscFree(ctx);CHKERRQ(ierr); 433 } 434 PetscFunctionReturn(0); 435 } 436 437 /* used just in bddc debug mode */ 438 #undef __FUNCT__ 439 #define __FUNCT__ "PCBDDCBenignProject" 440 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 441 { 442 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 443 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 444 Mat An; 445 PetscErrorCode ierr; 446 447 PetscFunctionBegin; 448 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 449 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 450 if (is1) { 451 ierr = MatGetSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 452 ierr = MatDestroy(&An);CHKERRQ(ierr); 453 } else { 454 *B = An; 455 } 456 PetscFunctionReturn(0); 457 } 458 459 /* TODO: add reuse flag */ 460 #undef __FUNCT__ 461 #define __FUNCT__ "MatSeqAIJCompress" 462 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 463 { 464 Mat Bt; 465 PetscScalar *a,*bdata; 466 const PetscInt *ii,*ij; 467 PetscInt m,n,i,nnz,*bii,*bij; 468 PetscBool flg_row; 469 PetscErrorCode ierr; 470 471 PetscFunctionBegin; 472 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 473 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 474 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 475 nnz = n; 476 for (i=0;i<ii[n];i++) { 477 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 478 } 479 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 480 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 481 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 482 nnz = 0; 483 bii[0] = 0; 484 for (i=0;i<n;i++) { 485 PetscInt j; 486 for (j=ii[i];j<ii[i+1];j++) { 487 PetscScalar entry = a[j]; 488 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 489 bij[nnz] = ij[j]; 490 bdata[nnz] = entry; 491 nnz++; 492 } 493 } 494 bii[i+1] = nnz; 495 } 496 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 497 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 498 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 499 { 500 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 501 b->free_a = PETSC_TRUE; 502 b->free_ij = PETSC_TRUE; 503 } 504 *B = Bt; 505 PetscFunctionReturn(0); 506 } 507 508 #undef __FUNCT__ 509 #define __FUNCT__ "MatDetectDisconnectedComponents" 510 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 511 { 512 Mat B; 513 IS is_dummy,*cc_n; 514 ISLocalToGlobalMapping l2gmap_dummy; 515 PCBDDCGraph graph; 516 PetscInt i,n; 517 PetscInt *xadj,*adjncy; 518 PetscInt *xadj_filtered,*adjncy_filtered; 519 PetscBool flg_row,isseqaij; 520 PetscErrorCode ierr; 521 522 PetscFunctionBegin; 523 if (!A->rmap->N || !A->cmap->N) { 524 *ncc = 0; 525 *cc = NULL; 526 PetscFunctionReturn(0); 527 } 528 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 529 if (!isseqaij && filter) { 530 PetscBool isseqdense; 531 532 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 533 if (!isseqdense) { 534 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 535 } else { /* TODO: rectangular case and LDA */ 536 PetscScalar *array; 537 PetscReal chop=1.e-6; 538 539 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 540 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 541 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 542 for (i=0;i<n;i++) { 543 PetscInt j; 544 for (j=i+1;j<n;j++) { 545 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 546 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 547 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 548 } 549 } 550 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 551 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 552 } 553 } else { 554 B = A; 555 } 556 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 557 558 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 559 if (filter) { 560 PetscScalar *data; 561 PetscInt j,cum; 562 563 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 564 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 565 cum = 0; 566 for (i=0;i<n;i++) { 567 PetscInt t; 568 569 for (j=xadj[i];j<xadj[i+1];j++) { 570 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 571 continue; 572 } 573 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 574 } 575 t = xadj_filtered[i]; 576 xadj_filtered[i] = cum; 577 cum += t; 578 } 579 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 580 } else { 581 xadj_filtered = NULL; 582 adjncy_filtered = NULL; 583 } 584 585 /* compute local connected components using PCBDDCGraph */ 586 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 587 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 588 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 589 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 590 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n);CHKERRQ(ierr); 591 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 592 if (xadj_filtered) { 593 graph->xadj = xadj_filtered; 594 graph->adjncy = adjncy_filtered; 595 } else { 596 graph->xadj = xadj; 597 graph->adjncy = adjncy; 598 } 599 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 600 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 601 /* partial clean up */ 602 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 603 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 604 if (A != B) { 605 ierr = MatDestroy(&B);CHKERRQ(ierr); 606 } 607 608 /* get back data */ 609 if (ncc) *ncc = graph->ncc; 610 if (cc) { 611 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 612 for (i=0;i<graph->ncc;i++) { 613 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 614 } 615 *cc = cc_n; 616 } 617 /* clean up graph */ 618 graph->xadj = 0; 619 graph->adjncy = 0; 620 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 621 PetscFunctionReturn(0); 622 } 623 624 #undef __FUNCT__ 625 #define __FUNCT__ "PCBDDCBenignCheck" 626 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 627 { 628 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 629 PC_IS* pcis = (PC_IS*)(pc->data); 630 IS dirIS = NULL; 631 PetscInt i; 632 PetscErrorCode ierr; 633 634 PetscFunctionBegin; 635 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 636 if (zerodiag) { 637 Mat A; 638 Vec vec3_N; 639 PetscScalar *vals; 640 const PetscInt *idxs; 641 PetscInt nz,*count; 642 643 /* p0 */ 644 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 645 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 646 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 647 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 648 for (i=0;i<nz;i++) vals[i] = 1.; 649 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 650 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 651 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 652 /* v_I */ 653 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 654 for (i=0;i<nz;i++) vals[i] = 0.; 655 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 656 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 657 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 658 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 659 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 660 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 661 if (dirIS) { 662 PetscInt n; 663 664 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 665 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 666 for (i=0;i<n;i++) vals[i] = 0.; 667 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 668 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 669 } 670 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 671 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 672 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 673 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 674 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 675 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 676 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 677 if (PetscAbsScalar(vals[0]) > 1.e-1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 678 ierr = PetscFree(vals);CHKERRQ(ierr); 679 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 680 681 /* there should not be any pressure dofs lying on the interface */ 682 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 683 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 684 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 685 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 686 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 687 for (i=0;i<nz;i++) if (count[idxs[i]]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 688 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 689 ierr = PetscFree(count);CHKERRQ(ierr); 690 } 691 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 692 693 /* check PCBDDCBenignGetOrSetP0 */ 694 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 695 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 696 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 697 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 698 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 699 for (i=0;i<pcbddc->benign_n;i++) { 700 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]); 701 if (val != -PetscGlobalRank-i) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %d instead of %g\n",PetscRealPart(pcbddc->benign_p0[i]),i,-PetscGlobalRank-i);CHKERRQ(ierr); 702 } 703 PetscFunctionReturn(0); 704 } 705 706 #undef __FUNCT__ 707 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 708 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 709 { 710 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 711 IS pressures,zerodiag,*zerodiag_subs; 712 PetscInt nz,n; 713 PetscInt *interior_dofs,n_interior_dofs; 714 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag; 715 PetscErrorCode ierr; 716 717 PetscFunctionBegin; 718 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 719 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 720 for (n=0;n<pcbddc->benign_n;n++) { 721 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 722 } 723 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 724 pcbddc->benign_n = 0; 725 /* if a local info on dofs is present, assumes that the last field represents "pressures" 726 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 727 Checks if all the pressure dofs in each subdomain have a zero diagonal 728 If not, a change of basis on pressures is not needed 729 since the local Schur complements are already SPD 730 */ 731 has_null_pressures = PETSC_TRUE; 732 have_null = PETSC_TRUE; 733 if (pcbddc->n_ISForDofsLocal) { 734 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 735 736 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 737 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 738 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 739 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 740 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 741 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 742 if (!sorted) { 743 ierr = ISSort(pressures);CHKERRQ(ierr); 744 } 745 } else { 746 pressures = NULL; 747 } 748 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 749 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 750 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 751 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 752 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 753 if (!sorted) { 754 ierr = ISSort(zerodiag);CHKERRQ(ierr); 755 } 756 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 757 if (!nz) { 758 if (n) have_null = PETSC_FALSE; 759 has_null_pressures = PETSC_FALSE; 760 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 761 } 762 recompute_zerodiag = PETSC_FALSE; 763 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 764 zerodiag_subs = NULL; 765 pcbddc->benign_n = 0; 766 n_interior_dofs = 0; 767 interior_dofs = NULL; 768 if (pcbddc->current_level) { /* need to compute interior nodes */ 769 PetscInt n,i,j; 770 PetscInt n_neigh,*neigh,*n_shared,**shared; 771 PetscInt *iwork; 772 773 ierr = ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);CHKERRQ(ierr); 774 ierr = ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 775 ierr = PetscCalloc1(n,&iwork);CHKERRQ(ierr); 776 ierr = PetscMalloc1(n,&interior_dofs);CHKERRQ(ierr); 777 for (i=0;i<n_neigh;i++) 778 for (j=0;j<n_shared[i];j++) 779 iwork[shared[i][j]] += 1; 780 for (i=0;i<n;i++) 781 if (!iwork[i]) 782 interior_dofs[n_interior_dofs++] = i; 783 ierr = PetscFree(iwork);CHKERRQ(ierr); 784 ierr = ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 785 } 786 if (has_null_pressures) { 787 IS *subs; 788 PetscInt nsubs,i,j,nl; 789 const PetscInt *idxs; 790 PetscScalar *array; 791 Vec *work; 792 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 793 794 subs = pcbddc->local_subs; 795 nsubs = pcbddc->n_local_subs; 796 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 797 if (pcbddc->current_level) { 798 ierr = VecDuplicateVecs(matis->y,2,&work);CHKERRQ(ierr); 799 ierr = ISGetLocalSize(zerodiag,&nl);CHKERRQ(ierr); 800 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 801 /* work[0] = 1_p */ 802 ierr = VecSet(work[0],0.);CHKERRQ(ierr); 803 ierr = VecGetArray(work[0],&array);CHKERRQ(ierr); 804 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 805 ierr = VecRestoreArray(work[0],&array);CHKERRQ(ierr); 806 /* work[0] = 1_v */ 807 ierr = VecSet(work[1],1.);CHKERRQ(ierr); 808 ierr = VecGetArray(work[1],&array);CHKERRQ(ierr); 809 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 810 ierr = VecRestoreArray(work[1],&array);CHKERRQ(ierr); 811 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 812 } 813 if (nsubs > 1) { 814 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 815 for (i=0;i<nsubs;i++) { 816 ISLocalToGlobalMapping l2g; 817 IS t_zerodiag_subs; 818 PetscInt nl; 819 820 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 821 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 822 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 823 if (nl) { 824 PetscBool valid = PETSC_TRUE; 825 826 if (pcbddc->current_level) { 827 ierr = VecSet(matis->x,0);CHKERRQ(ierr); 828 ierr = ISGetLocalSize(subs[i],&nl);CHKERRQ(ierr); 829 ierr = ISGetIndices(subs[i],&idxs);CHKERRQ(ierr); 830 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 831 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 832 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 833 ierr = ISRestoreIndices(subs[i],&idxs);CHKERRQ(ierr); 834 ierr = VecPointwiseMult(matis->x,work[0],matis->x);CHKERRQ(ierr); 835 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 836 ierr = VecPointwiseMult(matis->y,work[1],matis->y);CHKERRQ(ierr); 837 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 838 for (j=0;j<n_interior_dofs;j++) { 839 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 840 valid = PETSC_FALSE; 841 break; 842 } 843 } 844 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 845 } 846 if (valid && pcbddc->NeumannBoundariesLocal) { 847 IS t_bc; 848 PetscInt nzb; 849 850 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pcbddc->NeumannBoundariesLocal,&t_bc);CHKERRQ(ierr); 851 ierr = ISGetLocalSize(t_bc,&nzb);CHKERRQ(ierr); 852 ierr = ISDestroy(&t_bc);CHKERRQ(ierr); 853 if (nzb) valid = PETSC_FALSE; 854 } 855 if (valid && pressures) { 856 IS t_pressure_subs; 857 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 858 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 859 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 860 } 861 if (valid) { 862 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 863 pcbddc->benign_n++; 864 } else { 865 recompute_zerodiag = PETSC_TRUE; 866 } 867 } 868 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 869 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 870 } 871 } else { /* there's just one subdomain (or zero if they have not been detected */ 872 PetscBool valid = PETSC_TRUE; 873 874 if (pcbddc->NeumannBoundariesLocal) { 875 PetscInt nzb; 876 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nzb);CHKERRQ(ierr); 877 if (nzb) valid = PETSC_FALSE; 878 } 879 if (valid && pressures) { 880 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 881 } 882 if (valid && pcbddc->current_level) { 883 ierr = MatMult(matis->A,work[0],matis->x);CHKERRQ(ierr); 884 ierr = VecPointwiseMult(matis->x,work[1],matis->x);CHKERRQ(ierr); 885 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 886 for (j=0;j<n_interior_dofs;j++) { 887 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 888 valid = PETSC_FALSE; 889 break; 890 } 891 } 892 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 893 } 894 if (valid) { 895 pcbddc->benign_n = 1; 896 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 897 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 898 zerodiag_subs[0] = zerodiag; 899 } 900 } 901 if (pcbddc->current_level) { 902 ierr = VecDestroyVecs(2,&work);CHKERRQ(ierr); 903 } 904 } 905 ierr = PetscFree(interior_dofs);CHKERRQ(ierr); 906 907 if (!pcbddc->benign_n) { 908 PetscInt n; 909 910 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 911 recompute_zerodiag = PETSC_FALSE; 912 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 913 if (n) { 914 has_null_pressures = PETSC_FALSE; 915 have_null = PETSC_FALSE; 916 } 917 } 918 919 /* final check for null pressures */ 920 if (zerodiag && pressures) { 921 PetscInt nz,np; 922 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 923 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 924 if (nz != np) have_null = PETSC_FALSE; 925 } 926 927 if (recompute_zerodiag) { 928 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 929 if (pcbddc->benign_n == 1) { 930 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 931 zerodiag = zerodiag_subs[0]; 932 } else { 933 PetscInt i,nzn,*new_idxs; 934 935 nzn = 0; 936 for (i=0;i<pcbddc->benign_n;i++) { 937 PetscInt ns; 938 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 939 nzn += ns; 940 } 941 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 942 nzn = 0; 943 for (i=0;i<pcbddc->benign_n;i++) { 944 PetscInt ns,*idxs; 945 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 946 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 947 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 948 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 949 nzn += ns; 950 } 951 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 952 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 953 } 954 have_null = PETSC_FALSE; 955 } 956 957 /* Prepare matrix to compute no-net-flux */ 958 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 959 Mat A,loc_divudotp; 960 ISLocalToGlobalMapping rl2g,cl2g,l2gmap; 961 IS row,col,isused = NULL; 962 PetscInt M,N,n,st,n_isused; 963 964 if (pressures) { 965 isused = pressures; 966 } else { 967 isused = zerodiag; 968 } 969 ierr = MatGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);CHKERRQ(ierr); 970 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 971 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 972 if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field"); 973 n_isused = 0; 974 if (isused) { 975 ierr = ISGetLocalSize(isused,&n_isused);CHKERRQ(ierr); 976 } 977 ierr = MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 978 st = st-n_isused; 979 if (n) { 980 const PetscInt *gidxs; 981 982 ierr = MatGetSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);CHKERRQ(ierr); 983 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 984 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 985 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 986 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 987 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 988 } else { 989 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);CHKERRQ(ierr); 990 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 991 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 992 } 993 ierr = MatGetSize(pc->pmat,NULL,&N);CHKERRQ(ierr); 994 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 995 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 996 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 997 ierr = ISDestroy(&row);CHKERRQ(ierr); 998 ierr = ISDestroy(&col);CHKERRQ(ierr); 999 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);CHKERRQ(ierr); 1000 ierr = MatSetType(pcbddc->divudotp,MATIS);CHKERRQ(ierr); 1001 ierr = MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1002 ierr = MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);CHKERRQ(ierr); 1003 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 1004 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 1005 ierr = MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);CHKERRQ(ierr); 1006 ierr = MatDestroy(&loc_divudotp);CHKERRQ(ierr); 1007 ierr = MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1008 ierr = MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1009 } 1010 1011 /* change of basis and p0 dofs */ 1012 if (has_null_pressures) { 1013 IS zerodiagc; 1014 const PetscInt *idxs,*idxsc; 1015 PetscInt i,s,*nnz; 1016 1017 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 1018 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 1019 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 1020 /* local change of basis for pressures */ 1021 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 1022 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 1023 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 1024 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1025 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 1026 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 1027 for (i=0;i<pcbddc->benign_n;i++) { 1028 PetscInt nzs,j; 1029 1030 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 1031 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1032 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 1033 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 1034 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1035 } 1036 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 1037 ierr = PetscFree(nnz);CHKERRQ(ierr); 1038 /* set identity on velocities */ 1039 for (i=0;i<n-nz;i++) { 1040 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 1041 } 1042 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 1043 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 1044 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 1045 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 1046 /* set change on pressures */ 1047 for (s=0;s<pcbddc->benign_n;s++) { 1048 PetscScalar *array; 1049 PetscInt nzs; 1050 1051 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 1052 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 1053 for (i=0;i<nzs-1;i++) { 1054 PetscScalar vals[2]; 1055 PetscInt cols[2]; 1056 1057 cols[0] = idxs[i]; 1058 cols[1] = idxs[nzs-1]; 1059 vals[0] = 1.; 1060 vals[1] = 1.; 1061 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 1062 } 1063 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 1064 for (i=0;i<nzs-1;i++) array[i] = -1.; 1065 array[nzs-1] = 1.; 1066 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 1067 /* store local idxs for p0 */ 1068 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 1069 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 1070 ierr = PetscFree(array);CHKERRQ(ierr); 1071 } 1072 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1073 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1074 /* project if needed */ 1075 if (pcbddc->benign_change_explicit) { 1076 Mat M; 1077 1078 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 1079 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1080 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 1081 ierr = MatDestroy(&M);CHKERRQ(ierr); 1082 } 1083 /* store global idxs for p0 */ 1084 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 1085 } 1086 pcbddc->benign_zerodiag_subs = zerodiag_subs; 1087 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 1088 1089 /* determines if the coarse solver will be singular or not */ 1090 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1091 /* determines if the problem has subdomains with 0 pressure block */ 1092 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1093 *zerodiaglocal = zerodiag; 1094 PetscFunctionReturn(0); 1095 } 1096 1097 #undef __FUNCT__ 1098 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 1099 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 1100 { 1101 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1102 PetscScalar *array; 1103 PetscErrorCode ierr; 1104 1105 PetscFunctionBegin; 1106 if (!pcbddc->benign_sf) { 1107 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 1108 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 1109 } 1110 if (get) { 1111 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 1112 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 1113 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 1114 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 1115 } else { 1116 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 1117 ierr = PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 1118 ierr = PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 1119 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 1120 } 1121 PetscFunctionReturn(0); 1122 } 1123 1124 #undef __FUNCT__ 1125 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 1126 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 1127 { 1128 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1129 PetscErrorCode ierr; 1130 1131 PetscFunctionBegin; 1132 /* TODO: add error checking 1133 - avoid nested pop (or push) calls. 1134 - cannot push before pop. 1135 - cannot call this if pcbddc->local_mat is NULL 1136 */ 1137 if (!pcbddc->benign_n) { 1138 PetscFunctionReturn(0); 1139 } 1140 if (pop) { 1141 if (pcbddc->benign_change_explicit) { 1142 IS is_p0; 1143 MatReuse reuse; 1144 1145 /* extract B_0 */ 1146 reuse = MAT_INITIAL_MATRIX; 1147 if (pcbddc->benign_B0) { 1148 reuse = MAT_REUSE_MATRIX; 1149 } 1150 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 1151 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 1152 /* remove rows and cols from local problem */ 1153 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 1154 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1155 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 1156 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 1157 } else { 1158 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1159 PetscScalar *vals; 1160 PetscInt i,n,*idxs_ins; 1161 1162 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 1163 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 1164 if (!pcbddc->benign_B0) { 1165 PetscInt *nnz; 1166 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 1167 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 1168 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1169 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 1170 for (i=0;i<pcbddc->benign_n;i++) { 1171 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 1172 nnz[i] = n - nnz[i]; 1173 } 1174 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 1175 ierr = PetscFree(nnz);CHKERRQ(ierr); 1176 } 1177 1178 for (i=0;i<pcbddc->benign_n;i++) { 1179 PetscScalar *array; 1180 PetscInt *idxs,j,nz,cum; 1181 1182 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 1183 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1184 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 1185 for (j=0;j<nz;j++) vals[j] = 1.; 1186 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1187 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 1188 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 1189 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 1190 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 1191 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 1192 cum = 0; 1193 for (j=0;j<n;j++) { 1194 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 1195 vals[cum] = array[j]; 1196 idxs_ins[cum] = j; 1197 cum++; 1198 } 1199 } 1200 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 1201 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 1202 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 1203 } 1204 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1205 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1206 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 1207 } 1208 } else { /* push */ 1209 if (pcbddc->benign_change_explicit) { 1210 PetscInt i; 1211 1212 for (i=0;i<pcbddc->benign_n;i++) { 1213 PetscScalar *B0_vals; 1214 PetscInt *B0_cols,B0_ncol; 1215 1216 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 1217 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 1218 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 1219 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 1220 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 1221 } 1222 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1223 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1224 } else { 1225 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 1226 } 1227 } 1228 PetscFunctionReturn(0); 1229 } 1230 1231 #undef __FUNCT__ 1232 #define __FUNCT__ "PCBDDCAdaptiveSelection" 1233 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 1234 { 1235 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1236 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1237 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 1238 PetscBLASInt *B_iwork,*B_ifail; 1239 PetscScalar *work,lwork; 1240 PetscScalar *St,*S,*eigv; 1241 PetscScalar *Sarray,*Starray; 1242 PetscReal *eigs,thresh; 1243 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 1244 PetscBool allocated_S_St; 1245 #if defined(PETSC_USE_COMPLEX) 1246 PetscReal *rwork; 1247 #endif 1248 PetscErrorCode ierr; 1249 1250 PetscFunctionBegin; 1251 if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data"); 1252 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 1253 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 1254 1255 if (pcbddc->dbg_flag) { 1256 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1257 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1258 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 1259 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 1260 } 1261 1262 if (pcbddc->dbg_flag) { 1263 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 1264 } 1265 1266 /* max size of subsets */ 1267 mss = 0; 1268 for (i=0;i<sub_schurs->n_subs;i++) { 1269 PetscInt subset_size; 1270 1271 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1272 mss = PetscMax(mss,subset_size); 1273 } 1274 1275 /* min/max and threshold */ 1276 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 1277 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 1278 nmax = PetscMax(nmin,nmax); 1279 allocated_S_St = PETSC_FALSE; 1280 if (nmin) { 1281 allocated_S_St = PETSC_TRUE; 1282 } 1283 1284 /* allocate lapack workspace */ 1285 cum = cum2 = 0; 1286 maxneigs = 0; 1287 for (i=0;i<sub_schurs->n_subs;i++) { 1288 PetscInt n,subset_size; 1289 1290 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1291 n = PetscMin(subset_size,nmax); 1292 cum += subset_size; 1293 cum2 += subset_size*n; 1294 maxneigs = PetscMax(maxneigs,n); 1295 } 1296 if (mss) { 1297 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 1298 PetscBLASInt B_itype = 1; 1299 PetscBLASInt B_N = mss; 1300 PetscReal zero = 0.0; 1301 PetscReal eps = 0.0; /* dlamch? */ 1302 1303 B_lwork = -1; 1304 S = NULL; 1305 St = NULL; 1306 eigs = NULL; 1307 eigv = NULL; 1308 B_iwork = NULL; 1309 B_ifail = NULL; 1310 #if defined(PETSC_USE_COMPLEX) 1311 rwork = NULL; 1312 #endif 1313 thresh = 1.0; 1314 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1315 #if defined(PETSC_USE_COMPLEX) 1316 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1317 #else 1318 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1319 #endif 1320 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 1321 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1322 } else { 1323 /* TODO */ 1324 } 1325 } else { 1326 lwork = 0; 1327 } 1328 1329 nv = 0; 1330 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 1331 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 1332 } 1333 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 1334 if (allocated_S_St) { 1335 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 1336 } 1337 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 1338 #if defined(PETSC_USE_COMPLEX) 1339 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 1340 #endif 1341 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 1342 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 1343 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 1344 nv+cum,&pcbddc->adaptive_constraints_idxs, 1345 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 1346 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 1347 1348 maxneigs = 0; 1349 cum = cumarray = 0; 1350 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 1351 pcbddc->adaptive_constraints_data_ptr[0] = 0; 1352 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 1353 const PetscInt *idxs; 1354 1355 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 1356 for (cum=0;cum<nv;cum++) { 1357 pcbddc->adaptive_constraints_n[cum] = 1; 1358 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 1359 pcbddc->adaptive_constraints_data[cum] = 1.0; 1360 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 1361 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 1362 } 1363 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 1364 } 1365 1366 if (mss) { /* multilevel */ 1367 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1368 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1369 } 1370 1371 thresh = pcbddc->adaptive_threshold; 1372 for (i=0;i<sub_schurs->n_subs;i++) { 1373 const PetscInt *idxs; 1374 PetscReal upper,lower; 1375 PetscInt j,subset_size,eigs_start = 0; 1376 PetscBLASInt B_N; 1377 PetscBool same_data = PETSC_FALSE; 1378 1379 if (pcbddc->use_deluxe_scaling) { 1380 upper = PETSC_MAX_REAL; 1381 lower = thresh; 1382 } else { 1383 upper = 1./thresh; 1384 lower = 0.; 1385 } 1386 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1387 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1388 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 1389 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 1390 if (sub_schurs->is_hermitian) { 1391 PetscInt j,k; 1392 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 1393 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1394 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1395 } 1396 for (j=0;j<subset_size;j++) { 1397 for (k=j;k<subset_size;k++) { 1398 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 1399 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 1400 } 1401 } 1402 } else { 1403 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1404 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1405 } 1406 } else { 1407 S = Sarray + cumarray; 1408 St = Starray + cumarray; 1409 } 1410 /* see if we can save some work */ 1411 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 1412 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 1413 } 1414 1415 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 1416 B_neigs = 0; 1417 } else { 1418 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 1419 PetscBLASInt B_itype = 1; 1420 PetscBLASInt B_IL, B_IU; 1421 PetscReal eps = -1.0; /* dlamch? */ 1422 PetscInt nmin_s; 1423 PetscBool compute_range = PETSC_FALSE; 1424 1425 if (pcbddc->dbg_flag) { 1426 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 1427 } 1428 1429 compute_range = PETSC_FALSE; 1430 if (thresh > 1.+PETSC_SMALL && !same_data) { 1431 compute_range = PETSC_TRUE; 1432 } 1433 1434 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1435 if (compute_range) { 1436 1437 /* ask for eigenvalues larger than thresh */ 1438 #if defined(PETSC_USE_COMPLEX) 1439 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1440 #else 1441 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1442 #endif 1443 } else if (!same_data) { 1444 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 1445 B_IL = 1; 1446 #if defined(PETSC_USE_COMPLEX) 1447 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1448 #else 1449 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1450 #endif 1451 } else { /* same_data is true, so get the adaptive function requested by the user */ 1452 PetscInt k; 1453 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 1454 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 1455 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 1456 nmin = nmax; 1457 ierr = PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));CHKERRQ(ierr); 1458 for (k=0;k<nmax;k++) { 1459 eigs[k] = 1./PETSC_SMALL; 1460 eigv[k*(subset_size+1)] = 1.0; 1461 } 1462 } 1463 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1464 if (B_ierr) { 1465 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1466 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1467 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1468 } 1469 1470 if (B_neigs > nmax) { 1471 if (pcbddc->dbg_flag) { 1472 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 1473 } 1474 if (pcbddc->use_deluxe_scaling) eigs_start = B_neigs -nmax; 1475 B_neigs = nmax; 1476 } 1477 1478 nmin_s = PetscMin(nmin,B_N); 1479 if (B_neigs < nmin_s) { 1480 PetscBLASInt B_neigs2; 1481 1482 if (pcbddc->use_deluxe_scaling) { 1483 B_IL = B_N - nmin_s + 1; 1484 B_IU = B_N - B_neigs; 1485 } else { 1486 B_IL = B_neigs + 1; 1487 B_IU = nmin_s; 1488 } 1489 if (pcbddc->dbg_flag) { 1490 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 1491 } 1492 if (sub_schurs->is_hermitian) { 1493 PetscInt j,k; 1494 for (j=0;j<subset_size;j++) { 1495 for (k=j;k<subset_size;k++) { 1496 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 1497 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 1498 } 1499 } 1500 } else { 1501 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1502 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1503 } 1504 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1505 #if defined(PETSC_USE_COMPLEX) 1506 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1507 #else 1508 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1509 #endif 1510 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1511 B_neigs += B_neigs2; 1512 } 1513 if (B_ierr) { 1514 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1515 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1516 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1517 } 1518 if (pcbddc->dbg_flag) { 1519 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 1520 for (j=0;j<B_neigs;j++) { 1521 if (eigs[j] == 0.0) { 1522 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 1523 } else { 1524 if (pcbddc->use_deluxe_scaling) { 1525 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 1526 } else { 1527 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 1528 } 1529 } 1530 } 1531 } 1532 } else { 1533 /* TODO */ 1534 } 1535 } 1536 /* change the basis back to the original one */ 1537 if (sub_schurs->change) { 1538 Mat change,phi,phit; 1539 1540 if (pcbddc->dbg_flag > 1) { 1541 PetscInt ii; 1542 for (ii=0;ii<B_neigs;ii++) { 1543 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1544 for (j=0;j<B_N;j++) { 1545 #if defined(PETSC_USE_COMPLEX) 1546 PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]); 1547 PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]); 1548 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 1549 #else 1550 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 1551 #endif 1552 } 1553 } 1554 } 1555 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 1556 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 1557 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 1558 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1559 ierr = MatDestroy(&phit);CHKERRQ(ierr); 1560 ierr = MatDestroy(&phi);CHKERRQ(ierr); 1561 } 1562 maxneigs = PetscMax(B_neigs,maxneigs); 1563 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 1564 if (B_neigs) { 1565 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1566 1567 if (pcbddc->dbg_flag > 1) { 1568 PetscInt ii; 1569 for (ii=0;ii<B_neigs;ii++) { 1570 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1571 for (j=0;j<B_N;j++) { 1572 #if defined(PETSC_USE_COMPLEX) 1573 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1574 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1575 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 1576 #else 1577 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 1578 #endif 1579 } 1580 } 1581 } 1582 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 1583 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 1584 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 1585 cum++; 1586 } 1587 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1588 /* shift for next computation */ 1589 cumarray += subset_size*subset_size; 1590 } 1591 if (pcbddc->dbg_flag) { 1592 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1593 } 1594 1595 if (mss) { 1596 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1597 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1598 /* destroy matrices (junk) */ 1599 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 1600 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 1601 } 1602 if (allocated_S_St) { 1603 ierr = PetscFree2(S,St);CHKERRQ(ierr); 1604 } 1605 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 1606 #if defined(PETSC_USE_COMPLEX) 1607 ierr = PetscFree(rwork);CHKERRQ(ierr); 1608 #endif 1609 if (pcbddc->dbg_flag) { 1610 PetscInt maxneigs_r; 1611 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1612 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 1613 } 1614 PetscFunctionReturn(0); 1615 } 1616 1617 #undef __FUNCT__ 1618 #define __FUNCT__ "PCBDDCSetUpSolvers" 1619 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 1620 { 1621 PetscScalar *coarse_submat_vals; 1622 PetscErrorCode ierr; 1623 1624 PetscFunctionBegin; 1625 /* Setup local scatters R_to_B and (optionally) R_to_D */ 1626 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 1627 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 1628 1629 /* Setup local neumann solver ksp_R */ 1630 /* PCBDDCSetUpLocalScatters should be called first! */ 1631 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 1632 1633 /* 1634 Setup local correction and local part of coarse basis. 1635 Gives back the dense local part of the coarse matrix in column major ordering 1636 */ 1637 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 1638 1639 /* Compute total number of coarse nodes and setup coarse solver */ 1640 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 1641 1642 /* free */ 1643 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 1644 PetscFunctionReturn(0); 1645 } 1646 1647 #undef __FUNCT__ 1648 #define __FUNCT__ "PCBDDCResetCustomization" 1649 PetscErrorCode PCBDDCResetCustomization(PC pc) 1650 { 1651 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1652 PetscErrorCode ierr; 1653 1654 PetscFunctionBegin; 1655 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1656 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 1657 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1658 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 1659 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1660 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 1661 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1662 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1663 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1664 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 1665 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 1666 PetscFunctionReturn(0); 1667 } 1668 1669 #undef __FUNCT__ 1670 #define __FUNCT__ "PCBDDCResetTopography" 1671 PetscErrorCode PCBDDCResetTopography(PC pc) 1672 { 1673 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1674 PetscInt i; 1675 PetscErrorCode ierr; 1676 1677 PetscFunctionBegin; 1678 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 1679 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1680 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 1681 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 1682 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1683 ierr = MatDestroy(&pcbddc->divudotp);CHKERRQ(ierr); 1684 ierr = ISDestroy(&pcbddc->divudotp_vl2l);CHKERRQ(ierr); 1685 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 1686 pcbddc->graphanalyzed = PETSC_FALSE; 1687 for (i=0;i<pcbddc->n_local_subs;i++) { 1688 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 1689 } 1690 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 1691 if (pcbddc->sub_schurs) { 1692 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 1693 } 1694 PetscFunctionReturn(0); 1695 } 1696 1697 #undef __FUNCT__ 1698 #define __FUNCT__ "PCBDDCResetSolvers" 1699 PetscErrorCode PCBDDCResetSolvers(PC pc) 1700 { 1701 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1702 PetscErrorCode ierr; 1703 1704 PetscFunctionBegin; 1705 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 1706 if (pcbddc->coarse_phi_B) { 1707 PetscScalar *array; 1708 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 1709 ierr = PetscFree(array);CHKERRQ(ierr); 1710 } 1711 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1712 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1713 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1714 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1715 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1716 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1717 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1718 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1719 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1720 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1721 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1722 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1723 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1724 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 1725 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 1726 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 1727 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 1728 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1729 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1730 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1731 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 1732 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 1733 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 1734 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 1735 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 1736 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 1737 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 1738 if (pcbddc->benign_zerodiag_subs) { 1739 PetscInt i; 1740 for (i=0;i<pcbddc->benign_n;i++) { 1741 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1742 } 1743 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 1744 } 1745 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 1746 PetscFunctionReturn(0); 1747 } 1748 1749 #undef __FUNCT__ 1750 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 1751 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 1752 { 1753 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1754 PC_IS *pcis = (PC_IS*)pc->data; 1755 VecType impVecType; 1756 PetscInt n_constraints,n_R,old_size; 1757 PetscErrorCode ierr; 1758 1759 PetscFunctionBegin; 1760 if (!pcbddc->ConstraintMatrix) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 1761 /* get sizes */ 1762 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 1763 n_R = pcis->n - pcbddc->n_vertices; 1764 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 1765 /* local work vectors (try to avoid unneeded work)*/ 1766 /* R nodes */ 1767 old_size = -1; 1768 if (pcbddc->vec1_R) { 1769 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 1770 } 1771 if (n_R != old_size) { 1772 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1773 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1774 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 1775 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 1776 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 1777 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 1778 } 1779 /* local primal dofs */ 1780 old_size = -1; 1781 if (pcbddc->vec1_P) { 1782 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 1783 } 1784 if (pcbddc->local_primal_size != old_size) { 1785 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1786 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 1787 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 1788 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 1789 } 1790 /* local explicit constraints */ 1791 old_size = -1; 1792 if (pcbddc->vec1_C) { 1793 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 1794 } 1795 if (n_constraints && n_constraints != old_size) { 1796 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1797 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 1798 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 1799 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 1800 } 1801 PetscFunctionReturn(0); 1802 } 1803 1804 #undef __FUNCT__ 1805 #define __FUNCT__ "PCBDDCSetUpCorrection" 1806 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 1807 { 1808 PetscErrorCode ierr; 1809 /* pointers to pcis and pcbddc */ 1810 PC_IS* pcis = (PC_IS*)pc->data; 1811 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1812 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1813 /* submatrices of local problem */ 1814 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 1815 /* submatrices of local coarse problem */ 1816 Mat S_VV,S_CV,S_VC,S_CC; 1817 /* working matrices */ 1818 Mat C_CR; 1819 /* additional working stuff */ 1820 PC pc_R; 1821 Mat F; 1822 Vec dummy_vec; 1823 PetscBool isLU,isCHOL,isILU,need_benign_correction; 1824 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 1825 PetscScalar *work; 1826 PetscInt *idx_V_B; 1827 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 1828 PetscInt i,n_R,n_D,n_B; 1829 1830 /* some shortcuts to scalars */ 1831 PetscScalar one=1.0,m_one=-1.0; 1832 1833 PetscFunctionBegin; 1834 if (!pcbddc->symmetric_primal && pcbddc->benign_n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 1835 1836 /* Set Non-overlapping dimensions */ 1837 n_vertices = pcbddc->n_vertices; 1838 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 1839 n_B = pcis->n_B; 1840 n_D = pcis->n - n_B; 1841 n_R = pcis->n - n_vertices; 1842 1843 /* vertices in boundary numbering */ 1844 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 1845 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 1846 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i); 1847 1848 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 1849 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 1850 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 1851 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 1852 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 1853 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 1854 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 1855 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 1856 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 1857 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 1858 1859 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 1860 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 1861 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 1862 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 1863 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 1864 lda_rhs = n_R; 1865 need_benign_correction = PETSC_FALSE; 1866 if (isLU || isILU || isCHOL) { 1867 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 1868 } else if (sub_schurs && sub_schurs->reuse_solver) { 1869 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1870 MatFactorType type; 1871 1872 F = reuse_solver->F; 1873 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 1874 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 1875 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 1876 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 1877 } else { 1878 F = NULL; 1879 } 1880 1881 /* allocate workspace */ 1882 n = 0; 1883 if (n_constraints) { 1884 n += lda_rhs*n_constraints; 1885 } 1886 if (n_vertices) { 1887 n = PetscMax(2*lda_rhs*n_vertices,n); 1888 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 1889 } 1890 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1891 1892 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 1893 dummy_vec = NULL; 1894 if (need_benign_correction && lda_rhs != n_R && F) { 1895 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);CHKERRQ(ierr); 1896 } 1897 1898 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 1899 if (n_constraints) { 1900 Mat M1,M2,M3,C_B; 1901 IS is_aux; 1902 PetscScalar *array,*array2; 1903 1904 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1905 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1906 1907 /* Extract constraints on R nodes: C_{CR} */ 1908 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 1909 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 1910 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 1911 1912 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 1913 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 1914 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1915 for (i=0;i<n_constraints;i++) { 1916 const PetscScalar *row_cmat_values; 1917 const PetscInt *row_cmat_indices; 1918 PetscInt size_of_constraint,j; 1919 1920 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1921 for (j=0;j<size_of_constraint;j++) { 1922 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 1923 } 1924 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1925 } 1926 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 1927 if (F) { 1928 Mat B; 1929 1930 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 1931 if (need_benign_correction) { 1932 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1933 1934 /* rhs is already zero on interior dofs, no need to change the rhs */ 1935 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 1936 } 1937 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 1938 if (need_benign_correction) { 1939 PetscScalar *marr; 1940 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1941 1942 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1943 if (lda_rhs != n_R) { 1944 for (i=0;i<n_constraints;i++) { 1945 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 1946 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1947 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 1948 } 1949 } else { 1950 for (i=0;i<n_constraints;i++) { 1951 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1952 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1953 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1954 } 1955 } 1956 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1957 } 1958 ierr = MatDestroy(&B);CHKERRQ(ierr); 1959 } else { 1960 PetscScalar *marr; 1961 1962 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1963 for (i=0;i<n_constraints;i++) { 1964 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1965 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 1966 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1967 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1968 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1969 } 1970 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1971 } 1972 if (!pcbddc->switch_static) { 1973 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1974 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1975 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1976 for (i=0;i<n_constraints;i++) { 1977 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 1978 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 1979 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1980 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1981 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1982 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1983 } 1984 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1985 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1986 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1987 } else { 1988 if (lda_rhs != n_R) { 1989 IS dummy; 1990 1991 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 1992 ierr = MatGetSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1993 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1994 } else { 1995 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 1996 pcbddc->local_auxmat2 = local_auxmat2_R; 1997 } 1998 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1999 } 2000 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 2001 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 2002 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 2003 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 2004 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 2005 if (isCHOL) { 2006 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 2007 } else { 2008 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 2009 } 2010 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 2011 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 2012 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 2013 ierr = MatDestroy(&M2);CHKERRQ(ierr); 2014 ierr = MatDestroy(&M3);CHKERRQ(ierr); 2015 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 2016 ierr = MatMatMult(M1,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 2017 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 2018 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 2019 ierr = MatDestroy(&M1);CHKERRQ(ierr); 2020 } 2021 2022 /* Get submatrices from subdomain matrix */ 2023 if (n_vertices) { 2024 IS is_aux; 2025 2026 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 2027 IS tis; 2028 2029 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 2030 ierr = ISSort(tis);CHKERRQ(ierr); 2031 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 2032 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2033 } else { 2034 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 2035 } 2036 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 2037 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 2038 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 2039 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 2040 } 2041 2042 /* Matrix of coarse basis functions (local) */ 2043 if (pcbddc->coarse_phi_B) { 2044 PetscInt on_B,on_primal,on_D=n_D; 2045 if (pcbddc->coarse_phi_D) { 2046 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 2047 } 2048 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 2049 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 2050 PetscScalar *marray; 2051 2052 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 2053 ierr = PetscFree(marray);CHKERRQ(ierr); 2054 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 2055 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 2056 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 2057 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 2058 } 2059 } 2060 2061 if (!pcbddc->coarse_phi_B) { 2062 PetscScalar *marray; 2063 2064 n = n_B*pcbddc->local_primal_size; 2065 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2066 n += n_D*pcbddc->local_primal_size; 2067 } 2068 if (!pcbddc->symmetric_primal) { 2069 n *= 2; 2070 } 2071 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 2072 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 2073 n = n_B*pcbddc->local_primal_size; 2074 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2075 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 2076 n += n_D*pcbddc->local_primal_size; 2077 } 2078 if (!pcbddc->symmetric_primal) { 2079 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 2080 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2081 n = n_B*pcbddc->local_primal_size; 2082 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 2083 } 2084 } else { 2085 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 2086 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 2087 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2088 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 2089 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 2090 } 2091 } 2092 } 2093 2094 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 2095 p0_lidx_I = NULL; 2096 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 2097 const PetscInt *idxs; 2098 2099 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 2100 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 2101 for (i=0;i<pcbddc->benign_n;i++) { 2102 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 2103 } 2104 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 2105 } 2106 2107 /* vertices */ 2108 if (n_vertices) { 2109 2110 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 2111 2112 if (n_R) { 2113 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 2114 PetscBLASInt B_N,B_one = 1; 2115 PetscScalar *x,*y; 2116 PetscBool isseqaij; 2117 2118 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 2119 if (need_benign_correction) { 2120 ISLocalToGlobalMapping RtoN; 2121 IS is_p0; 2122 PetscInt *idxs_p0,n; 2123 2124 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 2125 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 2126 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 2127 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 2128 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 2129 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 2130 ierr = MatGetSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 2131 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 2132 } 2133 2134 if (lda_rhs == n_R) { 2135 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 2136 } else { 2137 PetscScalar *av,*array; 2138 const PetscInt *xadj,*adjncy; 2139 PetscInt n; 2140 PetscBool flg_row; 2141 2142 array = work+lda_rhs*n_vertices; 2143 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 2144 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 2145 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2146 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 2147 for (i=0;i<n;i++) { 2148 PetscInt j; 2149 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 2150 } 2151 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2152 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 2153 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 2154 } 2155 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 2156 if (need_benign_correction) { 2157 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2158 PetscScalar *marr; 2159 2160 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 2161 /* need \Phi^T A_RV = (I+L)A_RV, L given by 2162 2163 | 0 0 0 | (V) 2164 L = | 0 0 -1 | (P-p0) 2165 | 0 0 -1 | (p0) 2166 2167 */ 2168 for (i=0;i<reuse_solver->benign_n;i++) { 2169 const PetscScalar *vals; 2170 const PetscInt *idxs,*idxs_zero; 2171 PetscInt n,j,nz; 2172 2173 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2174 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 2175 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 2176 for (j=0;j<n;j++) { 2177 PetscScalar val = vals[j]; 2178 PetscInt k,col = idxs[j]; 2179 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 2180 } 2181 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 2182 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 2183 } 2184 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 2185 } 2186 if (F) { 2187 /* need to correct the rhs */ 2188 if (need_benign_correction) { 2189 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2190 PetscScalar *marr; 2191 2192 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 2193 if (lda_rhs != n_R) { 2194 for (i=0;i<n_vertices;i++) { 2195 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 2196 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 2197 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 2198 } 2199 } else { 2200 for (i=0;i<n_vertices;i++) { 2201 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 2202 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 2203 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2204 } 2205 } 2206 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 2207 } 2208 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 2209 /* need to correct the solution */ 2210 if (need_benign_correction) { 2211 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2212 PetscScalar *marr; 2213 2214 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 2215 if (lda_rhs != n_R) { 2216 for (i=0;i<n_vertices;i++) { 2217 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 2218 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 2219 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 2220 } 2221 } else { 2222 for (i=0;i<n_vertices;i++) { 2223 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 2224 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 2225 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2226 } 2227 } 2228 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 2229 } 2230 } else { 2231 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 2232 for (i=0;i<n_vertices;i++) { 2233 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 2234 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 2235 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2236 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2237 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2238 } 2239 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 2240 } 2241 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 2242 /* S_VV and S_CV */ 2243 if (n_constraints) { 2244 Mat B; 2245 2246 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 2247 for (i=0;i<n_vertices;i++) { 2248 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 2249 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 2250 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2251 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2252 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2253 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2254 } 2255 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 2256 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 2257 ierr = MatDestroy(&B);CHKERRQ(ierr); 2258 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 2259 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 2260 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 2261 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 2262 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 2263 ierr = MatDestroy(&B);CHKERRQ(ierr); 2264 } 2265 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2266 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 2267 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 2268 } 2269 if (lda_rhs != n_R) { 2270 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 2271 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 2272 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 2273 } 2274 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 2275 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 2276 if (need_benign_correction) { 2277 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2278 PetscScalar *marr,*sums; 2279 2280 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 2281 ierr = MatDenseGetArray(S_VVt,&marr); 2282 for (i=0;i<reuse_solver->benign_n;i++) { 2283 const PetscScalar *vals; 2284 const PetscInt *idxs,*idxs_zero; 2285 PetscInt n,j,nz; 2286 2287 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2288 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 2289 for (j=0;j<n_vertices;j++) { 2290 PetscInt k; 2291 sums[j] = 0.; 2292 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 2293 } 2294 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 2295 for (j=0;j<n;j++) { 2296 PetscScalar val = vals[j]; 2297 PetscInt k; 2298 for (k=0;k<n_vertices;k++) { 2299 marr[idxs[j]+k*n_vertices] += val*sums[k]; 2300 } 2301 } 2302 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 2303 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 2304 } 2305 ierr = PetscFree(sums);CHKERRQ(ierr); 2306 ierr = MatDenseRestoreArray(S_VVt,&marr); 2307 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 2308 } 2309 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 2310 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 2311 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 2312 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 2313 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 2314 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 2315 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 2316 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2317 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 2318 } else { 2319 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2320 } 2321 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 2322 2323 /* coarse basis functions */ 2324 for (i=0;i<n_vertices;i++) { 2325 PetscScalar *y; 2326 2327 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 2328 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2329 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 2330 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2331 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2332 y[n_B*i+idx_V_B[i]] = 1.0; 2333 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2334 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2335 2336 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2337 PetscInt j; 2338 2339 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2340 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2341 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2342 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2343 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2344 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 2345 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2346 } 2347 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2348 } 2349 /* if n_R == 0 the object is not destroyed */ 2350 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 2351 } 2352 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 2353 2354 if (n_constraints) { 2355 Mat B; 2356 2357 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 2358 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 2359 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 2360 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 2361 if (n_vertices) { 2362 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 2363 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 2364 } else { 2365 Mat S_VCt; 2366 2367 if (lda_rhs != n_R) { 2368 ierr = MatDestroy(&B);CHKERRQ(ierr); 2369 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 2370 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 2371 } 2372 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 2373 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2374 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 2375 } 2376 } 2377 ierr = MatDestroy(&B);CHKERRQ(ierr); 2378 /* coarse basis functions */ 2379 for (i=0;i<n_constraints;i++) { 2380 PetscScalar *y; 2381 2382 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 2383 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2384 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 2385 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2386 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2387 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2388 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2389 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2390 PetscInt j; 2391 2392 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2393 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 2394 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2395 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2396 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2397 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 2398 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2399 } 2400 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2401 } 2402 } 2403 if (n_constraints) { 2404 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 2405 } 2406 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 2407 2408 /* coarse matrix entries relative to B_0 */ 2409 if (pcbddc->benign_n) { 2410 Mat B0_B,B0_BPHI; 2411 IS is_dummy; 2412 PetscScalar *data; 2413 PetscInt j; 2414 2415 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2416 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 2417 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2418 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2419 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2420 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 2421 for (j=0;j<pcbddc->benign_n;j++) { 2422 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2423 for (i=0;i<pcbddc->local_primal_size;i++) { 2424 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 2425 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 2426 } 2427 } 2428 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 2429 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2430 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2431 } 2432 2433 /* compute other basis functions for non-symmetric problems */ 2434 if (!pcbddc->symmetric_primal) { 2435 Mat B_V=NULL,B_C=NULL; 2436 PetscScalar *marray; 2437 2438 if (n_constraints) { 2439 Mat S_CCT,C_CRT; 2440 2441 ierr = MatTranspose(C_CR,MAT_INPLACE_MATRIX,&C_CRT);CHKERRQ(ierr); 2442 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 2443 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 2444 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 2445 if (n_vertices) { 2446 Mat S_VCT; 2447 2448 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 2449 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 2450 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 2451 } 2452 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 2453 } else { 2454 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);CHKERRQ(ierr); 2455 } 2456 if (n_vertices && n_R) { 2457 PetscScalar *av,*marray; 2458 const PetscInt *xadj,*adjncy; 2459 PetscInt n; 2460 PetscBool flg_row; 2461 2462 /* B_V = B_V - A_VR^T */ 2463 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 2464 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2465 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 2466 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2467 for (i=0;i<n;i++) { 2468 PetscInt j; 2469 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 2470 } 2471 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2472 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2473 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2474 } 2475 2476 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 2477 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2478 for (i=0;i<n_vertices;i++) { 2479 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 2480 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2481 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2482 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2483 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2484 } 2485 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2486 if (B_C) { 2487 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 2488 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 2489 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 2490 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2491 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2492 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2493 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2494 } 2495 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 2496 } 2497 /* coarse basis functions */ 2498 for (i=0;i<pcbddc->local_primal_size;i++) { 2499 PetscScalar *y; 2500 2501 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 2502 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2503 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 2504 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2505 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2506 if (i<n_vertices) { 2507 y[n_B*i+idx_V_B[i]] = 1.0; 2508 } 2509 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2510 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2511 2512 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2513 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2514 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2515 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2516 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2517 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2518 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2519 } 2520 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2521 } 2522 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 2523 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 2524 } 2525 /* free memory */ 2526 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 2527 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 2528 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 2529 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 2530 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 2531 ierr = PetscFree(work);CHKERRQ(ierr); 2532 if (n_vertices) { 2533 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2534 } 2535 if (n_constraints) { 2536 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 2537 } 2538 /* Checking coarse_sub_mat and coarse basis functios */ 2539 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2540 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2541 if (pcbddc->dbg_flag) { 2542 Mat coarse_sub_mat; 2543 Mat AUXMAT,TM1,TM2,TM3,TM4; 2544 Mat coarse_phi_D,coarse_phi_B; 2545 Mat coarse_psi_D,coarse_psi_B; 2546 Mat A_II,A_BB,A_IB,A_BI; 2547 Mat C_B,CPHI; 2548 IS is_dummy; 2549 Vec mones; 2550 MatType checkmattype=MATSEQAIJ; 2551 PetscReal real_value; 2552 2553 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 2554 Mat A; 2555 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 2556 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2557 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2558 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2559 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2560 ierr = MatDestroy(&A);CHKERRQ(ierr); 2561 } else { 2562 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2563 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2564 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2565 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2566 } 2567 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 2568 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 2569 if (!pcbddc->symmetric_primal) { 2570 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 2571 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 2572 } 2573 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 2574 2575 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2576 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 2577 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2578 if (!pcbddc->symmetric_primal) { 2579 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2580 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2581 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2582 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2583 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2584 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2585 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2586 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2587 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2588 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2589 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2590 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2591 } else { 2592 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2593 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2594 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2595 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2596 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2597 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2598 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2599 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2600 } 2601 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2602 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2603 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2604 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 2605 if (pcbddc->benign_n) { 2606 Mat B0_B,B0_BPHI; 2607 PetscScalar *data,*data2; 2608 PetscInt j; 2609 2610 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2611 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 2612 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2613 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2614 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 2615 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 2616 for (j=0;j<pcbddc->benign_n;j++) { 2617 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2618 for (i=0;i<pcbddc->local_primal_size;i++) { 2619 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 2620 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 2621 } 2622 } 2623 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 2624 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 2625 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2626 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2627 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2628 } 2629 #if 0 2630 { 2631 PetscViewer viewer; 2632 char filename[256]; 2633 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 2634 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 2635 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2636 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 2637 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 2638 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 2639 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 2640 if (save_change) { 2641 Mat phi_B; 2642 ierr = MatMatMult(save_change,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&phi_B);CHKERRQ(ierr); 2643 ierr = PetscObjectSetName((PetscObject)phi_B,"phi_B");CHKERRQ(ierr); 2644 ierr = MatView(phi_B,viewer);CHKERRQ(ierr); 2645 ierr = MatDestroy(&phi_B);CHKERRQ(ierr); 2646 } else { 2647 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 2648 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 2649 } 2650 if (pcbddc->coarse_phi_D) { 2651 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 2652 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 2653 } 2654 if (pcbddc->coarse_psi_B) { 2655 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 2656 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 2657 } 2658 if (pcbddc->coarse_psi_D) { 2659 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 2660 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 2661 } 2662 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 2663 } 2664 #endif 2665 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2666 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2667 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2668 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2669 2670 /* check constraints */ 2671 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2672 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 2673 if (!pcbddc->benign_n) { /* TODO: add benign case */ 2674 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2675 } else { 2676 PetscScalar *data; 2677 Mat tmat; 2678 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 2679 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 2680 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 2681 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2682 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2683 } 2684 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 2685 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2686 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2687 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2688 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2689 if (!pcbddc->symmetric_primal) { 2690 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2691 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2692 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2693 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2694 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2695 } 2696 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 2697 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 2698 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2699 ierr = VecDestroy(&mones);CHKERRQ(ierr); 2700 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2701 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 2702 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 2703 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 2704 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 2705 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 2706 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 2707 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 2708 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 2709 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 2710 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 2711 if (!pcbddc->symmetric_primal) { 2712 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 2713 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 2714 } 2715 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 2716 } 2717 /* get back data */ 2718 *coarse_submat_vals_n = coarse_submat_vals; 2719 PetscFunctionReturn(0); 2720 } 2721 2722 #undef __FUNCT__ 2723 #define __FUNCT__ "MatGetSubMatrixUnsorted" 2724 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 2725 { 2726 Mat *work_mat; 2727 IS isrow_s,iscol_s; 2728 PetscBool rsorted,csorted; 2729 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 2730 PetscErrorCode ierr; 2731 2732 PetscFunctionBegin; 2733 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 2734 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 2735 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 2736 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2737 2738 if (!rsorted) { 2739 const PetscInt *idxs; 2740 PetscInt *idxs_sorted,i; 2741 2742 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 2743 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 2744 for (i=0;i<rsize;i++) { 2745 idxs_perm_r[i] = i; 2746 } 2747 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 2748 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 2749 for (i=0;i<rsize;i++) { 2750 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 2751 } 2752 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 2753 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 2754 } else { 2755 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 2756 isrow_s = isrow; 2757 } 2758 2759 if (!csorted) { 2760 if (isrow == iscol) { 2761 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 2762 iscol_s = isrow_s; 2763 } else { 2764 const PetscInt *idxs; 2765 PetscInt *idxs_sorted,i; 2766 2767 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 2768 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 2769 for (i=0;i<csize;i++) { 2770 idxs_perm_c[i] = i; 2771 } 2772 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 2773 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 2774 for (i=0;i<csize;i++) { 2775 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 2776 } 2777 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 2778 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 2779 } 2780 } else { 2781 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 2782 iscol_s = iscol; 2783 } 2784 2785 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2786 2787 if (!rsorted || !csorted) { 2788 Mat new_mat; 2789 IS is_perm_r,is_perm_c; 2790 2791 if (!rsorted) { 2792 PetscInt *idxs_r,i; 2793 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 2794 for (i=0;i<rsize;i++) { 2795 idxs_r[idxs_perm_r[i]] = i; 2796 } 2797 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 2798 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 2799 } else { 2800 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 2801 } 2802 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 2803 2804 if (!csorted) { 2805 if (isrow_s == iscol_s) { 2806 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 2807 is_perm_c = is_perm_r; 2808 } else { 2809 PetscInt *idxs_c,i; 2810 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 2811 for (i=0;i<csize;i++) { 2812 idxs_c[idxs_perm_c[i]] = i; 2813 } 2814 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 2815 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 2816 } 2817 } else { 2818 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 2819 } 2820 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 2821 2822 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 2823 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 2824 work_mat[0] = new_mat; 2825 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 2826 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 2827 } 2828 2829 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 2830 *B = work_mat[0]; 2831 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 2832 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 2833 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 2834 PetscFunctionReturn(0); 2835 } 2836 2837 #undef __FUNCT__ 2838 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 2839 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 2840 { 2841 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2842 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2843 Mat new_mat; 2844 IS is_local,is_global; 2845 PetscInt local_size; 2846 PetscBool isseqaij; 2847 PetscErrorCode ierr; 2848 2849 PetscFunctionBegin; 2850 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2851 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 2852 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 2853 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 2854 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 2855 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 2856 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 2857 2858 /* check */ 2859 if (pcbddc->dbg_flag) { 2860 Vec x,x_change; 2861 PetscReal error; 2862 2863 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 2864 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2865 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 2866 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2867 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2868 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 2869 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2870 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2871 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2872 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2873 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2874 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 2875 ierr = VecDestroy(&x);CHKERRQ(ierr); 2876 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2877 } 2878 2879 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 2880 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2881 if (isseqaij) { 2882 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2883 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 2884 } else { 2885 Mat work_mat; 2886 2887 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2888 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2889 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 2890 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 2891 } 2892 if (matis->A->symmetric_set) { 2893 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 2894 #if !defined(PETSC_USE_COMPLEX) 2895 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 2896 #endif 2897 } 2898 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 2899 PetscFunctionReturn(0); 2900 } 2901 2902 #undef __FUNCT__ 2903 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 2904 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 2905 { 2906 PC_IS* pcis = (PC_IS*)(pc->data); 2907 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2908 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2909 PetscInt *idx_R_local=NULL; 2910 PetscInt n_vertices,i,j,n_R,n_D,n_B; 2911 PetscInt vbs,bs; 2912 PetscBT bitmask=NULL; 2913 PetscErrorCode ierr; 2914 2915 PetscFunctionBegin; 2916 /* 2917 No need to setup local scatters if 2918 - primal space is unchanged 2919 AND 2920 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 2921 AND 2922 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 2923 */ 2924 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 2925 PetscFunctionReturn(0); 2926 } 2927 /* destroy old objects */ 2928 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 2929 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 2930 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 2931 /* Set Non-overlapping dimensions */ 2932 n_B = pcis->n_B; 2933 n_D = pcis->n - n_B; 2934 n_vertices = pcbddc->n_vertices; 2935 2936 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 2937 2938 /* create auxiliary bitmask and allocate workspace */ 2939 if (!sub_schurs || !sub_schurs->reuse_solver) { 2940 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 2941 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 2942 for (i=0;i<n_vertices;i++) { 2943 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 2944 } 2945 2946 for (i=0, n_R=0; i<pcis->n; i++) { 2947 if (!PetscBTLookup(bitmask,i)) { 2948 idx_R_local[n_R++] = i; 2949 } 2950 } 2951 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 2952 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2953 2954 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2955 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 2956 } 2957 2958 /* Block code */ 2959 vbs = 1; 2960 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 2961 if (bs>1 && !(n_vertices%bs)) { 2962 PetscBool is_blocked = PETSC_TRUE; 2963 PetscInt *vary; 2964 if (!sub_schurs || !sub_schurs->reuse_solver) { 2965 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 2966 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 2967 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 2968 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 2969 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 2970 for (i=0; i<pcis->n/bs; i++) { 2971 if (vary[i]!=0 && vary[i]!=bs) { 2972 is_blocked = PETSC_FALSE; 2973 break; 2974 } 2975 } 2976 ierr = PetscFree(vary);CHKERRQ(ierr); 2977 } else { 2978 /* Verify directly the R set */ 2979 for (i=0; i<n_R/bs; i++) { 2980 PetscInt j,node=idx_R_local[bs*i]; 2981 for (j=1; j<bs; j++) { 2982 if (node != idx_R_local[bs*i+j]-j) { 2983 is_blocked = PETSC_FALSE; 2984 break; 2985 } 2986 } 2987 } 2988 } 2989 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 2990 vbs = bs; 2991 for (i=0;i<n_R/vbs;i++) { 2992 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 2993 } 2994 } 2995 } 2996 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 2997 if (sub_schurs && sub_schurs->reuse_solver) { 2998 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2999 3000 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 3001 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 3002 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 3003 reuse_solver->is_R = pcbddc->is_R_local; 3004 } else { 3005 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 3006 } 3007 3008 /* print some info if requested */ 3009 if (pcbddc->dbg_flag) { 3010 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3011 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3012 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3013 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 3014 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 3015 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 3016 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3017 } 3018 3019 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 3020 if (!sub_schurs || !sub_schurs->reuse_solver) { 3021 IS is_aux1,is_aux2; 3022 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 3023 3024 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 3025 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 3026 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 3027 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3028 for (i=0; i<n_D; i++) { 3029 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 3030 } 3031 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3032 for (i=0, j=0; i<n_R; i++) { 3033 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 3034 aux_array1[j++] = i; 3035 } 3036 } 3037 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 3038 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3039 for (i=0, j=0; i<n_B; i++) { 3040 if (!PetscBTLookup(bitmask,is_indices[i])) { 3041 aux_array2[j++] = i; 3042 } 3043 } 3044 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3045 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 3046 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 3047 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 3048 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 3049 3050 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3051 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 3052 for (i=0, j=0; i<n_R; i++) { 3053 if (PetscBTLookup(bitmask,idx_R_local[i])) { 3054 aux_array1[j++] = i; 3055 } 3056 } 3057 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 3058 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 3059 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 3060 } 3061 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 3062 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 3063 } else { 3064 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3065 IS tis; 3066 PetscInt schur_size; 3067 3068 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 3069 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 3070 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 3071 ierr = ISDestroy(&tis);CHKERRQ(ierr); 3072 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3073 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 3074 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 3075 ierr = ISDestroy(&tis);CHKERRQ(ierr); 3076 } 3077 } 3078 PetscFunctionReturn(0); 3079 } 3080 3081 3082 #undef __FUNCT__ 3083 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 3084 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 3085 { 3086 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3087 PC_IS *pcis = (PC_IS*)pc->data; 3088 PC pc_temp; 3089 Mat A_RR; 3090 MatReuse reuse; 3091 PetscScalar m_one = -1.0; 3092 PetscReal value; 3093 PetscInt n_D,n_R; 3094 PetscBool check_corr[2],issbaij; 3095 PetscErrorCode ierr; 3096 /* prefixes stuff */ 3097 char dir_prefix[256],neu_prefix[256],str_level[16]; 3098 size_t len; 3099 3100 PetscFunctionBegin; 3101 3102 /* compute prefixes */ 3103 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 3104 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 3105 if (!pcbddc->current_level) { 3106 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 3107 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 3108 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 3109 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 3110 } else { 3111 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 3112 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 3113 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 3114 len -= 15; /* remove "pc_bddc_coarse_" */ 3115 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 3116 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 3117 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 3118 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 3119 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 3120 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 3121 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 3122 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 3123 } 3124 3125 /* DIRICHLET PROBLEM */ 3126 if (dirichlet) { 3127 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3128 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 3129 if (!sub_schurs || !sub_schurs->reuse_solver) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 3130 if (pcbddc->dbg_flag) { 3131 Mat A_IIn; 3132 3133 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 3134 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 3135 pcis->A_II = A_IIn; 3136 } 3137 } 3138 if (pcbddc->local_mat->symmetric_set) { 3139 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 3140 } 3141 /* Matrix for Dirichlet problem is pcis->A_II */ 3142 n_D = pcis->n - pcis->n_B; 3143 if (!pcbddc->ksp_D) { /* create object if not yet build */ 3144 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 3145 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 3146 /* default */ 3147 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 3148 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 3149 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 3150 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 3151 if (issbaij) { 3152 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 3153 } else { 3154 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 3155 } 3156 /* Allow user's customization */ 3157 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 3158 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 3159 } 3160 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 3161 if (sub_schurs && sub_schurs->reuse_solver) { 3162 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3163 3164 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 3165 } 3166 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 3167 if (!n_D) { 3168 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 3169 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 3170 } 3171 /* Set Up KSP for Dirichlet problem of BDDC */ 3172 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 3173 /* set ksp_D into pcis data */ 3174 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 3175 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 3176 pcis->ksp_D = pcbddc->ksp_D; 3177 } 3178 3179 /* NEUMANN PROBLEM */ 3180 A_RR = 0; 3181 if (neumann) { 3182 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3183 PetscInt ibs,mbs; 3184 PetscBool issbaij; 3185 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3186 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 3187 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 3188 if (pcbddc->ksp_R) { /* already created ksp */ 3189 PetscInt nn_R; 3190 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 3191 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 3192 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 3193 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 3194 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 3195 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3196 reuse = MAT_INITIAL_MATRIX; 3197 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 3198 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 3199 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3200 reuse = MAT_INITIAL_MATRIX; 3201 } else { /* safe to reuse the matrix */ 3202 reuse = MAT_REUSE_MATRIX; 3203 } 3204 } 3205 /* last check */ 3206 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 3207 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3208 reuse = MAT_INITIAL_MATRIX; 3209 } 3210 } else { /* first time, so we need to create the matrix */ 3211 reuse = MAT_INITIAL_MATRIX; 3212 } 3213 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */ 3214 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 3215 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 3216 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 3217 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 3218 if (matis->A == pcbddc->local_mat) { 3219 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3220 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 3221 } else { 3222 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 3223 } 3224 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 3225 if (matis->A == pcbddc->local_mat) { 3226 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3227 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 3228 } else { 3229 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 3230 } 3231 } 3232 /* extract A_RR */ 3233 if (sub_schurs && sub_schurs->reuse_solver) { 3234 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3235 3236 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 3237 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3238 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 3239 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 3240 } else { 3241 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 3242 } 3243 } else { 3244 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3245 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 3246 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 3247 } 3248 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 3249 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 3250 } 3251 if (pcbddc->local_mat->symmetric_set) { 3252 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 3253 } 3254 if (!pcbddc->ksp_R) { /* create object if not present */ 3255 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 3256 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 3257 /* default */ 3258 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 3259 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 3260 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 3261 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 3262 if (issbaij) { 3263 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 3264 } else { 3265 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 3266 } 3267 /* Allow user's customization */ 3268 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 3269 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 3270 } 3271 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 3272 if (!n_R) { 3273 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 3274 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 3275 } 3276 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 3277 /* Reuse solver if it is present */ 3278 if (sub_schurs && sub_schurs->reuse_solver) { 3279 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3280 3281 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 3282 } 3283 /* Set Up KSP for Neumann problem of BDDC */ 3284 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 3285 } 3286 3287 if (pcbddc->dbg_flag) { 3288 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3289 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3290 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3291 } 3292 3293 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 3294 check_corr[0] = check_corr[1] = PETSC_FALSE; 3295 if (pcbddc->NullSpace_corr[0]) { 3296 ierr = PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);CHKERRQ(ierr); 3297 } 3298 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 3299 check_corr[0] = PETSC_TRUE; 3300 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);CHKERRQ(ierr); 3301 } 3302 if (neumann && pcbddc->NullSpace_corr[2]) { 3303 check_corr[1] = PETSC_TRUE; 3304 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);CHKERRQ(ierr); 3305 } 3306 3307 /* check Dirichlet and Neumann solvers */ 3308 if (pcbddc->dbg_flag) { 3309 if (dirichlet) { /* Dirichlet */ 3310 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 3311 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 3312 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 3313 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 3314 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 3315 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 3316 if (check_corr[0]) { 3317 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_TRUE);CHKERRQ(ierr); 3318 } 3319 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3320 } 3321 if (neumann) { /* Neumann */ 3322 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 3323 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3324 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 3325 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 3326 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 3327 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 3328 if (check_corr[1]) { 3329 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_FALSE);CHKERRQ(ierr); 3330 } 3331 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3332 } 3333 } 3334 /* free Neumann problem's matrix */ 3335 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3336 PetscFunctionReturn(0); 3337 } 3338 3339 #undef __FUNCT__ 3340 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 3341 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 3342 { 3343 PetscErrorCode ierr; 3344 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3345 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3346 PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE ) : PETSC_FALSE; 3347 3348 PetscFunctionBegin; 3349 if (!reuse_solver) { 3350 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 3351 } 3352 if (!pcbddc->switch_static) { 3353 if (applytranspose && pcbddc->local_auxmat1) { 3354 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3355 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3356 } 3357 if (!reuse_solver) { 3358 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3359 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3360 } else { 3361 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3362 3363 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3364 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3365 } 3366 } else { 3367 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3368 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3369 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3370 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3371 if (applytranspose && pcbddc->local_auxmat1) { 3372 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 3373 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3374 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3375 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3376 } 3377 } 3378 if (!reuse_solver || pcbddc->switch_static) { 3379 if (applytranspose) { 3380 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3381 } else { 3382 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3383 } 3384 } else { 3385 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3386 3387 if (applytranspose) { 3388 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 3389 } else { 3390 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 3391 } 3392 } 3393 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 3394 if (!pcbddc->switch_static) { 3395 if (!reuse_solver) { 3396 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3397 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3398 } else { 3399 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3400 3401 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3402 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3403 } 3404 if (!applytranspose && pcbddc->local_auxmat1) { 3405 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3406 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3407 } 3408 } else { 3409 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3410 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3411 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3412 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3413 if (!applytranspose && pcbddc->local_auxmat1) { 3414 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3415 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3416 } 3417 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3418 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3419 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3420 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3421 } 3422 PetscFunctionReturn(0); 3423 } 3424 3425 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 3426 #undef __FUNCT__ 3427 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 3428 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 3429 { 3430 PetscErrorCode ierr; 3431 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3432 PC_IS* pcis = (PC_IS*) (pc->data); 3433 const PetscScalar zero = 0.0; 3434 3435 PetscFunctionBegin; 3436 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 3437 if (!pcbddc->benign_apply_coarse_only) { 3438 if (applytranspose) { 3439 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3440 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3441 } else { 3442 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3443 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3444 } 3445 } else { 3446 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 3447 } 3448 3449 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 3450 if (pcbddc->benign_n) { 3451 PetscScalar *array; 3452 PetscInt j; 3453 3454 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3455 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 3456 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3457 } 3458 3459 /* start communications from local primal nodes to rhs of coarse solver */ 3460 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 3461 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3462 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3463 3464 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 3465 if (pcbddc->coarse_ksp) { 3466 Mat coarse_mat; 3467 Vec rhs,sol; 3468 MatNullSpace nullsp; 3469 PetscBool isbddc = PETSC_FALSE; 3470 3471 if (pcbddc->benign_have_null) { 3472 PC coarse_pc; 3473 3474 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 3475 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 3476 /* we need to propagate to coarser levels the need for a possible benign correction */ 3477 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 3478 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 3479 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 3480 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 3481 } 3482 } 3483 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 3484 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 3485 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 3486 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 3487 if (nullsp) { 3488 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 3489 } 3490 if (applytranspose) { 3491 if (pcbddc->benign_apply_coarse_only) SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 3492 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3493 } else { 3494 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 3495 PC coarse_pc; 3496 3497 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 3498 ierr = PCPreSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 3499 ierr = PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);CHKERRQ(ierr); 3500 ierr = PCPostSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 3501 } else { 3502 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3503 } 3504 } 3505 /* we don't need the benign correction at coarser levels anymore */ 3506 if (pcbddc->benign_have_null && isbddc) { 3507 PC coarse_pc; 3508 PC_BDDC* coarsepcbddc; 3509 3510 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 3511 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 3512 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 3513 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 3514 } 3515 if (nullsp) { 3516 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 3517 } 3518 } 3519 3520 /* Local solution on R nodes */ 3521 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 3522 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 3523 } 3524 /* communications from coarse sol to local primal nodes */ 3525 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3526 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3527 3528 /* Sum contributions from the two levels */ 3529 if (!pcbddc->benign_apply_coarse_only) { 3530 if (applytranspose) { 3531 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3532 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3533 } else { 3534 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3535 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3536 } 3537 /* store p0 */ 3538 if (pcbddc->benign_n) { 3539 PetscScalar *array; 3540 PetscInt j; 3541 3542 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3543 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 3544 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3545 } 3546 } else { /* expand the coarse solution */ 3547 if (applytranspose) { 3548 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3549 } else { 3550 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3551 } 3552 } 3553 PetscFunctionReturn(0); 3554 } 3555 3556 #undef __FUNCT__ 3557 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 3558 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 3559 { 3560 PetscErrorCode ierr; 3561 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3562 PetscScalar *array; 3563 Vec from,to; 3564 3565 PetscFunctionBegin; 3566 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3567 from = pcbddc->coarse_vec; 3568 to = pcbddc->vec1_P; 3569 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3570 Vec tvec; 3571 3572 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3573 ierr = VecResetArray(tvec);CHKERRQ(ierr); 3574 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3575 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 3576 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 3577 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 3578 } 3579 } else { /* from local to global -> put data in coarse right hand side */ 3580 from = pcbddc->vec1_P; 3581 to = pcbddc->coarse_vec; 3582 } 3583 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3584 PetscFunctionReturn(0); 3585 } 3586 3587 #undef __FUNCT__ 3588 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 3589 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 3590 { 3591 PetscErrorCode ierr; 3592 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3593 PetscScalar *array; 3594 Vec from,to; 3595 3596 PetscFunctionBegin; 3597 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3598 from = pcbddc->coarse_vec; 3599 to = pcbddc->vec1_P; 3600 } else { /* from local to global -> put data in coarse right hand side */ 3601 from = pcbddc->vec1_P; 3602 to = pcbddc->coarse_vec; 3603 } 3604 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3605 if (smode == SCATTER_FORWARD) { 3606 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3607 Vec tvec; 3608 3609 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3610 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 3611 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 3612 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 3613 } 3614 } else { 3615 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 3616 ierr = VecResetArray(from);CHKERRQ(ierr); 3617 } 3618 } 3619 PetscFunctionReturn(0); 3620 } 3621 3622 /* uncomment for testing purposes */ 3623 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 3624 #undef __FUNCT__ 3625 #define __FUNCT__ "PCBDDCConstraintsSetUp" 3626 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 3627 { 3628 PetscErrorCode ierr; 3629 PC_IS* pcis = (PC_IS*)(pc->data); 3630 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3631 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3632 /* one and zero */ 3633 PetscScalar one=1.0,zero=0.0; 3634 /* space to store constraints and their local indices */ 3635 PetscScalar *constraints_data; 3636 PetscInt *constraints_idxs,*constraints_idxs_B; 3637 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 3638 PetscInt *constraints_n; 3639 /* iterators */ 3640 PetscInt i,j,k,total_counts,total_counts_cc,cum; 3641 /* BLAS integers */ 3642 PetscBLASInt lwork,lierr; 3643 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 3644 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 3645 /* reuse */ 3646 PetscInt olocal_primal_size,olocal_primal_size_cc; 3647 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 3648 /* change of basis */ 3649 PetscBool qr_needed; 3650 PetscBT change_basis,qr_needed_idx; 3651 /* auxiliary stuff */ 3652 PetscInt *nnz,*is_indices; 3653 PetscInt ncc; 3654 /* some quantities */ 3655 PetscInt n_vertices,total_primal_vertices,valid_constraints; 3656 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 3657 3658 PetscFunctionBegin; 3659 /* Destroy Mat objects computed previously */ 3660 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3661 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3662 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 3663 /* save info on constraints from previous setup (if any) */ 3664 olocal_primal_size = pcbddc->local_primal_size; 3665 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 3666 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 3667 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3668 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3669 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3670 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3671 3672 if (!pcbddc->adaptive_selection) { 3673 IS ISForVertices,*ISForFaces,*ISForEdges; 3674 MatNullSpace nearnullsp; 3675 const Vec *nearnullvecs; 3676 Vec *localnearnullsp; 3677 PetscScalar *array; 3678 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 3679 PetscBool nnsp_has_cnst; 3680 /* LAPACK working arrays for SVD or POD */ 3681 PetscBool skip_lapack,boolforchange; 3682 PetscScalar *work; 3683 PetscReal *singular_vals; 3684 #if defined(PETSC_USE_COMPLEX) 3685 PetscReal *rwork; 3686 #endif 3687 #if defined(PETSC_MISSING_LAPACK_GESVD) 3688 PetscScalar *temp_basis,*correlation_mat; 3689 #else 3690 PetscBLASInt dummy_int=1; 3691 PetscScalar dummy_scalar=1.; 3692 #endif 3693 3694 /* Get index sets for faces, edges and vertices from graph */ 3695 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 3696 /* print some info */ 3697 if (pcbddc->dbg_flag && !pcbddc->sub_schurs) { 3698 PetscInt nv; 3699 3700 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag-1,pcbddc->dbg_viewer);CHKERRQ(ierr); 3701 ierr = ISGetSize(ISForVertices,&nv);CHKERRQ(ierr); 3702 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3703 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3704 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 3705 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);CHKERRQ(ierr); 3706 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);CHKERRQ(ierr); 3707 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3708 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3709 } 3710 3711 /* free unneeded index sets */ 3712 if (!pcbddc->use_vertices) { 3713 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3714 } 3715 if (!pcbddc->use_edges) { 3716 for (i=0;i<n_ISForEdges;i++) { 3717 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3718 } 3719 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3720 n_ISForEdges = 0; 3721 } 3722 if (!pcbddc->use_faces) { 3723 for (i=0;i<n_ISForFaces;i++) { 3724 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3725 } 3726 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3727 n_ISForFaces = 0; 3728 } 3729 3730 /* check if near null space is attached to global mat */ 3731 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 3732 if (nearnullsp) { 3733 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 3734 /* remove any stored info */ 3735 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3736 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3737 /* store information for BDDC solver reuse */ 3738 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 3739 pcbddc->onearnullspace = nearnullsp; 3740 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3741 for (i=0;i<nnsp_size;i++) { 3742 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 3743 } 3744 } else { /* if near null space is not provided BDDC uses constants by default */ 3745 nnsp_size = 0; 3746 nnsp_has_cnst = PETSC_TRUE; 3747 } 3748 /* get max number of constraints on a single cc */ 3749 max_constraints = nnsp_size; 3750 if (nnsp_has_cnst) max_constraints++; 3751 3752 /* 3753 Evaluate maximum storage size needed by the procedure 3754 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 3755 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 3756 There can be multiple constraints per connected component 3757 */ 3758 n_vertices = 0; 3759 if (ISForVertices) { 3760 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 3761 } 3762 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 3763 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 3764 3765 total_counts = n_ISForFaces+n_ISForEdges; 3766 total_counts *= max_constraints; 3767 total_counts += n_vertices; 3768 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 3769 3770 total_counts = 0; 3771 max_size_of_constraint = 0; 3772 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 3773 IS used_is; 3774 if (i<n_ISForEdges) { 3775 used_is = ISForEdges[i]; 3776 } else { 3777 used_is = ISForFaces[i-n_ISForEdges]; 3778 } 3779 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 3780 total_counts += j; 3781 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 3782 } 3783 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 3784 3785 /* get local part of global near null space vectors */ 3786 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 3787 for (k=0;k<nnsp_size;k++) { 3788 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 3789 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3790 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3791 } 3792 3793 /* whether or not to skip lapack calls */ 3794 skip_lapack = PETSC_TRUE; 3795 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 3796 3797 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 3798 if (!skip_lapack) { 3799 PetscScalar temp_work; 3800 3801 #if defined(PETSC_MISSING_LAPACK_GESVD) 3802 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 3803 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 3804 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 3805 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 3806 #if defined(PETSC_USE_COMPLEX) 3807 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 3808 #endif 3809 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3810 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3811 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 3812 lwork = -1; 3813 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3814 #if !defined(PETSC_USE_COMPLEX) 3815 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 3816 #else 3817 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 3818 #endif 3819 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3820 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 3821 #else /* on missing GESVD */ 3822 /* SVD */ 3823 PetscInt max_n,min_n; 3824 max_n = max_size_of_constraint; 3825 min_n = max_constraints; 3826 if (max_size_of_constraint < max_constraints) { 3827 min_n = max_size_of_constraint; 3828 max_n = max_constraints; 3829 } 3830 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 3831 #if defined(PETSC_USE_COMPLEX) 3832 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 3833 #endif 3834 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3835 lwork = -1; 3836 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 3837 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 3838 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 3839 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3840 #if !defined(PETSC_USE_COMPLEX) 3841 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 3842 #else 3843 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 3844 #endif 3845 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3846 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 3847 #endif /* on missing GESVD */ 3848 /* Allocate optimal workspace */ 3849 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 3850 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 3851 } 3852 /* Now we can loop on constraining sets */ 3853 total_counts = 0; 3854 constraints_idxs_ptr[0] = 0; 3855 constraints_data_ptr[0] = 0; 3856 /* vertices */ 3857 if (n_vertices) { 3858 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3859 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3860 for (i=0;i<n_vertices;i++) { 3861 constraints_n[total_counts] = 1; 3862 constraints_data[total_counts] = 1.0; 3863 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 3864 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 3865 total_counts++; 3866 } 3867 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3868 n_vertices = total_counts; 3869 } 3870 3871 /* edges and faces */ 3872 total_counts_cc = total_counts; 3873 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 3874 IS used_is; 3875 PetscBool idxs_copied = PETSC_FALSE; 3876 3877 if (ncc<n_ISForEdges) { 3878 used_is = ISForEdges[ncc]; 3879 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 3880 } else { 3881 used_is = ISForFaces[ncc-n_ISForEdges]; 3882 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 3883 } 3884 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 3885 3886 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 3887 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3888 /* change of basis should not be performed on local periodic nodes */ 3889 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 3890 if (nnsp_has_cnst) { 3891 PetscScalar quad_value; 3892 3893 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3894 idxs_copied = PETSC_TRUE; 3895 3896 if (!pcbddc->use_nnsp_true) { 3897 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 3898 } else { 3899 quad_value = 1.0; 3900 } 3901 for (j=0;j<size_of_constraint;j++) { 3902 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 3903 } 3904 temp_constraints++; 3905 total_counts++; 3906 } 3907 for (k=0;k<nnsp_size;k++) { 3908 PetscReal real_value; 3909 PetscScalar *ptr_to_data; 3910 3911 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3912 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 3913 for (j=0;j<size_of_constraint;j++) { 3914 ptr_to_data[j] = array[is_indices[j]]; 3915 } 3916 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3917 /* check if array is null on the connected component */ 3918 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3919 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 3920 if (real_value > 0.0) { /* keep indices and values */ 3921 temp_constraints++; 3922 total_counts++; 3923 if (!idxs_copied) { 3924 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3925 idxs_copied = PETSC_TRUE; 3926 } 3927 } 3928 } 3929 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3930 valid_constraints = temp_constraints; 3931 if (!pcbddc->use_nnsp_true && temp_constraints) { 3932 if (temp_constraints == 1) { /* just normalize the constraint */ 3933 PetscScalar norm,*ptr_to_data; 3934 3935 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3936 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3937 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 3938 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 3939 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 3940 } else { /* perform SVD */ 3941 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 3942 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3943 3944 #if defined(PETSC_MISSING_LAPACK_GESVD) 3945 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 3946 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 3947 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 3948 the constraints basis will differ (by a complex factor with absolute value equal to 1) 3949 from that computed using LAPACKgesvd 3950 -> This is due to a different computation of eigenvectors in LAPACKheev 3951 -> The quality of the POD-computed basis will be the same */ 3952 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3953 /* Store upper triangular part of correlation matrix */ 3954 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3955 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3956 for (j=0;j<temp_constraints;j++) { 3957 for (k=0;k<j+1;k++) { 3958 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 3959 } 3960 } 3961 /* compute eigenvalues and eigenvectors of correlation matrix */ 3962 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3963 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 3964 #if !defined(PETSC_USE_COMPLEX) 3965 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 3966 #else 3967 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 3968 #endif 3969 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3970 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 3971 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 3972 j = 0; 3973 while (j < temp_constraints && singular_vals[j] < tol) j++; 3974 total_counts = total_counts-j; 3975 valid_constraints = temp_constraints-j; 3976 /* scale and copy POD basis into used quadrature memory */ 3977 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3978 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3979 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 3980 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3981 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 3982 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3983 if (j<temp_constraints) { 3984 PetscInt ii; 3985 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 3986 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3987 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 3988 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3989 for (k=0;k<temp_constraints-j;k++) { 3990 for (ii=0;ii<size_of_constraint;ii++) { 3991 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 3992 } 3993 } 3994 } 3995 #else /* on missing GESVD */ 3996 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3997 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3998 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3999 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4000 #if !defined(PETSC_USE_COMPLEX) 4001 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 4002 #else 4003 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 4004 #endif 4005 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 4006 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4007 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 4008 k = temp_constraints; 4009 if (k > size_of_constraint) k = size_of_constraint; 4010 j = 0; 4011 while (j < k && singular_vals[k-j-1] < tol) j++; 4012 valid_constraints = k-j; 4013 total_counts = total_counts-temp_constraints+valid_constraints; 4014 #endif /* on missing GESVD */ 4015 } 4016 } 4017 /* update pointers information */ 4018 if (valid_constraints) { 4019 constraints_n[total_counts_cc] = valid_constraints; 4020 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 4021 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 4022 /* set change_of_basis flag */ 4023 if (boolforchange) { 4024 PetscBTSet(change_basis,total_counts_cc); 4025 } 4026 total_counts_cc++; 4027 } 4028 } 4029 /* free workspace */ 4030 if (!skip_lapack) { 4031 ierr = PetscFree(work);CHKERRQ(ierr); 4032 #if defined(PETSC_USE_COMPLEX) 4033 ierr = PetscFree(rwork);CHKERRQ(ierr); 4034 #endif 4035 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 4036 #if defined(PETSC_MISSING_LAPACK_GESVD) 4037 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 4038 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 4039 #endif 4040 } 4041 for (k=0;k<nnsp_size;k++) { 4042 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 4043 } 4044 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 4045 /* free index sets of faces, edges and vertices */ 4046 for (i=0;i<n_ISForFaces;i++) { 4047 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 4048 } 4049 if (n_ISForFaces) { 4050 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 4051 } 4052 for (i=0;i<n_ISForEdges;i++) { 4053 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 4054 } 4055 if (n_ISForEdges) { 4056 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 4057 } 4058 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 4059 } else { 4060 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4061 4062 total_counts = 0; 4063 n_vertices = 0; 4064 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 4065 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 4066 } 4067 max_constraints = 0; 4068 total_counts_cc = 0; 4069 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 4070 total_counts += pcbddc->adaptive_constraints_n[i]; 4071 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 4072 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 4073 } 4074 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 4075 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 4076 constraints_idxs = pcbddc->adaptive_constraints_idxs; 4077 constraints_data = pcbddc->adaptive_constraints_data; 4078 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 4079 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 4080 total_counts_cc = 0; 4081 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 4082 if (pcbddc->adaptive_constraints_n[i]) { 4083 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 4084 } 4085 } 4086 #if 0 4087 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 4088 for (i=0;i<total_counts_cc;i++) { 4089 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 4090 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 4091 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 4092 printf(" %d",constraints_idxs[j]); 4093 } 4094 printf("\n"); 4095 printf("number of cc: %d\n",constraints_n[i]); 4096 } 4097 for (i=0;i<n_vertices;i++) { 4098 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 4099 } 4100 for (i=0;i<sub_schurs->n_subs;i++) { 4101 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 4102 } 4103 #endif 4104 4105 max_size_of_constraint = 0; 4106 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 4107 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 4108 /* Change of basis */ 4109 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 4110 if (pcbddc->use_change_of_basis) { 4111 for (i=0;i<sub_schurs->n_subs;i++) { 4112 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 4113 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 4114 } 4115 } 4116 } 4117 } 4118 pcbddc->local_primal_size = total_counts; 4119 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 4120 4121 /* map constraints_idxs in boundary numbering */ 4122 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 4123 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i); 4124 4125 /* Create constraint matrix */ 4126 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 4127 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 4128 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 4129 4130 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 4131 /* determine if a QR strategy is needed for change of basis */ 4132 qr_needed = PETSC_FALSE; 4133 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 4134 total_primal_vertices=0; 4135 pcbddc->local_primal_size_cc = 0; 4136 for (i=0;i<total_counts_cc;i++) { 4137 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 4138 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 4139 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 4140 pcbddc->local_primal_size_cc += 1; 4141 } else if (PetscBTLookup(change_basis,i)) { 4142 for (k=0;k<constraints_n[i];k++) { 4143 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 4144 } 4145 pcbddc->local_primal_size_cc += constraints_n[i]; 4146 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 4147 PetscBTSet(qr_needed_idx,i); 4148 qr_needed = PETSC_TRUE; 4149 } 4150 } else { 4151 pcbddc->local_primal_size_cc += 1; 4152 } 4153 } 4154 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 4155 pcbddc->n_vertices = total_primal_vertices; 4156 /* permute indices in order to have a sorted set of vertices */ 4157 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 4158 4159 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 4160 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 4161 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 4162 4163 /* nonzero structure of constraint matrix */ 4164 /* and get reference dof for local constraints */ 4165 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 4166 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 4167 4168 j = total_primal_vertices; 4169 total_counts = total_primal_vertices; 4170 cum = total_primal_vertices; 4171 for (i=n_vertices;i<total_counts_cc;i++) { 4172 if (!PetscBTLookup(change_basis,i)) { 4173 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 4174 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 4175 cum++; 4176 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 4177 for (k=0;k<constraints_n[i];k++) { 4178 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 4179 nnz[j+k] = size_of_constraint; 4180 } 4181 j += constraints_n[i]; 4182 } 4183 } 4184 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 4185 ierr = PetscFree(nnz);CHKERRQ(ierr); 4186 4187 /* set values in constraint matrix */ 4188 for (i=0;i<total_primal_vertices;i++) { 4189 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4190 } 4191 total_counts = total_primal_vertices; 4192 for (i=n_vertices;i<total_counts_cc;i++) { 4193 if (!PetscBTLookup(change_basis,i)) { 4194 PetscInt *cols; 4195 4196 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 4197 cols = constraints_idxs+constraints_idxs_ptr[i]; 4198 for (k=0;k<constraints_n[i];k++) { 4199 PetscInt row = total_counts+k; 4200 PetscScalar *vals; 4201 4202 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 4203 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 4204 } 4205 total_counts += constraints_n[i]; 4206 } 4207 } 4208 /* assembling */ 4209 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4210 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4211 4212 /* 4213 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4214 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 4215 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 4216 */ 4217 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 4218 if (pcbddc->use_change_of_basis) { 4219 /* dual and primal dofs on a single cc */ 4220 PetscInt dual_dofs,primal_dofs; 4221 /* working stuff for GEQRF */ 4222 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 4223 PetscBLASInt lqr_work; 4224 /* working stuff for UNGQR */ 4225 PetscScalar *gqr_work,lgqr_work_t; 4226 PetscBLASInt lgqr_work; 4227 /* working stuff for TRTRS */ 4228 PetscScalar *trs_rhs; 4229 PetscBLASInt Blas_NRHS; 4230 /* pointers for values insertion into change of basis matrix */ 4231 PetscInt *start_rows,*start_cols; 4232 PetscScalar *start_vals; 4233 /* working stuff for values insertion */ 4234 PetscBT is_primal; 4235 PetscInt *aux_primal_numbering_B; 4236 /* matrix sizes */ 4237 PetscInt global_size,local_size; 4238 /* temporary change of basis */ 4239 Mat localChangeOfBasisMatrix; 4240 /* extra space for debugging */ 4241 PetscScalar *dbg_work; 4242 4243 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 4244 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 4245 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 4246 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 4247 /* nonzeros for local mat */ 4248 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 4249 if (!pcbddc->benign_change || pcbddc->fake_change) { 4250 for (i=0;i<pcis->n;i++) nnz[i]=1; 4251 } else { 4252 const PetscInt *ii; 4253 PetscInt n; 4254 PetscBool flg_row; 4255 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 4256 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 4257 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 4258 } 4259 for (i=n_vertices;i<total_counts_cc;i++) { 4260 if (PetscBTLookup(change_basis,i)) { 4261 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 4262 if (PetscBTLookup(qr_needed_idx,i)) { 4263 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 4264 } else { 4265 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 4266 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 4267 } 4268 } 4269 } 4270 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 4271 ierr = PetscFree(nnz);CHKERRQ(ierr); 4272 /* Set interior change in the matrix */ 4273 if (!pcbddc->benign_change || pcbddc->fake_change) { 4274 for (i=0;i<pcis->n;i++) { 4275 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 4276 } 4277 } else { 4278 const PetscInt *ii,*jj; 4279 PetscScalar *aa; 4280 PetscInt n; 4281 PetscBool flg_row; 4282 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 4283 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 4284 for (i=0;i<n;i++) { 4285 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 4286 } 4287 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 4288 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 4289 } 4290 4291 if (pcbddc->dbg_flag) { 4292 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 4293 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4294 } 4295 4296 4297 /* Now we loop on the constraints which need a change of basis */ 4298 /* 4299 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 4300 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 4301 4302 Basic blocks of change of basis matrix T computed by 4303 4304 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 4305 4306 | 1 0 ... 0 s_1/S | 4307 | 0 1 ... 0 s_2/S | 4308 | ... | 4309 | 0 ... 1 s_{n-1}/S | 4310 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 4311 4312 with S = \sum_{i=1}^n s_i^2 4313 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 4314 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 4315 4316 - QR decomposition of constraints otherwise 4317 */ 4318 if (qr_needed) { 4319 /* space to store Q */ 4320 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 4321 /* first we issue queries for optimal work */ 4322 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 4323 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 4324 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4325 lqr_work = -1; 4326 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 4327 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 4328 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 4329 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 4330 lgqr_work = -1; 4331 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 4332 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 4333 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 4334 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4335 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 4336 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 4337 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 4338 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 4339 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 4340 /* array to store scaling factors for reflectors */ 4341 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 4342 /* array to store rhs and solution of triangular solver */ 4343 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 4344 /* allocating workspace for check */ 4345 if (pcbddc->dbg_flag) { 4346 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 4347 } 4348 } 4349 /* array to store whether a node is primal or not */ 4350 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 4351 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 4352 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 4353 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i); 4354 for (i=0;i<total_primal_vertices;i++) { 4355 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 4356 } 4357 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 4358 4359 /* loop on constraints and see whether or not they need a change of basis and compute it */ 4360 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 4361 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 4362 if (PetscBTLookup(change_basis,total_counts)) { 4363 /* get constraint info */ 4364 primal_dofs = constraints_n[total_counts]; 4365 dual_dofs = size_of_constraint-primal_dofs; 4366 4367 if (pcbddc->dbg_flag) { 4368 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 4369 } 4370 4371 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 4372 4373 /* copy quadrature constraints for change of basis check */ 4374 if (pcbddc->dbg_flag) { 4375 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4376 } 4377 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 4378 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4379 4380 /* compute QR decomposition of constraints */ 4381 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4382 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4383 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4384 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4385 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 4386 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 4387 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4388 4389 /* explictly compute R^-T */ 4390 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 4391 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 4392 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4393 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 4394 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4395 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 4396 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4397 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 4398 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 4399 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4400 4401 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 4402 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4403 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4404 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 4405 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4406 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4407 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 4408 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 4409 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4410 4411 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 4412 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 4413 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 4414 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4415 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4416 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 4417 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4418 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 4419 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 4420 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4421 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 4422 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4423 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4424 4425 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 4426 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 4427 /* insert cols for primal dofs */ 4428 for (j=0;j<primal_dofs;j++) { 4429 start_vals = &qr_basis[j*size_of_constraint]; 4430 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 4431 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 4432 } 4433 /* insert cols for dual dofs */ 4434 for (j=0,k=0;j<dual_dofs;k++) { 4435 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 4436 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 4437 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 4438 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 4439 j++; 4440 } 4441 } 4442 4443 /* check change of basis */ 4444 if (pcbddc->dbg_flag) { 4445 PetscInt ii,jj; 4446 PetscBool valid_qr=PETSC_TRUE; 4447 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 4448 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4449 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 4450 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4451 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 4452 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 4453 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4454 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 4455 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4456 for (jj=0;jj<size_of_constraint;jj++) { 4457 for (ii=0;ii<primal_dofs;ii++) { 4458 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 4459 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 4460 } 4461 } 4462 if (!valid_qr) { 4463 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 4464 for (jj=0;jj<size_of_constraint;jj++) { 4465 for (ii=0;ii<primal_dofs;ii++) { 4466 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 4467 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4468 } 4469 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 4470 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4471 } 4472 } 4473 } 4474 } else { 4475 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 4476 } 4477 } 4478 } else { /* simple transformation block */ 4479 PetscInt row,col; 4480 PetscScalar val,norm; 4481 4482 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4483 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 4484 for (j=0;j<size_of_constraint;j++) { 4485 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 4486 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 4487 if (!PetscBTLookup(is_primal,row_B)) { 4488 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 4489 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 4490 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 4491 } else { 4492 for (k=0;k<size_of_constraint;k++) { 4493 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 4494 if (row != col) { 4495 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 4496 } else { 4497 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 4498 } 4499 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 4500 } 4501 } 4502 } 4503 if (pcbddc->dbg_flag) { 4504 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 4505 } 4506 } 4507 } else { 4508 if (pcbddc->dbg_flag) { 4509 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 4510 } 4511 } 4512 } 4513 4514 /* free workspace */ 4515 if (qr_needed) { 4516 if (pcbddc->dbg_flag) { 4517 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 4518 } 4519 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 4520 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 4521 ierr = PetscFree(qr_work);CHKERRQ(ierr); 4522 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 4523 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 4524 } 4525 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 4526 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4527 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4528 4529 /* assembling of global change of variable */ 4530 if (!pcbddc->fake_change) { 4531 Mat tmat; 4532 PetscInt bs; 4533 4534 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 4535 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 4536 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 4537 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 4538 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4539 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 4540 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 4541 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 4542 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 4543 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 4544 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4545 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4546 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4547 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4548 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4549 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4550 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 4551 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 4552 4553 /* check */ 4554 if (pcbddc->dbg_flag) { 4555 PetscReal error; 4556 Vec x,x_change; 4557 4558 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 4559 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 4560 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4561 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 4562 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4563 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4564 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 4565 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4566 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4567 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 4568 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4569 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4570 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4571 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 4572 ierr = VecDestroy(&x);CHKERRQ(ierr); 4573 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4574 } 4575 /* adapt sub_schurs computed (if any) */ 4576 if (pcbddc->use_deluxe_scaling) { 4577 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4578 4579 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");CHKERRQ(ierr); 4580 if (sub_schurs && sub_schurs->S_Ej_all) { 4581 Mat S_new,tmat; 4582 IS is_all_N,is_V_Sall = NULL; 4583 4584 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 4585 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 4586 if (pcbddc->deluxe_zerorows) { 4587 ISLocalToGlobalMapping NtoSall; 4588 IS is_V; 4589 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 4590 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 4591 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 4592 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 4593 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 4594 } 4595 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 4596 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4597 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 4598 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4599 if (pcbddc->deluxe_zerorows) { 4600 const PetscScalar *array; 4601 const PetscInt *idxs_V,*idxs_all; 4602 PetscInt i,n_V; 4603 4604 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4605 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 4606 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4607 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4608 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 4609 for (i=0;i<n_V;i++) { 4610 PetscScalar val; 4611 PetscInt idx; 4612 4613 idx = idxs_V[i]; 4614 val = array[idxs_all[idxs_V[i]]]; 4615 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 4616 } 4617 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4618 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4619 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 4620 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4621 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4622 } 4623 sub_schurs->S_Ej_all = S_new; 4624 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4625 if (sub_schurs->sum_S_Ej_all) { 4626 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4627 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 4628 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4629 if (pcbddc->deluxe_zerorows) { 4630 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4631 } 4632 sub_schurs->sum_S_Ej_all = S_new; 4633 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4634 } 4635 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 4636 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4637 } 4638 /* destroy any change of basis context in sub_schurs */ 4639 if (sub_schurs && sub_schurs->change) { 4640 PetscInt i; 4641 4642 for (i=0;i<sub_schurs->n_subs;i++) { 4643 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 4644 } 4645 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 4646 } 4647 } 4648 if (pcbddc->switch_static) { /* need to save the local change */ 4649 pcbddc->switch_static_change = localChangeOfBasisMatrix; 4650 } else { 4651 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 4652 } 4653 /* determine if any process has changed the pressures locally */ 4654 pcbddc->change_interior = pcbddc->benign_have_null; 4655 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 4656 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 4657 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 4658 pcbddc->use_qr_single = qr_needed; 4659 } 4660 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 4661 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 4662 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 4663 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 4664 } else { 4665 Mat benign_global = NULL; 4666 if (pcbddc->benign_have_null) { 4667 Mat tmat; 4668 4669 pcbddc->change_interior = PETSC_TRUE; 4670 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4671 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4672 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4673 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4674 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 4675 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4676 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4677 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 4678 if (pcbddc->benign_change) { 4679 Mat M; 4680 4681 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 4682 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 4683 ierr = MatISSetLocalMat(tmat,M);CHKERRQ(ierr); 4684 ierr = MatDestroy(&M);CHKERRQ(ierr); 4685 } else { 4686 Mat eye; 4687 PetscScalar *array; 4688 4689 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4690 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&eye);CHKERRQ(ierr); 4691 for (i=0;i<pcis->n;i++) { 4692 ierr = MatSetValue(eye,i,i,array[i],INSERT_VALUES);CHKERRQ(ierr); 4693 } 4694 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4695 ierr = MatAssemblyBegin(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4696 ierr = MatAssemblyEnd(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4697 ierr = MatISSetLocalMat(tmat,eye);CHKERRQ(ierr); 4698 ierr = MatDestroy(&eye);CHKERRQ(ierr); 4699 } 4700 ierr = MatISGetMPIXAIJ(tmat,MAT_INITIAL_MATRIX,&benign_global);CHKERRQ(ierr); 4701 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4702 } 4703 if (pcbddc->user_ChangeOfBasisMatrix) { 4704 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4705 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 4706 } else if (pcbddc->benign_have_null) { 4707 pcbddc->ChangeOfBasisMatrix = benign_global; 4708 } 4709 } 4710 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 4711 IS is_global; 4712 const PetscInt *gidxs; 4713 4714 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 4715 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 4716 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 4717 ierr = MatGetSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 4718 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 4719 } 4720 } 4721 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 4722 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 4723 } 4724 4725 if (!pcbddc->fake_change) { 4726 /* add pressure dofs to set of primal nodes for numbering purposes */ 4727 for (i=0;i<pcbddc->benign_n;i++) { 4728 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 4729 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 4730 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 4731 pcbddc->local_primal_size_cc++; 4732 pcbddc->local_primal_size++; 4733 } 4734 4735 /* check if a new primal space has been introduced (also take into account benign trick) */ 4736 pcbddc->new_primal_space_local = PETSC_TRUE; 4737 if (olocal_primal_size == pcbddc->local_primal_size) { 4738 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4739 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4740 if (!pcbddc->new_primal_space_local) { 4741 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4742 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4743 } 4744 } 4745 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 4746 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4747 } 4748 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 4749 4750 /* flush dbg viewer */ 4751 if (pcbddc->dbg_flag) { 4752 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4753 } 4754 4755 /* free workspace */ 4756 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 4757 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 4758 if (!pcbddc->adaptive_selection) { 4759 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 4760 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 4761 } else { 4762 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 4763 pcbddc->adaptive_constraints_idxs_ptr, 4764 pcbddc->adaptive_constraints_data_ptr, 4765 pcbddc->adaptive_constraints_idxs, 4766 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 4767 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 4768 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 4769 } 4770 PetscFunctionReturn(0); 4771 } 4772 4773 #undef __FUNCT__ 4774 #define __FUNCT__ "PCBDDCAnalyzeInterface" 4775 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 4776 { 4777 ISLocalToGlobalMapping map; 4778 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4779 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 4780 PetscInt ierr,i,N; 4781 4782 PetscFunctionBegin; 4783 if (pcbddc->graphanalyzed) PetscFunctionReturn(0); 4784 /* Reset previously computed graph */ 4785 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 4786 /* Init local Graph struct */ 4787 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 4788 ierr = MatGetLocalToGlobalMapping(pc->pmat,&map,NULL);CHKERRQ(ierr); 4789 ierr = PCBDDCGraphInit(pcbddc->mat_graph,map,N);CHKERRQ(ierr); 4790 4791 /* Check validity of the csr graph passed in by the user */ 4792 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 4793 4794 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 4795 if ( (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) && pcbddc->use_local_adj) { 4796 PetscInt *xadj,*adjncy; 4797 PetscInt nvtxs; 4798 PetscBool flg_row=PETSC_FALSE; 4799 4800 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4801 if (flg_row) { 4802 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 4803 pcbddc->computed_rowadj = PETSC_TRUE; 4804 } 4805 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4806 } 4807 if (pcbddc->dbg_flag) { 4808 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4809 } 4810 4811 /* Setup of Graph */ 4812 pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */ 4813 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 4814 4815 /* attach info on disconnected subdomains if present */ 4816 if (pcbddc->n_local_subs) { 4817 PetscInt *local_subs; 4818 4819 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 4820 for (i=0;i<pcbddc->n_local_subs;i++) { 4821 const PetscInt *idxs; 4822 PetscInt nl,j; 4823 4824 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 4825 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4826 for (j=0;j<nl;j++) local_subs[idxs[j]] = i; 4827 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4828 } 4829 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 4830 pcbddc->mat_graph->local_subs = local_subs; 4831 } 4832 4833 /* Graph's connected components analysis */ 4834 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 4835 4836 /* set flag indicating analysis has been done */ 4837 pcbddc->graphanalyzed = PETSC_TRUE; 4838 PetscFunctionReturn(0); 4839 } 4840 4841 #undef __FUNCT__ 4842 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 4843 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 4844 { 4845 PetscInt i,j; 4846 PetscScalar *alphas; 4847 PetscErrorCode ierr; 4848 4849 PetscFunctionBegin; 4850 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 4851 for (i=0;i<n;i++) { 4852 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 4853 ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],alphas);CHKERRQ(ierr); 4854 for (j=0;j<n-i-1;j++) alphas[j] = PetscConj(-alphas[j]); 4855 ierr = VecMAXPY(vecs[j],n-i-1,alphas,vecs+i);CHKERRQ(ierr); 4856 } 4857 ierr = PetscFree(alphas);CHKERRQ(ierr); 4858 PetscFunctionReturn(0); 4859 } 4860 4861 #undef __FUNCT__ 4862 #define __FUNCT__ "MatISGetSubassemblingPattern" 4863 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 4864 { 4865 Mat A; 4866 PetscInt n_neighs,*neighs,*n_shared,**shared; 4867 PetscMPIInt size,rank,color; 4868 PetscInt *xadj,*adjncy; 4869 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 4870 PetscInt im_active,active_procs,n,i,j,local_size,threshold = 2; 4871 PetscInt void_procs,*procs_candidates = NULL; 4872 PetscInt xadj_count, *count; 4873 PetscBool ismatis,use_vwgt=PETSC_FALSE; 4874 PetscSubcomm psubcomm; 4875 MPI_Comm subcomm; 4876 PetscErrorCode ierr; 4877 4878 PetscFunctionBegin; 4879 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 4880 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 4881 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 4882 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 4883 PetscValidLogicalCollectiveInt(mat,redprocs,3); 4884 if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %d\n",*n_subdomains); 4885 4886 if (have_void) *have_void = PETSC_FALSE; 4887 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 4888 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 4889 ierr = MatISGetLocalMat(mat,&A);CHKERRQ(ierr); 4890 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 4891 im_active = !!(n); 4892 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 4893 void_procs = size - active_procs; 4894 /* get ranks of of non-active processes in mat communicator */ 4895 if (void_procs) { 4896 PetscInt ncand; 4897 4898 if (have_void) *have_void = PETSC_TRUE; 4899 ierr = PetscMalloc1(size,&procs_candidates);CHKERRQ(ierr); 4900 ierr = MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 4901 for (i=0,ncand=0;i<size;i++) { 4902 if (!procs_candidates[i]) { 4903 procs_candidates[ncand++] = i; 4904 } 4905 } 4906 /* force n_subdomains to be not greater that the number of non-active processes */ 4907 *n_subdomains = PetscMin(void_procs,*n_subdomains); 4908 } 4909 4910 /* number of subdomains requested greater than active processes -> just shift the matrix 4911 number of subdomains requested 1 -> send to master or first candidate in voids */ 4912 if (active_procs < *n_subdomains || *n_subdomains == 1) { 4913 PetscInt issize,isidx,dest; 4914 if (*n_subdomains == 1) dest = 0; 4915 else dest = rank; 4916 if (im_active) { 4917 issize = 1; 4918 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 4919 isidx = procs_candidates[dest]; 4920 } else { 4921 isidx = dest; 4922 } 4923 } else { 4924 issize = 0; 4925 isidx = -1; 4926 } 4927 *n_subdomains = active_procs; 4928 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);CHKERRQ(ierr); 4929 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 4930 PetscFunctionReturn(0); 4931 } 4932 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 4933 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 4934 threshold = PetscMax(threshold,2); 4935 4936 /* Get info on mapping */ 4937 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 4938 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4939 4940 /* build local CSR graph of subdomains' connectivity */ 4941 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 4942 xadj[0] = 0; 4943 xadj[1] = PetscMax(n_neighs-1,0); 4944 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 4945 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 4946 ierr = PetscCalloc1(local_size,&count);CHKERRQ(ierr); 4947 for (i=1;i<n_neighs;i++) 4948 for (j=0;j<n_shared[i];j++) 4949 count[shared[i][j]] += 1; 4950 4951 xadj_count = 0; 4952 for (i=1;i<n_neighs;i++) { 4953 for (j=0;j<n_shared[i];j++) { 4954 if (count[shared[i][j]] < threshold) { 4955 adjncy[xadj_count] = neighs[i]; 4956 adjncy_wgt[xadj_count] = n_shared[i]; 4957 xadj_count++; 4958 break; 4959 } 4960 } 4961 } 4962 xadj[1] = xadj_count; 4963 ierr = PetscFree(count);CHKERRQ(ierr); 4964 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4965 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4966 4967 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 4968 4969 /* Restrict work on active processes only */ 4970 ierr = PetscMPIIntCast(im_active,&color);CHKERRQ(ierr); 4971 if (void_procs) { 4972 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);CHKERRQ(ierr); 4973 ierr = PetscSubcommSetNumber(psubcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 4974 ierr = PetscSubcommSetTypeGeneral(psubcomm,color,rank);CHKERRQ(ierr); 4975 subcomm = PetscSubcommChild(psubcomm); 4976 } else { 4977 psubcomm = NULL; 4978 subcomm = PetscObjectComm((PetscObject)mat); 4979 } 4980 4981 v_wgt = NULL; 4982 if (!color) { 4983 ierr = PetscFree(xadj);CHKERRQ(ierr); 4984 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4985 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4986 } else { 4987 Mat subdomain_adj; 4988 IS new_ranks,new_ranks_contig; 4989 MatPartitioning partitioner; 4990 PetscInt rstart=0,rend=0; 4991 PetscInt *is_indices,*oldranks; 4992 PetscMPIInt size; 4993 PetscBool aggregate; 4994 4995 ierr = MPI_Comm_size(subcomm,&size);CHKERRQ(ierr); 4996 if (void_procs) { 4997 PetscInt prank = rank; 4998 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 4999 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);CHKERRQ(ierr); 5000 for (i=0;i<xadj[1];i++) { 5001 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 5002 } 5003 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 5004 } else { 5005 oldranks = NULL; 5006 } 5007 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 5008 if (aggregate) { /* TODO: all this part could be made more efficient */ 5009 PetscInt lrows,row,ncols,*cols; 5010 PetscMPIInt nrank; 5011 PetscScalar *vals; 5012 5013 ierr = MPI_Comm_rank(subcomm,&nrank);CHKERRQ(ierr); 5014 lrows = 0; 5015 if (nrank<redprocs) { 5016 lrows = size/redprocs; 5017 if (nrank<size%redprocs) lrows++; 5018 } 5019 ierr = MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 5020 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 5021 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 5022 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 5023 row = nrank; 5024 ncols = xadj[1]-xadj[0]; 5025 cols = adjncy; 5026 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 5027 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 5028 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 5029 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5030 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5031 ierr = PetscFree(xadj);CHKERRQ(ierr); 5032 ierr = PetscFree(adjncy);CHKERRQ(ierr); 5033 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 5034 ierr = PetscFree(vals);CHKERRQ(ierr); 5035 if (use_vwgt) { 5036 Vec v; 5037 const PetscScalar *array; 5038 PetscInt nl; 5039 5040 ierr = MatCreateVecs(subdomain_adj,&v,NULL);CHKERRQ(ierr); 5041 ierr = VecSetValue(v,row,(PetscScalar)local_size,INSERT_VALUES);CHKERRQ(ierr); 5042 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 5043 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 5044 ierr = VecGetLocalSize(v,&nl);CHKERRQ(ierr); 5045 ierr = VecGetArrayRead(v,&array);CHKERRQ(ierr); 5046 ierr = PetscMalloc1(nl,&v_wgt);CHKERRQ(ierr); 5047 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 5048 ierr = VecRestoreArrayRead(v,&array);CHKERRQ(ierr); 5049 ierr = VecDestroy(&v);CHKERRQ(ierr); 5050 } 5051 } else { 5052 ierr = MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 5053 if (use_vwgt) { 5054 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 5055 v_wgt[0] = local_size; 5056 } 5057 } 5058 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 5059 5060 /* Partition */ 5061 ierr = MatPartitioningCreate(subcomm,&partitioner);CHKERRQ(ierr); 5062 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 5063 if (v_wgt) { 5064 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 5065 } 5066 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 5067 ierr = MatPartitioningSetNParts(partitioner,*n_subdomains);CHKERRQ(ierr); 5068 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 5069 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 5070 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 5071 5072 /* renumber new_ranks to avoid "holes" in new set of processors */ 5073 ierr = ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 5074 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 5075 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5076 if (!aggregate) { 5077 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 5078 #if defined(PETSC_USE_DEBUG) 5079 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 5080 #endif 5081 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 5082 } else if (oldranks) { 5083 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 5084 } else { 5085 ranks_send_to_idx[0] = is_indices[0]; 5086 } 5087 } else { 5088 PetscInt idxs[1]; 5089 PetscMPIInt tag; 5090 MPI_Request *reqs; 5091 5092 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 5093 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 5094 for (i=rstart;i<rend;i++) { 5095 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);CHKERRQ(ierr); 5096 } 5097 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 5098 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5099 ierr = PetscFree(reqs);CHKERRQ(ierr); 5100 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 5101 #if defined(PETSC_USE_DEBUG) 5102 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 5103 #endif 5104 ranks_send_to_idx[0] = procs_candidates[oldranks[idxs[0]]]; 5105 } else if (oldranks) { 5106 ranks_send_to_idx[0] = oldranks[idxs[0]]; 5107 } else { 5108 ranks_send_to_idx[0] = idxs[0]; 5109 } 5110 } 5111 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5112 /* clean up */ 5113 ierr = PetscFree(oldranks);CHKERRQ(ierr); 5114 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 5115 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 5116 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 5117 } 5118 ierr = PetscSubcommDestroy(&psubcomm);CHKERRQ(ierr); 5119 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 5120 5121 /* assemble parallel IS for sends */ 5122 i = 1; 5123 if (!color) i=0; 5124 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);CHKERRQ(ierr); 5125 PetscFunctionReturn(0); 5126 } 5127 5128 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 5129 5130 #undef __FUNCT__ 5131 #define __FUNCT__ "MatISSubassemble" 5132 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 5133 { 5134 Mat local_mat; 5135 IS is_sends_internal; 5136 PetscInt rows,cols,new_local_rows; 5137 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs; 5138 PetscBool ismatis,isdense,newisdense,destroy_mat; 5139 ISLocalToGlobalMapping l2gmap; 5140 PetscInt* l2gmap_indices; 5141 const PetscInt* is_indices; 5142 MatType new_local_type; 5143 /* buffers */ 5144 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 5145 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 5146 PetscInt *recv_buffer_idxs_local; 5147 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 5148 PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs; 5149 /* MPI */ 5150 MPI_Comm comm,comm_n; 5151 PetscSubcomm subcomm; 5152 PetscMPIInt n_sends,n_recvs,commsize; 5153 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 5154 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 5155 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest; 5156 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs; 5157 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs; 5158 PetscErrorCode ierr; 5159 5160 PetscFunctionBegin; 5161 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 5162 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 5163 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 5164 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 5165 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 5166 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 5167 PetscValidLogicalCollectiveBool(mat,reuse,6); 5168 PetscValidLogicalCollectiveInt(mat,nis,8); 5169 PetscValidLogicalCollectiveInt(mat,nvecs,10); 5170 if (nvecs) { 5171 if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported"); 5172 PetscValidHeaderSpecific(nnsp_vec[0],VEC_CLASSID,11); 5173 } 5174 /* further checks */ 5175 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 5176 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 5177 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 5178 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 5179 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 5180 if (reuse && *mat_n) { 5181 PetscInt mrows,mcols,mnrows,mncols; 5182 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 5183 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 5184 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 5185 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 5186 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 5187 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 5188 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 5189 } 5190 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 5191 PetscValidLogicalCollectiveInt(mat,bs,0); 5192 5193 /* prepare IS for sending if not provided */ 5194 if (!is_sends) { 5195 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 5196 ierr = MatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);CHKERRQ(ierr); 5197 } else { 5198 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 5199 is_sends_internal = is_sends; 5200 } 5201 5202 /* get comm */ 5203 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 5204 5205 /* compute number of sends */ 5206 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 5207 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 5208 5209 /* compute number of receives */ 5210 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 5211 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 5212 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 5213 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 5214 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 5215 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 5216 ierr = PetscFree(iflags);CHKERRQ(ierr); 5217 5218 /* restrict comm if requested */ 5219 subcomm = 0; 5220 destroy_mat = PETSC_FALSE; 5221 if (restrict_comm) { 5222 PetscMPIInt color,subcommsize; 5223 5224 color = 0; 5225 if (restrict_full) { 5226 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 5227 } else { 5228 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 5229 } 5230 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 5231 subcommsize = commsize - subcommsize; 5232 /* check if reuse has been requested */ 5233 if (reuse) { 5234 if (*mat_n) { 5235 PetscMPIInt subcommsize2; 5236 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 5237 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 5238 comm_n = PetscObjectComm((PetscObject)*mat_n); 5239 } else { 5240 comm_n = PETSC_COMM_SELF; 5241 } 5242 } else { /* MAT_INITIAL_MATRIX */ 5243 PetscMPIInt rank; 5244 5245 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5246 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 5247 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 5248 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 5249 comm_n = PetscSubcommChild(subcomm); 5250 } 5251 /* flag to destroy *mat_n if not significative */ 5252 if (color) destroy_mat = PETSC_TRUE; 5253 } else { 5254 comm_n = comm; 5255 } 5256 5257 /* prepare send/receive buffers */ 5258 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 5259 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 5260 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 5261 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 5262 if (nis) { 5263 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 5264 } 5265 5266 /* Get data from local matrices */ 5267 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 5268 /* TODO: See below some guidelines on how to prepare the local buffers */ 5269 /* 5270 send_buffer_vals should contain the raw values of the local matrix 5271 send_buffer_idxs should contain: 5272 - MatType_PRIVATE type 5273 - PetscInt size_of_l2gmap 5274 - PetscInt global_row_indices[size_of_l2gmap] 5275 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 5276 */ 5277 else { 5278 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 5279 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 5280 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 5281 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 5282 send_buffer_idxs[1] = i; 5283 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 5284 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 5285 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 5286 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 5287 for (i=0;i<n_sends;i++) { 5288 ilengths_vals[is_indices[i]] = len*len; 5289 ilengths_idxs[is_indices[i]] = len+2; 5290 } 5291 } 5292 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 5293 /* additional is (if any) */ 5294 if (nis) { 5295 PetscMPIInt psum; 5296 PetscInt j; 5297 for (j=0,psum=0;j<nis;j++) { 5298 PetscInt plen; 5299 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 5300 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 5301 psum += len+1; /* indices + lenght */ 5302 } 5303 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 5304 for (j=0,psum=0;j<nis;j++) { 5305 PetscInt plen; 5306 const PetscInt *is_array_idxs; 5307 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 5308 send_buffer_idxs_is[psum] = plen; 5309 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 5310 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 5311 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 5312 psum += plen+1; /* indices + lenght */ 5313 } 5314 for (i=0;i<n_sends;i++) { 5315 ilengths_idxs_is[is_indices[i]] = psum; 5316 } 5317 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 5318 } 5319 5320 buf_size_idxs = 0; 5321 buf_size_vals = 0; 5322 buf_size_idxs_is = 0; 5323 buf_size_vecs = 0; 5324 for (i=0;i<n_recvs;i++) { 5325 buf_size_idxs += (PetscInt)olengths_idxs[i]; 5326 buf_size_vals += (PetscInt)olengths_vals[i]; 5327 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 5328 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 5329 } 5330 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 5331 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 5332 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 5333 ierr = PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);CHKERRQ(ierr); 5334 5335 /* get new tags for clean communications */ 5336 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 5337 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 5338 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 5339 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);CHKERRQ(ierr); 5340 5341 /* allocate for requests */ 5342 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 5343 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 5344 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 5345 ierr = PetscMalloc1(n_sends,&send_req_vecs);CHKERRQ(ierr); 5346 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 5347 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 5348 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 5349 ierr = PetscMalloc1(n_recvs,&recv_req_vecs);CHKERRQ(ierr); 5350 5351 /* communications */ 5352 ptr_idxs = recv_buffer_idxs; 5353 ptr_vals = recv_buffer_vals; 5354 ptr_idxs_is = recv_buffer_idxs_is; 5355 ptr_vecs = recv_buffer_vecs; 5356 for (i=0;i<n_recvs;i++) { 5357 source_dest = onodes[i]; 5358 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 5359 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 5360 ptr_idxs += olengths_idxs[i]; 5361 ptr_vals += olengths_vals[i]; 5362 if (nis) { 5363 source_dest = onodes_is[i]; 5364 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 5365 ptr_idxs_is += olengths_idxs_is[i]; 5366 } 5367 if (nvecs) { 5368 source_dest = onodes[i]; 5369 ierr = MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);CHKERRQ(ierr); 5370 ptr_vecs += olengths_idxs[i]-2; 5371 } 5372 } 5373 for (i=0;i<n_sends;i++) { 5374 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 5375 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 5376 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 5377 if (nis) { 5378 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 5379 } 5380 if (nvecs) { 5381 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 5382 ierr = MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);CHKERRQ(ierr); 5383 } 5384 } 5385 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 5386 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 5387 5388 /* assemble new l2g map */ 5389 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5390 ptr_idxs = recv_buffer_idxs; 5391 new_local_rows = 0; 5392 for (i=0;i<n_recvs;i++) { 5393 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 5394 ptr_idxs += olengths_idxs[i]; 5395 } 5396 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 5397 ptr_idxs = recv_buffer_idxs; 5398 new_local_rows = 0; 5399 for (i=0;i<n_recvs;i++) { 5400 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 5401 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 5402 ptr_idxs += olengths_idxs[i]; 5403 } 5404 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 5405 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 5406 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 5407 5408 /* infer new local matrix type from received local matrices type */ 5409 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 5410 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 5411 if (n_recvs) { 5412 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 5413 ptr_idxs = recv_buffer_idxs; 5414 for (i=0;i<n_recvs;i++) { 5415 if ((PetscInt)new_local_type_private != *ptr_idxs) { 5416 new_local_type_private = MATAIJ_PRIVATE; 5417 break; 5418 } 5419 ptr_idxs += olengths_idxs[i]; 5420 } 5421 switch (new_local_type_private) { 5422 case MATDENSE_PRIVATE: 5423 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 5424 new_local_type = MATSEQAIJ; 5425 bs = 1; 5426 } else { /* if I receive only 1 dense matrix */ 5427 new_local_type = MATSEQDENSE; 5428 bs = 1; 5429 } 5430 break; 5431 case MATAIJ_PRIVATE: 5432 new_local_type = MATSEQAIJ; 5433 bs = 1; 5434 break; 5435 case MATBAIJ_PRIVATE: 5436 new_local_type = MATSEQBAIJ; 5437 break; 5438 case MATSBAIJ_PRIVATE: 5439 new_local_type = MATSEQSBAIJ; 5440 break; 5441 default: 5442 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 5443 break; 5444 } 5445 } else { /* by default, new_local_type is seqdense */ 5446 new_local_type = MATSEQDENSE; 5447 bs = 1; 5448 } 5449 5450 /* create MATIS object if needed */ 5451 if (!reuse) { 5452 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 5453 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 5454 } else { 5455 /* it also destroys the local matrices */ 5456 if (*mat_n) { 5457 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 5458 } else { /* this is a fake object */ 5459 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 5460 } 5461 } 5462 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 5463 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 5464 5465 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5466 5467 /* Global to local map of received indices */ 5468 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 5469 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 5470 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 5471 5472 /* restore attributes -> type of incoming data and its size */ 5473 buf_size_idxs = 0; 5474 for (i=0;i<n_recvs;i++) { 5475 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 5476 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 5477 buf_size_idxs += (PetscInt)olengths_idxs[i]; 5478 } 5479 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 5480 5481 /* set preallocation */ 5482 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 5483 if (!newisdense) { 5484 PetscInt *new_local_nnz=0; 5485 5486 ptr_idxs = recv_buffer_idxs_local; 5487 if (n_recvs) { 5488 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 5489 } 5490 for (i=0;i<n_recvs;i++) { 5491 PetscInt j; 5492 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 5493 for (j=0;j<*(ptr_idxs+1);j++) { 5494 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 5495 } 5496 } else { 5497 /* TODO */ 5498 } 5499 ptr_idxs += olengths_idxs[i]; 5500 } 5501 if (new_local_nnz) { 5502 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 5503 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 5504 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 5505 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5506 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 5507 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5508 } else { 5509 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5510 } 5511 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 5512 } else { 5513 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5514 } 5515 5516 /* set values */ 5517 ptr_vals = recv_buffer_vals; 5518 ptr_idxs = recv_buffer_idxs_local; 5519 for (i=0;i<n_recvs;i++) { 5520 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 5521 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 5522 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 5523 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5524 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5525 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 5526 } else { 5527 /* TODO */ 5528 } 5529 ptr_idxs += olengths_idxs[i]; 5530 ptr_vals += olengths_vals[i]; 5531 } 5532 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5533 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5534 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5535 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5536 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 5537 5538 #if 0 5539 if (!restrict_comm) { /* check */ 5540 Vec lvec,rvec; 5541 PetscReal infty_error; 5542 5543 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 5544 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 5545 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 5546 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 5547 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 5548 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5549 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 5550 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 5551 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 5552 } 5553 #endif 5554 5555 /* assemble new additional is (if any) */ 5556 if (nis) { 5557 PetscInt **temp_idxs,*count_is,j,psum; 5558 5559 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5560 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 5561 ptr_idxs = recv_buffer_idxs_is; 5562 psum = 0; 5563 for (i=0;i<n_recvs;i++) { 5564 for (j=0;j<nis;j++) { 5565 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5566 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 5567 psum += plen; 5568 ptr_idxs += plen+1; /* shift pointer to received data */ 5569 } 5570 } 5571 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 5572 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 5573 for (i=1;i<nis;i++) { 5574 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 5575 } 5576 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 5577 ptr_idxs = recv_buffer_idxs_is; 5578 for (i=0;i<n_recvs;i++) { 5579 for (j=0;j<nis;j++) { 5580 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5581 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 5582 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 5583 ptr_idxs += plen+1; /* shift pointer to received data */ 5584 } 5585 } 5586 for (i=0;i<nis;i++) { 5587 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5588 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 5589 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5590 } 5591 ierr = PetscFree(count_is);CHKERRQ(ierr); 5592 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 5593 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 5594 } 5595 /* free workspace */ 5596 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 5597 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5598 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 5599 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5600 if (isdense) { 5601 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 5602 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 5603 } else { 5604 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 5605 } 5606 if (nis) { 5607 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5608 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 5609 } 5610 5611 if (nvecs) { 5612 ierr = MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5613 ierr = MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5614 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 5615 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 5616 ierr = VecCreate(comm_n,&nnsp_vec[0]);CHKERRQ(ierr); 5617 ierr = VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);CHKERRQ(ierr); 5618 ierr = VecSetType(nnsp_vec[0],VECSTANDARD);CHKERRQ(ierr); 5619 /* set values */ 5620 ptr_vals = recv_buffer_vecs; 5621 ptr_idxs = recv_buffer_idxs_local; 5622 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 5623 for (i=0;i<n_recvs;i++) { 5624 PetscInt j; 5625 for (j=0;j<*(ptr_idxs+1);j++) { 5626 send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j); 5627 } 5628 ptr_idxs += olengths_idxs[i]; 5629 ptr_vals += olengths_idxs[i]-2; 5630 } 5631 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 5632 ierr = VecAssemblyBegin(nnsp_vec[0]);CHKERRQ(ierr); 5633 ierr = VecAssemblyEnd(nnsp_vec[0]);CHKERRQ(ierr); 5634 } 5635 5636 ierr = PetscFree(recv_buffer_vecs);CHKERRQ(ierr); 5637 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 5638 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 5639 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 5640 ierr = PetscFree(recv_req_vecs);CHKERRQ(ierr); 5641 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 5642 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 5643 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 5644 ierr = PetscFree(send_req_vecs);CHKERRQ(ierr); 5645 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 5646 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 5647 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 5648 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 5649 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 5650 ierr = PetscFree(onodes);CHKERRQ(ierr); 5651 if (nis) { 5652 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 5653 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 5654 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 5655 } 5656 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 5657 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 5658 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 5659 for (i=0;i<nis;i++) { 5660 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5661 } 5662 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 5663 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 5664 } 5665 *mat_n = NULL; 5666 } 5667 PetscFunctionReturn(0); 5668 } 5669 5670 /* temporary hack into ksp private data structure */ 5671 #include <petsc/private/kspimpl.h> 5672 5673 #undef __FUNCT__ 5674 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 5675 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 5676 { 5677 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5678 PC_IS *pcis = (PC_IS*)pc->data; 5679 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 5680 Mat coarsedivudotp = NULL; 5681 MatNullSpace CoarseNullSpace = NULL; 5682 ISLocalToGlobalMapping coarse_islg; 5683 IS coarse_is,*isarray; 5684 PetscInt i,im_active=-1,active_procs=-1; 5685 PetscInt nis,nisdofs,nisneu,nisvert; 5686 PC pc_temp; 5687 PCType coarse_pc_type; 5688 KSPType coarse_ksp_type; 5689 PetscBool multilevel_requested,multilevel_allowed; 5690 PetscBool isredundant,isbddc,isnn,coarse_reuse; 5691 Mat t_coarse_mat_is; 5692 PetscInt ncoarse; 5693 PetscBool compute_vecs = PETSC_FALSE; 5694 PetscScalar *array; 5695 MatReuse coarse_mat_reuse; 5696 PetscBool restr, full_restr, have_void; 5697 PetscErrorCode ierr; 5698 5699 PetscFunctionBegin; 5700 /* Assign global numbering to coarse dofs */ 5701 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 5702 PetscInt ocoarse_size; 5703 compute_vecs = PETSC_TRUE; 5704 ocoarse_size = pcbddc->coarse_size; 5705 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 5706 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 5707 /* see if we can avoid some work */ 5708 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 5709 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 5710 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 5711 PC pc; 5712 PetscBool isbddc; 5713 5714 /* temporary workaround since PCBDDC does not have a reset method so far */ 5715 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 5716 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5717 if (isbddc) { 5718 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 5719 } else { 5720 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 5721 } 5722 coarse_reuse = PETSC_FALSE; 5723 } else { /* we can safely reuse already computed coarse matrix */ 5724 coarse_reuse = PETSC_TRUE; 5725 } 5726 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 5727 coarse_reuse = PETSC_FALSE; 5728 } 5729 /* reset any subassembling information */ 5730 if (!coarse_reuse || pcbddc->recompute_topography) { 5731 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5732 } 5733 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 5734 coarse_reuse = PETSC_TRUE; 5735 } 5736 /* assemble coarse matrix */ 5737 if (coarse_reuse && pcbddc->coarse_ksp) { 5738 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5739 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 5740 coarse_mat_reuse = MAT_REUSE_MATRIX; 5741 } else { 5742 coarse_mat = NULL; 5743 coarse_mat_reuse = MAT_INITIAL_MATRIX; 5744 } 5745 5746 /* creates temporary l2gmap and IS for coarse indexes */ 5747 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 5748 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 5749 5750 /* creates temporary MATIS object for coarse matrix */ 5751 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 5752 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5753 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 5754 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5755 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 5756 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 5757 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5758 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5759 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 5760 5761 /* count "active" (i.e. with positive local size) and "void" processes */ 5762 im_active = !!(pcis->n); 5763 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5764 5765 /* determine number of processes partecipating to coarse solver and compute subassembling pattern */ 5766 /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */ 5767 /* full_restr : just use the receivers from the subassembling pattern */ 5768 coarse_mat_is = NULL; 5769 multilevel_allowed = PETSC_FALSE; 5770 multilevel_requested = PETSC_FALSE; 5771 full_restr = PETSC_TRUE; 5772 pcbddc->coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc); 5773 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 5774 if (multilevel_requested) { 5775 ncoarse = active_procs/pcbddc->coarsening_ratio; 5776 restr = PETSC_FALSE; 5777 full_restr = PETSC_FALSE; 5778 } else { 5779 ncoarse = pcbddc->coarse_size/pcbddc->coarse_eqs_per_proc; 5780 restr = PETSC_TRUE; 5781 full_restr = PETSC_TRUE; 5782 } 5783 if (!pcbddc->coarse_size) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 5784 ncoarse = PetscMax(1,ncoarse); 5785 if (!pcbddc->coarse_subassembling) { 5786 if (pcbddc->coarsening_ratio > 1) { 5787 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 5788 } else { 5789 PetscMPIInt size,rank; 5790 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 5791 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 5792 have_void = (active_procs == (PetscInt)size) ? PETSC_FALSE : PETSC_TRUE; 5793 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5794 } 5795 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 5796 PetscInt psum; 5797 PetscMPIInt size; 5798 if (pcbddc->coarse_ksp) psum = 1; 5799 else psum = 0; 5800 ierr = MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5801 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 5802 if (ncoarse < size) have_void = PETSC_TRUE; 5803 } 5804 /* determine if we can go multilevel */ 5805 if (multilevel_requested) { 5806 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 5807 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 5808 } 5809 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 5810 5811 /* dump subassembling pattern */ 5812 if (pcbddc->dbg_flag && multilevel_allowed) { 5813 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 5814 } 5815 5816 /* compute dofs splitting and neumann boundaries for coarse dofs */ 5817 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal)) { /* protects from unneded computations */ 5818 PetscInt *tidxs,*tidxs2,nout,tsize,i; 5819 const PetscInt *idxs; 5820 ISLocalToGlobalMapping tmap; 5821 5822 /* create map between primal indices (in local representative ordering) and local primal numbering */ 5823 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 5824 /* allocate space for temporary storage */ 5825 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 5826 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 5827 /* allocate for IS array */ 5828 nisdofs = pcbddc->n_ISForDofsLocal; 5829 nisneu = !!pcbddc->NeumannBoundariesLocal; 5830 nisvert = 0; /* nisvert is not used */ 5831 nis = nisdofs + nisneu + nisvert; 5832 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 5833 /* dofs splitting */ 5834 for (i=0;i<nisdofs;i++) { 5835 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 5836 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 5837 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5838 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5839 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5840 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5841 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5842 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 5843 } 5844 /* neumann boundaries */ 5845 if (pcbddc->NeumannBoundariesLocal) { 5846 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 5847 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 5848 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5849 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5850 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5851 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5852 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 5853 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 5854 } 5855 /* free memory */ 5856 ierr = PetscFree(tidxs);CHKERRQ(ierr); 5857 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 5858 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 5859 } else { 5860 nis = 0; 5861 nisdofs = 0; 5862 nisneu = 0; 5863 nisvert = 0; 5864 isarray = NULL; 5865 } 5866 /* destroy no longer needed map */ 5867 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 5868 5869 /* subassemble */ 5870 if (multilevel_allowed) { 5871 Vec vp[1]; 5872 PetscInt nvecs = 0; 5873 PetscBool reuse,reuser; 5874 5875 if (coarse_mat) reuse = PETSC_TRUE; 5876 else reuse = PETSC_FALSE; 5877 ierr = MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5878 vp[0] = NULL; 5879 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 5880 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);CHKERRQ(ierr); 5881 ierr = VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);CHKERRQ(ierr); 5882 ierr = VecSetType(vp[0],VECSTANDARD);CHKERRQ(ierr); 5883 nvecs = 1; 5884 5885 if (pcbddc->divudotp) { 5886 Mat B,loc_divudotp; 5887 Vec v,p; 5888 IS dummy; 5889 PetscInt np; 5890 5891 ierr = MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);CHKERRQ(ierr); 5892 ierr = MatGetSize(loc_divudotp,&np,NULL);CHKERRQ(ierr); 5893 ierr = ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);CHKERRQ(ierr); 5894 ierr = MatGetSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 5895 ierr = MatCreateVecs(B,&v,&p);CHKERRQ(ierr); 5896 ierr = VecSet(p,1.);CHKERRQ(ierr); 5897 ierr = MatMultTranspose(B,p,v);CHKERRQ(ierr); 5898 ierr = VecDestroy(&p);CHKERRQ(ierr); 5899 ierr = MatDestroy(&B);CHKERRQ(ierr); 5900 ierr = VecGetArray(vp[0],&array);CHKERRQ(ierr); 5901 ierr = VecPlaceArray(pcbddc->vec1_P,array);CHKERRQ(ierr); 5902 ierr = VecRestoreArray(vp[0],&array);CHKERRQ(ierr); 5903 ierr = MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);CHKERRQ(ierr); 5904 ierr = VecResetArray(pcbddc->vec1_P);CHKERRQ(ierr); 5905 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 5906 ierr = VecDestroy(&v);CHKERRQ(ierr); 5907 } 5908 } 5909 if (reuser) { 5910 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);CHKERRQ(ierr); 5911 } else { 5912 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);CHKERRQ(ierr); 5913 } 5914 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 5915 PetscScalar *arraym,*arrayv; 5916 PetscInt nl; 5917 ierr = VecGetLocalSize(vp[0],&nl);CHKERRQ(ierr); 5918 ierr = MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);CHKERRQ(ierr); 5919 ierr = MatDenseGetArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 5920 ierr = VecGetArray(vp[0],&arrayv);CHKERRQ(ierr); 5921 ierr = PetscMemcpy(arraym,arrayv,nl*sizeof(PetscScalar));CHKERRQ(ierr); 5922 ierr = VecRestoreArray(vp[0],&arrayv);CHKERRQ(ierr); 5923 ierr = MatDenseRestoreArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 5924 ierr = VecDestroy(&vp[0]);CHKERRQ(ierr); 5925 } else { 5926 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);CHKERRQ(ierr); 5927 } 5928 } else { 5929 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,0,NULL);CHKERRQ(ierr); 5930 } 5931 if (coarse_mat_is || coarse_mat) { 5932 PetscMPIInt size; 5933 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)coarse_mat_is),&size); 5934 if (!multilevel_allowed) { 5935 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 5936 } else { 5937 Mat A; 5938 5939 /* if this matrix is present, it means we are not reusing the coarse matrix */ 5940 if (coarse_mat_is) { 5941 if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 5942 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 5943 coarse_mat = coarse_mat_is; 5944 } 5945 /* be sure we don't have MatSeqDENSE as local mat */ 5946 ierr = MatISGetLocalMat(coarse_mat,&A);CHKERRQ(ierr); 5947 ierr = MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 5948 } 5949 } 5950 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 5951 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 5952 5953 /* create local to global scatters for coarse problem */ 5954 if (compute_vecs) { 5955 PetscInt lrows; 5956 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 5957 if (coarse_mat) { 5958 ierr = MatGetLocalSize(coarse_mat,&lrows,NULL);CHKERRQ(ierr); 5959 } else { 5960 lrows = 0; 5961 } 5962 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 5963 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 5964 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 5965 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5966 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5967 } 5968 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 5969 5970 /* set defaults for coarse KSP and PC */ 5971 if (multilevel_allowed) { 5972 coarse_ksp_type = KSPRICHARDSON; 5973 coarse_pc_type = PCBDDC; 5974 } else { 5975 coarse_ksp_type = KSPPREONLY; 5976 coarse_pc_type = PCREDUNDANT; 5977 } 5978 5979 /* print some info if requested */ 5980 if (pcbddc->dbg_flag) { 5981 if (!multilevel_allowed) { 5982 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5983 if (multilevel_requested) { 5984 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5985 } else if (pcbddc->max_levels) { 5986 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 5987 } 5988 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5989 } 5990 } 5991 5992 /* create the coarse KSP object only once with defaults */ 5993 if (coarse_mat) { 5994 PetscViewer dbg_viewer = NULL; 5995 if (pcbddc->dbg_flag) { 5996 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 5997 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5998 } 5999 if (!pcbddc->coarse_ksp) { 6000 char prefix[256],str_level[16]; 6001 size_t len; 6002 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 6003 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 6004 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 6005 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 6006 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 6007 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 6008 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 6009 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 6010 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 6011 /* prefix */ 6012 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 6013 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 6014 if (!pcbddc->current_level) { 6015 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 6016 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 6017 } else { 6018 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 6019 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 6020 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 6021 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 6022 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 6023 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 6024 } 6025 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 6026 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 6027 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 6028 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 6029 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 6030 /* allow user customization */ 6031 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 6032 } 6033 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 6034 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 6035 if (nisdofs) { 6036 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 6037 for (i=0;i<nisdofs;i++) { 6038 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 6039 } 6040 } 6041 if (nisneu) { 6042 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 6043 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 6044 } 6045 if (nisvert) { 6046 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 6047 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 6048 } 6049 6050 /* get some info after set from options */ 6051 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 6052 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 6053 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 6054 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 6055 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 6056 isbddc = PETSC_FALSE; 6057 } 6058 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 6059 if (isredundant) { 6060 KSP inner_ksp; 6061 PC inner_pc; 6062 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 6063 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 6064 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 6065 } 6066 6067 /* parameters which miss an API */ 6068 if (isbddc) { 6069 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 6070 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 6071 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 6072 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 6073 if (pcbddc_coarse->benign_saddle_point) { 6074 Mat coarsedivudotp_is; 6075 ISLocalToGlobalMapping l2gmap,rl2g,cl2g; 6076 IS row,col; 6077 const PetscInt *gidxs; 6078 PetscInt n,st,M,N; 6079 6080 ierr = MatGetSize(coarsedivudotp,&n,NULL);CHKERRQ(ierr); 6081 ierr = MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));CHKERRQ(ierr); 6082 st = st-n; 6083 ierr = ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);CHKERRQ(ierr); 6084 ierr = MatGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);CHKERRQ(ierr); 6085 ierr = ISLocalToGlobalMappingGetSize(l2gmap,&n);CHKERRQ(ierr); 6086 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 6087 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 6088 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 6089 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 6090 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 6091 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 6092 ierr = MatGetSize(coarse_mat,&N,NULL);CHKERRQ(ierr); 6093 ierr = ISDestroy(&row);CHKERRQ(ierr); 6094 ierr = ISDestroy(&col);CHKERRQ(ierr); 6095 ierr = MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);CHKERRQ(ierr); 6096 ierr = MatSetType(coarsedivudotp_is,MATIS);CHKERRQ(ierr); 6097 ierr = MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 6098 ierr = MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);CHKERRQ(ierr); 6099 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 6100 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 6101 ierr = MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);CHKERRQ(ierr); 6102 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 6103 ierr = PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL);CHKERRQ(ierr); 6104 ierr = MatDestroy(&coarsedivudotp_is);CHKERRQ(ierr); 6105 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 6106 if (pcbddc->adaptive_threshold < 1.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 6107 } 6108 } 6109 6110 /* propagate symmetry info of coarse matrix */ 6111 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 6112 if (pc->pmat->symmetric_set) { 6113 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 6114 } 6115 if (pc->pmat->hermitian_set) { 6116 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 6117 } 6118 if (pc->pmat->spd_set) { 6119 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 6120 } 6121 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 6122 ierr = MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 6123 } 6124 /* set operators */ 6125 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 6126 if (pcbddc->dbg_flag) { 6127 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 6128 } 6129 } 6130 ierr = PetscFree(isarray);CHKERRQ(ierr); 6131 #if 0 6132 { 6133 PetscViewer viewer; 6134 char filename[256]; 6135 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 6136 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 6137 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 6138 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 6139 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 6140 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 6141 } 6142 #endif 6143 6144 if (pcbddc->coarse_ksp) { 6145 Vec crhs,csol; 6146 6147 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 6148 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 6149 if (!csol) { 6150 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 6151 } 6152 if (!crhs) { 6153 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 6154 } 6155 } 6156 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 6157 6158 /* compute null space for coarse solver if the benign trick has been requested */ 6159 if (pcbddc->benign_null) { 6160 6161 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 6162 for (i=0;i<pcbddc->benign_n;i++) { 6163 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 6164 } 6165 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 6166 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 6167 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6168 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6169 if (coarse_mat) { 6170 Vec nullv; 6171 PetscScalar *array,*array2; 6172 PetscInt nl; 6173 6174 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 6175 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 6176 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 6177 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 6178 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 6179 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 6180 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 6181 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 6182 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 6183 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 6184 } 6185 } 6186 6187 if (pcbddc->coarse_ksp) { 6188 PetscBool ispreonly; 6189 6190 if (CoarseNullSpace) { 6191 PetscBool isnull; 6192 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 6193 if (isnull) { 6194 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 6195 } 6196 /* TODO: add local nullspaces (if any) */ 6197 } 6198 /* setup coarse ksp */ 6199 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 6200 /* Check coarse problem if in debug mode or if solving with an iterative method */ 6201 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 6202 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 6203 KSP check_ksp; 6204 KSPType check_ksp_type; 6205 PC check_pc; 6206 Vec check_vec,coarse_vec; 6207 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 6208 PetscInt its; 6209 PetscBool compute_eigs; 6210 PetscReal *eigs_r,*eigs_c; 6211 PetscInt neigs; 6212 const char *prefix; 6213 6214 /* Create ksp object suitable for estimation of extreme eigenvalues */ 6215 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 6216 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 6217 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 6218 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 6219 /* prevent from setup unneeded object */ 6220 ierr = KSPGetPC(check_ksp,&check_pc);CHKERRQ(ierr); 6221 ierr = PCSetType(check_pc,PCNONE);CHKERRQ(ierr); 6222 if (ispreonly) { 6223 check_ksp_type = KSPPREONLY; 6224 compute_eigs = PETSC_FALSE; 6225 } else { 6226 check_ksp_type = KSPGMRES; 6227 compute_eigs = PETSC_TRUE; 6228 } 6229 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 6230 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 6231 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 6232 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 6233 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 6234 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 6235 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 6236 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 6237 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 6238 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 6239 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 6240 /* create random vec */ 6241 ierr = MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);CHKERRQ(ierr); 6242 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 6243 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 6244 /* solve coarse problem */ 6245 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 6246 /* set eigenvalue estimation if preonly has not been requested */ 6247 if (compute_eigs) { 6248 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 6249 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 6250 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 6251 if (neigs) { 6252 lambda_max = eigs_r[neigs-1]; 6253 lambda_min = eigs_r[0]; 6254 if (pcbddc->use_coarse_estimates) { 6255 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 6256 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);CHKERRQ(ierr); 6257 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 6258 } 6259 } 6260 } 6261 } 6262 6263 /* check coarse problem residual error */ 6264 if (pcbddc->dbg_flag) { 6265 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 6266 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 6267 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 6268 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 6269 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 6270 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 6271 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 6272 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 6273 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 6274 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 6275 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 6276 if (CoarseNullSpace) { 6277 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 6278 } 6279 if (compute_eigs) { 6280 PetscReal lambda_max_s,lambda_min_s; 6281 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 6282 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 6283 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 6284 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 6285 for (i=0;i<neigs;i++) { 6286 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 6287 } 6288 } 6289 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 6290 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 6291 } 6292 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 6293 ierr = VecDestroy(&coarse_vec);CHKERRQ(ierr); 6294 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 6295 if (compute_eigs) { 6296 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 6297 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 6298 } 6299 } 6300 } 6301 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 6302 /* print additional info */ 6303 if (pcbddc->dbg_flag) { 6304 /* waits until all processes reaches this point */ 6305 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 6306 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 6307 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6308 } 6309 6310 /* free memory */ 6311 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 6312 PetscFunctionReturn(0); 6313 } 6314 6315 #undef __FUNCT__ 6316 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 6317 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 6318 { 6319 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 6320 PC_IS* pcis = (PC_IS*)pc->data; 6321 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6322 IS subset,subset_mult,subset_n; 6323 PetscInt local_size,coarse_size=0; 6324 PetscInt *local_primal_indices=NULL; 6325 const PetscInt *t_local_primal_indices; 6326 PetscErrorCode ierr; 6327 6328 PetscFunctionBegin; 6329 /* Compute global number of coarse dofs */ 6330 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 6331 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 6332 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 6333 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 6334 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 6335 ierr = ISRenumber(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 6336 ierr = ISDestroy(&subset);CHKERRQ(ierr); 6337 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 6338 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 6339 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 6340 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 6341 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 6342 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 6343 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 6344 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 6345 6346 /* check numbering */ 6347 if (pcbddc->dbg_flag) { 6348 PetscScalar coarsesum,*array,*array2; 6349 PetscInt i; 6350 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 6351 6352 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6353 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 6354 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 6355 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6356 /* counter */ 6357 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6358 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6359 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6360 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6361 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6362 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6363 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 6364 for (i=0;i<pcbddc->local_primal_size;i++) { 6365 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 6366 } 6367 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6368 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6369 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6370 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6371 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6372 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6373 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6374 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6375 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 6376 for (i=0;i<pcis->n;i++) { 6377 if (array[i] != 0.0 && array[i] != array2[i]) { 6378 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 6379 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 6380 set_error = PETSC_TRUE; 6381 ierr = ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);CHKERRQ(ierr); 6382 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d (gid %d) owned by %d processes instead of %d!\n",PetscGlobalRank,i,gi,owned,neigh);CHKERRQ(ierr); 6383 } 6384 } 6385 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 6386 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6387 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6388 for (i=0;i<pcis->n;i++) { 6389 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 6390 } 6391 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6392 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6393 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6394 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6395 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 6396 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 6397 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 6398 PetscInt *gidxs; 6399 6400 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 6401 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 6402 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 6403 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6404 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 6405 for (i=0;i<pcbddc->local_primal_size;i++) { 6406 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 6407 } 6408 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6409 ierr = PetscFree(gidxs);CHKERRQ(ierr); 6410 } 6411 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6412 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6413 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 6414 } 6415 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 6416 /* get back data */ 6417 *coarse_size_n = coarse_size; 6418 *local_primal_indices_n = local_primal_indices; 6419 PetscFunctionReturn(0); 6420 } 6421 6422 #undef __FUNCT__ 6423 #define __FUNCT__ "PCBDDCGlobalToLocal" 6424 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 6425 { 6426 IS localis_t; 6427 PetscInt i,lsize,*idxs,n; 6428 PetscScalar *vals; 6429 PetscErrorCode ierr; 6430 6431 PetscFunctionBegin; 6432 /* get indices in local ordering exploiting local to global map */ 6433 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 6434 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 6435 for (i=0;i<lsize;i++) vals[i] = 1.0; 6436 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 6437 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 6438 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 6439 if (idxs) { /* multilevel guard */ 6440 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 6441 } 6442 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 6443 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 6444 ierr = PetscFree(vals);CHKERRQ(ierr); 6445 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 6446 /* now compute set in local ordering */ 6447 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6448 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6449 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6450 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 6451 for (i=0,lsize=0;i<n;i++) { 6452 if (PetscRealPart(vals[i]) > 0.5) { 6453 lsize++; 6454 } 6455 } 6456 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 6457 for (i=0,lsize=0;i<n;i++) { 6458 if (PetscRealPart(vals[i]) > 0.5) { 6459 idxs[lsize++] = i; 6460 } 6461 } 6462 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6463 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 6464 *localis = localis_t; 6465 PetscFunctionReturn(0); 6466 } 6467 6468 #undef __FUNCT__ 6469 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 6470 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 6471 { 6472 PC_IS *pcis=(PC_IS*)pc->data; 6473 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6474 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6475 Mat S_j; 6476 PetscInt *used_xadj,*used_adjncy; 6477 PetscBool free_used_adj; 6478 PetscErrorCode ierr; 6479 6480 PetscFunctionBegin; 6481 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 6482 free_used_adj = PETSC_FALSE; 6483 if (pcbddc->sub_schurs_layers == -1) { 6484 used_xadj = NULL; 6485 used_adjncy = NULL; 6486 } else { 6487 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 6488 used_xadj = pcbddc->mat_graph->xadj; 6489 used_adjncy = pcbddc->mat_graph->adjncy; 6490 } else if (pcbddc->computed_rowadj) { 6491 used_xadj = pcbddc->mat_graph->xadj; 6492 used_adjncy = pcbddc->mat_graph->adjncy; 6493 } else { 6494 PetscBool flg_row=PETSC_FALSE; 6495 const PetscInt *xadj,*adjncy; 6496 PetscInt nvtxs; 6497 6498 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6499 if (flg_row) { 6500 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 6501 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 6502 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 6503 free_used_adj = PETSC_TRUE; 6504 } else { 6505 pcbddc->sub_schurs_layers = -1; 6506 used_xadj = NULL; 6507 used_adjncy = NULL; 6508 } 6509 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6510 } 6511 } 6512 6513 /* setup sub_schurs data */ 6514 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6515 if (!sub_schurs->schur_explicit) { 6516 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 6517 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6518 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 6519 } else { 6520 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 6521 PetscBool isseqaij,need_change = PETSC_FALSE; 6522 PetscInt benign_n; 6523 Mat change = NULL; 6524 Vec scaling = NULL; 6525 IS change_primal = NULL; 6526 6527 if (!pcbddc->use_vertices && reuse_solvers) { 6528 PetscInt n_vertices; 6529 6530 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 6531 reuse_solvers = (PetscBool)!n_vertices; 6532 } 6533 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 6534 if (!isseqaij) { 6535 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6536 if (matis->A == pcbddc->local_mat) { 6537 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 6538 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6539 } else { 6540 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6541 } 6542 } 6543 if (!pcbddc->benign_change_explicit) { 6544 benign_n = pcbddc->benign_n; 6545 } else { 6546 benign_n = 0; 6547 } 6548 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 6549 We need a global reduction to avoid possible deadlocks. 6550 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 6551 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 6552 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 6553 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6554 need_change = (PetscBool)(!need_change); 6555 } 6556 /* If the user defines additional constraints, we import them here. 6557 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 6558 if (need_change) { 6559 PC_IS *pcisf; 6560 PC_BDDC *pcbddcf; 6561 PC pcf; 6562 6563 if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 6564 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 6565 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 6566 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 6567 /* hacks */ 6568 pcisf = (PC_IS*)pcf->data; 6569 pcisf->is_B_local = pcis->is_B_local; 6570 pcisf->vec1_N = pcis->vec1_N; 6571 pcisf->BtoNmap = pcis->BtoNmap; 6572 pcisf->n = pcis->n; 6573 pcisf->n_B = pcis->n_B; 6574 pcbddcf = (PC_BDDC*)pcf->data; 6575 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 6576 pcbddcf->mat_graph = pcbddc->mat_graph; 6577 pcbddcf->use_faces = PETSC_TRUE; 6578 pcbddcf->use_change_of_basis = PETSC_TRUE; 6579 pcbddcf->use_change_on_faces = PETSC_TRUE; 6580 pcbddcf->use_qr_single = PETSC_TRUE; 6581 pcbddcf->fake_change = PETSC_TRUE; 6582 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 6583 /* store information on primal vertices and change of basis (in local numbering) */ 6584 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 6585 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 6586 change = pcbddcf->ConstraintMatrix; 6587 pcbddcf->ConstraintMatrix = NULL; 6588 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 6589 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 6590 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 6591 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 6592 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 6593 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 6594 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 6595 pcf->ops->destroy = NULL; 6596 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 6597 } 6598 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 6599 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 6600 ierr = MatDestroy(&change);CHKERRQ(ierr); 6601 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 6602 } 6603 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6604 6605 /* free adjacency */ 6606 if (free_used_adj) { 6607 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 6608 } 6609 PetscFunctionReturn(0); 6610 } 6611 6612 #undef __FUNCT__ 6613 #define __FUNCT__ "PCBDDCInitSubSchurs" 6614 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 6615 { 6616 PC_IS *pcis=(PC_IS*)pc->data; 6617 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6618 PCBDDCGraph graph; 6619 PetscErrorCode ierr; 6620 6621 PetscFunctionBegin; 6622 /* attach interface graph for determining subsets */ 6623 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 6624 IS verticesIS,verticescomm; 6625 PetscInt vsize,*idxs; 6626 6627 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 6628 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 6629 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6630 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 6631 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6632 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 6633 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 6634 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 6635 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 6636 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 6637 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 6638 } else { 6639 graph = pcbddc->mat_graph; 6640 } 6641 /* print some info */ 6642 if (pcbddc->dbg_flag) { 6643 IS vertices; 6644 PetscInt nv,nedges,nfaces; 6645 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag-1,pcbddc->dbg_viewer);CHKERRQ(ierr); 6646 ierr = PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 6647 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 6648 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 6649 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6650 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 6651 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 6652 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 6653 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 6654 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6655 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6656 } 6657 6658 /* sub_schurs init */ 6659 if (!pcbddc->sub_schurs) { 6660 ierr = PCBDDCSubSchursCreate(&pcbddc->sub_schurs);CHKERRQ(ierr); 6661 } 6662 ierr = PCBDDCSubSchursInit(pcbddc->sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 6663 6664 /* free graph struct */ 6665 if (pcbddc->sub_schurs_rebuild) { 6666 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 6667 } 6668 PetscFunctionReturn(0); 6669 } 6670 6671 #undef __FUNCT__ 6672 #define __FUNCT__ "PCBDDCCheckOperator" 6673 PetscErrorCode PCBDDCCheckOperator(PC pc) 6674 { 6675 PC_IS *pcis=(PC_IS*)pc->data; 6676 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6677 PetscErrorCode ierr; 6678 6679 PetscFunctionBegin; 6680 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 6681 IS zerodiag = NULL; 6682 Mat S_j,B0_B=NULL; 6683 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 6684 PetscScalar *p0_check,*array,*array2; 6685 PetscReal norm; 6686 PetscInt i; 6687 6688 /* B0 and B0_B */ 6689 if (zerodiag) { 6690 IS dummy; 6691 6692 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 6693 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 6694 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 6695 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 6696 } 6697 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 6698 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 6699 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 6700 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6701 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6702 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6703 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6704 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 6705 /* S_j */ 6706 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6707 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6708 6709 /* mimic vector in \widetilde{W}_\Gamma */ 6710 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 6711 /* continuous in primal space */ 6712 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 6713 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6714 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6715 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6716 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 6717 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 6718 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6719 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6720 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6721 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6722 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6723 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6724 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 6725 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 6726 6727 /* assemble rhs for coarse problem */ 6728 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 6729 /* local with Schur */ 6730 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 6731 if (zerodiag) { 6732 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6733 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 6734 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6735 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 6736 } 6737 /* sum on primal nodes the local contributions */ 6738 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6739 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6740 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6741 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6742 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 6743 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6744 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6745 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 6746 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6747 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6748 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6749 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6750 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6751 /* scale primal nodes (BDDC sums contibutions) */ 6752 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 6753 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6754 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6755 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6756 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6757 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6758 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6759 /* global: \widetilde{B0}_B w_\Gamma */ 6760 if (zerodiag) { 6761 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 6762 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6763 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 6764 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6765 } 6766 /* BDDC */ 6767 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 6768 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 6769 6770 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 6771 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 6772 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 6773 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 6774 for (i=0;i<pcbddc->benign_n;i++) { 6775 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 6776 } 6777 ierr = PetscFree(p0_check);CHKERRQ(ierr); 6778 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 6779 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 6780 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 6781 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6782 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 6783 } 6784 PetscFunctionReturn(0); 6785 } 6786