1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 #include <petsc/private/sfimpl.h> 6 7 #undef __FUNCT__ 8 #define __FUNCT__ "PCBDDCComputeNoNetFlux" 9 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 10 { 11 Mat loc_divudotp; 12 Vec p,v,vins,quad_vec,*quad_vecs; 13 ISLocalToGlobalMapping map; 14 IS *faces,*edges; 15 PetscScalar *vals; 16 const PetscScalar *array; 17 PetscInt i,maxneighs,lmaxneighs,maxsize,nf,ne; 18 PetscMPIInt rank; 19 PetscErrorCode ierr; 20 21 PetscFunctionBegin; 22 ierr = PCBDDCGraphGetCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 23 if (graph->twodim) { 24 lmaxneighs = 2; 25 } else { 26 lmaxneighs = 1; 27 for (i=0;i<ne;i++) { 28 const PetscInt *idxs; 29 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 30 lmaxneighs = PetscMax(lmaxneighs,graph->count[idxs[0]]); 31 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 32 } 33 lmaxneighs++; /* graph count does not include self */ 34 } 35 ierr = MPIU_Allreduce(&lmaxneighs,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 36 maxsize = 0; 37 for (i=0;i<ne;i++) { 38 PetscInt nn; 39 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 40 maxsize = PetscMax(maxsize,nn); 41 } 42 for (i=0;i<nf;i++) { 43 PetscInt nn; 44 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 45 maxsize = PetscMax(maxsize,nn); 46 } 47 ierr = PetscMalloc1(maxsize,&vals);CHKERRQ(ierr); 48 /* create vectors to hold quadrature weights */ 49 ierr = MatCreateVecs(A,&quad_vec,NULL);CHKERRQ(ierr); 50 if (!transpose) { 51 ierr = MatGetLocalToGlobalMapping(A,&map,NULL);CHKERRQ(ierr); 52 } else { 53 ierr = MatGetLocalToGlobalMapping(A,NULL,&map);CHKERRQ(ierr); 54 } 55 ierr = VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);CHKERRQ(ierr); 56 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 57 for (i=0;i<maxneighs;i++) { 58 PetscInt first,last; 59 60 ierr = VecSetLocalToGlobalMapping(quad_vecs[i],map);CHKERRQ(ierr); 61 /* the near-null space fo BDDC carries information on quadrature weights, 62 and these can be collinear -> so cheat with MatNullSpaceCreate 63 and create a suitable set of basis vectors first */ 64 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 65 if (i>=first && i < last) { 66 PetscScalar *data; 67 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 68 data[i-first] = 1.; 69 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 70 } 71 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 72 } 73 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);CHKERRQ(ierr); 74 for (i=0;i<maxneighs;i++) { /* reset vectors */ 75 PetscInt first,last; 76 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 77 if (i>=first && i < last) { 78 PetscScalar *data; 79 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 80 data[i-first] = 0.; 81 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 82 } 83 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 84 } 85 /* compute local quad vec */ 86 ierr = MatISGetLocalMat(divudotp,&loc_divudotp);CHKERRQ(ierr); 87 if (!transpose) { 88 ierr = MatCreateVecs(loc_divudotp,&v,&p);CHKERRQ(ierr); 89 } else { 90 ierr = MatCreateVecs(loc_divudotp,&p,&v);CHKERRQ(ierr); 91 } 92 ierr = VecSet(p,1.);CHKERRQ(ierr); 93 if (!transpose) { 94 ierr = MatMultTranspose(loc_divudotp,p,v);CHKERRQ(ierr); 95 } else { 96 ierr = MatMult(loc_divudotp,p,v);CHKERRQ(ierr); 97 } 98 if (vl2l) { 99 ierr = VecGetSubVector(v,vl2l,&vins);CHKERRQ(ierr); 100 } else { 101 vins = v; 102 } 103 ierr = VecGetArrayRead(vins,&array);CHKERRQ(ierr); 104 ierr = VecDestroy(&p);CHKERRQ(ierr); 105 106 /* insert in global quadrature vecs */ 107 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 108 for (i=0;i<nf;i++) { 109 const PetscInt *idxs; 110 PetscInt idx,nn,j; 111 112 ierr = ISGetIndices(faces[i],&idxs);CHKERRQ(ierr); 113 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 114 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 115 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 116 idx = -(idx+1); 117 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 118 ierr = ISRestoreIndices(faces[i],&idxs);CHKERRQ(ierr); 119 ierr = ISDestroy(&faces[i]);CHKERRQ(ierr); 120 } 121 for (i=0;i<ne;i++) { 122 const PetscInt *idxs; 123 PetscInt idx,nn,j; 124 125 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 126 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 127 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 128 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 129 idx = -(idx+1); 130 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 131 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 132 ierr = ISDestroy(&edges[i]);CHKERRQ(ierr); 133 } 134 ierr = PetscFree(edges);CHKERRQ(ierr); 135 ierr = PetscFree(faces);CHKERRQ(ierr); 136 ierr = VecRestoreArrayRead(vins,&array);CHKERRQ(ierr); 137 if (vl2l) { 138 ierr = VecRestoreSubVector(v,vl2l,&vins);CHKERRQ(ierr); 139 } 140 ierr = VecDestroy(&v);CHKERRQ(ierr); 141 ierr = PetscFree(vals);CHKERRQ(ierr); 142 143 /* assemble near null space */ 144 for (i=0;i<maxneighs;i++) { 145 ierr = VecAssemblyBegin(quad_vecs[i]);CHKERRQ(ierr); 146 } 147 for (i=0;i<maxneighs;i++) { 148 ierr = VecAssemblyEnd(quad_vecs[i]);CHKERRQ(ierr); 149 } 150 ierr = VecDestroyVecs(maxneighs,&quad_vecs);CHKERRQ(ierr); 151 PetscFunctionReturn(0); 152 } 153 154 155 #undef __FUNCT__ 156 #define __FUNCT__ "PCBDDCComputeLocalTopologyInfo" 157 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 158 { 159 PetscErrorCode ierr; 160 Vec local,global; 161 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 162 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 163 164 PetscFunctionBegin; 165 ierr = MatCreateVecs(pc->pmat,&global,NULL);CHKERRQ(ierr); 166 /* need to convert from global to local topology information and remove references to information in global ordering */ 167 ierr = MatCreateVecs(matis->A,&local,NULL);CHKERRQ(ierr); 168 if (pcbddc->user_provided_isfordofs) { 169 if (pcbddc->n_ISForDofs) { 170 PetscInt i; 171 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 172 for (i=0;i<pcbddc->n_ISForDofs;i++) { 173 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 174 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 175 } 176 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 177 pcbddc->n_ISForDofs = 0; 178 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 179 } 180 } else { 181 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering if bs > 1 */ 182 PetscInt i, n = matis->A->rmap->n; 183 ierr = MatGetBlockSize(pc->pmat,&i);CHKERRQ(ierr); 184 if (i > 1) { 185 pcbddc->n_ISForDofsLocal = i; 186 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 187 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 188 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 189 } 190 } 191 } 192 } 193 194 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 195 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 196 } 197 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 198 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 199 } 200 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 201 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 202 } 203 ierr = VecDestroy(&global);CHKERRQ(ierr); 204 ierr = VecDestroy(&local);CHKERRQ(ierr); 205 PetscFunctionReturn(0); 206 } 207 208 #undef __FUNCT__ 209 #define __FUNCT__ "PCBDDCBenignRemoveInterior" 210 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z) 211 { 212 PC_IS *pcis = (PC_IS*)(pc->data); 213 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 214 PetscErrorCode ierr; 215 216 PetscFunctionBegin; 217 if (!pcbddc->benign_have_null) { 218 PetscFunctionReturn(0); 219 } 220 if (pcbddc->ChangeOfBasisMatrix) { 221 Vec swap; 222 223 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);CHKERRQ(ierr); 224 swap = pcbddc->work_change; 225 pcbddc->work_change = r; 226 r = swap; 227 } 228 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 229 ierr = VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 230 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 231 ierr = VecSet(z,0.);CHKERRQ(ierr); 232 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 233 ierr = VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 234 if (pcbddc->ChangeOfBasisMatrix) { 235 Vec swap; 236 237 swap = r; 238 r = pcbddc->work_change; 239 pcbddc->work_change = swap; 240 ierr = VecCopy(z,pcbddc->work_change);CHKERRQ(ierr); 241 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);CHKERRQ(ierr); 242 } 243 PetscFunctionReturn(0); 244 } 245 246 #undef __FUNCT__ 247 #define __FUNCT__ "PCBDDCBenignMatMult_Private_Private" 248 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 249 { 250 PCBDDCBenignMatMult_ctx ctx; 251 PetscErrorCode ierr; 252 PetscBool apply_right,apply_left,reset_x; 253 254 PetscFunctionBegin; 255 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 256 if (transpose) { 257 apply_right = ctx->apply_left; 258 apply_left = ctx->apply_right; 259 } else { 260 apply_right = ctx->apply_right; 261 apply_left = ctx->apply_left; 262 } 263 reset_x = PETSC_FALSE; 264 if (apply_right) { 265 const PetscScalar *ax; 266 PetscInt nl,i; 267 268 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 269 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 270 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 271 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 272 for (i=0;i<ctx->benign_n;i++) { 273 PetscScalar sum,val; 274 const PetscInt *idxs; 275 PetscInt nz,j; 276 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 277 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 278 sum = 0.; 279 if (ctx->apply_p0) { 280 val = ctx->work[idxs[nz-1]]; 281 for (j=0;j<nz-1;j++) { 282 sum += ctx->work[idxs[j]]; 283 ctx->work[idxs[j]] += val; 284 } 285 } else { 286 for (j=0;j<nz-1;j++) { 287 sum += ctx->work[idxs[j]]; 288 } 289 } 290 ctx->work[idxs[nz-1]] -= sum; 291 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 292 } 293 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 294 reset_x = PETSC_TRUE; 295 } 296 if (transpose) { 297 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 298 } else { 299 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 300 } 301 if (reset_x) { 302 ierr = VecResetArray(x);CHKERRQ(ierr); 303 } 304 if (apply_left) { 305 PetscScalar *ay; 306 PetscInt i; 307 308 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 309 for (i=0;i<ctx->benign_n;i++) { 310 PetscScalar sum,val; 311 const PetscInt *idxs; 312 PetscInt nz,j; 313 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 314 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 315 val = -ay[idxs[nz-1]]; 316 if (ctx->apply_p0) { 317 sum = 0.; 318 for (j=0;j<nz-1;j++) { 319 sum += ay[idxs[j]]; 320 ay[idxs[j]] += val; 321 } 322 ay[idxs[nz-1]] += sum; 323 } else { 324 for (j=0;j<nz-1;j++) { 325 ay[idxs[j]] += val; 326 } 327 ay[idxs[nz-1]] = 0.; 328 } 329 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 330 } 331 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 332 } 333 PetscFunctionReturn(0); 334 } 335 336 #undef __FUNCT__ 337 #define __FUNCT__ "PCBDDCBenignMatMultTranspose_Private" 338 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 339 { 340 PetscErrorCode ierr; 341 342 PetscFunctionBegin; 343 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 344 PetscFunctionReturn(0); 345 } 346 347 #undef __FUNCT__ 348 #define __FUNCT__ "PCBDDCBenignMatMult_Private" 349 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 350 { 351 PetscErrorCode ierr; 352 353 PetscFunctionBegin; 354 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 355 PetscFunctionReturn(0); 356 } 357 358 #undef __FUNCT__ 359 #define __FUNCT__ "PCBDDCBenignShellMat" 360 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 361 { 362 PC_IS *pcis = (PC_IS*)pc->data; 363 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 364 PCBDDCBenignMatMult_ctx ctx; 365 PetscErrorCode ierr; 366 367 PetscFunctionBegin; 368 if (!restore) { 369 Mat A_IB,A_BI; 370 PetscScalar *work; 371 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 372 373 if (pcbddc->benign_original_mat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 374 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(0); 375 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 376 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 377 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 378 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 379 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 380 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 381 ierr = PetscNew(&ctx);CHKERRQ(ierr); 382 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 383 ctx->apply_left = PETSC_TRUE; 384 ctx->apply_right = PETSC_FALSE; 385 ctx->apply_p0 = PETSC_FALSE; 386 ctx->benign_n = pcbddc->benign_n; 387 if (reuse) { 388 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 389 ctx->free = PETSC_FALSE; 390 } else { /* TODO: could be optimized for successive solves */ 391 ISLocalToGlobalMapping N_to_D; 392 PetscInt i; 393 394 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 395 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 396 for (i=0;i<pcbddc->benign_n;i++) { 397 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 398 } 399 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 400 ctx->free = PETSC_TRUE; 401 } 402 ctx->A = pcis->A_IB; 403 ctx->work = work; 404 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 405 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 406 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 407 pcis->A_IB = A_IB; 408 409 /* A_BI as A_IB^T */ 410 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 411 pcbddc->benign_original_mat = pcis->A_BI; 412 pcis->A_BI = A_BI; 413 } else { 414 if (!pcbddc->benign_original_mat) { 415 PetscFunctionReturn(0); 416 } 417 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 418 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 419 pcis->A_IB = ctx->A; 420 ctx->A = NULL; 421 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 422 pcis->A_BI = pcbddc->benign_original_mat; 423 pcbddc->benign_original_mat = NULL; 424 if (ctx->free) { 425 PetscInt i; 426 for (i=0;i<ctx->benign_n;i++) { 427 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 428 } 429 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 430 } 431 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 432 ierr = PetscFree(ctx);CHKERRQ(ierr); 433 } 434 PetscFunctionReturn(0); 435 } 436 437 /* used just in bddc debug mode */ 438 #undef __FUNCT__ 439 #define __FUNCT__ "PCBDDCBenignProject" 440 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 441 { 442 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 443 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 444 Mat An; 445 PetscErrorCode ierr; 446 447 PetscFunctionBegin; 448 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 449 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 450 if (is1) { 451 ierr = MatGetSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 452 ierr = MatDestroy(&An);CHKERRQ(ierr); 453 } else { 454 *B = An; 455 } 456 PetscFunctionReturn(0); 457 } 458 459 /* TODO: add reuse flag */ 460 #undef __FUNCT__ 461 #define __FUNCT__ "MatSeqAIJCompress" 462 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 463 { 464 Mat Bt; 465 PetscScalar *a,*bdata; 466 const PetscInt *ii,*ij; 467 PetscInt m,n,i,nnz,*bii,*bij; 468 PetscBool flg_row; 469 PetscErrorCode ierr; 470 471 PetscFunctionBegin; 472 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 473 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 474 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 475 nnz = n; 476 for (i=0;i<ii[n];i++) { 477 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 478 } 479 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 480 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 481 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 482 nnz = 0; 483 bii[0] = 0; 484 for (i=0;i<n;i++) { 485 PetscInt j; 486 for (j=ii[i];j<ii[i+1];j++) { 487 PetscScalar entry = a[j]; 488 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 489 bij[nnz] = ij[j]; 490 bdata[nnz] = entry; 491 nnz++; 492 } 493 } 494 bii[i+1] = nnz; 495 } 496 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 497 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 498 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 499 { 500 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 501 b->free_a = PETSC_TRUE; 502 b->free_ij = PETSC_TRUE; 503 } 504 *B = Bt; 505 PetscFunctionReturn(0); 506 } 507 508 #undef __FUNCT__ 509 #define __FUNCT__ "MatDetectDisconnectedComponents" 510 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 511 { 512 Mat B; 513 IS is_dummy,*cc_n; 514 ISLocalToGlobalMapping l2gmap_dummy; 515 PCBDDCGraph graph; 516 PetscInt i,n; 517 PetscInt *xadj,*adjncy; 518 PetscInt *xadj_filtered,*adjncy_filtered; 519 PetscBool flg_row,isseqaij; 520 PetscErrorCode ierr; 521 522 PetscFunctionBegin; 523 if (!A->rmap->N || !A->cmap->N) { 524 *ncc = 0; 525 *cc = NULL; 526 PetscFunctionReturn(0); 527 } 528 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 529 if (!isseqaij && filter) { 530 PetscBool isseqdense; 531 532 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 533 if (!isseqdense) { 534 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 535 } else { /* TODO: rectangular case and LDA */ 536 PetscScalar *array; 537 PetscReal chop=1.e-6; 538 539 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 540 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 541 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 542 for (i=0;i<n;i++) { 543 PetscInt j; 544 for (j=i+1;j<n;j++) { 545 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 546 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 547 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 548 } 549 } 550 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 551 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 552 } 553 } else { 554 B = A; 555 } 556 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 557 558 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 559 if (filter) { 560 PetscScalar *data; 561 PetscInt j,cum; 562 563 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 564 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 565 cum = 0; 566 for (i=0;i<n;i++) { 567 PetscInt t; 568 569 for (j=xadj[i];j<xadj[i+1];j++) { 570 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 571 continue; 572 } 573 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 574 } 575 t = xadj_filtered[i]; 576 xadj_filtered[i] = cum; 577 cum += t; 578 } 579 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 580 } else { 581 xadj_filtered = NULL; 582 adjncy_filtered = NULL; 583 } 584 585 /* compute local connected components using PCBDDCGraph */ 586 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 587 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 588 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 589 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 590 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n);CHKERRQ(ierr); 591 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 592 if (xadj_filtered) { 593 graph->xadj = xadj_filtered; 594 graph->adjncy = adjncy_filtered; 595 } else { 596 graph->xadj = xadj; 597 graph->adjncy = adjncy; 598 } 599 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 600 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 601 /* partial clean up */ 602 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 603 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 604 if (A != B) { 605 ierr = MatDestroy(&B);CHKERRQ(ierr); 606 } 607 608 /* get back data */ 609 if (ncc) *ncc = graph->ncc; 610 if (cc) { 611 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 612 for (i=0;i<graph->ncc;i++) { 613 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 614 } 615 *cc = cc_n; 616 } 617 /* clean up graph */ 618 graph->xadj = 0; 619 graph->adjncy = 0; 620 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 621 PetscFunctionReturn(0); 622 } 623 624 #undef __FUNCT__ 625 #define __FUNCT__ "PCBDDCBenignCheck" 626 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 627 { 628 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 629 PC_IS* pcis = (PC_IS*)(pc->data); 630 IS dirIS = NULL; 631 PetscInt i; 632 PetscErrorCode ierr; 633 634 PetscFunctionBegin; 635 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 636 if (zerodiag) { 637 Mat A; 638 Vec vec3_N; 639 PetscScalar *vals; 640 const PetscInt *idxs; 641 PetscInt nz,*count; 642 643 /* p0 */ 644 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 645 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 646 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 647 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 648 for (i=0;i<nz;i++) vals[i] = 1.; 649 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 650 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 651 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 652 /* v_I */ 653 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 654 for (i=0;i<nz;i++) vals[i] = 0.; 655 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 656 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 657 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 658 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 659 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 660 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 661 if (dirIS) { 662 PetscInt n; 663 664 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 665 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 666 for (i=0;i<n;i++) vals[i] = 0.; 667 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 668 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 669 } 670 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 671 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 672 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 673 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 674 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 675 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 676 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 677 if (PetscAbsScalar(vals[0]) > 1.e-1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 678 ierr = PetscFree(vals);CHKERRQ(ierr); 679 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 680 681 /* there should not be any pressure dofs lying on the interface */ 682 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 683 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 684 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 685 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 686 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 687 for (i=0;i<nz;i++) if (count[idxs[i]]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 688 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 689 ierr = PetscFree(count);CHKERRQ(ierr); 690 } 691 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 692 693 /* check PCBDDCBenignGetOrSetP0 */ 694 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 695 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 696 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 697 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 698 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 699 for (i=0;i<pcbddc->benign_n;i++) if ((PetscInt)(PetscRealPart(pcbddc->benign_p0[i])) != -PetscGlobalRank-i) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %d instead of %g\n",PetscRealPart(pcbddc->benign_p0[i]),i,-PetscGlobalRank-i);CHKERRQ(ierr); 700 PetscFunctionReturn(0); 701 } 702 703 #undef __FUNCT__ 704 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 705 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 706 { 707 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 708 IS pressures,zerodiag,*zerodiag_subs; 709 PetscInt nz,n; 710 PetscInt *interior_dofs,n_interior_dofs; 711 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag; 712 PetscErrorCode ierr; 713 714 PetscFunctionBegin; 715 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 716 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 717 for (n=0;n<pcbddc->benign_n;n++) { 718 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 719 } 720 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 721 pcbddc->benign_n = 0; 722 /* if a local info on dofs is present, assumes that the last field represents "pressures" 723 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 724 Checks if all the pressure dofs in each subdomain have a zero diagonal 725 If not, a change of basis on pressures is not needed 726 since the local Schur complements are already SPD 727 */ 728 has_null_pressures = PETSC_TRUE; 729 have_null = PETSC_TRUE; 730 if (pcbddc->n_ISForDofsLocal) { 731 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 732 733 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 734 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 735 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 736 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 737 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 738 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 739 if (!sorted) { 740 ierr = ISSort(pressures);CHKERRQ(ierr); 741 } 742 } else { 743 pressures = NULL; 744 } 745 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 746 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 747 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 748 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 749 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 750 if (!sorted) { 751 ierr = ISSort(zerodiag);CHKERRQ(ierr); 752 } 753 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 754 if (!nz) { 755 if (n) have_null = PETSC_FALSE; 756 has_null_pressures = PETSC_FALSE; 757 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 758 } 759 recompute_zerodiag = PETSC_FALSE; 760 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 761 zerodiag_subs = NULL; 762 pcbddc->benign_n = 0; 763 n_interior_dofs = 0; 764 interior_dofs = NULL; 765 if (pcbddc->current_level) { /* need to compute interior nodes */ 766 PetscInt n,i,j; 767 PetscInt n_neigh,*neigh,*n_shared,**shared; 768 PetscInt *iwork; 769 770 ierr = ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);CHKERRQ(ierr); 771 ierr = ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 772 ierr = PetscCalloc1(n,&iwork);CHKERRQ(ierr); 773 ierr = PetscMalloc1(n,&interior_dofs);CHKERRQ(ierr); 774 for (i=0;i<n_neigh;i++) 775 for (j=0;j<n_shared[i];j++) 776 iwork[shared[i][j]] += 1; 777 for (i=0;i<n;i++) 778 if (!iwork[i]) 779 interior_dofs[n_interior_dofs++] = i; 780 ierr = PetscFree(iwork);CHKERRQ(ierr); 781 ierr = ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 782 } 783 if (has_null_pressures) { 784 IS *subs; 785 PetscInt nsubs,i,j,nl; 786 const PetscInt *idxs; 787 PetscScalar *array; 788 Vec *work; 789 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 790 791 subs = pcbddc->local_subs; 792 nsubs = pcbddc->n_local_subs; 793 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 794 if (pcbddc->current_level) { 795 ierr = VecDuplicateVecs(matis->y,2,&work);CHKERRQ(ierr); 796 ierr = ISGetLocalSize(zerodiag,&nl);CHKERRQ(ierr); 797 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 798 /* work[0] = 1_p */ 799 ierr = VecSet(work[0],0.);CHKERRQ(ierr); 800 ierr = VecGetArray(work[0],&array);CHKERRQ(ierr); 801 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 802 ierr = VecRestoreArray(work[0],&array);CHKERRQ(ierr); 803 /* work[0] = 1_v */ 804 ierr = VecSet(work[1],1.);CHKERRQ(ierr); 805 ierr = VecGetArray(work[1],&array);CHKERRQ(ierr); 806 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 807 ierr = VecRestoreArray(work[1],&array);CHKERRQ(ierr); 808 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 809 } 810 if (nsubs > 1) { 811 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 812 for (i=0;i<nsubs;i++) { 813 ISLocalToGlobalMapping l2g; 814 IS t_zerodiag_subs; 815 PetscInt nl; 816 817 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 818 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 819 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 820 if (nl) { 821 PetscBool valid = PETSC_TRUE; 822 823 if (pcbddc->current_level) { 824 ierr = VecSet(matis->x,0);CHKERRQ(ierr); 825 ierr = ISGetLocalSize(subs[i],&nl);CHKERRQ(ierr); 826 ierr = ISGetIndices(subs[i],&idxs);CHKERRQ(ierr); 827 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 828 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 829 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 830 ierr = ISRestoreIndices(subs[i],&idxs);CHKERRQ(ierr); 831 ierr = VecPointwiseMult(matis->x,work[0],matis->x);CHKERRQ(ierr); 832 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 833 ierr = VecPointwiseMult(matis->y,work[1],matis->y);CHKERRQ(ierr); 834 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 835 for (j=0;j<n_interior_dofs;j++) { 836 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 837 valid = PETSC_FALSE; 838 break; 839 } 840 } 841 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 842 } 843 if (valid && pcbddc->NeumannBoundariesLocal) { 844 IS t_bc; 845 PetscInt nzb; 846 847 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pcbddc->NeumannBoundariesLocal,&t_bc);CHKERRQ(ierr); 848 ierr = ISGetLocalSize(t_bc,&nzb);CHKERRQ(ierr); 849 ierr = ISDestroy(&t_bc);CHKERRQ(ierr); 850 if (nzb) valid = PETSC_FALSE; 851 } 852 if (valid && pressures) { 853 IS t_pressure_subs; 854 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 855 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 856 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 857 } 858 if (valid) { 859 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 860 pcbddc->benign_n++; 861 } else { 862 recompute_zerodiag = PETSC_TRUE; 863 } 864 } 865 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 866 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 867 } 868 } else { /* there's just one subdomain (or zero if they have not been detected */ 869 PetscBool valid = PETSC_TRUE; 870 871 if (pcbddc->NeumannBoundariesLocal) { 872 PetscInt nzb; 873 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nzb);CHKERRQ(ierr); 874 if (nzb) valid = PETSC_FALSE; 875 } 876 if (valid && pressures) { 877 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 878 } 879 if (valid && pcbddc->current_level) { 880 ierr = MatMult(matis->A,work[0],matis->x);CHKERRQ(ierr); 881 ierr = VecPointwiseMult(matis->x,work[1],matis->x);CHKERRQ(ierr); 882 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 883 for (j=0;j<n_interior_dofs;j++) { 884 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 885 valid = PETSC_FALSE; 886 break; 887 } 888 } 889 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 890 } 891 if (valid) { 892 pcbddc->benign_n = 1; 893 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 894 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 895 zerodiag_subs[0] = zerodiag; 896 } 897 } 898 if (pcbddc->current_level) { 899 ierr = VecDestroyVecs(2,&work);CHKERRQ(ierr); 900 } 901 } 902 ierr = PetscFree(interior_dofs);CHKERRQ(ierr); 903 904 if (!pcbddc->benign_n) { 905 PetscInt n; 906 907 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 908 recompute_zerodiag = PETSC_FALSE; 909 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 910 if (n) { 911 has_null_pressures = PETSC_FALSE; 912 have_null = PETSC_FALSE; 913 } 914 } 915 916 /* final check for null pressures */ 917 if (zerodiag && pressures) { 918 PetscInt nz,np; 919 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 920 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 921 if (nz != np) have_null = PETSC_FALSE; 922 } 923 924 if (recompute_zerodiag) { 925 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 926 if (pcbddc->benign_n == 1) { 927 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 928 zerodiag = zerodiag_subs[0]; 929 } else { 930 PetscInt i,nzn,*new_idxs; 931 932 nzn = 0; 933 for (i=0;i<pcbddc->benign_n;i++) { 934 PetscInt ns; 935 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 936 nzn += ns; 937 } 938 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 939 nzn = 0; 940 for (i=0;i<pcbddc->benign_n;i++) { 941 PetscInt ns,*idxs; 942 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 943 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 944 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 945 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 946 nzn += ns; 947 } 948 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 949 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 950 } 951 have_null = PETSC_FALSE; 952 } 953 954 /* Prepare matrix to compute no-net-flux */ 955 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 956 Mat A,loc_divudotp; 957 ISLocalToGlobalMapping rl2g,cl2g,l2gmap; 958 IS row,col,isused = NULL; 959 PetscInt M,N,n,st,n_isused; 960 961 if (pressures) { 962 isused = pressures; 963 } else { 964 isused = zerodiag; 965 } 966 ierr = MatGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);CHKERRQ(ierr); 967 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 968 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 969 if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field"); 970 n_isused = 0; 971 if (isused) { 972 ierr = ISGetLocalSize(isused,&n_isused);CHKERRQ(ierr); 973 } 974 ierr = MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 975 st = st-n_isused; 976 if (n) { 977 const PetscInt *gidxs; 978 979 ierr = MatGetSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);CHKERRQ(ierr); 980 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 981 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 982 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 983 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 984 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 985 } else { 986 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);CHKERRQ(ierr); 987 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 988 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 989 } 990 ierr = MatGetSize(pc->pmat,NULL,&N);CHKERRQ(ierr); 991 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 992 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 993 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 994 ierr = ISDestroy(&row);CHKERRQ(ierr); 995 ierr = ISDestroy(&col);CHKERRQ(ierr); 996 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);CHKERRQ(ierr); 997 ierr = MatSetType(pcbddc->divudotp,MATIS);CHKERRQ(ierr); 998 ierr = MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 999 ierr = MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);CHKERRQ(ierr); 1000 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 1001 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 1002 ierr = MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);CHKERRQ(ierr); 1003 ierr = MatDestroy(&loc_divudotp);CHKERRQ(ierr); 1004 ierr = MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1005 ierr = MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1006 } 1007 1008 /* change of basis and p0 dofs */ 1009 if (has_null_pressures) { 1010 IS zerodiagc; 1011 const PetscInt *idxs,*idxsc; 1012 PetscInt i,s,*nnz; 1013 1014 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 1015 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 1016 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 1017 /* local change of basis for pressures */ 1018 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 1019 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 1020 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 1021 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1022 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 1023 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 1024 for (i=0;i<pcbddc->benign_n;i++) { 1025 PetscInt nzs,j; 1026 1027 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 1028 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1029 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 1030 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 1031 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1032 } 1033 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 1034 ierr = PetscFree(nnz);CHKERRQ(ierr); 1035 /* set identity on velocities */ 1036 for (i=0;i<n-nz;i++) { 1037 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 1038 } 1039 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 1040 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 1041 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 1042 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 1043 /* set change on pressures */ 1044 for (s=0;s<pcbddc->benign_n;s++) { 1045 PetscScalar *array; 1046 PetscInt nzs; 1047 1048 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 1049 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 1050 for (i=0;i<nzs-1;i++) { 1051 PetscScalar vals[2]; 1052 PetscInt cols[2]; 1053 1054 cols[0] = idxs[i]; 1055 cols[1] = idxs[nzs-1]; 1056 vals[0] = 1.; 1057 vals[1] = 1.; 1058 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 1059 } 1060 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 1061 for (i=0;i<nzs-1;i++) array[i] = -1.; 1062 array[nzs-1] = 1.; 1063 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 1064 /* store local idxs for p0 */ 1065 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 1066 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 1067 ierr = PetscFree(array);CHKERRQ(ierr); 1068 } 1069 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1070 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1071 /* project if needed */ 1072 if (pcbddc->benign_change_explicit) { 1073 Mat M; 1074 1075 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 1076 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1077 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 1078 ierr = MatDestroy(&M);CHKERRQ(ierr); 1079 } 1080 /* store global idxs for p0 */ 1081 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 1082 } 1083 pcbddc->benign_zerodiag_subs = zerodiag_subs; 1084 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 1085 1086 /* determines if the coarse solver will be singular or not */ 1087 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1088 /* determines if the problem has subdomains with 0 pressure block */ 1089 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1090 *zerodiaglocal = zerodiag; 1091 PetscFunctionReturn(0); 1092 } 1093 1094 #undef __FUNCT__ 1095 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 1096 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 1097 { 1098 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1099 PetscScalar *array; 1100 PetscErrorCode ierr; 1101 1102 PetscFunctionBegin; 1103 if (!pcbddc->benign_sf) { 1104 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 1105 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 1106 } 1107 if (get) { 1108 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 1109 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 1110 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 1111 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 1112 } else { 1113 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 1114 ierr = PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 1115 ierr = PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 1116 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 1117 } 1118 PetscFunctionReturn(0); 1119 } 1120 1121 #undef __FUNCT__ 1122 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 1123 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 1124 { 1125 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1126 PetscErrorCode ierr; 1127 1128 PetscFunctionBegin; 1129 /* TODO: add error checking 1130 - avoid nested pop (or push) calls. 1131 - cannot push before pop. 1132 - cannot call this if pcbddc->local_mat is NULL 1133 */ 1134 if (!pcbddc->benign_n) { 1135 PetscFunctionReturn(0); 1136 } 1137 if (pop) { 1138 if (pcbddc->benign_change_explicit) { 1139 IS is_p0; 1140 MatReuse reuse; 1141 1142 /* extract B_0 */ 1143 reuse = MAT_INITIAL_MATRIX; 1144 if (pcbddc->benign_B0) { 1145 reuse = MAT_REUSE_MATRIX; 1146 } 1147 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 1148 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 1149 /* remove rows and cols from local problem */ 1150 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 1151 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1152 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 1153 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 1154 } else { 1155 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1156 PetscScalar *vals; 1157 PetscInt i,n,*idxs_ins; 1158 1159 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 1160 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 1161 if (!pcbddc->benign_B0) { 1162 PetscInt *nnz; 1163 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 1164 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 1165 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1166 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 1167 for (i=0;i<pcbddc->benign_n;i++) { 1168 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 1169 nnz[i] = n - nnz[i]; 1170 } 1171 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 1172 ierr = PetscFree(nnz);CHKERRQ(ierr); 1173 } 1174 1175 for (i=0;i<pcbddc->benign_n;i++) { 1176 PetscScalar *array; 1177 PetscInt *idxs,j,nz,cum; 1178 1179 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 1180 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1181 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 1182 for (j=0;j<nz;j++) vals[j] = 1.; 1183 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1184 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 1185 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 1186 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 1187 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 1188 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 1189 cum = 0; 1190 for (j=0;j<n;j++) { 1191 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 1192 vals[cum] = array[j]; 1193 idxs_ins[cum] = j; 1194 cum++; 1195 } 1196 } 1197 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 1198 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 1199 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 1200 } 1201 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1202 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1203 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 1204 } 1205 } else { /* push */ 1206 if (pcbddc->benign_change_explicit) { 1207 PetscInt i; 1208 1209 for (i=0;i<pcbddc->benign_n;i++) { 1210 PetscScalar *B0_vals; 1211 PetscInt *B0_cols,B0_ncol; 1212 1213 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 1214 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 1215 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 1216 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 1217 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 1218 } 1219 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1220 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1221 } else { 1222 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 1223 } 1224 } 1225 PetscFunctionReturn(0); 1226 } 1227 1228 #undef __FUNCT__ 1229 #define __FUNCT__ "PCBDDCAdaptiveSelection" 1230 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 1231 { 1232 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1233 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1234 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 1235 PetscBLASInt *B_iwork,*B_ifail; 1236 PetscScalar *work,lwork; 1237 PetscScalar *St,*S,*eigv; 1238 PetscScalar *Sarray,*Starray; 1239 PetscReal *eigs,thresh; 1240 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 1241 PetscBool allocated_S_St; 1242 #if defined(PETSC_USE_COMPLEX) 1243 PetscReal *rwork; 1244 #endif 1245 PetscErrorCode ierr; 1246 1247 PetscFunctionBegin; 1248 if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data"); 1249 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 1250 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 1251 1252 if (pcbddc->dbg_flag) { 1253 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1254 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1255 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 1256 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 1257 } 1258 1259 if (pcbddc->dbg_flag) { 1260 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 1261 } 1262 1263 /* max size of subsets */ 1264 mss = 0; 1265 for (i=0;i<sub_schurs->n_subs;i++) { 1266 PetscInt subset_size; 1267 1268 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1269 mss = PetscMax(mss,subset_size); 1270 } 1271 1272 /* min/max and threshold */ 1273 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 1274 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 1275 nmax = PetscMax(nmin,nmax); 1276 allocated_S_St = PETSC_FALSE; 1277 if (nmin) { 1278 allocated_S_St = PETSC_TRUE; 1279 } 1280 1281 /* allocate lapack workspace */ 1282 cum = cum2 = 0; 1283 maxneigs = 0; 1284 for (i=0;i<sub_schurs->n_subs;i++) { 1285 PetscInt n,subset_size; 1286 1287 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1288 n = PetscMin(subset_size,nmax); 1289 cum += subset_size; 1290 cum2 += subset_size*n; 1291 maxneigs = PetscMax(maxneigs,n); 1292 } 1293 if (mss) { 1294 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 1295 PetscBLASInt B_itype = 1; 1296 PetscBLASInt B_N = mss; 1297 PetscReal zero = 0.0; 1298 PetscReal eps = 0.0; /* dlamch? */ 1299 1300 B_lwork = -1; 1301 S = NULL; 1302 St = NULL; 1303 eigs = NULL; 1304 eigv = NULL; 1305 B_iwork = NULL; 1306 B_ifail = NULL; 1307 #if defined(PETSC_USE_COMPLEX) 1308 rwork = NULL; 1309 #endif 1310 thresh = 1.0; 1311 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1312 #if defined(PETSC_USE_COMPLEX) 1313 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1314 #else 1315 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1316 #endif 1317 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 1318 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1319 } else { 1320 /* TODO */ 1321 } 1322 } else { 1323 lwork = 0; 1324 } 1325 1326 nv = 0; 1327 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 1328 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 1329 } 1330 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 1331 if (allocated_S_St) { 1332 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 1333 } 1334 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 1335 #if defined(PETSC_USE_COMPLEX) 1336 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 1337 #endif 1338 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 1339 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 1340 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 1341 nv+cum,&pcbddc->adaptive_constraints_idxs, 1342 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 1343 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 1344 1345 maxneigs = 0; 1346 cum = cumarray = 0; 1347 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 1348 pcbddc->adaptive_constraints_data_ptr[0] = 0; 1349 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 1350 const PetscInt *idxs; 1351 1352 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 1353 for (cum=0;cum<nv;cum++) { 1354 pcbddc->adaptive_constraints_n[cum] = 1; 1355 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 1356 pcbddc->adaptive_constraints_data[cum] = 1.0; 1357 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 1358 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 1359 } 1360 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 1361 } 1362 1363 if (mss) { /* multilevel */ 1364 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1365 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1366 } 1367 1368 thresh = pcbddc->adaptive_threshold; 1369 for (i=0;i<sub_schurs->n_subs;i++) { 1370 const PetscInt *idxs; 1371 PetscReal upper,lower; 1372 PetscInt j,subset_size,eigs_start = 0; 1373 PetscBLASInt B_N; 1374 PetscBool same_data = PETSC_FALSE; 1375 1376 if (pcbddc->use_deluxe_scaling) { 1377 upper = PETSC_MAX_REAL; 1378 lower = thresh; 1379 } else { 1380 upper = 1./thresh; 1381 lower = 0.; 1382 } 1383 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1384 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1385 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 1386 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 1387 if (sub_schurs->is_hermitian) { 1388 PetscInt j,k; 1389 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 1390 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1391 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1392 } 1393 for (j=0;j<subset_size;j++) { 1394 for (k=j;k<subset_size;k++) { 1395 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 1396 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 1397 } 1398 } 1399 } else { 1400 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1401 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1402 } 1403 } else { 1404 S = Sarray + cumarray; 1405 St = Starray + cumarray; 1406 } 1407 /* see if we can save some work */ 1408 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 1409 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 1410 } 1411 1412 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 1413 B_neigs = 0; 1414 } else { 1415 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 1416 PetscBLASInt B_itype = 1; 1417 PetscBLASInt B_IL, B_IU; 1418 PetscReal eps = -1.0; /* dlamch? */ 1419 PetscInt nmin_s; 1420 PetscBool compute_range = PETSC_FALSE; 1421 1422 if (pcbddc->dbg_flag) { 1423 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 1424 } 1425 1426 compute_range = PETSC_FALSE; 1427 if (thresh > 1.+PETSC_SMALL && !same_data) { 1428 compute_range = PETSC_TRUE; 1429 } 1430 1431 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1432 if (compute_range) { 1433 1434 /* ask for eigenvalues larger than thresh */ 1435 #if defined(PETSC_USE_COMPLEX) 1436 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1437 #else 1438 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1439 #endif 1440 } else if (!same_data) { 1441 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 1442 B_IL = 1; 1443 #if defined(PETSC_USE_COMPLEX) 1444 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1445 #else 1446 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1447 #endif 1448 } else { /* same_data is true, so get the adaptive function requested by the user */ 1449 PetscInt k; 1450 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 1451 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 1452 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 1453 nmin = nmax; 1454 ierr = PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));CHKERRQ(ierr); 1455 for (k=0;k<nmax;k++) { 1456 eigs[k] = 1./PETSC_SMALL; 1457 eigv[k*(subset_size+1)] = 1.0; 1458 } 1459 } 1460 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1461 if (B_ierr) { 1462 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1463 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1464 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1465 } 1466 1467 if (B_neigs > nmax) { 1468 if (pcbddc->dbg_flag) { 1469 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 1470 } 1471 if (pcbddc->use_deluxe_scaling) eigs_start = B_neigs -nmax; 1472 B_neigs = nmax; 1473 } 1474 1475 nmin_s = PetscMin(nmin,B_N); 1476 if (B_neigs < nmin_s) { 1477 PetscBLASInt B_neigs2; 1478 1479 if (pcbddc->use_deluxe_scaling) { 1480 B_IL = B_N - nmin_s + 1; 1481 B_IU = B_N - B_neigs; 1482 } else { 1483 B_IL = B_neigs + 1; 1484 B_IU = nmin_s; 1485 } 1486 if (pcbddc->dbg_flag) { 1487 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 1488 } 1489 if (sub_schurs->is_hermitian) { 1490 PetscInt j,k; 1491 for (j=0;j<subset_size;j++) { 1492 for (k=j;k<subset_size;k++) { 1493 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 1494 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 1495 } 1496 } 1497 } else { 1498 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1499 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1500 } 1501 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1502 #if defined(PETSC_USE_COMPLEX) 1503 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1504 #else 1505 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1506 #endif 1507 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1508 B_neigs += B_neigs2; 1509 } 1510 if (B_ierr) { 1511 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1512 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1513 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1514 } 1515 if (pcbddc->dbg_flag) { 1516 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 1517 for (j=0;j<B_neigs;j++) { 1518 if (eigs[j] == 0.0) { 1519 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 1520 } else { 1521 if (pcbddc->use_deluxe_scaling) { 1522 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 1523 } else { 1524 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 1525 } 1526 } 1527 } 1528 } 1529 } else { 1530 /* TODO */ 1531 } 1532 } 1533 /* change the basis back to the original one */ 1534 if (sub_schurs->change) { 1535 Mat change,phi,phit; 1536 1537 if (pcbddc->dbg_flag > 1) { 1538 PetscInt ii; 1539 for (ii=0;ii<B_neigs;ii++) { 1540 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1541 for (j=0;j<B_N;j++) { 1542 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 1543 } 1544 } 1545 } 1546 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 1547 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 1548 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 1549 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1550 ierr = MatDestroy(&phit);CHKERRQ(ierr); 1551 ierr = MatDestroy(&phi);CHKERRQ(ierr); 1552 } 1553 maxneigs = PetscMax(B_neigs,maxneigs); 1554 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 1555 if (B_neigs) { 1556 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1557 1558 if (pcbddc->dbg_flag > 1) { 1559 PetscInt ii; 1560 for (ii=0;ii<B_neigs;ii++) { 1561 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1562 for (j=0;j<B_N;j++) { 1563 #if defined(PETSC_USE_COMPLEX) 1564 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1565 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1566 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 1567 #else 1568 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 1569 #endif 1570 } 1571 } 1572 } 1573 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 1574 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 1575 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 1576 cum++; 1577 } 1578 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1579 /* shift for next computation */ 1580 cumarray += subset_size*subset_size; 1581 } 1582 if (pcbddc->dbg_flag) { 1583 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1584 } 1585 1586 if (mss) { 1587 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1588 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1589 /* destroy matrices (junk) */ 1590 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 1591 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 1592 } 1593 if (allocated_S_St) { 1594 ierr = PetscFree2(S,St);CHKERRQ(ierr); 1595 } 1596 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 1597 #if defined(PETSC_USE_COMPLEX) 1598 ierr = PetscFree(rwork);CHKERRQ(ierr); 1599 #endif 1600 if (pcbddc->dbg_flag) { 1601 PetscInt maxneigs_r; 1602 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1603 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 1604 } 1605 PetscFunctionReturn(0); 1606 } 1607 1608 #undef __FUNCT__ 1609 #define __FUNCT__ "PCBDDCSetUpSolvers" 1610 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 1611 { 1612 PetscScalar *coarse_submat_vals; 1613 PetscErrorCode ierr; 1614 1615 PetscFunctionBegin; 1616 /* Setup local scatters R_to_B and (optionally) R_to_D */ 1617 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 1618 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 1619 1620 /* Setup local neumann solver ksp_R */ 1621 /* PCBDDCSetUpLocalScatters should be called first! */ 1622 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 1623 1624 /* 1625 Setup local correction and local part of coarse basis. 1626 Gives back the dense local part of the coarse matrix in column major ordering 1627 */ 1628 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 1629 1630 /* Compute total number of coarse nodes and setup coarse solver */ 1631 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 1632 1633 /* free */ 1634 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 1635 PetscFunctionReturn(0); 1636 } 1637 1638 #undef __FUNCT__ 1639 #define __FUNCT__ "PCBDDCResetCustomization" 1640 PetscErrorCode PCBDDCResetCustomization(PC pc) 1641 { 1642 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1643 PetscErrorCode ierr; 1644 1645 PetscFunctionBegin; 1646 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1647 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 1648 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1649 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 1650 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1651 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 1652 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1653 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1654 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1655 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 1656 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 1657 PetscFunctionReturn(0); 1658 } 1659 1660 #undef __FUNCT__ 1661 #define __FUNCT__ "PCBDDCResetTopography" 1662 PetscErrorCode PCBDDCResetTopography(PC pc) 1663 { 1664 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1665 PetscInt i; 1666 PetscErrorCode ierr; 1667 1668 PetscFunctionBegin; 1669 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 1670 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1671 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 1672 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 1673 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1674 ierr = MatDestroy(&pcbddc->divudotp);CHKERRQ(ierr); 1675 ierr = ISDestroy(&pcbddc->divudotp_vl2l);CHKERRQ(ierr); 1676 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 1677 pcbddc->graphanalyzed = PETSC_FALSE; 1678 for (i=0;i<pcbddc->n_local_subs;i++) { 1679 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 1680 } 1681 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 1682 if (pcbddc->sub_schurs) { 1683 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 1684 } 1685 PetscFunctionReturn(0); 1686 } 1687 1688 #undef __FUNCT__ 1689 #define __FUNCT__ "PCBDDCResetSolvers" 1690 PetscErrorCode PCBDDCResetSolvers(PC pc) 1691 { 1692 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1693 PetscErrorCode ierr; 1694 1695 PetscFunctionBegin; 1696 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 1697 if (pcbddc->coarse_phi_B) { 1698 PetscScalar *array; 1699 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 1700 ierr = PetscFree(array);CHKERRQ(ierr); 1701 } 1702 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1703 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1704 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1705 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1706 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1707 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1708 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1709 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1710 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1711 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1712 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1713 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1714 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1715 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 1716 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 1717 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 1718 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 1719 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1720 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1721 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1722 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 1723 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 1724 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 1725 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 1726 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 1727 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 1728 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 1729 if (pcbddc->benign_zerodiag_subs) { 1730 PetscInt i; 1731 for (i=0;i<pcbddc->benign_n;i++) { 1732 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1733 } 1734 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 1735 } 1736 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 1737 PetscFunctionReturn(0); 1738 } 1739 1740 #undef __FUNCT__ 1741 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 1742 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 1743 { 1744 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1745 PC_IS *pcis = (PC_IS*)pc->data; 1746 VecType impVecType; 1747 PetscInt n_constraints,n_R,old_size; 1748 PetscErrorCode ierr; 1749 1750 PetscFunctionBegin; 1751 if (!pcbddc->ConstraintMatrix) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 1752 /* get sizes */ 1753 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 1754 n_R = pcis->n - pcbddc->n_vertices; 1755 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 1756 /* local work vectors (try to avoid unneeded work)*/ 1757 /* R nodes */ 1758 old_size = -1; 1759 if (pcbddc->vec1_R) { 1760 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 1761 } 1762 if (n_R != old_size) { 1763 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1764 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1765 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 1766 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 1767 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 1768 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 1769 } 1770 /* local primal dofs */ 1771 old_size = -1; 1772 if (pcbddc->vec1_P) { 1773 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 1774 } 1775 if (pcbddc->local_primal_size != old_size) { 1776 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1777 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 1778 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 1779 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 1780 } 1781 /* local explicit constraints */ 1782 old_size = -1; 1783 if (pcbddc->vec1_C) { 1784 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 1785 } 1786 if (n_constraints && n_constraints != old_size) { 1787 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1788 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 1789 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 1790 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 1791 } 1792 PetscFunctionReturn(0); 1793 } 1794 1795 #undef __FUNCT__ 1796 #define __FUNCT__ "PCBDDCSetUpCorrection" 1797 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 1798 { 1799 PetscErrorCode ierr; 1800 /* pointers to pcis and pcbddc */ 1801 PC_IS* pcis = (PC_IS*)pc->data; 1802 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1803 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1804 /* submatrices of local problem */ 1805 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 1806 /* submatrices of local coarse problem */ 1807 Mat S_VV,S_CV,S_VC,S_CC; 1808 /* working matrices */ 1809 Mat C_CR; 1810 /* additional working stuff */ 1811 PC pc_R; 1812 Mat F; 1813 Vec dummy_vec; 1814 PetscBool isLU,isCHOL,isILU,need_benign_correction; 1815 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 1816 PetscScalar *work; 1817 PetscInt *idx_V_B; 1818 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 1819 PetscInt i,n_R,n_D,n_B; 1820 1821 /* some shortcuts to scalars */ 1822 PetscScalar one=1.0,m_one=-1.0; 1823 1824 PetscFunctionBegin; 1825 if (!pcbddc->symmetric_primal && pcbddc->benign_n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 1826 1827 /* Set Non-overlapping dimensions */ 1828 n_vertices = pcbddc->n_vertices; 1829 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 1830 n_B = pcis->n_B; 1831 n_D = pcis->n - n_B; 1832 n_R = pcis->n - n_vertices; 1833 1834 /* vertices in boundary numbering */ 1835 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 1836 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 1837 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i); 1838 1839 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 1840 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 1841 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 1842 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 1843 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 1844 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 1845 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 1846 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 1847 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 1848 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 1849 1850 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 1851 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 1852 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 1853 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 1854 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 1855 lda_rhs = n_R; 1856 need_benign_correction = PETSC_FALSE; 1857 if (isLU || isILU || isCHOL) { 1858 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 1859 } else if (sub_schurs && sub_schurs->reuse_solver) { 1860 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1861 MatFactorType type; 1862 1863 F = reuse_solver->F; 1864 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 1865 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 1866 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 1867 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 1868 } else { 1869 F = NULL; 1870 } 1871 1872 /* allocate workspace */ 1873 n = 0; 1874 if (n_constraints) { 1875 n += lda_rhs*n_constraints; 1876 } 1877 if (n_vertices) { 1878 n = PetscMax(2*lda_rhs*n_vertices,n); 1879 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 1880 } 1881 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1882 1883 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 1884 dummy_vec = NULL; 1885 if (need_benign_correction && lda_rhs != n_R && F) { 1886 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);CHKERRQ(ierr); 1887 } 1888 1889 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 1890 if (n_constraints) { 1891 Mat M1,M2,M3,C_B; 1892 IS is_aux; 1893 PetscScalar *array,*array2; 1894 1895 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1896 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1897 1898 /* Extract constraints on R nodes: C_{CR} */ 1899 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 1900 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 1901 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 1902 1903 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 1904 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 1905 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1906 for (i=0;i<n_constraints;i++) { 1907 const PetscScalar *row_cmat_values; 1908 const PetscInt *row_cmat_indices; 1909 PetscInt size_of_constraint,j; 1910 1911 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1912 for (j=0;j<size_of_constraint;j++) { 1913 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 1914 } 1915 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1916 } 1917 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 1918 if (F) { 1919 Mat B; 1920 1921 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 1922 if (need_benign_correction) { 1923 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1924 1925 /* rhs is already zero on interior dofs, no need to change the rhs */ 1926 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 1927 } 1928 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 1929 if (need_benign_correction) { 1930 PetscScalar *marr; 1931 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1932 1933 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1934 if (lda_rhs != n_R) { 1935 for (i=0;i<n_constraints;i++) { 1936 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 1937 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1938 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 1939 } 1940 } else { 1941 for (i=0;i<n_constraints;i++) { 1942 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1943 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1944 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1945 } 1946 } 1947 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1948 } 1949 ierr = MatDestroy(&B);CHKERRQ(ierr); 1950 } else { 1951 PetscScalar *marr; 1952 1953 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1954 for (i=0;i<n_constraints;i++) { 1955 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1956 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 1957 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1958 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1959 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1960 } 1961 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1962 } 1963 if (!pcbddc->switch_static) { 1964 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1965 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1966 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1967 for (i=0;i<n_constraints;i++) { 1968 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 1969 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 1970 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1971 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1972 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1973 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1974 } 1975 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1976 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1977 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1978 } else { 1979 if (lda_rhs != n_R) { 1980 IS dummy; 1981 1982 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 1983 ierr = MatGetSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1984 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1985 } else { 1986 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 1987 pcbddc->local_auxmat2 = local_auxmat2_R; 1988 } 1989 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1990 } 1991 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1992 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 1993 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 1994 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 1995 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 1996 if (isCHOL) { 1997 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 1998 } else { 1999 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 2000 } 2001 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 2002 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 2003 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 2004 ierr = MatDestroy(&M2);CHKERRQ(ierr); 2005 ierr = MatDestroy(&M3);CHKERRQ(ierr); 2006 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 2007 ierr = MatMatMult(M1,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 2008 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 2009 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 2010 ierr = MatDestroy(&M1);CHKERRQ(ierr); 2011 } 2012 2013 /* Get submatrices from subdomain matrix */ 2014 if (n_vertices) { 2015 IS is_aux; 2016 2017 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 2018 IS tis; 2019 2020 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 2021 ierr = ISSort(tis);CHKERRQ(ierr); 2022 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 2023 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2024 } else { 2025 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 2026 } 2027 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 2028 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 2029 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 2030 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 2031 } 2032 2033 /* Matrix of coarse basis functions (local) */ 2034 if (pcbddc->coarse_phi_B) { 2035 PetscInt on_B,on_primal,on_D=n_D; 2036 if (pcbddc->coarse_phi_D) { 2037 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 2038 } 2039 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 2040 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 2041 PetscScalar *marray; 2042 2043 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 2044 ierr = PetscFree(marray);CHKERRQ(ierr); 2045 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 2046 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 2047 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 2048 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 2049 } 2050 } 2051 2052 if (!pcbddc->coarse_phi_B) { 2053 PetscScalar *marray; 2054 2055 n = n_B*pcbddc->local_primal_size; 2056 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2057 n += n_D*pcbddc->local_primal_size; 2058 } 2059 if (!pcbddc->symmetric_primal) { 2060 n *= 2; 2061 } 2062 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 2063 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 2064 n = n_B*pcbddc->local_primal_size; 2065 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2066 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 2067 n += n_D*pcbddc->local_primal_size; 2068 } 2069 if (!pcbddc->symmetric_primal) { 2070 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 2071 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2072 n = n_B*pcbddc->local_primal_size; 2073 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 2074 } 2075 } else { 2076 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 2077 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 2078 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2079 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 2080 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 2081 } 2082 } 2083 } 2084 2085 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 2086 p0_lidx_I = NULL; 2087 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 2088 const PetscInt *idxs; 2089 2090 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 2091 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 2092 for (i=0;i<pcbddc->benign_n;i++) { 2093 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 2094 } 2095 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 2096 } 2097 2098 /* vertices */ 2099 if (n_vertices) { 2100 2101 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 2102 2103 if (n_R) { 2104 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 2105 PetscBLASInt B_N,B_one = 1; 2106 PetscScalar *x,*y; 2107 PetscBool isseqaij; 2108 2109 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 2110 if (need_benign_correction) { 2111 ISLocalToGlobalMapping RtoN; 2112 IS is_p0; 2113 PetscInt *idxs_p0,n; 2114 2115 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 2116 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 2117 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 2118 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 2119 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 2120 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 2121 ierr = MatGetSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 2122 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 2123 } 2124 2125 if (lda_rhs == n_R) { 2126 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 2127 } else { 2128 PetscScalar *av,*array; 2129 const PetscInt *xadj,*adjncy; 2130 PetscInt n; 2131 PetscBool flg_row; 2132 2133 array = work+lda_rhs*n_vertices; 2134 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 2135 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 2136 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2137 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 2138 for (i=0;i<n;i++) { 2139 PetscInt j; 2140 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 2141 } 2142 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2143 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 2144 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 2145 } 2146 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 2147 if (need_benign_correction) { 2148 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2149 PetscScalar *marr; 2150 2151 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 2152 /* need \Phi^T A_RV = (I+L)A_RV, L given by 2153 2154 | 0 0 0 | (V) 2155 L = | 0 0 -1 | (P-p0) 2156 | 0 0 -1 | (p0) 2157 2158 */ 2159 for (i=0;i<reuse_solver->benign_n;i++) { 2160 const PetscScalar *vals; 2161 const PetscInt *idxs,*idxs_zero; 2162 PetscInt n,j,nz; 2163 2164 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2165 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 2166 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 2167 for (j=0;j<n;j++) { 2168 PetscScalar val = vals[j]; 2169 PetscInt k,col = idxs[j]; 2170 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 2171 } 2172 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 2173 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 2174 } 2175 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 2176 } 2177 if (F) { 2178 /* need to correct the rhs */ 2179 if (need_benign_correction) { 2180 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2181 PetscScalar *marr; 2182 2183 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 2184 if (lda_rhs != n_R) { 2185 for (i=0;i<n_vertices;i++) { 2186 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 2187 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 2188 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 2189 } 2190 } else { 2191 for (i=0;i<n_vertices;i++) { 2192 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 2193 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 2194 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2195 } 2196 } 2197 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 2198 } 2199 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 2200 /* need to correct the solution */ 2201 if (need_benign_correction) { 2202 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2203 PetscScalar *marr; 2204 2205 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 2206 if (lda_rhs != n_R) { 2207 for (i=0;i<n_vertices;i++) { 2208 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 2209 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 2210 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 2211 } 2212 } else { 2213 for (i=0;i<n_vertices;i++) { 2214 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 2215 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 2216 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2217 } 2218 } 2219 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 2220 } 2221 } else { 2222 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 2223 for (i=0;i<n_vertices;i++) { 2224 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 2225 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 2226 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2227 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2228 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2229 } 2230 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 2231 } 2232 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 2233 /* S_VV and S_CV */ 2234 if (n_constraints) { 2235 Mat B; 2236 2237 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 2238 for (i=0;i<n_vertices;i++) { 2239 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 2240 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 2241 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2242 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2243 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2244 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2245 } 2246 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 2247 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 2248 ierr = MatDestroy(&B);CHKERRQ(ierr); 2249 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 2250 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 2251 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 2252 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 2253 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 2254 ierr = MatDestroy(&B);CHKERRQ(ierr); 2255 } 2256 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2257 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 2258 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 2259 } 2260 if (lda_rhs != n_R) { 2261 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 2262 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 2263 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 2264 } 2265 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 2266 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 2267 if (need_benign_correction) { 2268 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2269 PetscScalar *marr,*sums; 2270 2271 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 2272 ierr = MatDenseGetArray(S_VVt,&marr); 2273 for (i=0;i<reuse_solver->benign_n;i++) { 2274 const PetscScalar *vals; 2275 const PetscInt *idxs,*idxs_zero; 2276 PetscInt n,j,nz; 2277 2278 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2279 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 2280 for (j=0;j<n_vertices;j++) { 2281 PetscInt k; 2282 sums[j] = 0.; 2283 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 2284 } 2285 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 2286 for (j=0;j<n;j++) { 2287 PetscScalar val = vals[j]; 2288 PetscInt k; 2289 for (k=0;k<n_vertices;k++) { 2290 marr[idxs[j]+k*n_vertices] += val*sums[k]; 2291 } 2292 } 2293 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 2294 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 2295 } 2296 ierr = PetscFree(sums);CHKERRQ(ierr); 2297 ierr = MatDenseRestoreArray(S_VVt,&marr); 2298 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 2299 } 2300 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 2301 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 2302 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 2303 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 2304 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 2305 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 2306 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 2307 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2308 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 2309 } else { 2310 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2311 } 2312 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 2313 2314 /* coarse basis functions */ 2315 for (i=0;i<n_vertices;i++) { 2316 PetscScalar *y; 2317 2318 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 2319 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2320 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 2321 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2322 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2323 y[n_B*i+idx_V_B[i]] = 1.0; 2324 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2325 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2326 2327 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2328 PetscInt j; 2329 2330 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2331 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2332 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2333 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2334 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2335 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 2336 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2337 } 2338 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2339 } 2340 /* if n_R == 0 the object is not destroyed */ 2341 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 2342 } 2343 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 2344 2345 if (n_constraints) { 2346 Mat B; 2347 2348 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 2349 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 2350 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 2351 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 2352 if (n_vertices) { 2353 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 2354 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 2355 } else { 2356 Mat S_VCt; 2357 2358 if (lda_rhs != n_R) { 2359 ierr = MatDestroy(&B);CHKERRQ(ierr); 2360 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 2361 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 2362 } 2363 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 2364 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2365 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 2366 } 2367 } 2368 ierr = MatDestroy(&B);CHKERRQ(ierr); 2369 /* coarse basis functions */ 2370 for (i=0;i<n_constraints;i++) { 2371 PetscScalar *y; 2372 2373 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 2374 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2375 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 2376 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2377 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2378 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2379 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2380 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2381 PetscInt j; 2382 2383 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2384 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 2385 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2386 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2387 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2388 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 2389 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2390 } 2391 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2392 } 2393 } 2394 if (n_constraints) { 2395 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 2396 } 2397 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 2398 2399 /* coarse matrix entries relative to B_0 */ 2400 if (pcbddc->benign_n) { 2401 Mat B0_B,B0_BPHI; 2402 IS is_dummy; 2403 PetscScalar *data; 2404 PetscInt j; 2405 2406 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2407 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 2408 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2409 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2410 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2411 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 2412 for (j=0;j<pcbddc->benign_n;j++) { 2413 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2414 for (i=0;i<pcbddc->local_primal_size;i++) { 2415 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 2416 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 2417 } 2418 } 2419 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 2420 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2421 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2422 } 2423 2424 /* compute other basis functions for non-symmetric problems */ 2425 if (!pcbddc->symmetric_primal) { 2426 Mat B_V=NULL,B_C=NULL; 2427 PetscScalar *marray; 2428 2429 if (n_constraints) { 2430 Mat S_CCT,C_CRT; 2431 2432 ierr = MatTranspose(C_CR,MAT_INPLACE_MATRIX,&C_CRT);CHKERRQ(ierr); 2433 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 2434 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 2435 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 2436 if (n_vertices) { 2437 Mat S_VCT; 2438 2439 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 2440 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 2441 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 2442 } 2443 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 2444 } else { 2445 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);CHKERRQ(ierr); 2446 } 2447 if (n_vertices && n_R) { 2448 PetscScalar *av,*marray; 2449 const PetscInt *xadj,*adjncy; 2450 PetscInt n; 2451 PetscBool flg_row; 2452 2453 /* B_V = B_V - A_VR^T */ 2454 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 2455 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2456 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 2457 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2458 for (i=0;i<n;i++) { 2459 PetscInt j; 2460 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 2461 } 2462 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2463 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2464 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2465 } 2466 2467 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 2468 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2469 for (i=0;i<n_vertices;i++) { 2470 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 2471 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2472 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2473 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2474 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2475 } 2476 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2477 if (B_C) { 2478 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 2479 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 2480 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 2481 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2482 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2483 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2484 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2485 } 2486 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 2487 } 2488 /* coarse basis functions */ 2489 for (i=0;i<pcbddc->local_primal_size;i++) { 2490 PetscScalar *y; 2491 2492 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 2493 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2494 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 2495 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2496 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2497 if (i<n_vertices) { 2498 y[n_B*i+idx_V_B[i]] = 1.0; 2499 } 2500 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2501 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2502 2503 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2504 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2505 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2506 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2507 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2508 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2509 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2510 } 2511 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2512 } 2513 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 2514 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 2515 } 2516 /* free memory */ 2517 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 2518 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 2519 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 2520 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 2521 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 2522 ierr = PetscFree(work);CHKERRQ(ierr); 2523 if (n_vertices) { 2524 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2525 } 2526 if (n_constraints) { 2527 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 2528 } 2529 /* Checking coarse_sub_mat and coarse basis functios */ 2530 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2531 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2532 if (pcbddc->dbg_flag) { 2533 Mat coarse_sub_mat; 2534 Mat AUXMAT,TM1,TM2,TM3,TM4; 2535 Mat coarse_phi_D,coarse_phi_B; 2536 Mat coarse_psi_D,coarse_psi_B; 2537 Mat A_II,A_BB,A_IB,A_BI; 2538 Mat C_B,CPHI; 2539 IS is_dummy; 2540 Vec mones; 2541 MatType checkmattype=MATSEQAIJ; 2542 PetscReal real_value; 2543 2544 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 2545 Mat A; 2546 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 2547 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2548 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2549 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2550 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2551 ierr = MatDestroy(&A);CHKERRQ(ierr); 2552 } else { 2553 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2554 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2555 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2556 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2557 } 2558 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 2559 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 2560 if (!pcbddc->symmetric_primal) { 2561 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 2562 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 2563 } 2564 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 2565 2566 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2567 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 2568 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2569 if (!pcbddc->symmetric_primal) { 2570 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2571 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2572 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2573 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2574 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2575 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2576 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2577 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2578 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2579 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2580 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2581 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2582 } else { 2583 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2584 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2585 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2586 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2587 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2588 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2589 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2590 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2591 } 2592 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2593 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2594 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2595 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 2596 if (pcbddc->benign_n) { 2597 Mat B0_B,B0_BPHI; 2598 PetscScalar *data,*data2; 2599 PetscInt j; 2600 2601 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2602 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 2603 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2604 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2605 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 2606 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 2607 for (j=0;j<pcbddc->benign_n;j++) { 2608 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2609 for (i=0;i<pcbddc->local_primal_size;i++) { 2610 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 2611 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 2612 } 2613 } 2614 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 2615 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 2616 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2617 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2618 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2619 } 2620 #if 0 2621 { 2622 PetscViewer viewer; 2623 char filename[256]; 2624 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 2625 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 2626 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2627 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 2628 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 2629 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 2630 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 2631 if (save_change) { 2632 Mat phi_B; 2633 ierr = MatMatMult(save_change,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&phi_B);CHKERRQ(ierr); 2634 ierr = PetscObjectSetName((PetscObject)phi_B,"phi_B");CHKERRQ(ierr); 2635 ierr = MatView(phi_B,viewer);CHKERRQ(ierr); 2636 ierr = MatDestroy(&phi_B);CHKERRQ(ierr); 2637 } else { 2638 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 2639 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 2640 } 2641 if (pcbddc->coarse_phi_D) { 2642 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 2643 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 2644 } 2645 if (pcbddc->coarse_psi_B) { 2646 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 2647 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 2648 } 2649 if (pcbddc->coarse_psi_D) { 2650 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 2651 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 2652 } 2653 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 2654 } 2655 #endif 2656 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2657 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2658 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2659 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2660 2661 /* check constraints */ 2662 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2663 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 2664 if (!pcbddc->benign_n) { /* TODO: add benign case */ 2665 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2666 } else { 2667 PetscScalar *data; 2668 Mat tmat; 2669 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 2670 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 2671 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 2672 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2673 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2674 } 2675 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 2676 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2677 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2678 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2679 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2680 if (!pcbddc->symmetric_primal) { 2681 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2682 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2683 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2684 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2685 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2686 } 2687 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 2688 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 2689 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2690 ierr = VecDestroy(&mones);CHKERRQ(ierr); 2691 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2692 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 2693 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 2694 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 2695 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 2696 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 2697 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 2698 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 2699 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 2700 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 2701 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 2702 if (!pcbddc->symmetric_primal) { 2703 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 2704 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 2705 } 2706 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 2707 } 2708 /* get back data */ 2709 *coarse_submat_vals_n = coarse_submat_vals; 2710 PetscFunctionReturn(0); 2711 } 2712 2713 #undef __FUNCT__ 2714 #define __FUNCT__ "MatGetSubMatrixUnsorted" 2715 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 2716 { 2717 Mat *work_mat; 2718 IS isrow_s,iscol_s; 2719 PetscBool rsorted,csorted; 2720 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 2721 PetscErrorCode ierr; 2722 2723 PetscFunctionBegin; 2724 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 2725 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 2726 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 2727 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2728 2729 if (!rsorted) { 2730 const PetscInt *idxs; 2731 PetscInt *idxs_sorted,i; 2732 2733 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 2734 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 2735 for (i=0;i<rsize;i++) { 2736 idxs_perm_r[i] = i; 2737 } 2738 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 2739 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 2740 for (i=0;i<rsize;i++) { 2741 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 2742 } 2743 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 2744 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 2745 } else { 2746 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 2747 isrow_s = isrow; 2748 } 2749 2750 if (!csorted) { 2751 if (isrow == iscol) { 2752 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 2753 iscol_s = isrow_s; 2754 } else { 2755 const PetscInt *idxs; 2756 PetscInt *idxs_sorted,i; 2757 2758 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 2759 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 2760 for (i=0;i<csize;i++) { 2761 idxs_perm_c[i] = i; 2762 } 2763 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 2764 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 2765 for (i=0;i<csize;i++) { 2766 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 2767 } 2768 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 2769 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 2770 } 2771 } else { 2772 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 2773 iscol_s = iscol; 2774 } 2775 2776 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2777 2778 if (!rsorted || !csorted) { 2779 Mat new_mat; 2780 IS is_perm_r,is_perm_c; 2781 2782 if (!rsorted) { 2783 PetscInt *idxs_r,i; 2784 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 2785 for (i=0;i<rsize;i++) { 2786 idxs_r[idxs_perm_r[i]] = i; 2787 } 2788 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 2789 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 2790 } else { 2791 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 2792 } 2793 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 2794 2795 if (!csorted) { 2796 if (isrow_s == iscol_s) { 2797 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 2798 is_perm_c = is_perm_r; 2799 } else { 2800 PetscInt *idxs_c,i; 2801 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 2802 for (i=0;i<csize;i++) { 2803 idxs_c[idxs_perm_c[i]] = i; 2804 } 2805 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 2806 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 2807 } 2808 } else { 2809 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 2810 } 2811 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 2812 2813 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 2814 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 2815 work_mat[0] = new_mat; 2816 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 2817 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 2818 } 2819 2820 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 2821 *B = work_mat[0]; 2822 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 2823 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 2824 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 2825 PetscFunctionReturn(0); 2826 } 2827 2828 #undef __FUNCT__ 2829 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 2830 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 2831 { 2832 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2833 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2834 Mat new_mat; 2835 IS is_local,is_global; 2836 PetscInt local_size; 2837 PetscBool isseqaij; 2838 PetscErrorCode ierr; 2839 2840 PetscFunctionBegin; 2841 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2842 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 2843 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 2844 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 2845 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 2846 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 2847 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 2848 2849 /* check */ 2850 if (pcbddc->dbg_flag) { 2851 Vec x,x_change; 2852 PetscReal error; 2853 2854 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 2855 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2856 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 2857 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2858 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2859 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 2860 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2861 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2862 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2863 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2864 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2865 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 2866 ierr = VecDestroy(&x);CHKERRQ(ierr); 2867 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2868 } 2869 2870 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 2871 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2872 if (isseqaij) { 2873 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2874 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 2875 } else { 2876 Mat work_mat; 2877 2878 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2879 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2880 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 2881 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 2882 } 2883 if (matis->A->symmetric_set) { 2884 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 2885 #if !defined(PETSC_USE_COMPLEX) 2886 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 2887 #endif 2888 } 2889 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 2890 PetscFunctionReturn(0); 2891 } 2892 2893 #undef __FUNCT__ 2894 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 2895 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 2896 { 2897 PC_IS* pcis = (PC_IS*)(pc->data); 2898 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2899 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2900 PetscInt *idx_R_local=NULL; 2901 PetscInt n_vertices,i,j,n_R,n_D,n_B; 2902 PetscInt vbs,bs; 2903 PetscBT bitmask=NULL; 2904 PetscErrorCode ierr; 2905 2906 PetscFunctionBegin; 2907 /* 2908 No need to setup local scatters if 2909 - primal space is unchanged 2910 AND 2911 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 2912 AND 2913 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 2914 */ 2915 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 2916 PetscFunctionReturn(0); 2917 } 2918 /* destroy old objects */ 2919 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 2920 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 2921 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 2922 /* Set Non-overlapping dimensions */ 2923 n_B = pcis->n_B; 2924 n_D = pcis->n - n_B; 2925 n_vertices = pcbddc->n_vertices; 2926 2927 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 2928 2929 /* create auxiliary bitmask and allocate workspace */ 2930 if (!sub_schurs || !sub_schurs->reuse_solver) { 2931 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 2932 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 2933 for (i=0;i<n_vertices;i++) { 2934 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 2935 } 2936 2937 for (i=0, n_R=0; i<pcis->n; i++) { 2938 if (!PetscBTLookup(bitmask,i)) { 2939 idx_R_local[n_R++] = i; 2940 } 2941 } 2942 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 2943 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2944 2945 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2946 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 2947 } 2948 2949 /* Block code */ 2950 vbs = 1; 2951 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 2952 if (bs>1 && !(n_vertices%bs)) { 2953 PetscBool is_blocked = PETSC_TRUE; 2954 PetscInt *vary; 2955 if (!sub_schurs || !sub_schurs->reuse_solver) { 2956 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 2957 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 2958 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 2959 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 2960 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 2961 for (i=0; i<pcis->n/bs; i++) { 2962 if (vary[i]!=0 && vary[i]!=bs) { 2963 is_blocked = PETSC_FALSE; 2964 break; 2965 } 2966 } 2967 ierr = PetscFree(vary);CHKERRQ(ierr); 2968 } else { 2969 /* Verify directly the R set */ 2970 for (i=0; i<n_R/bs; i++) { 2971 PetscInt j,node=idx_R_local[bs*i]; 2972 for (j=1; j<bs; j++) { 2973 if (node != idx_R_local[bs*i+j]-j) { 2974 is_blocked = PETSC_FALSE; 2975 break; 2976 } 2977 } 2978 } 2979 } 2980 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 2981 vbs = bs; 2982 for (i=0;i<n_R/vbs;i++) { 2983 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 2984 } 2985 } 2986 } 2987 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 2988 if (sub_schurs && sub_schurs->reuse_solver) { 2989 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2990 2991 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2992 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 2993 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 2994 reuse_solver->is_R = pcbddc->is_R_local; 2995 } else { 2996 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 2997 } 2998 2999 /* print some info if requested */ 3000 if (pcbddc->dbg_flag) { 3001 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3002 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3003 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3004 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 3005 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 3006 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 3007 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3008 } 3009 3010 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 3011 if (!sub_schurs || !sub_schurs->reuse_solver) { 3012 IS is_aux1,is_aux2; 3013 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 3014 3015 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 3016 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 3017 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 3018 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3019 for (i=0; i<n_D; i++) { 3020 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 3021 } 3022 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3023 for (i=0, j=0; i<n_R; i++) { 3024 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 3025 aux_array1[j++] = i; 3026 } 3027 } 3028 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 3029 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3030 for (i=0, j=0; i<n_B; i++) { 3031 if (!PetscBTLookup(bitmask,is_indices[i])) { 3032 aux_array2[j++] = i; 3033 } 3034 } 3035 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3036 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 3037 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 3038 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 3039 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 3040 3041 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3042 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 3043 for (i=0, j=0; i<n_R; i++) { 3044 if (PetscBTLookup(bitmask,idx_R_local[i])) { 3045 aux_array1[j++] = i; 3046 } 3047 } 3048 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 3049 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 3050 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 3051 } 3052 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 3053 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 3054 } else { 3055 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3056 IS tis; 3057 PetscInt schur_size; 3058 3059 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 3060 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 3061 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 3062 ierr = ISDestroy(&tis);CHKERRQ(ierr); 3063 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3064 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 3065 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 3066 ierr = ISDestroy(&tis);CHKERRQ(ierr); 3067 } 3068 } 3069 PetscFunctionReturn(0); 3070 } 3071 3072 3073 #undef __FUNCT__ 3074 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 3075 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 3076 { 3077 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3078 PC_IS *pcis = (PC_IS*)pc->data; 3079 PC pc_temp; 3080 Mat A_RR; 3081 MatReuse reuse; 3082 PetscScalar m_one = -1.0; 3083 PetscReal value; 3084 PetscInt n_D,n_R; 3085 PetscBool check_corr[2],issbaij; 3086 PetscErrorCode ierr; 3087 /* prefixes stuff */ 3088 char dir_prefix[256],neu_prefix[256],str_level[16]; 3089 size_t len; 3090 3091 PetscFunctionBegin; 3092 3093 /* compute prefixes */ 3094 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 3095 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 3096 if (!pcbddc->current_level) { 3097 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 3098 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 3099 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 3100 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 3101 } else { 3102 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 3103 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 3104 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 3105 len -= 15; /* remove "pc_bddc_coarse_" */ 3106 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 3107 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 3108 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 3109 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 3110 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 3111 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 3112 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 3113 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 3114 } 3115 3116 /* DIRICHLET PROBLEM */ 3117 if (dirichlet) { 3118 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3119 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 3120 if (!sub_schurs || !sub_schurs->reuse_solver) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 3121 if (pcbddc->dbg_flag) { 3122 Mat A_IIn; 3123 3124 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 3125 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 3126 pcis->A_II = A_IIn; 3127 } 3128 } 3129 if (pcbddc->local_mat->symmetric_set) { 3130 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 3131 } 3132 /* Matrix for Dirichlet problem is pcis->A_II */ 3133 n_D = pcis->n - pcis->n_B; 3134 if (!pcbddc->ksp_D) { /* create object if not yet build */ 3135 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 3136 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 3137 /* default */ 3138 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 3139 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 3140 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 3141 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 3142 if (issbaij) { 3143 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 3144 } else { 3145 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 3146 } 3147 /* Allow user's customization */ 3148 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 3149 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 3150 } 3151 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 3152 if (sub_schurs && sub_schurs->reuse_solver) { 3153 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3154 3155 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 3156 } 3157 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 3158 if (!n_D) { 3159 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 3160 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 3161 } 3162 /* Set Up KSP for Dirichlet problem of BDDC */ 3163 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 3164 /* set ksp_D into pcis data */ 3165 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 3166 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 3167 pcis->ksp_D = pcbddc->ksp_D; 3168 } 3169 3170 /* NEUMANN PROBLEM */ 3171 A_RR = 0; 3172 if (neumann) { 3173 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3174 PetscInt ibs,mbs; 3175 PetscBool issbaij; 3176 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3177 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 3178 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 3179 if (pcbddc->ksp_R) { /* already created ksp */ 3180 PetscInt nn_R; 3181 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 3182 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 3183 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 3184 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 3185 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 3186 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3187 reuse = MAT_INITIAL_MATRIX; 3188 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 3189 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 3190 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3191 reuse = MAT_INITIAL_MATRIX; 3192 } else { /* safe to reuse the matrix */ 3193 reuse = MAT_REUSE_MATRIX; 3194 } 3195 } 3196 /* last check */ 3197 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 3198 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3199 reuse = MAT_INITIAL_MATRIX; 3200 } 3201 } else { /* first time, so we need to create the matrix */ 3202 reuse = MAT_INITIAL_MATRIX; 3203 } 3204 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */ 3205 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 3206 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 3207 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 3208 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 3209 if (matis->A == pcbddc->local_mat) { 3210 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3211 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 3212 } else { 3213 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 3214 } 3215 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 3216 if (matis->A == pcbddc->local_mat) { 3217 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3218 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 3219 } else { 3220 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 3221 } 3222 } 3223 /* extract A_RR */ 3224 if (sub_schurs && sub_schurs->reuse_solver) { 3225 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3226 3227 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 3228 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3229 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 3230 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 3231 } else { 3232 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 3233 } 3234 } else { 3235 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3236 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 3237 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 3238 } 3239 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 3240 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 3241 } 3242 if (pcbddc->local_mat->symmetric_set) { 3243 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 3244 } 3245 if (!pcbddc->ksp_R) { /* create object if not present */ 3246 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 3247 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 3248 /* default */ 3249 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 3250 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 3251 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 3252 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 3253 if (issbaij) { 3254 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 3255 } else { 3256 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 3257 } 3258 /* Allow user's customization */ 3259 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 3260 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 3261 } 3262 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 3263 if (!n_R) { 3264 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 3265 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 3266 } 3267 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 3268 /* Reuse solver if it is present */ 3269 if (sub_schurs && sub_schurs->reuse_solver) { 3270 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3271 3272 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 3273 } 3274 /* Set Up KSP for Neumann problem of BDDC */ 3275 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 3276 } 3277 3278 if (pcbddc->dbg_flag) { 3279 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3280 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3281 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3282 } 3283 3284 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 3285 check_corr[0] = check_corr[1] = PETSC_FALSE; 3286 if (pcbddc->NullSpace_corr[0]) { 3287 ierr = PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);CHKERRQ(ierr); 3288 } 3289 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 3290 check_corr[0] = PETSC_TRUE; 3291 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);CHKERRQ(ierr); 3292 } 3293 if (neumann && pcbddc->NullSpace_corr[2]) { 3294 check_corr[1] = PETSC_TRUE; 3295 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);CHKERRQ(ierr); 3296 } 3297 3298 /* check Dirichlet and Neumann solvers */ 3299 if (pcbddc->dbg_flag) { 3300 if (dirichlet) { /* Dirichlet */ 3301 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 3302 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 3303 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 3304 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 3305 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 3306 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 3307 if (check_corr[0]) { 3308 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_TRUE);CHKERRQ(ierr); 3309 } 3310 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3311 } 3312 if (neumann) { /* Neumann */ 3313 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 3314 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3315 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 3316 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 3317 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 3318 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 3319 if (check_corr[1]) { 3320 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_FALSE);CHKERRQ(ierr); 3321 } 3322 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3323 } 3324 } 3325 /* free Neumann problem's matrix */ 3326 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3327 PetscFunctionReturn(0); 3328 } 3329 3330 #undef __FUNCT__ 3331 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 3332 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 3333 { 3334 PetscErrorCode ierr; 3335 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3336 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3337 PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE ) : PETSC_FALSE; 3338 3339 PetscFunctionBegin; 3340 if (!reuse_solver) { 3341 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 3342 } 3343 if (!pcbddc->switch_static) { 3344 if (applytranspose && pcbddc->local_auxmat1) { 3345 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3346 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3347 } 3348 if (!reuse_solver) { 3349 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3350 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3351 } else { 3352 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3353 3354 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3355 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3356 } 3357 } else { 3358 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3359 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3360 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3361 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3362 if (applytranspose && pcbddc->local_auxmat1) { 3363 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 3364 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3365 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3366 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3367 } 3368 } 3369 if (!reuse_solver || pcbddc->switch_static) { 3370 if (applytranspose) { 3371 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3372 } else { 3373 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3374 } 3375 } else { 3376 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3377 3378 if (applytranspose) { 3379 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 3380 } else { 3381 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 3382 } 3383 } 3384 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 3385 if (!pcbddc->switch_static) { 3386 if (!reuse_solver) { 3387 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3388 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3389 } else { 3390 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3391 3392 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3393 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3394 } 3395 if (!applytranspose && pcbddc->local_auxmat1) { 3396 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3397 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3398 } 3399 } else { 3400 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3401 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3402 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3403 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3404 if (!applytranspose && pcbddc->local_auxmat1) { 3405 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3406 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3407 } 3408 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3409 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3410 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3411 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3412 } 3413 PetscFunctionReturn(0); 3414 } 3415 3416 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 3417 #undef __FUNCT__ 3418 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 3419 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 3420 { 3421 PetscErrorCode ierr; 3422 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3423 PC_IS* pcis = (PC_IS*) (pc->data); 3424 const PetscScalar zero = 0.0; 3425 3426 PetscFunctionBegin; 3427 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 3428 if (!pcbddc->benign_apply_coarse_only) { 3429 if (applytranspose) { 3430 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3431 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3432 } else { 3433 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3434 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3435 } 3436 } else { 3437 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 3438 } 3439 3440 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 3441 if (pcbddc->benign_n) { 3442 PetscScalar *array; 3443 PetscInt j; 3444 3445 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3446 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 3447 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3448 } 3449 3450 /* start communications from local primal nodes to rhs of coarse solver */ 3451 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 3452 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3453 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3454 3455 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 3456 if (pcbddc->coarse_ksp) { 3457 Mat coarse_mat; 3458 Vec rhs,sol; 3459 MatNullSpace nullsp; 3460 PetscBool isbddc = PETSC_FALSE; 3461 3462 if (pcbddc->benign_have_null) { 3463 PC coarse_pc; 3464 3465 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 3466 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 3467 /* we need to propagate to coarser levels the need for a possible benign correction */ 3468 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 3469 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 3470 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 3471 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 3472 } 3473 } 3474 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 3475 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 3476 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 3477 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 3478 if (nullsp) { 3479 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 3480 } 3481 if (applytranspose) { 3482 if (pcbddc->benign_apply_coarse_only) SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 3483 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3484 } else { 3485 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 3486 PC coarse_pc; 3487 3488 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 3489 ierr = PCPreSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 3490 ierr = PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);CHKERRQ(ierr); 3491 ierr = PCPostSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 3492 } else { 3493 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3494 } 3495 } 3496 /* we don't need the benign correction at coarser levels anymore */ 3497 if (pcbddc->benign_have_null && isbddc) { 3498 PC coarse_pc; 3499 PC_BDDC* coarsepcbddc; 3500 3501 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 3502 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 3503 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 3504 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 3505 } 3506 if (nullsp) { 3507 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 3508 } 3509 } 3510 3511 /* Local solution on R nodes */ 3512 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 3513 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 3514 } 3515 /* communications from coarse sol to local primal nodes */ 3516 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3517 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3518 3519 /* Sum contributions from the two levels */ 3520 if (!pcbddc->benign_apply_coarse_only) { 3521 if (applytranspose) { 3522 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3523 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3524 } else { 3525 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3526 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3527 } 3528 /* store p0 */ 3529 if (pcbddc->benign_n) { 3530 PetscScalar *array; 3531 PetscInt j; 3532 3533 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3534 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 3535 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3536 } 3537 } else { /* expand the coarse solution */ 3538 if (applytranspose) { 3539 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3540 } else { 3541 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3542 } 3543 } 3544 PetscFunctionReturn(0); 3545 } 3546 3547 #undef __FUNCT__ 3548 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 3549 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 3550 { 3551 PetscErrorCode ierr; 3552 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3553 PetscScalar *array; 3554 Vec from,to; 3555 3556 PetscFunctionBegin; 3557 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3558 from = pcbddc->coarse_vec; 3559 to = pcbddc->vec1_P; 3560 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3561 Vec tvec; 3562 3563 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3564 ierr = VecResetArray(tvec);CHKERRQ(ierr); 3565 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3566 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 3567 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 3568 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 3569 } 3570 } else { /* from local to global -> put data in coarse right hand side */ 3571 from = pcbddc->vec1_P; 3572 to = pcbddc->coarse_vec; 3573 } 3574 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3575 PetscFunctionReturn(0); 3576 } 3577 3578 #undef __FUNCT__ 3579 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 3580 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 3581 { 3582 PetscErrorCode ierr; 3583 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3584 PetscScalar *array; 3585 Vec from,to; 3586 3587 PetscFunctionBegin; 3588 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3589 from = pcbddc->coarse_vec; 3590 to = pcbddc->vec1_P; 3591 } else { /* from local to global -> put data in coarse right hand side */ 3592 from = pcbddc->vec1_P; 3593 to = pcbddc->coarse_vec; 3594 } 3595 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3596 if (smode == SCATTER_FORWARD) { 3597 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3598 Vec tvec; 3599 3600 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3601 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 3602 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 3603 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 3604 } 3605 } else { 3606 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 3607 ierr = VecResetArray(from);CHKERRQ(ierr); 3608 } 3609 } 3610 PetscFunctionReturn(0); 3611 } 3612 3613 /* uncomment for testing purposes */ 3614 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 3615 #undef __FUNCT__ 3616 #define __FUNCT__ "PCBDDCConstraintsSetUp" 3617 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 3618 { 3619 PetscErrorCode ierr; 3620 PC_IS* pcis = (PC_IS*)(pc->data); 3621 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3622 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3623 /* one and zero */ 3624 PetscScalar one=1.0,zero=0.0; 3625 /* space to store constraints and their local indices */ 3626 PetscScalar *constraints_data; 3627 PetscInt *constraints_idxs,*constraints_idxs_B; 3628 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 3629 PetscInt *constraints_n; 3630 /* iterators */ 3631 PetscInt i,j,k,total_counts,total_counts_cc,cum; 3632 /* BLAS integers */ 3633 PetscBLASInt lwork,lierr; 3634 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 3635 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 3636 /* reuse */ 3637 PetscInt olocal_primal_size,olocal_primal_size_cc; 3638 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 3639 /* change of basis */ 3640 PetscBool qr_needed; 3641 PetscBT change_basis,qr_needed_idx; 3642 /* auxiliary stuff */ 3643 PetscInt *nnz,*is_indices; 3644 PetscInt ncc; 3645 /* some quantities */ 3646 PetscInt n_vertices,total_primal_vertices,valid_constraints; 3647 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 3648 3649 PetscFunctionBegin; 3650 /* Destroy Mat objects computed previously */ 3651 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3652 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3653 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 3654 /* save info on constraints from previous setup (if any) */ 3655 olocal_primal_size = pcbddc->local_primal_size; 3656 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 3657 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 3658 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3659 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3660 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3661 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3662 3663 if (!pcbddc->adaptive_selection) { 3664 IS ISForVertices,*ISForFaces,*ISForEdges; 3665 MatNullSpace nearnullsp; 3666 const Vec *nearnullvecs; 3667 Vec *localnearnullsp; 3668 PetscScalar *array; 3669 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 3670 PetscBool nnsp_has_cnst; 3671 /* LAPACK working arrays for SVD or POD */ 3672 PetscBool skip_lapack,boolforchange; 3673 PetscScalar *work; 3674 PetscReal *singular_vals; 3675 #if defined(PETSC_USE_COMPLEX) 3676 PetscReal *rwork; 3677 #endif 3678 #if defined(PETSC_MISSING_LAPACK_GESVD) 3679 PetscScalar *temp_basis,*correlation_mat; 3680 #else 3681 PetscBLASInt dummy_int=1; 3682 PetscScalar dummy_scalar=1.; 3683 #endif 3684 3685 /* Get index sets for faces, edges and vertices from graph */ 3686 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 3687 /* print some info */ 3688 if (pcbddc->dbg_flag && !pcbddc->sub_schurs) { 3689 PetscInt nv; 3690 3691 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag-1,pcbddc->dbg_viewer);CHKERRQ(ierr); 3692 ierr = ISGetSize(ISForVertices,&nv);CHKERRQ(ierr); 3693 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3694 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3695 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 3696 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);CHKERRQ(ierr); 3697 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);CHKERRQ(ierr); 3698 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3699 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3700 } 3701 3702 /* free unneeded index sets */ 3703 if (!pcbddc->use_vertices) { 3704 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3705 } 3706 if (!pcbddc->use_edges) { 3707 for (i=0;i<n_ISForEdges;i++) { 3708 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3709 } 3710 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3711 n_ISForEdges = 0; 3712 } 3713 if (!pcbddc->use_faces) { 3714 for (i=0;i<n_ISForFaces;i++) { 3715 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3716 } 3717 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3718 n_ISForFaces = 0; 3719 } 3720 3721 /* check if near null space is attached to global mat */ 3722 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 3723 if (nearnullsp) { 3724 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 3725 /* remove any stored info */ 3726 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3727 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3728 /* store information for BDDC solver reuse */ 3729 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 3730 pcbddc->onearnullspace = nearnullsp; 3731 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3732 for (i=0;i<nnsp_size;i++) { 3733 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 3734 } 3735 } else { /* if near null space is not provided BDDC uses constants by default */ 3736 nnsp_size = 0; 3737 nnsp_has_cnst = PETSC_TRUE; 3738 } 3739 /* get max number of constraints on a single cc */ 3740 max_constraints = nnsp_size; 3741 if (nnsp_has_cnst) max_constraints++; 3742 3743 /* 3744 Evaluate maximum storage size needed by the procedure 3745 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 3746 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 3747 There can be multiple constraints per connected component 3748 */ 3749 n_vertices = 0; 3750 if (ISForVertices) { 3751 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 3752 } 3753 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 3754 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 3755 3756 total_counts = n_ISForFaces+n_ISForEdges; 3757 total_counts *= max_constraints; 3758 total_counts += n_vertices; 3759 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 3760 3761 total_counts = 0; 3762 max_size_of_constraint = 0; 3763 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 3764 IS used_is; 3765 if (i<n_ISForEdges) { 3766 used_is = ISForEdges[i]; 3767 } else { 3768 used_is = ISForFaces[i-n_ISForEdges]; 3769 } 3770 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 3771 total_counts += j; 3772 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 3773 } 3774 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 3775 3776 /* get local part of global near null space vectors */ 3777 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 3778 for (k=0;k<nnsp_size;k++) { 3779 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 3780 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3781 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3782 } 3783 3784 /* whether or not to skip lapack calls */ 3785 skip_lapack = PETSC_TRUE; 3786 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 3787 3788 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 3789 if (!skip_lapack) { 3790 PetscScalar temp_work; 3791 3792 #if defined(PETSC_MISSING_LAPACK_GESVD) 3793 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 3794 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 3795 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 3796 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 3797 #if defined(PETSC_USE_COMPLEX) 3798 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 3799 #endif 3800 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3801 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3802 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 3803 lwork = -1; 3804 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3805 #if !defined(PETSC_USE_COMPLEX) 3806 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 3807 #else 3808 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 3809 #endif 3810 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3811 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 3812 #else /* on missing GESVD */ 3813 /* SVD */ 3814 PetscInt max_n,min_n; 3815 max_n = max_size_of_constraint; 3816 min_n = max_constraints; 3817 if (max_size_of_constraint < max_constraints) { 3818 min_n = max_size_of_constraint; 3819 max_n = max_constraints; 3820 } 3821 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 3822 #if defined(PETSC_USE_COMPLEX) 3823 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 3824 #endif 3825 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3826 lwork = -1; 3827 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 3828 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 3829 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 3830 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3831 #if !defined(PETSC_USE_COMPLEX) 3832 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 3833 #else 3834 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 3835 #endif 3836 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3837 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 3838 #endif /* on missing GESVD */ 3839 /* Allocate optimal workspace */ 3840 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 3841 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 3842 } 3843 /* Now we can loop on constraining sets */ 3844 total_counts = 0; 3845 constraints_idxs_ptr[0] = 0; 3846 constraints_data_ptr[0] = 0; 3847 /* vertices */ 3848 if (n_vertices) { 3849 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3850 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3851 for (i=0;i<n_vertices;i++) { 3852 constraints_n[total_counts] = 1; 3853 constraints_data[total_counts] = 1.0; 3854 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 3855 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 3856 total_counts++; 3857 } 3858 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3859 n_vertices = total_counts; 3860 } 3861 3862 /* edges and faces */ 3863 total_counts_cc = total_counts; 3864 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 3865 IS used_is; 3866 PetscBool idxs_copied = PETSC_FALSE; 3867 3868 if (ncc<n_ISForEdges) { 3869 used_is = ISForEdges[ncc]; 3870 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 3871 } else { 3872 used_is = ISForFaces[ncc-n_ISForEdges]; 3873 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 3874 } 3875 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 3876 3877 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 3878 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3879 /* change of basis should not be performed on local periodic nodes */ 3880 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 3881 if (nnsp_has_cnst) { 3882 PetscScalar quad_value; 3883 3884 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3885 idxs_copied = PETSC_TRUE; 3886 3887 if (!pcbddc->use_nnsp_true) { 3888 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 3889 } else { 3890 quad_value = 1.0; 3891 } 3892 for (j=0;j<size_of_constraint;j++) { 3893 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 3894 } 3895 temp_constraints++; 3896 total_counts++; 3897 } 3898 for (k=0;k<nnsp_size;k++) { 3899 PetscReal real_value; 3900 PetscScalar *ptr_to_data; 3901 3902 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3903 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 3904 for (j=0;j<size_of_constraint;j++) { 3905 ptr_to_data[j] = array[is_indices[j]]; 3906 } 3907 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3908 /* check if array is null on the connected component */ 3909 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3910 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 3911 if (real_value > 0.0) { /* keep indices and values */ 3912 temp_constraints++; 3913 total_counts++; 3914 if (!idxs_copied) { 3915 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3916 idxs_copied = PETSC_TRUE; 3917 } 3918 } 3919 } 3920 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3921 valid_constraints = temp_constraints; 3922 if (!pcbddc->use_nnsp_true && temp_constraints) { 3923 if (temp_constraints == 1) { /* just normalize the constraint */ 3924 PetscScalar norm,*ptr_to_data; 3925 3926 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3927 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3928 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 3929 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 3930 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 3931 } else { /* perform SVD */ 3932 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 3933 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3934 3935 #if defined(PETSC_MISSING_LAPACK_GESVD) 3936 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 3937 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 3938 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 3939 the constraints basis will differ (by a complex factor with absolute value equal to 1) 3940 from that computed using LAPACKgesvd 3941 -> This is due to a different computation of eigenvectors in LAPACKheev 3942 -> The quality of the POD-computed basis will be the same */ 3943 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3944 /* Store upper triangular part of correlation matrix */ 3945 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3946 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3947 for (j=0;j<temp_constraints;j++) { 3948 for (k=0;k<j+1;k++) { 3949 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 3950 } 3951 } 3952 /* compute eigenvalues and eigenvectors of correlation matrix */ 3953 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3954 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 3955 #if !defined(PETSC_USE_COMPLEX) 3956 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 3957 #else 3958 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 3959 #endif 3960 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3961 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 3962 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 3963 j = 0; 3964 while (j < temp_constraints && singular_vals[j] < tol) j++; 3965 total_counts = total_counts-j; 3966 valid_constraints = temp_constraints-j; 3967 /* scale and copy POD basis into used quadrature memory */ 3968 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3969 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3970 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 3971 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3972 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 3973 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3974 if (j<temp_constraints) { 3975 PetscInt ii; 3976 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 3977 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3978 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 3979 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3980 for (k=0;k<temp_constraints-j;k++) { 3981 for (ii=0;ii<size_of_constraint;ii++) { 3982 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 3983 } 3984 } 3985 } 3986 #else /* on missing GESVD */ 3987 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3988 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3989 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3990 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3991 #if !defined(PETSC_USE_COMPLEX) 3992 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 3993 #else 3994 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 3995 #endif 3996 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 3997 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3998 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 3999 k = temp_constraints; 4000 if (k > size_of_constraint) k = size_of_constraint; 4001 j = 0; 4002 while (j < k && singular_vals[k-j-1] < tol) j++; 4003 valid_constraints = k-j; 4004 total_counts = total_counts-temp_constraints+valid_constraints; 4005 #endif /* on missing GESVD */ 4006 } 4007 } 4008 /* update pointers information */ 4009 if (valid_constraints) { 4010 constraints_n[total_counts_cc] = valid_constraints; 4011 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 4012 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 4013 /* set change_of_basis flag */ 4014 if (boolforchange) { 4015 PetscBTSet(change_basis,total_counts_cc); 4016 } 4017 total_counts_cc++; 4018 } 4019 } 4020 /* free workspace */ 4021 if (!skip_lapack) { 4022 ierr = PetscFree(work);CHKERRQ(ierr); 4023 #if defined(PETSC_USE_COMPLEX) 4024 ierr = PetscFree(rwork);CHKERRQ(ierr); 4025 #endif 4026 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 4027 #if defined(PETSC_MISSING_LAPACK_GESVD) 4028 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 4029 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 4030 #endif 4031 } 4032 for (k=0;k<nnsp_size;k++) { 4033 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 4034 } 4035 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 4036 /* free index sets of faces, edges and vertices */ 4037 for (i=0;i<n_ISForFaces;i++) { 4038 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 4039 } 4040 if (n_ISForFaces) { 4041 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 4042 } 4043 for (i=0;i<n_ISForEdges;i++) { 4044 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 4045 } 4046 if (n_ISForEdges) { 4047 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 4048 } 4049 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 4050 } else { 4051 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4052 4053 total_counts = 0; 4054 n_vertices = 0; 4055 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 4056 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 4057 } 4058 max_constraints = 0; 4059 total_counts_cc = 0; 4060 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 4061 total_counts += pcbddc->adaptive_constraints_n[i]; 4062 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 4063 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 4064 } 4065 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 4066 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 4067 constraints_idxs = pcbddc->adaptive_constraints_idxs; 4068 constraints_data = pcbddc->adaptive_constraints_data; 4069 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 4070 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 4071 total_counts_cc = 0; 4072 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 4073 if (pcbddc->adaptive_constraints_n[i]) { 4074 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 4075 } 4076 } 4077 #if 0 4078 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 4079 for (i=0;i<total_counts_cc;i++) { 4080 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 4081 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 4082 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 4083 printf(" %d",constraints_idxs[j]); 4084 } 4085 printf("\n"); 4086 printf("number of cc: %d\n",constraints_n[i]); 4087 } 4088 for (i=0;i<n_vertices;i++) { 4089 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 4090 } 4091 for (i=0;i<sub_schurs->n_subs;i++) { 4092 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 4093 } 4094 #endif 4095 4096 max_size_of_constraint = 0; 4097 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 4098 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 4099 /* Change of basis */ 4100 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 4101 if (pcbddc->use_change_of_basis) { 4102 for (i=0;i<sub_schurs->n_subs;i++) { 4103 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 4104 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 4105 } 4106 } 4107 } 4108 } 4109 pcbddc->local_primal_size = total_counts; 4110 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 4111 4112 /* map constraints_idxs in boundary numbering */ 4113 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 4114 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i); 4115 4116 /* Create constraint matrix */ 4117 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 4118 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 4119 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 4120 4121 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 4122 /* determine if a QR strategy is needed for change of basis */ 4123 qr_needed = PETSC_FALSE; 4124 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 4125 total_primal_vertices=0; 4126 pcbddc->local_primal_size_cc = 0; 4127 for (i=0;i<total_counts_cc;i++) { 4128 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 4129 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 4130 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 4131 pcbddc->local_primal_size_cc += 1; 4132 } else if (PetscBTLookup(change_basis,i)) { 4133 for (k=0;k<constraints_n[i];k++) { 4134 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 4135 } 4136 pcbddc->local_primal_size_cc += constraints_n[i]; 4137 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 4138 PetscBTSet(qr_needed_idx,i); 4139 qr_needed = PETSC_TRUE; 4140 } 4141 } else { 4142 pcbddc->local_primal_size_cc += 1; 4143 } 4144 } 4145 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 4146 pcbddc->n_vertices = total_primal_vertices; 4147 /* permute indices in order to have a sorted set of vertices */ 4148 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 4149 4150 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 4151 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 4152 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 4153 4154 /* nonzero structure of constraint matrix */ 4155 /* and get reference dof for local constraints */ 4156 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 4157 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 4158 4159 j = total_primal_vertices; 4160 total_counts = total_primal_vertices; 4161 cum = total_primal_vertices; 4162 for (i=n_vertices;i<total_counts_cc;i++) { 4163 if (!PetscBTLookup(change_basis,i)) { 4164 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 4165 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 4166 cum++; 4167 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 4168 for (k=0;k<constraints_n[i];k++) { 4169 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 4170 nnz[j+k] = size_of_constraint; 4171 } 4172 j += constraints_n[i]; 4173 } 4174 } 4175 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 4176 ierr = PetscFree(nnz);CHKERRQ(ierr); 4177 4178 /* set values in constraint matrix */ 4179 for (i=0;i<total_primal_vertices;i++) { 4180 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4181 } 4182 total_counts = total_primal_vertices; 4183 for (i=n_vertices;i<total_counts_cc;i++) { 4184 if (!PetscBTLookup(change_basis,i)) { 4185 PetscInt *cols; 4186 4187 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 4188 cols = constraints_idxs+constraints_idxs_ptr[i]; 4189 for (k=0;k<constraints_n[i];k++) { 4190 PetscInt row = total_counts+k; 4191 PetscScalar *vals; 4192 4193 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 4194 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 4195 } 4196 total_counts += constraints_n[i]; 4197 } 4198 } 4199 /* assembling */ 4200 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4201 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4202 4203 /* 4204 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4205 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 4206 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 4207 */ 4208 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 4209 if (pcbddc->use_change_of_basis) { 4210 /* dual and primal dofs on a single cc */ 4211 PetscInt dual_dofs,primal_dofs; 4212 /* working stuff for GEQRF */ 4213 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 4214 PetscBLASInt lqr_work; 4215 /* working stuff for UNGQR */ 4216 PetscScalar *gqr_work,lgqr_work_t; 4217 PetscBLASInt lgqr_work; 4218 /* working stuff for TRTRS */ 4219 PetscScalar *trs_rhs; 4220 PetscBLASInt Blas_NRHS; 4221 /* pointers for values insertion into change of basis matrix */ 4222 PetscInt *start_rows,*start_cols; 4223 PetscScalar *start_vals; 4224 /* working stuff for values insertion */ 4225 PetscBT is_primal; 4226 PetscInt *aux_primal_numbering_B; 4227 /* matrix sizes */ 4228 PetscInt global_size,local_size; 4229 /* temporary change of basis */ 4230 Mat localChangeOfBasisMatrix; 4231 /* extra space for debugging */ 4232 PetscScalar *dbg_work; 4233 4234 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 4235 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 4236 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 4237 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 4238 /* nonzeros for local mat */ 4239 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 4240 if (!pcbddc->benign_change || pcbddc->fake_change) { 4241 for (i=0;i<pcis->n;i++) nnz[i]=1; 4242 } else { 4243 const PetscInt *ii; 4244 PetscInt n; 4245 PetscBool flg_row; 4246 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 4247 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 4248 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 4249 } 4250 for (i=n_vertices;i<total_counts_cc;i++) { 4251 if (PetscBTLookup(change_basis,i)) { 4252 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 4253 if (PetscBTLookup(qr_needed_idx,i)) { 4254 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 4255 } else { 4256 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 4257 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 4258 } 4259 } 4260 } 4261 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 4262 ierr = PetscFree(nnz);CHKERRQ(ierr); 4263 /* Set interior change in the matrix */ 4264 if (!pcbddc->benign_change || pcbddc->fake_change) { 4265 for (i=0;i<pcis->n;i++) { 4266 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 4267 } 4268 } else { 4269 const PetscInt *ii,*jj; 4270 PetscScalar *aa; 4271 PetscInt n; 4272 PetscBool flg_row; 4273 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 4274 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 4275 for (i=0;i<n;i++) { 4276 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 4277 } 4278 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 4279 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 4280 } 4281 4282 if (pcbddc->dbg_flag) { 4283 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 4284 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4285 } 4286 4287 4288 /* Now we loop on the constraints which need a change of basis */ 4289 /* 4290 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 4291 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 4292 4293 Basic blocks of change of basis matrix T computed by 4294 4295 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 4296 4297 | 1 0 ... 0 s_1/S | 4298 | 0 1 ... 0 s_2/S | 4299 | ... | 4300 | 0 ... 1 s_{n-1}/S | 4301 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 4302 4303 with S = \sum_{i=1}^n s_i^2 4304 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 4305 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 4306 4307 - QR decomposition of constraints otherwise 4308 */ 4309 if (qr_needed) { 4310 /* space to store Q */ 4311 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 4312 /* first we issue queries for optimal work */ 4313 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 4314 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 4315 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4316 lqr_work = -1; 4317 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 4318 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 4319 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 4320 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 4321 lgqr_work = -1; 4322 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 4323 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 4324 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 4325 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4326 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 4327 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 4328 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 4329 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 4330 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 4331 /* array to store scaling factors for reflectors */ 4332 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 4333 /* array to store rhs and solution of triangular solver */ 4334 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 4335 /* allocating workspace for check */ 4336 if (pcbddc->dbg_flag) { 4337 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 4338 } 4339 } 4340 /* array to store whether a node is primal or not */ 4341 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 4342 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 4343 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 4344 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i); 4345 for (i=0;i<total_primal_vertices;i++) { 4346 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 4347 } 4348 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 4349 4350 /* loop on constraints and see whether or not they need a change of basis and compute it */ 4351 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 4352 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 4353 if (PetscBTLookup(change_basis,total_counts)) { 4354 /* get constraint info */ 4355 primal_dofs = constraints_n[total_counts]; 4356 dual_dofs = size_of_constraint-primal_dofs; 4357 4358 if (pcbddc->dbg_flag) { 4359 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 4360 } 4361 4362 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 4363 4364 /* copy quadrature constraints for change of basis check */ 4365 if (pcbddc->dbg_flag) { 4366 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4367 } 4368 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 4369 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4370 4371 /* compute QR decomposition of constraints */ 4372 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4373 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4374 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4375 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4376 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 4377 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 4378 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4379 4380 /* explictly compute R^-T */ 4381 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 4382 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 4383 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4384 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 4385 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4386 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 4387 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4388 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 4389 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 4390 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4391 4392 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 4393 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4394 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4395 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 4396 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4397 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4398 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 4399 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 4400 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4401 4402 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 4403 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 4404 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 4405 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4406 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4407 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 4408 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4409 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 4410 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 4411 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4412 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 4413 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4414 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4415 4416 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 4417 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 4418 /* insert cols for primal dofs */ 4419 for (j=0;j<primal_dofs;j++) { 4420 start_vals = &qr_basis[j*size_of_constraint]; 4421 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 4422 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 4423 } 4424 /* insert cols for dual dofs */ 4425 for (j=0,k=0;j<dual_dofs;k++) { 4426 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 4427 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 4428 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 4429 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 4430 j++; 4431 } 4432 } 4433 4434 /* check change of basis */ 4435 if (pcbddc->dbg_flag) { 4436 PetscInt ii,jj; 4437 PetscBool valid_qr=PETSC_TRUE; 4438 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 4439 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4440 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 4441 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4442 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 4443 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 4444 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4445 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 4446 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4447 for (jj=0;jj<size_of_constraint;jj++) { 4448 for (ii=0;ii<primal_dofs;ii++) { 4449 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 4450 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 4451 } 4452 } 4453 if (!valid_qr) { 4454 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 4455 for (jj=0;jj<size_of_constraint;jj++) { 4456 for (ii=0;ii<primal_dofs;ii++) { 4457 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 4458 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4459 } 4460 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 4461 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4462 } 4463 } 4464 } 4465 } else { 4466 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 4467 } 4468 } 4469 } else { /* simple transformation block */ 4470 PetscInt row,col; 4471 PetscScalar val,norm; 4472 4473 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4474 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 4475 for (j=0;j<size_of_constraint;j++) { 4476 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 4477 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 4478 if (!PetscBTLookup(is_primal,row_B)) { 4479 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 4480 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 4481 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 4482 } else { 4483 for (k=0;k<size_of_constraint;k++) { 4484 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 4485 if (row != col) { 4486 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 4487 } else { 4488 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 4489 } 4490 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 4491 } 4492 } 4493 } 4494 if (pcbddc->dbg_flag) { 4495 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 4496 } 4497 } 4498 } else { 4499 if (pcbddc->dbg_flag) { 4500 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 4501 } 4502 } 4503 } 4504 4505 /* free workspace */ 4506 if (qr_needed) { 4507 if (pcbddc->dbg_flag) { 4508 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 4509 } 4510 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 4511 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 4512 ierr = PetscFree(qr_work);CHKERRQ(ierr); 4513 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 4514 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 4515 } 4516 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 4517 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4518 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4519 4520 /* assembling of global change of variable */ 4521 if (!pcbddc->fake_change) { 4522 Mat tmat; 4523 PetscInt bs; 4524 4525 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 4526 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 4527 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 4528 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 4529 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4530 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 4531 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 4532 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 4533 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 4534 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 4535 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4536 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4537 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4538 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4539 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4540 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4541 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 4542 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 4543 4544 /* check */ 4545 if (pcbddc->dbg_flag) { 4546 PetscReal error; 4547 Vec x,x_change; 4548 4549 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 4550 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 4551 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4552 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 4553 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4554 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4555 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 4556 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4557 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4558 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 4559 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4560 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4561 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4562 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 4563 ierr = VecDestroy(&x);CHKERRQ(ierr); 4564 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4565 } 4566 /* adapt sub_schurs computed (if any) */ 4567 if (pcbddc->use_deluxe_scaling) { 4568 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4569 4570 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");CHKERRQ(ierr); 4571 if (sub_schurs && sub_schurs->S_Ej_all) { 4572 Mat S_new,tmat; 4573 IS is_all_N,is_V_Sall = NULL; 4574 4575 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 4576 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 4577 if (pcbddc->deluxe_zerorows) { 4578 ISLocalToGlobalMapping NtoSall; 4579 IS is_V; 4580 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 4581 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 4582 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 4583 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 4584 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 4585 } 4586 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 4587 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4588 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 4589 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4590 if (pcbddc->deluxe_zerorows) { 4591 const PetscScalar *array; 4592 const PetscInt *idxs_V,*idxs_all; 4593 PetscInt i,n_V; 4594 4595 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4596 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 4597 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4598 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4599 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 4600 for (i=0;i<n_V;i++) { 4601 PetscScalar val; 4602 PetscInt idx; 4603 4604 idx = idxs_V[i]; 4605 val = array[idxs_all[idxs_V[i]]]; 4606 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 4607 } 4608 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4609 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4610 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 4611 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4612 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4613 } 4614 sub_schurs->S_Ej_all = S_new; 4615 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4616 if (sub_schurs->sum_S_Ej_all) { 4617 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4618 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 4619 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4620 if (pcbddc->deluxe_zerorows) { 4621 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4622 } 4623 sub_schurs->sum_S_Ej_all = S_new; 4624 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4625 } 4626 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 4627 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4628 } 4629 /* destroy any change of basis context in sub_schurs */ 4630 if (sub_schurs && sub_schurs->change) { 4631 PetscInt i; 4632 4633 for (i=0;i<sub_schurs->n_subs;i++) { 4634 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 4635 } 4636 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 4637 } 4638 } 4639 if (pcbddc->switch_static) { /* need to save the local change */ 4640 pcbddc->switch_static_change = localChangeOfBasisMatrix; 4641 } else { 4642 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 4643 } 4644 /* determine if any process has changed the pressures locally */ 4645 pcbddc->change_interior = pcbddc->benign_have_null; 4646 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 4647 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 4648 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 4649 pcbddc->use_qr_single = qr_needed; 4650 } 4651 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 4652 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 4653 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 4654 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 4655 } else { 4656 Mat benign_global = NULL; 4657 if (pcbddc->benign_have_null) { 4658 Mat tmat; 4659 4660 pcbddc->change_interior = PETSC_TRUE; 4661 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4662 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4663 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4664 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4665 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 4666 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4667 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4668 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 4669 if (pcbddc->benign_change) { 4670 Mat M; 4671 4672 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 4673 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 4674 ierr = MatISSetLocalMat(tmat,M);CHKERRQ(ierr); 4675 ierr = MatDestroy(&M);CHKERRQ(ierr); 4676 } else { 4677 Mat eye; 4678 PetscScalar *array; 4679 4680 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4681 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&eye);CHKERRQ(ierr); 4682 for (i=0;i<pcis->n;i++) { 4683 ierr = MatSetValue(eye,i,i,array[i],INSERT_VALUES);CHKERRQ(ierr); 4684 } 4685 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4686 ierr = MatAssemblyBegin(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4687 ierr = MatAssemblyEnd(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4688 ierr = MatISSetLocalMat(tmat,eye);CHKERRQ(ierr); 4689 ierr = MatDestroy(&eye);CHKERRQ(ierr); 4690 } 4691 ierr = MatISGetMPIXAIJ(tmat,MAT_INITIAL_MATRIX,&benign_global);CHKERRQ(ierr); 4692 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4693 } 4694 if (pcbddc->user_ChangeOfBasisMatrix) { 4695 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4696 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 4697 } else if (pcbddc->benign_have_null) { 4698 pcbddc->ChangeOfBasisMatrix = benign_global; 4699 } 4700 } 4701 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 4702 IS is_global; 4703 const PetscInt *gidxs; 4704 4705 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 4706 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 4707 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 4708 ierr = MatGetSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 4709 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 4710 } 4711 } 4712 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 4713 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 4714 } 4715 4716 if (!pcbddc->fake_change) { 4717 /* add pressure dofs to set of primal nodes for numbering purposes */ 4718 for (i=0;i<pcbddc->benign_n;i++) { 4719 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 4720 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 4721 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 4722 pcbddc->local_primal_size_cc++; 4723 pcbddc->local_primal_size++; 4724 } 4725 4726 /* check if a new primal space has been introduced (also take into account benign trick) */ 4727 pcbddc->new_primal_space_local = PETSC_TRUE; 4728 if (olocal_primal_size == pcbddc->local_primal_size) { 4729 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4730 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4731 if (!pcbddc->new_primal_space_local) { 4732 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4733 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4734 } 4735 } 4736 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 4737 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4738 } 4739 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 4740 4741 /* flush dbg viewer */ 4742 if (pcbddc->dbg_flag) { 4743 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4744 } 4745 4746 /* free workspace */ 4747 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 4748 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 4749 if (!pcbddc->adaptive_selection) { 4750 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 4751 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 4752 } else { 4753 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 4754 pcbddc->adaptive_constraints_idxs_ptr, 4755 pcbddc->adaptive_constraints_data_ptr, 4756 pcbddc->adaptive_constraints_idxs, 4757 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 4758 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 4759 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 4760 } 4761 PetscFunctionReturn(0); 4762 } 4763 4764 #undef __FUNCT__ 4765 #define __FUNCT__ "PCBDDCAnalyzeInterface" 4766 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 4767 { 4768 ISLocalToGlobalMapping map; 4769 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4770 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 4771 PetscInt ierr,i,N; 4772 4773 PetscFunctionBegin; 4774 if (pcbddc->graphanalyzed) PetscFunctionReturn(0); 4775 /* Reset previously computed graph */ 4776 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 4777 /* Init local Graph struct */ 4778 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 4779 ierr = MatGetLocalToGlobalMapping(pc->pmat,&map,NULL);CHKERRQ(ierr); 4780 ierr = PCBDDCGraphInit(pcbddc->mat_graph,map,N);CHKERRQ(ierr); 4781 4782 /* Check validity of the csr graph passed in by the user */ 4783 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 4784 4785 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 4786 if ( (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) && pcbddc->use_local_adj) { 4787 PetscInt *xadj,*adjncy; 4788 PetscInt nvtxs; 4789 PetscBool flg_row=PETSC_FALSE; 4790 4791 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4792 if (flg_row) { 4793 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 4794 pcbddc->computed_rowadj = PETSC_TRUE; 4795 } 4796 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4797 } 4798 if (pcbddc->dbg_flag) { 4799 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4800 } 4801 4802 /* Setup of Graph */ 4803 pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */ 4804 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 4805 4806 /* attach info on disconnected subdomains if present */ 4807 if (pcbddc->n_local_subs) { 4808 PetscInt *local_subs; 4809 4810 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 4811 for (i=0;i<pcbddc->n_local_subs;i++) { 4812 const PetscInt *idxs; 4813 PetscInt nl,j; 4814 4815 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 4816 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4817 for (j=0;j<nl;j++) local_subs[idxs[j]] = i; 4818 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4819 } 4820 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 4821 pcbddc->mat_graph->local_subs = local_subs; 4822 } 4823 4824 /* Graph's connected components analysis */ 4825 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 4826 4827 /* set flag indicating analysis has been done */ 4828 pcbddc->graphanalyzed = PETSC_TRUE; 4829 PetscFunctionReturn(0); 4830 } 4831 4832 #undef __FUNCT__ 4833 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 4834 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 4835 { 4836 PetscInt i,j; 4837 PetscScalar *alphas; 4838 PetscErrorCode ierr; 4839 4840 PetscFunctionBegin; 4841 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 4842 for (i=0;i<n;i++) { 4843 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 4844 ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],alphas);CHKERRQ(ierr); 4845 for (j=0;j<n-i-1;j++) alphas[j] = PetscConj(-alphas[j]); 4846 ierr = VecMAXPY(vecs[j],n-i-1,alphas,vecs+i);CHKERRQ(ierr); 4847 } 4848 ierr = PetscFree(alphas);CHKERRQ(ierr); 4849 PetscFunctionReturn(0); 4850 } 4851 4852 #undef __FUNCT__ 4853 #define __FUNCT__ "MatISGetSubassemblingPattern" 4854 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 4855 { 4856 Mat A; 4857 PetscInt n_neighs,*neighs,*n_shared,**shared; 4858 PetscMPIInt size,rank,color; 4859 PetscInt *xadj,*adjncy; 4860 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 4861 PetscInt im_active,active_procs,n,i,j,local_size,threshold = 2; 4862 PetscInt void_procs,*procs_candidates = NULL; 4863 PetscInt xadj_count, *count; 4864 PetscBool ismatis,use_vwgt=PETSC_FALSE; 4865 PetscSubcomm psubcomm; 4866 MPI_Comm subcomm; 4867 PetscErrorCode ierr; 4868 4869 PetscFunctionBegin; 4870 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 4871 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 4872 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 4873 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 4874 PetscValidLogicalCollectiveInt(mat,redprocs,3); 4875 if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %d\n",*n_subdomains); 4876 4877 if (have_void) *have_void = PETSC_FALSE; 4878 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 4879 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 4880 ierr = MatISGetLocalMat(mat,&A);CHKERRQ(ierr); 4881 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 4882 im_active = !!(n); 4883 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 4884 void_procs = size - active_procs; 4885 /* get ranks of of non-active processes in mat communicator */ 4886 if (void_procs) { 4887 PetscInt ncand; 4888 4889 if (have_void) *have_void = PETSC_TRUE; 4890 ierr = PetscMalloc1(size,&procs_candidates);CHKERRQ(ierr); 4891 ierr = MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 4892 for (i=0,ncand=0;i<size;i++) { 4893 if (!procs_candidates[i]) { 4894 procs_candidates[ncand++] = i; 4895 } 4896 } 4897 /* force n_subdomains to be not greater that the number of non-active processes */ 4898 *n_subdomains = PetscMin(void_procs,*n_subdomains); 4899 } 4900 4901 /* number of subdomains requested greater than active processes -> just shift the matrix */ 4902 if (active_procs < *n_subdomains) { 4903 PetscInt issize,isidx; 4904 if (im_active) { 4905 issize = 1; 4906 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 4907 isidx = procs_candidates[rank]; 4908 } else { 4909 isidx = rank; 4910 } 4911 } else { 4912 issize = 0; 4913 isidx = -1; 4914 } 4915 *n_subdomains = active_procs; 4916 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);CHKERRQ(ierr); 4917 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 4918 PetscFunctionReturn(0); 4919 } 4920 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 4921 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 4922 threshold = PetscMax(threshold,2); 4923 4924 /* Get info on mapping */ 4925 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 4926 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4927 4928 /* build local CSR graph of subdomains' connectivity */ 4929 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 4930 xadj[0] = 0; 4931 xadj[1] = PetscMax(n_neighs-1,0); 4932 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 4933 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 4934 ierr = PetscCalloc1(local_size,&count);CHKERRQ(ierr); 4935 for (i=1;i<n_neighs;i++) 4936 for (j=0;j<n_shared[i];j++) 4937 count[shared[i][j]] += 1; 4938 4939 xadj_count = 0; 4940 for (i=1;i<n_neighs;i++) { 4941 for (j=0;j<n_shared[i];j++) { 4942 if (count[shared[i][j]] < threshold) { 4943 adjncy[xadj_count] = neighs[i]; 4944 adjncy_wgt[xadj_count] = n_shared[i]; 4945 xadj_count++; 4946 break; 4947 } 4948 } 4949 } 4950 xadj[1] = xadj_count; 4951 ierr = PetscFree(count);CHKERRQ(ierr); 4952 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4953 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4954 4955 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 4956 4957 /* Restrict work on active processes only */ 4958 ierr = PetscMPIIntCast(im_active,&color);CHKERRQ(ierr); 4959 if (void_procs) { 4960 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);CHKERRQ(ierr); 4961 ierr = PetscSubcommSetNumber(psubcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 4962 ierr = PetscSubcommSetTypeGeneral(psubcomm,color,rank);CHKERRQ(ierr); 4963 subcomm = PetscSubcommChild(psubcomm); 4964 } else { 4965 psubcomm = NULL; 4966 subcomm = PetscObjectComm((PetscObject)mat); 4967 } 4968 4969 v_wgt = NULL; 4970 if (!color) { 4971 ierr = PetscFree(xadj);CHKERRQ(ierr); 4972 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4973 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4974 } else { 4975 Mat subdomain_adj; 4976 IS new_ranks,new_ranks_contig; 4977 MatPartitioning partitioner; 4978 PetscInt rstart=0,rend=0; 4979 PetscInt *is_indices,*oldranks; 4980 PetscMPIInt size; 4981 PetscBool aggregate; 4982 4983 ierr = MPI_Comm_size(subcomm,&size);CHKERRQ(ierr); 4984 if (void_procs) { 4985 PetscInt prank = rank; 4986 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 4987 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);CHKERRQ(ierr); 4988 for (i=0;i<xadj[1];i++) { 4989 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 4990 } 4991 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4992 } else { 4993 oldranks = NULL; 4994 } 4995 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 4996 if (aggregate) { /* TODO: all this part could be made more efficient */ 4997 PetscInt lrows,row,ncols,*cols; 4998 PetscMPIInt nrank; 4999 PetscScalar *vals; 5000 5001 ierr = MPI_Comm_rank(subcomm,&nrank);CHKERRQ(ierr); 5002 lrows = 0; 5003 if (nrank<redprocs) { 5004 lrows = size/redprocs; 5005 if (nrank<size%redprocs) lrows++; 5006 } 5007 ierr = MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 5008 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 5009 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 5010 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 5011 row = nrank; 5012 ncols = xadj[1]-xadj[0]; 5013 cols = adjncy; 5014 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 5015 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 5016 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 5017 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5018 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5019 ierr = PetscFree(xadj);CHKERRQ(ierr); 5020 ierr = PetscFree(adjncy);CHKERRQ(ierr); 5021 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 5022 ierr = PetscFree(vals);CHKERRQ(ierr); 5023 if (use_vwgt) { 5024 Vec v; 5025 const PetscScalar *array; 5026 PetscInt nl; 5027 5028 ierr = MatCreateVecs(subdomain_adj,&v,NULL);CHKERRQ(ierr); 5029 ierr = VecSetValue(v,row,(PetscScalar)local_size,INSERT_VALUES);CHKERRQ(ierr); 5030 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 5031 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 5032 ierr = VecGetLocalSize(v,&nl);CHKERRQ(ierr); 5033 ierr = VecGetArrayRead(v,&array);CHKERRQ(ierr); 5034 ierr = PetscMalloc1(nl,&v_wgt);CHKERRQ(ierr); 5035 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 5036 ierr = VecRestoreArrayRead(v,&array);CHKERRQ(ierr); 5037 ierr = VecDestroy(&v);CHKERRQ(ierr); 5038 } 5039 } else { 5040 ierr = MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 5041 if (use_vwgt) { 5042 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 5043 v_wgt[0] = local_size; 5044 } 5045 } 5046 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 5047 5048 /* Partition */ 5049 ierr = MatPartitioningCreate(subcomm,&partitioner);CHKERRQ(ierr); 5050 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 5051 if (v_wgt) { 5052 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 5053 } 5054 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 5055 ierr = MatPartitioningSetNParts(partitioner,*n_subdomains);CHKERRQ(ierr); 5056 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 5057 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 5058 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 5059 5060 /* renumber new_ranks to avoid "holes" in new set of processors */ 5061 ierr = ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 5062 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 5063 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5064 if (!aggregate) { 5065 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 5066 #if defined(PETSC_USE_DEBUG) 5067 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 5068 #endif 5069 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 5070 } else if (oldranks) { 5071 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 5072 } else { 5073 ranks_send_to_idx[0] = is_indices[0]; 5074 } 5075 } else { 5076 PetscInt idxs[1]; 5077 PetscMPIInt tag; 5078 MPI_Request *reqs; 5079 5080 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 5081 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 5082 for (i=rstart;i<rend;i++) { 5083 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);CHKERRQ(ierr); 5084 } 5085 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 5086 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5087 ierr = PetscFree(reqs);CHKERRQ(ierr); 5088 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 5089 #if defined(PETSC_USE_DEBUG) 5090 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 5091 #endif 5092 ranks_send_to_idx[0] = procs_candidates[oldranks[idxs[0]]]; 5093 } else if (oldranks) { 5094 ranks_send_to_idx[0] = oldranks[idxs[0]]; 5095 } else { 5096 ranks_send_to_idx[0] = idxs[0]; 5097 } 5098 } 5099 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5100 /* clean up */ 5101 ierr = PetscFree(oldranks);CHKERRQ(ierr); 5102 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 5103 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 5104 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 5105 } 5106 ierr = PetscSubcommDestroy(&psubcomm);CHKERRQ(ierr); 5107 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 5108 5109 /* assemble parallel IS for sends */ 5110 i = 1; 5111 if (!color) i=0; 5112 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);CHKERRQ(ierr); 5113 PetscFunctionReturn(0); 5114 } 5115 5116 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 5117 5118 #undef __FUNCT__ 5119 #define __FUNCT__ "MatISSubassemble" 5120 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 5121 { 5122 Mat local_mat; 5123 IS is_sends_internal; 5124 PetscInt rows,cols,new_local_rows; 5125 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs; 5126 PetscBool ismatis,isdense,newisdense,destroy_mat; 5127 ISLocalToGlobalMapping l2gmap; 5128 PetscInt* l2gmap_indices; 5129 const PetscInt* is_indices; 5130 MatType new_local_type; 5131 /* buffers */ 5132 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 5133 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 5134 PetscInt *recv_buffer_idxs_local; 5135 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 5136 PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs; 5137 /* MPI */ 5138 MPI_Comm comm,comm_n; 5139 PetscSubcomm subcomm; 5140 PetscMPIInt n_sends,n_recvs,commsize; 5141 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 5142 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 5143 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest; 5144 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs; 5145 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs; 5146 PetscErrorCode ierr; 5147 5148 PetscFunctionBegin; 5149 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 5150 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 5151 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 5152 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 5153 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 5154 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 5155 PetscValidLogicalCollectiveBool(mat,reuse,6); 5156 PetscValidLogicalCollectiveInt(mat,nis,8); 5157 PetscValidLogicalCollectiveInt(mat,nvecs,10); 5158 if (nvecs) { 5159 if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported"); 5160 PetscValidHeaderSpecific(nnsp_vec[0],VEC_CLASSID,11); 5161 } 5162 /* further checks */ 5163 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 5164 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 5165 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 5166 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 5167 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 5168 if (reuse && *mat_n) { 5169 PetscInt mrows,mcols,mnrows,mncols; 5170 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 5171 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 5172 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 5173 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 5174 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 5175 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 5176 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 5177 } 5178 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 5179 PetscValidLogicalCollectiveInt(mat,bs,0); 5180 5181 /* prepare IS for sending if not provided */ 5182 if (!is_sends) { 5183 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 5184 ierr = MatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);CHKERRQ(ierr); 5185 } else { 5186 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 5187 is_sends_internal = is_sends; 5188 } 5189 5190 /* get comm */ 5191 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 5192 5193 /* compute number of sends */ 5194 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 5195 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 5196 5197 /* compute number of receives */ 5198 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 5199 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 5200 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 5201 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 5202 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 5203 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 5204 ierr = PetscFree(iflags);CHKERRQ(ierr); 5205 5206 /* restrict comm if requested */ 5207 subcomm = 0; 5208 destroy_mat = PETSC_FALSE; 5209 if (restrict_comm) { 5210 PetscMPIInt color,subcommsize; 5211 5212 color = 0; 5213 if (restrict_full) { 5214 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 5215 } else { 5216 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 5217 } 5218 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 5219 subcommsize = commsize - subcommsize; 5220 /* check if reuse has been requested */ 5221 if (reuse) { 5222 if (*mat_n) { 5223 PetscMPIInt subcommsize2; 5224 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 5225 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 5226 comm_n = PetscObjectComm((PetscObject)*mat_n); 5227 } else { 5228 comm_n = PETSC_COMM_SELF; 5229 } 5230 } else { /* MAT_INITIAL_MATRIX */ 5231 PetscMPIInt rank; 5232 5233 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5234 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 5235 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 5236 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 5237 comm_n = PetscSubcommChild(subcomm); 5238 } 5239 /* flag to destroy *mat_n if not significative */ 5240 if (color) destroy_mat = PETSC_TRUE; 5241 } else { 5242 comm_n = comm; 5243 } 5244 5245 /* prepare send/receive buffers */ 5246 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 5247 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 5248 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 5249 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 5250 if (nis) { 5251 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 5252 } 5253 5254 /* Get data from local matrices */ 5255 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 5256 /* TODO: See below some guidelines on how to prepare the local buffers */ 5257 /* 5258 send_buffer_vals should contain the raw values of the local matrix 5259 send_buffer_idxs should contain: 5260 - MatType_PRIVATE type 5261 - PetscInt size_of_l2gmap 5262 - PetscInt global_row_indices[size_of_l2gmap] 5263 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 5264 */ 5265 else { 5266 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 5267 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 5268 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 5269 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 5270 send_buffer_idxs[1] = i; 5271 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 5272 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 5273 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 5274 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 5275 for (i=0;i<n_sends;i++) { 5276 ilengths_vals[is_indices[i]] = len*len; 5277 ilengths_idxs[is_indices[i]] = len+2; 5278 } 5279 } 5280 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 5281 /* additional is (if any) */ 5282 if (nis) { 5283 PetscMPIInt psum; 5284 PetscInt j; 5285 for (j=0,psum=0;j<nis;j++) { 5286 PetscInt plen; 5287 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 5288 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 5289 psum += len+1; /* indices + lenght */ 5290 } 5291 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 5292 for (j=0,psum=0;j<nis;j++) { 5293 PetscInt plen; 5294 const PetscInt *is_array_idxs; 5295 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 5296 send_buffer_idxs_is[psum] = plen; 5297 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 5298 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 5299 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 5300 psum += plen+1; /* indices + lenght */ 5301 } 5302 for (i=0;i<n_sends;i++) { 5303 ilengths_idxs_is[is_indices[i]] = psum; 5304 } 5305 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 5306 } 5307 5308 buf_size_idxs = 0; 5309 buf_size_vals = 0; 5310 buf_size_idxs_is = 0; 5311 buf_size_vecs = 0; 5312 for (i=0;i<n_recvs;i++) { 5313 buf_size_idxs += (PetscInt)olengths_idxs[i]; 5314 buf_size_vals += (PetscInt)olengths_vals[i]; 5315 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 5316 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 5317 } 5318 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 5319 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 5320 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 5321 ierr = PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);CHKERRQ(ierr); 5322 5323 /* get new tags for clean communications */ 5324 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 5325 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 5326 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 5327 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);CHKERRQ(ierr); 5328 5329 /* allocate for requests */ 5330 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 5331 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 5332 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 5333 ierr = PetscMalloc1(n_sends,&send_req_vecs);CHKERRQ(ierr); 5334 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 5335 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 5336 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 5337 ierr = PetscMalloc1(n_recvs,&recv_req_vecs);CHKERRQ(ierr); 5338 5339 /* communications */ 5340 ptr_idxs = recv_buffer_idxs; 5341 ptr_vals = recv_buffer_vals; 5342 ptr_idxs_is = recv_buffer_idxs_is; 5343 ptr_vecs = recv_buffer_vecs; 5344 for (i=0;i<n_recvs;i++) { 5345 source_dest = onodes[i]; 5346 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 5347 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 5348 ptr_idxs += olengths_idxs[i]; 5349 ptr_vals += olengths_vals[i]; 5350 if (nis) { 5351 source_dest = onodes_is[i]; 5352 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 5353 ptr_idxs_is += olengths_idxs_is[i]; 5354 } 5355 if (nvecs) { 5356 source_dest = onodes[i]; 5357 ierr = MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);CHKERRQ(ierr); 5358 ptr_vecs += olengths_idxs[i]-2; 5359 } 5360 } 5361 for (i=0;i<n_sends;i++) { 5362 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 5363 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 5364 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 5365 if (nis) { 5366 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 5367 } 5368 if (nvecs) { 5369 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 5370 ierr = MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);CHKERRQ(ierr); 5371 } 5372 } 5373 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 5374 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 5375 5376 /* assemble new l2g map */ 5377 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5378 ptr_idxs = recv_buffer_idxs; 5379 new_local_rows = 0; 5380 for (i=0;i<n_recvs;i++) { 5381 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 5382 ptr_idxs += olengths_idxs[i]; 5383 } 5384 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 5385 ptr_idxs = recv_buffer_idxs; 5386 new_local_rows = 0; 5387 for (i=0;i<n_recvs;i++) { 5388 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 5389 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 5390 ptr_idxs += olengths_idxs[i]; 5391 } 5392 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 5393 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 5394 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 5395 5396 /* infer new local matrix type from received local matrices type */ 5397 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 5398 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 5399 if (n_recvs) { 5400 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 5401 ptr_idxs = recv_buffer_idxs; 5402 for (i=0;i<n_recvs;i++) { 5403 if ((PetscInt)new_local_type_private != *ptr_idxs) { 5404 new_local_type_private = MATAIJ_PRIVATE; 5405 break; 5406 } 5407 ptr_idxs += olengths_idxs[i]; 5408 } 5409 switch (new_local_type_private) { 5410 case MATDENSE_PRIVATE: 5411 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 5412 new_local_type = MATSEQAIJ; 5413 bs = 1; 5414 } else { /* if I receive only 1 dense matrix */ 5415 new_local_type = MATSEQDENSE; 5416 bs = 1; 5417 } 5418 break; 5419 case MATAIJ_PRIVATE: 5420 new_local_type = MATSEQAIJ; 5421 bs = 1; 5422 break; 5423 case MATBAIJ_PRIVATE: 5424 new_local_type = MATSEQBAIJ; 5425 break; 5426 case MATSBAIJ_PRIVATE: 5427 new_local_type = MATSEQSBAIJ; 5428 break; 5429 default: 5430 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 5431 break; 5432 } 5433 } else { /* by default, new_local_type is seqdense */ 5434 new_local_type = MATSEQDENSE; 5435 bs = 1; 5436 } 5437 5438 /* create MATIS object if needed */ 5439 if (!reuse) { 5440 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 5441 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 5442 } else { 5443 /* it also destroys the local matrices */ 5444 if (*mat_n) { 5445 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 5446 } else { /* this is a fake object */ 5447 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 5448 } 5449 } 5450 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 5451 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 5452 5453 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5454 5455 /* Global to local map of received indices */ 5456 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 5457 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 5458 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 5459 5460 /* restore attributes -> type of incoming data and its size */ 5461 buf_size_idxs = 0; 5462 for (i=0;i<n_recvs;i++) { 5463 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 5464 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 5465 buf_size_idxs += (PetscInt)olengths_idxs[i]; 5466 } 5467 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 5468 5469 /* set preallocation */ 5470 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 5471 if (!newisdense) { 5472 PetscInt *new_local_nnz=0; 5473 5474 ptr_idxs = recv_buffer_idxs_local; 5475 if (n_recvs) { 5476 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 5477 } 5478 for (i=0;i<n_recvs;i++) { 5479 PetscInt j; 5480 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 5481 for (j=0;j<*(ptr_idxs+1);j++) { 5482 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 5483 } 5484 } else { 5485 /* TODO */ 5486 } 5487 ptr_idxs += olengths_idxs[i]; 5488 } 5489 if (new_local_nnz) { 5490 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 5491 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 5492 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 5493 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5494 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 5495 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5496 } else { 5497 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5498 } 5499 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 5500 } else { 5501 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5502 } 5503 5504 /* set values */ 5505 ptr_vals = recv_buffer_vals; 5506 ptr_idxs = recv_buffer_idxs_local; 5507 for (i=0;i<n_recvs;i++) { 5508 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 5509 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 5510 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 5511 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5512 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5513 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 5514 } else { 5515 /* TODO */ 5516 } 5517 ptr_idxs += olengths_idxs[i]; 5518 ptr_vals += olengths_vals[i]; 5519 } 5520 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5521 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5522 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5523 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5524 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 5525 5526 #if 0 5527 if (!restrict_comm) { /* check */ 5528 Vec lvec,rvec; 5529 PetscReal infty_error; 5530 5531 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 5532 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 5533 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 5534 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 5535 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 5536 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5537 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 5538 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 5539 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 5540 } 5541 #endif 5542 5543 /* assemble new additional is (if any) */ 5544 if (nis) { 5545 PetscInt **temp_idxs,*count_is,j,psum; 5546 5547 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5548 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 5549 ptr_idxs = recv_buffer_idxs_is; 5550 psum = 0; 5551 for (i=0;i<n_recvs;i++) { 5552 for (j=0;j<nis;j++) { 5553 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5554 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 5555 psum += plen; 5556 ptr_idxs += plen+1; /* shift pointer to received data */ 5557 } 5558 } 5559 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 5560 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 5561 for (i=1;i<nis;i++) { 5562 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 5563 } 5564 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 5565 ptr_idxs = recv_buffer_idxs_is; 5566 for (i=0;i<n_recvs;i++) { 5567 for (j=0;j<nis;j++) { 5568 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5569 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 5570 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 5571 ptr_idxs += plen+1; /* shift pointer to received data */ 5572 } 5573 } 5574 for (i=0;i<nis;i++) { 5575 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5576 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 5577 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5578 } 5579 ierr = PetscFree(count_is);CHKERRQ(ierr); 5580 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 5581 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 5582 } 5583 /* free workspace */ 5584 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 5585 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5586 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 5587 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5588 if (isdense) { 5589 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 5590 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 5591 } else { 5592 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 5593 } 5594 if (nis) { 5595 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5596 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 5597 } 5598 5599 if (nvecs) { 5600 ierr = MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5601 ierr = MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5602 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 5603 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 5604 ierr = VecCreate(comm_n,&nnsp_vec[0]);CHKERRQ(ierr); 5605 ierr = VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);CHKERRQ(ierr); 5606 ierr = VecSetType(nnsp_vec[0],VECSTANDARD);CHKERRQ(ierr); 5607 /* set values */ 5608 ptr_vals = recv_buffer_vecs; 5609 ptr_idxs = recv_buffer_idxs_local; 5610 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 5611 for (i=0;i<n_recvs;i++) { 5612 PetscInt j; 5613 for (j=0;j<*(ptr_idxs+1);j++) { 5614 send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j); 5615 } 5616 ptr_idxs += olengths_idxs[i]; 5617 ptr_vals += olengths_idxs[i]-2; 5618 } 5619 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 5620 ierr = VecAssemblyBegin(nnsp_vec[0]);CHKERRQ(ierr); 5621 ierr = VecAssemblyEnd(nnsp_vec[0]);CHKERRQ(ierr); 5622 } 5623 5624 ierr = PetscFree(recv_buffer_vecs);CHKERRQ(ierr); 5625 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 5626 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 5627 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 5628 ierr = PetscFree(recv_req_vecs);CHKERRQ(ierr); 5629 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 5630 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 5631 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 5632 ierr = PetscFree(send_req_vecs);CHKERRQ(ierr); 5633 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 5634 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 5635 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 5636 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 5637 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 5638 ierr = PetscFree(onodes);CHKERRQ(ierr); 5639 if (nis) { 5640 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 5641 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 5642 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 5643 } 5644 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 5645 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 5646 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 5647 for (i=0;i<nis;i++) { 5648 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5649 } 5650 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 5651 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 5652 } 5653 *mat_n = NULL; 5654 } 5655 PetscFunctionReturn(0); 5656 } 5657 5658 /* temporary hack into ksp private data structure */ 5659 #include <petsc/private/kspimpl.h> 5660 5661 #undef __FUNCT__ 5662 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 5663 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 5664 { 5665 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5666 PC_IS *pcis = (PC_IS*)pc->data; 5667 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 5668 Mat coarsedivudotp = NULL; 5669 MatNullSpace CoarseNullSpace = NULL; 5670 ISLocalToGlobalMapping coarse_islg; 5671 IS coarse_is,*isarray; 5672 PetscInt i,im_active=-1,active_procs=-1; 5673 PetscInt nis,nisdofs,nisneu,nisvert; 5674 PC pc_temp; 5675 PCType coarse_pc_type; 5676 KSPType coarse_ksp_type; 5677 PetscBool multilevel_requested,multilevel_allowed; 5678 PetscBool isredundant,isbddc,isnn,coarse_reuse; 5679 Mat t_coarse_mat_is; 5680 PetscInt ncoarse; 5681 PetscBool compute_vecs = PETSC_FALSE; 5682 PetscScalar *array; 5683 MatReuse coarse_mat_reuse; 5684 PetscBool restr, full_restr, have_void; 5685 PetscErrorCode ierr; 5686 5687 PetscFunctionBegin; 5688 /* Assign global numbering to coarse dofs */ 5689 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 5690 PetscInt ocoarse_size; 5691 compute_vecs = PETSC_TRUE; 5692 ocoarse_size = pcbddc->coarse_size; 5693 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 5694 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 5695 /* see if we can avoid some work */ 5696 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 5697 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 5698 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 5699 PC pc; 5700 PetscBool isbddc; 5701 5702 /* temporary workaround since PCBDDC does not have a reset method so far */ 5703 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 5704 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5705 if (isbddc) { 5706 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 5707 } else { 5708 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 5709 } 5710 coarse_reuse = PETSC_FALSE; 5711 } else { /* we can safely reuse already computed coarse matrix */ 5712 coarse_reuse = PETSC_TRUE; 5713 } 5714 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 5715 coarse_reuse = PETSC_FALSE; 5716 } 5717 /* reset any subassembling information */ 5718 if (!coarse_reuse || pcbddc->recompute_topography) { 5719 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5720 } 5721 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 5722 coarse_reuse = PETSC_TRUE; 5723 } 5724 /* assemble coarse matrix */ 5725 if (coarse_reuse && pcbddc->coarse_ksp) { 5726 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5727 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 5728 coarse_mat_reuse = MAT_REUSE_MATRIX; 5729 } else { 5730 coarse_mat = NULL; 5731 coarse_mat_reuse = MAT_INITIAL_MATRIX; 5732 } 5733 5734 /* creates temporary l2gmap and IS for coarse indexes */ 5735 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 5736 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 5737 5738 /* creates temporary MATIS object for coarse matrix */ 5739 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 5740 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5741 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 5742 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5743 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 5744 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 5745 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5746 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5747 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 5748 5749 /* count "active" (i.e. with positive local size) and "void" processes */ 5750 im_active = !!(pcis->n); 5751 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5752 5753 /* determine number of process partecipating to coarse solver and compute subassembling pattern */ 5754 /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */ 5755 /* full_restr : just use the receivers from the subassembling pattern */ 5756 coarse_mat_is = NULL; 5757 multilevel_allowed = PETSC_FALSE; 5758 multilevel_requested = PETSC_FALSE; 5759 full_restr = PETSC_TRUE; 5760 pcbddc->coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc); 5761 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 5762 if (multilevel_requested) { 5763 ncoarse = active_procs/pcbddc->coarsening_ratio; 5764 restr = PETSC_FALSE; 5765 full_restr = PETSC_FALSE; 5766 } else { 5767 ncoarse = pcbddc->coarse_size/pcbddc->coarse_eqs_per_proc; 5768 restr = PETSC_TRUE; 5769 full_restr = PETSC_TRUE; 5770 } 5771 if (!pcbddc->coarse_size) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 5772 ncoarse = PetscMax(1,ncoarse); 5773 if (!pcbddc->coarse_subassembling) { 5774 if (pcbddc->coarsening_ratio > 1) { 5775 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 5776 } else { 5777 PetscMPIInt size,rank; 5778 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 5779 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 5780 have_void = (active_procs == (PetscInt)size) ? PETSC_FALSE : PETSC_TRUE; 5781 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5782 } 5783 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 5784 PetscInt psum; 5785 PetscMPIInt size; 5786 if (pcbddc->coarse_ksp) psum = 1; 5787 else psum = 0; 5788 ierr = MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5789 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 5790 if (ncoarse < size) have_void = PETSC_TRUE; 5791 } 5792 /* determine if we can go multilevel */ 5793 if (multilevel_requested) { 5794 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 5795 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 5796 } 5797 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 5798 5799 /* dump subassembling pattern */ 5800 if (pcbddc->dbg_flag && multilevel_allowed) { 5801 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 5802 } 5803 5804 /* compute dofs splitting and neumann boundaries for coarse dofs */ 5805 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal)) { /* protects from unneded computations */ 5806 PetscInt *tidxs,*tidxs2,nout,tsize,i; 5807 const PetscInt *idxs; 5808 ISLocalToGlobalMapping tmap; 5809 5810 /* create map between primal indices (in local representative ordering) and local primal numbering */ 5811 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 5812 /* allocate space for temporary storage */ 5813 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 5814 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 5815 /* allocate for IS array */ 5816 nisdofs = pcbddc->n_ISForDofsLocal; 5817 nisneu = !!pcbddc->NeumannBoundariesLocal; 5818 nisvert = 0; /* nisvert is not used */ 5819 nis = nisdofs + nisneu + nisvert; 5820 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 5821 /* dofs splitting */ 5822 for (i=0;i<nisdofs;i++) { 5823 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 5824 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 5825 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5826 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5827 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5828 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5829 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5830 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 5831 } 5832 /* neumann boundaries */ 5833 if (pcbddc->NeumannBoundariesLocal) { 5834 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 5835 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 5836 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5837 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5838 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5839 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5840 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 5841 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 5842 } 5843 /* free memory */ 5844 ierr = PetscFree(tidxs);CHKERRQ(ierr); 5845 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 5846 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 5847 } else { 5848 nis = 0; 5849 nisdofs = 0; 5850 nisneu = 0; 5851 nisvert = 0; 5852 isarray = NULL; 5853 } 5854 /* destroy no longer needed map */ 5855 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 5856 5857 /* subassemble */ 5858 if (multilevel_allowed) { 5859 Vec vp[1]; 5860 PetscInt nvecs = 0; 5861 PetscBool reuse,reuser; 5862 5863 if (coarse_mat) reuse = PETSC_TRUE; 5864 else reuse = PETSC_FALSE; 5865 ierr = MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5866 vp[0] = NULL; 5867 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 5868 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);CHKERRQ(ierr); 5869 ierr = VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);CHKERRQ(ierr); 5870 ierr = VecSetType(vp[0],VECSTANDARD);CHKERRQ(ierr); 5871 nvecs = 1; 5872 5873 if (pcbddc->divudotp) { 5874 Mat B,loc_divudotp; 5875 Vec v,p; 5876 IS dummy; 5877 PetscInt np; 5878 5879 ierr = MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);CHKERRQ(ierr); 5880 ierr = MatGetSize(loc_divudotp,&np,NULL);CHKERRQ(ierr); 5881 ierr = ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);CHKERRQ(ierr); 5882 ierr = MatGetSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 5883 ierr = MatCreateVecs(B,&v,&p);CHKERRQ(ierr); 5884 ierr = VecSet(p,1.);CHKERRQ(ierr); 5885 ierr = MatMultTranspose(B,p,v);CHKERRQ(ierr); 5886 ierr = VecDestroy(&p);CHKERRQ(ierr); 5887 ierr = MatDestroy(&B);CHKERRQ(ierr); 5888 ierr = VecGetArray(vp[0],&array);CHKERRQ(ierr); 5889 ierr = VecPlaceArray(pcbddc->vec1_P,array);CHKERRQ(ierr); 5890 ierr = VecRestoreArray(vp[0],&array);CHKERRQ(ierr); 5891 ierr = MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);CHKERRQ(ierr); 5892 ierr = VecResetArray(pcbddc->vec1_P);CHKERRQ(ierr); 5893 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 5894 ierr = VecDestroy(&v);CHKERRQ(ierr); 5895 } 5896 } 5897 if (reuser) { 5898 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);CHKERRQ(ierr); 5899 } else { 5900 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);CHKERRQ(ierr); 5901 } 5902 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 5903 PetscScalar *arraym,*arrayv; 5904 PetscInt nl; 5905 ierr = VecGetLocalSize(vp[0],&nl);CHKERRQ(ierr); 5906 ierr = MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);CHKERRQ(ierr); 5907 ierr = MatDenseGetArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 5908 ierr = VecGetArray(vp[0],&arrayv);CHKERRQ(ierr); 5909 ierr = PetscMemcpy(arraym,arrayv,nl*sizeof(PetscScalar));CHKERRQ(ierr); 5910 ierr = VecRestoreArray(vp[0],&arrayv);CHKERRQ(ierr); 5911 ierr = MatDenseRestoreArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 5912 ierr = VecDestroy(&vp[0]);CHKERRQ(ierr); 5913 } else { 5914 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);CHKERRQ(ierr); 5915 } 5916 } else { 5917 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,0,NULL);CHKERRQ(ierr); 5918 } 5919 if (coarse_mat_is || coarse_mat) { 5920 PetscMPIInt size; 5921 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)coarse_mat_is),&size); 5922 if (!multilevel_allowed) { 5923 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 5924 } else { 5925 Mat A; 5926 5927 /* if this matrix is present, it means we are not reusing the coarse matrix */ 5928 if (coarse_mat_is) { 5929 if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 5930 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 5931 coarse_mat = coarse_mat_is; 5932 } 5933 /* be sure we don't have MatSeqDENSE as local mat */ 5934 ierr = MatISGetLocalMat(coarse_mat,&A);CHKERRQ(ierr); 5935 ierr = MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 5936 } 5937 } 5938 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 5939 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 5940 5941 /* create local to global scatters for coarse problem */ 5942 if (compute_vecs) { 5943 PetscInt lrows; 5944 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 5945 if (coarse_mat) { 5946 ierr = MatGetLocalSize(coarse_mat,&lrows,NULL);CHKERRQ(ierr); 5947 } else { 5948 lrows = 0; 5949 } 5950 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 5951 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 5952 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 5953 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5954 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5955 } 5956 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 5957 5958 /* set defaults for coarse KSP and PC */ 5959 if (multilevel_allowed) { 5960 coarse_ksp_type = KSPRICHARDSON; 5961 coarse_pc_type = PCBDDC; 5962 } else { 5963 coarse_ksp_type = KSPPREONLY; 5964 coarse_pc_type = PCREDUNDANT; 5965 } 5966 5967 /* print some info if requested */ 5968 if (pcbddc->dbg_flag) { 5969 if (!multilevel_allowed) { 5970 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5971 if (multilevel_requested) { 5972 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5973 } else if (pcbddc->max_levels) { 5974 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 5975 } 5976 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5977 } 5978 } 5979 5980 /* create the coarse KSP object only once with defaults */ 5981 if (coarse_mat) { 5982 PetscViewer dbg_viewer = NULL; 5983 if (pcbddc->dbg_flag) { 5984 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 5985 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5986 } 5987 if (!pcbddc->coarse_ksp) { 5988 char prefix[256],str_level[16]; 5989 size_t len; 5990 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 5991 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5992 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 5993 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 5994 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5995 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 5996 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 5997 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5998 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5999 /* prefix */ 6000 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 6001 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 6002 if (!pcbddc->current_level) { 6003 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 6004 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 6005 } else { 6006 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 6007 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 6008 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 6009 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 6010 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 6011 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 6012 } 6013 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 6014 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 6015 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 6016 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 6017 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 6018 /* allow user customization */ 6019 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 6020 } 6021 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 6022 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 6023 if (nisdofs) { 6024 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 6025 for (i=0;i<nisdofs;i++) { 6026 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 6027 } 6028 } 6029 if (nisneu) { 6030 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 6031 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 6032 } 6033 if (nisvert) { 6034 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 6035 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 6036 } 6037 6038 /* get some info after set from options */ 6039 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 6040 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 6041 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 6042 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 6043 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 6044 isbddc = PETSC_FALSE; 6045 } 6046 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 6047 if (isredundant) { 6048 KSP inner_ksp; 6049 PC inner_pc; 6050 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 6051 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 6052 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 6053 } 6054 6055 /* parameters which miss an API */ 6056 if (isbddc) { 6057 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 6058 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 6059 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 6060 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 6061 if (pcbddc_coarse->benign_saddle_point) { 6062 Mat coarsedivudotp_is; 6063 ISLocalToGlobalMapping l2gmap,rl2g,cl2g; 6064 IS row,col; 6065 const PetscInt *gidxs; 6066 PetscInt n,st,M,N; 6067 6068 ierr = MatGetSize(coarsedivudotp,&n,NULL);CHKERRQ(ierr); 6069 ierr = MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));CHKERRQ(ierr); 6070 st = st-n; 6071 ierr = ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);CHKERRQ(ierr); 6072 ierr = MatGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);CHKERRQ(ierr); 6073 ierr = ISLocalToGlobalMappingGetSize(l2gmap,&n);CHKERRQ(ierr); 6074 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 6075 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 6076 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 6077 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 6078 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 6079 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 6080 ierr = MatGetSize(coarse_mat,&N,NULL);CHKERRQ(ierr); 6081 ierr = ISDestroy(&row);CHKERRQ(ierr); 6082 ierr = ISDestroy(&col);CHKERRQ(ierr); 6083 ierr = MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);CHKERRQ(ierr); 6084 ierr = MatSetType(coarsedivudotp_is,MATIS);CHKERRQ(ierr); 6085 ierr = MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 6086 ierr = MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);CHKERRQ(ierr); 6087 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 6088 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 6089 ierr = MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);CHKERRQ(ierr); 6090 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 6091 ierr = PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL);CHKERRQ(ierr); 6092 ierr = MatDestroy(&coarsedivudotp_is);CHKERRQ(ierr); 6093 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 6094 if (pcbddc->adaptive_threshold < 1.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 6095 } 6096 } 6097 6098 /* propagate symmetry info of coarse matrix */ 6099 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 6100 if (pc->pmat->symmetric_set) { 6101 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 6102 } 6103 if (pc->pmat->hermitian_set) { 6104 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 6105 } 6106 if (pc->pmat->spd_set) { 6107 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 6108 } 6109 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 6110 ierr = MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 6111 } 6112 /* set operators */ 6113 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 6114 if (pcbddc->dbg_flag) { 6115 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 6116 } 6117 } 6118 ierr = PetscFree(isarray);CHKERRQ(ierr); 6119 #if 0 6120 { 6121 PetscViewer viewer; 6122 char filename[256]; 6123 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 6124 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 6125 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 6126 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 6127 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 6128 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 6129 } 6130 #endif 6131 6132 if (pcbddc->coarse_ksp) { 6133 Vec crhs,csol; 6134 6135 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 6136 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 6137 if (!csol) { 6138 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 6139 } 6140 if (!crhs) { 6141 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 6142 } 6143 } 6144 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 6145 6146 /* compute null space for coarse solver if the benign trick has been requested */ 6147 if (pcbddc->benign_null) { 6148 6149 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 6150 for (i=0;i<pcbddc->benign_n;i++) { 6151 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 6152 } 6153 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 6154 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 6155 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6156 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6157 if (coarse_mat) { 6158 Vec nullv; 6159 PetscScalar *array,*array2; 6160 PetscInt nl; 6161 6162 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 6163 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 6164 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 6165 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 6166 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 6167 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 6168 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 6169 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 6170 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 6171 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 6172 } 6173 } 6174 6175 if (pcbddc->coarse_ksp) { 6176 PetscBool ispreonly; 6177 6178 if (CoarseNullSpace) { 6179 PetscBool isnull; 6180 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 6181 if (isnull) { 6182 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 6183 } 6184 /* TODO: add local nullspaces (if any) */ 6185 } 6186 /* setup coarse ksp */ 6187 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 6188 /* Check coarse problem if in debug mode or if solving with an iterative method */ 6189 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 6190 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 6191 KSP check_ksp; 6192 KSPType check_ksp_type; 6193 PC check_pc; 6194 Vec check_vec,coarse_vec; 6195 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 6196 PetscInt its; 6197 PetscBool compute_eigs; 6198 PetscReal *eigs_r,*eigs_c; 6199 PetscInt neigs; 6200 const char *prefix; 6201 6202 /* Create ksp object suitable for estimation of extreme eigenvalues */ 6203 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 6204 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 6205 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 6206 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 6207 /* prevent from setup unneeded object */ 6208 ierr = KSPGetPC(check_ksp,&check_pc);CHKERRQ(ierr); 6209 ierr = PCSetType(check_pc,PCNONE);CHKERRQ(ierr); 6210 if (ispreonly) { 6211 check_ksp_type = KSPPREONLY; 6212 compute_eigs = PETSC_FALSE; 6213 } else { 6214 check_ksp_type = KSPGMRES; 6215 compute_eigs = PETSC_TRUE; 6216 } 6217 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 6218 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 6219 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 6220 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 6221 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 6222 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 6223 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 6224 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 6225 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 6226 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 6227 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 6228 /* create random vec */ 6229 ierr = MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);CHKERRQ(ierr); 6230 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 6231 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 6232 /* solve coarse problem */ 6233 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 6234 /* set eigenvalue estimation if preonly has not been requested */ 6235 if (compute_eigs) { 6236 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 6237 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 6238 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 6239 if (neigs) { 6240 lambda_max = eigs_r[neigs-1]; 6241 lambda_min = eigs_r[0]; 6242 if (pcbddc->use_coarse_estimates) { 6243 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 6244 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);CHKERRQ(ierr); 6245 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 6246 } 6247 } 6248 } 6249 } 6250 6251 /* check coarse problem residual error */ 6252 if (pcbddc->dbg_flag) { 6253 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 6254 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 6255 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 6256 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 6257 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 6258 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 6259 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 6260 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 6261 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 6262 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 6263 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 6264 if (CoarseNullSpace) { 6265 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 6266 } 6267 if (compute_eigs) { 6268 PetscReal lambda_max_s,lambda_min_s; 6269 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 6270 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 6271 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 6272 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 6273 for (i=0;i<neigs;i++) { 6274 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 6275 } 6276 } 6277 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 6278 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 6279 } 6280 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 6281 ierr = VecDestroy(&coarse_vec);CHKERRQ(ierr); 6282 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 6283 if (compute_eigs) { 6284 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 6285 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 6286 } 6287 } 6288 } 6289 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 6290 /* print additional info */ 6291 if (pcbddc->dbg_flag) { 6292 /* waits until all processes reaches this point */ 6293 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 6294 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 6295 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6296 } 6297 6298 /* free memory */ 6299 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 6300 PetscFunctionReturn(0); 6301 } 6302 6303 #undef __FUNCT__ 6304 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 6305 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 6306 { 6307 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 6308 PC_IS* pcis = (PC_IS*)pc->data; 6309 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6310 IS subset,subset_mult,subset_n; 6311 PetscInt local_size,coarse_size=0; 6312 PetscInt *local_primal_indices=NULL; 6313 const PetscInt *t_local_primal_indices; 6314 PetscErrorCode ierr; 6315 6316 PetscFunctionBegin; 6317 /* Compute global number of coarse dofs */ 6318 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 6319 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 6320 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 6321 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 6322 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 6323 ierr = ISRenumber(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 6324 ierr = ISDestroy(&subset);CHKERRQ(ierr); 6325 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 6326 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 6327 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 6328 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 6329 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 6330 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 6331 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 6332 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 6333 6334 /* check numbering */ 6335 if (pcbddc->dbg_flag) { 6336 PetscScalar coarsesum,*array,*array2; 6337 PetscInt i; 6338 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 6339 6340 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6341 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 6342 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 6343 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6344 /* counter */ 6345 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6346 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6347 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6348 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6349 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6350 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6351 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 6352 for (i=0;i<pcbddc->local_primal_size;i++) { 6353 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 6354 } 6355 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6356 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6357 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6358 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6359 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6360 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6361 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6362 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6363 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 6364 for (i=0;i<pcis->n;i++) { 6365 if (array[i] != 0.0 && array[i] != array2[i]) { 6366 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 6367 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 6368 set_error = PETSC_TRUE; 6369 ierr = ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);CHKERRQ(ierr); 6370 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d (gid %d) owned by %d processes instead of %d!\n",PetscGlobalRank,i,gi,owned,neigh);CHKERRQ(ierr); 6371 } 6372 } 6373 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 6374 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6375 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6376 for (i=0;i<pcis->n;i++) { 6377 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 6378 } 6379 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6380 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6381 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6382 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6383 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 6384 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 6385 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 6386 PetscInt *gidxs; 6387 6388 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 6389 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 6390 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 6391 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6392 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 6393 for (i=0;i<pcbddc->local_primal_size;i++) { 6394 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 6395 } 6396 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6397 ierr = PetscFree(gidxs);CHKERRQ(ierr); 6398 } 6399 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6400 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6401 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 6402 } 6403 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 6404 /* get back data */ 6405 *coarse_size_n = coarse_size; 6406 *local_primal_indices_n = local_primal_indices; 6407 PetscFunctionReturn(0); 6408 } 6409 6410 #undef __FUNCT__ 6411 #define __FUNCT__ "PCBDDCGlobalToLocal" 6412 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 6413 { 6414 IS localis_t; 6415 PetscInt i,lsize,*idxs,n; 6416 PetscScalar *vals; 6417 PetscErrorCode ierr; 6418 6419 PetscFunctionBegin; 6420 /* get indices in local ordering exploiting local to global map */ 6421 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 6422 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 6423 for (i=0;i<lsize;i++) vals[i] = 1.0; 6424 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 6425 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 6426 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 6427 if (idxs) { /* multilevel guard */ 6428 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 6429 } 6430 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 6431 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 6432 ierr = PetscFree(vals);CHKERRQ(ierr); 6433 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 6434 /* now compute set in local ordering */ 6435 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6436 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6437 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6438 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 6439 for (i=0,lsize=0;i<n;i++) { 6440 if (PetscRealPart(vals[i]) > 0.5) { 6441 lsize++; 6442 } 6443 } 6444 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 6445 for (i=0,lsize=0;i<n;i++) { 6446 if (PetscRealPart(vals[i]) > 0.5) { 6447 idxs[lsize++] = i; 6448 } 6449 } 6450 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6451 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 6452 *localis = localis_t; 6453 PetscFunctionReturn(0); 6454 } 6455 6456 #undef __FUNCT__ 6457 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 6458 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 6459 { 6460 PC_IS *pcis=(PC_IS*)pc->data; 6461 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6462 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6463 Mat S_j; 6464 PetscInt *used_xadj,*used_adjncy; 6465 PetscBool free_used_adj; 6466 PetscErrorCode ierr; 6467 6468 PetscFunctionBegin; 6469 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 6470 free_used_adj = PETSC_FALSE; 6471 if (pcbddc->sub_schurs_layers == -1) { 6472 used_xadj = NULL; 6473 used_adjncy = NULL; 6474 } else { 6475 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 6476 used_xadj = pcbddc->mat_graph->xadj; 6477 used_adjncy = pcbddc->mat_graph->adjncy; 6478 } else if (pcbddc->computed_rowadj) { 6479 used_xadj = pcbddc->mat_graph->xadj; 6480 used_adjncy = pcbddc->mat_graph->adjncy; 6481 } else { 6482 PetscBool flg_row=PETSC_FALSE; 6483 const PetscInt *xadj,*adjncy; 6484 PetscInt nvtxs; 6485 6486 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6487 if (flg_row) { 6488 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 6489 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 6490 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 6491 free_used_adj = PETSC_TRUE; 6492 } else { 6493 pcbddc->sub_schurs_layers = -1; 6494 used_xadj = NULL; 6495 used_adjncy = NULL; 6496 } 6497 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6498 } 6499 } 6500 6501 /* setup sub_schurs data */ 6502 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6503 if (!sub_schurs->schur_explicit) { 6504 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 6505 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6506 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 6507 } else { 6508 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 6509 PetscBool isseqaij,need_change = PETSC_FALSE; 6510 PetscInt benign_n; 6511 Mat change = NULL; 6512 Vec scaling = NULL; 6513 IS change_primal = NULL; 6514 6515 if (!pcbddc->use_vertices && reuse_solvers) { 6516 PetscInt n_vertices; 6517 6518 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 6519 reuse_solvers = (PetscBool)!n_vertices; 6520 } 6521 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 6522 if (!isseqaij) { 6523 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6524 if (matis->A == pcbddc->local_mat) { 6525 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 6526 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6527 } else { 6528 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6529 } 6530 } 6531 if (!pcbddc->benign_change_explicit) { 6532 benign_n = pcbddc->benign_n; 6533 } else { 6534 benign_n = 0; 6535 } 6536 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 6537 We need a global reduction to avoid possible deadlocks. 6538 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 6539 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 6540 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 6541 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6542 need_change = (PetscBool)(!need_change); 6543 } 6544 /* If the user defines additional constraints, we import them here. 6545 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 6546 if (need_change) { 6547 PC_IS *pcisf; 6548 PC_BDDC *pcbddcf; 6549 PC pcf; 6550 6551 if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 6552 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 6553 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 6554 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 6555 /* hacks */ 6556 pcisf = (PC_IS*)pcf->data; 6557 pcisf->is_B_local = pcis->is_B_local; 6558 pcisf->vec1_N = pcis->vec1_N; 6559 pcisf->BtoNmap = pcis->BtoNmap; 6560 pcisf->n = pcis->n; 6561 pcisf->n_B = pcis->n_B; 6562 pcbddcf = (PC_BDDC*)pcf->data; 6563 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 6564 pcbddcf->mat_graph = pcbddc->mat_graph; 6565 pcbddcf->use_faces = PETSC_TRUE; 6566 pcbddcf->use_change_of_basis = PETSC_TRUE; 6567 pcbddcf->use_change_on_faces = PETSC_TRUE; 6568 pcbddcf->use_qr_single = PETSC_TRUE; 6569 pcbddcf->fake_change = PETSC_TRUE; 6570 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 6571 /* store information on primal vertices and change of basis (in local numbering) */ 6572 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 6573 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 6574 change = pcbddcf->ConstraintMatrix; 6575 pcbddcf->ConstraintMatrix = NULL; 6576 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 6577 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 6578 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 6579 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 6580 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 6581 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 6582 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 6583 pcf->ops->destroy = NULL; 6584 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 6585 } 6586 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 6587 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 6588 ierr = MatDestroy(&change);CHKERRQ(ierr); 6589 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 6590 } 6591 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6592 6593 /* free adjacency */ 6594 if (free_used_adj) { 6595 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 6596 } 6597 PetscFunctionReturn(0); 6598 } 6599 6600 #undef __FUNCT__ 6601 #define __FUNCT__ "PCBDDCInitSubSchurs" 6602 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 6603 { 6604 PC_IS *pcis=(PC_IS*)pc->data; 6605 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6606 PCBDDCGraph graph; 6607 PetscErrorCode ierr; 6608 6609 PetscFunctionBegin; 6610 /* attach interface graph for determining subsets */ 6611 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 6612 IS verticesIS,verticescomm; 6613 PetscInt vsize,*idxs; 6614 6615 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 6616 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 6617 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6618 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 6619 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6620 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 6621 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 6622 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 6623 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 6624 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 6625 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 6626 } else { 6627 graph = pcbddc->mat_graph; 6628 } 6629 /* print some info */ 6630 if (pcbddc->dbg_flag) { 6631 IS vertices; 6632 PetscInt nv,nedges,nfaces; 6633 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag-1,pcbddc->dbg_viewer);CHKERRQ(ierr); 6634 ierr = PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 6635 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 6636 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 6637 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6638 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 6639 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 6640 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 6641 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 6642 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6643 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6644 } 6645 6646 /* sub_schurs init */ 6647 if (!pcbddc->sub_schurs) { 6648 ierr = PCBDDCSubSchursCreate(&pcbddc->sub_schurs);CHKERRQ(ierr); 6649 } 6650 ierr = PCBDDCSubSchursInit(pcbddc->sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 6651 6652 /* free graph struct */ 6653 if (pcbddc->sub_schurs_rebuild) { 6654 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 6655 } 6656 PetscFunctionReturn(0); 6657 } 6658 6659 #undef __FUNCT__ 6660 #define __FUNCT__ "PCBDDCCheckOperator" 6661 PetscErrorCode PCBDDCCheckOperator(PC pc) 6662 { 6663 PC_IS *pcis=(PC_IS*)pc->data; 6664 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6665 PetscErrorCode ierr; 6666 6667 PetscFunctionBegin; 6668 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 6669 IS zerodiag = NULL; 6670 Mat S_j,B0_B=NULL; 6671 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 6672 PetscScalar *p0_check,*array,*array2; 6673 PetscReal norm; 6674 PetscInt i; 6675 6676 /* B0 and B0_B */ 6677 if (zerodiag) { 6678 IS dummy; 6679 6680 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 6681 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 6682 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 6683 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 6684 } 6685 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 6686 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 6687 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 6688 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6689 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6690 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6691 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6692 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 6693 /* S_j */ 6694 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6695 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6696 6697 /* mimic vector in \widetilde{W}_\Gamma */ 6698 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 6699 /* continuous in primal space */ 6700 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 6701 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6702 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6703 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6704 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 6705 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 6706 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6707 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6708 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6709 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6710 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6711 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6712 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 6713 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 6714 6715 /* assemble rhs for coarse problem */ 6716 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 6717 /* local with Schur */ 6718 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 6719 if (zerodiag) { 6720 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6721 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 6722 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6723 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 6724 } 6725 /* sum on primal nodes the local contributions */ 6726 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6727 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6728 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6729 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6730 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 6731 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6732 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6733 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 6734 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6735 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6736 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6737 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6738 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6739 /* scale primal nodes (BDDC sums contibutions) */ 6740 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 6741 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6742 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6743 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6744 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6745 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6746 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6747 /* global: \widetilde{B0}_B w_\Gamma */ 6748 if (zerodiag) { 6749 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 6750 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6751 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 6752 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6753 } 6754 /* BDDC */ 6755 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 6756 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 6757 6758 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 6759 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 6760 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 6761 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 6762 for (i=0;i<pcbddc->benign_n;i++) { 6763 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 6764 } 6765 ierr = PetscFree(p0_check);CHKERRQ(ierr); 6766 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 6767 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 6768 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 6769 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6770 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 6771 } 6772 PetscFunctionReturn(0); 6773 } 6774