1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 #include <petsc/private/sfimpl.h> 6 7 #undef __FUNCT__ 8 #define __FUNCT__ "PCBDDCComputeNoNetFlux" 9 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 10 { 11 Mat loc_divudotp; 12 Vec p,v,vins,quad_vec,*quad_vecs; 13 ISLocalToGlobalMapping map; 14 IS *faces,*edges; 15 PetscScalar *vals; 16 const PetscScalar *array; 17 PetscInt i,maxneighs,lmaxneighs,maxsize,nf,ne; 18 PetscMPIInt rank; 19 PetscErrorCode ierr; 20 21 PetscFunctionBegin; 22 ierr = PCBDDCGraphGetCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 23 if (graph->twodim) { 24 lmaxneighs = 2; 25 } else { 26 lmaxneighs = 1; 27 for (i=0;i<ne;i++) { 28 const PetscInt *idxs; 29 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 30 lmaxneighs = PetscMax(lmaxneighs,graph->count[idxs[0]]); 31 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 32 } 33 lmaxneighs++; /* graph count does not include self */ 34 } 35 ierr = MPIU_Allreduce(&lmaxneighs,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 36 maxsize = 0; 37 for (i=0;i<ne;i++) { 38 PetscInt nn; 39 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 40 maxsize = PetscMax(maxsize,nn); 41 } 42 for (i=0;i<nf;i++) { 43 PetscInt nn; 44 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 45 maxsize = PetscMax(maxsize,nn); 46 } 47 ierr = PetscMalloc1(maxsize,&vals);CHKERRQ(ierr); 48 /* create vectors to hold quadrature weights */ 49 ierr = MatCreateVecs(A,&quad_vec,NULL);CHKERRQ(ierr); 50 if (!transpose) { 51 ierr = MatGetLocalToGlobalMapping(A,&map,NULL);CHKERRQ(ierr); 52 } else { 53 ierr = MatGetLocalToGlobalMapping(A,NULL,&map);CHKERRQ(ierr); 54 } 55 ierr = VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);CHKERRQ(ierr); 56 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 57 for (i=0;i<maxneighs;i++) { 58 PetscInt first,last; 59 60 ierr = VecSetLocalToGlobalMapping(quad_vecs[i],map);CHKERRQ(ierr); 61 /* the near-null space fo BDDC carries information on quadrature weights, 62 and these can be collinear -> so cheat with MatNullSpaceCreate 63 and create a suitable set of basis vectors first */ 64 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 65 if (i>=first && i < last) { 66 PetscScalar *data; 67 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 68 data[i-first] = 1.; 69 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 70 } 71 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 72 } 73 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);CHKERRQ(ierr); 74 for (i=0;i<maxneighs;i++) { /* reset vectors */ 75 PetscInt first,last; 76 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 77 if (i>=first && i < last) { 78 PetscScalar *data; 79 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 80 data[i-first] = 0.; 81 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 82 } 83 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 84 } 85 /* compute local quad vec */ 86 ierr = MatISGetLocalMat(divudotp,&loc_divudotp);CHKERRQ(ierr); 87 if (!transpose) { 88 ierr = MatCreateVecs(loc_divudotp,&v,&p);CHKERRQ(ierr); 89 } else { 90 ierr = MatCreateVecs(loc_divudotp,&p,&v);CHKERRQ(ierr); 91 } 92 ierr = VecSet(p,1.);CHKERRQ(ierr); 93 if (!transpose) { 94 ierr = MatMultTranspose(loc_divudotp,p,v);CHKERRQ(ierr); 95 } else { 96 ierr = MatMult(loc_divudotp,p,v);CHKERRQ(ierr); 97 } 98 if (vl2l) { 99 ierr = VecGetSubVector(v,vl2l,&vins);CHKERRQ(ierr); 100 } else { 101 vins = v; 102 } 103 ierr = VecGetArrayRead(vins,&array);CHKERRQ(ierr); 104 ierr = VecDestroy(&p);CHKERRQ(ierr); 105 106 /* insert in global quadrature vecs */ 107 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 108 for (i=0;i<nf;i++) { 109 const PetscInt *idxs; 110 PetscInt idx,nn,j; 111 112 ierr = ISGetIndices(faces[i],&idxs);CHKERRQ(ierr); 113 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 114 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 115 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 116 idx = -(idx+1); 117 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 118 ierr = ISRestoreIndices(faces[i],&idxs);CHKERRQ(ierr); 119 ierr = ISDestroy(&faces[i]);CHKERRQ(ierr); 120 } 121 for (i=0;i<ne;i++) { 122 const PetscInt *idxs; 123 PetscInt idx,nn,j; 124 125 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 126 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 127 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 128 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 129 idx = -(idx+1); 130 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 131 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 132 ierr = ISDestroy(&edges[i]);CHKERRQ(ierr); 133 } 134 ierr = PetscFree(edges);CHKERRQ(ierr); 135 ierr = PetscFree(faces);CHKERRQ(ierr); 136 ierr = VecRestoreArrayRead(vins,&array);CHKERRQ(ierr); 137 if (vl2l) { 138 ierr = VecRestoreSubVector(v,vl2l,&vins);CHKERRQ(ierr); 139 } 140 ierr = VecDestroy(&v);CHKERRQ(ierr); 141 ierr = PetscFree(vals);CHKERRQ(ierr); 142 143 /* assemble near null space */ 144 for (i=0;i<maxneighs;i++) { 145 ierr = VecAssemblyBegin(quad_vecs[i]);CHKERRQ(ierr); 146 } 147 for (i=0;i<maxneighs;i++) { 148 ierr = VecAssemblyEnd(quad_vecs[i]);CHKERRQ(ierr); 149 } 150 ierr = VecDestroyVecs(maxneighs,&quad_vecs);CHKERRQ(ierr); 151 PetscFunctionReturn(0); 152 } 153 154 155 #undef __FUNCT__ 156 #define __FUNCT__ "PCBDDCComputeLocalTopologyInfo" 157 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 158 { 159 PetscErrorCode ierr; 160 Vec local,global; 161 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 162 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 163 164 PetscFunctionBegin; 165 ierr = MatCreateVecs(pc->pmat,&global,NULL);CHKERRQ(ierr); 166 /* need to convert from global to local topology information and remove references to information in global ordering */ 167 ierr = MatCreateVecs(matis->A,&local,NULL);CHKERRQ(ierr); 168 if (pcbddc->user_provided_isfordofs) { 169 if (pcbddc->n_ISForDofs) { 170 PetscInt i; 171 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 172 for (i=0;i<pcbddc->n_ISForDofs;i++) { 173 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 174 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 175 } 176 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 177 pcbddc->n_ISForDofs = 0; 178 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 179 } 180 } else { 181 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering if bs > 1 */ 182 PetscInt i, n = matis->A->rmap->n; 183 ierr = MatGetBlockSize(pc->pmat,&i);CHKERRQ(ierr); 184 if (i > 1) { 185 pcbddc->n_ISForDofsLocal = i; 186 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 187 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 188 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 189 } 190 } 191 } 192 } 193 194 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 195 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 196 } 197 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 198 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 199 } 200 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 201 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 202 } 203 ierr = VecDestroy(&global);CHKERRQ(ierr); 204 ierr = VecDestroy(&local);CHKERRQ(ierr); 205 PetscFunctionReturn(0); 206 } 207 208 #undef __FUNCT__ 209 #define __FUNCT__ "PCBDDCBenignRemoveInterior" 210 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z) 211 { 212 PC_IS *pcis = (PC_IS*)(pc->data); 213 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 214 PetscErrorCode ierr; 215 216 PetscFunctionBegin; 217 if (!pcbddc->benign_have_null) { 218 PetscFunctionReturn(0); 219 } 220 if (pcbddc->ChangeOfBasisMatrix) { 221 Vec swap; 222 223 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);CHKERRQ(ierr); 224 swap = pcbddc->work_change; 225 pcbddc->work_change = r; 226 r = swap; 227 } 228 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 229 ierr = VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 230 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 231 ierr = VecSet(z,0.);CHKERRQ(ierr); 232 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 233 ierr = VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 234 if (pcbddc->ChangeOfBasisMatrix) { 235 Vec swap; 236 237 swap = r; 238 r = pcbddc->work_change; 239 pcbddc->work_change = swap; 240 ierr = VecCopy(z,pcbddc->work_change);CHKERRQ(ierr); 241 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);CHKERRQ(ierr); 242 } 243 PetscFunctionReturn(0); 244 } 245 246 #undef __FUNCT__ 247 #define __FUNCT__ "PCBDDCBenignMatMult_Private_Private" 248 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 249 { 250 PCBDDCBenignMatMult_ctx ctx; 251 PetscErrorCode ierr; 252 PetscBool apply_right,apply_left,reset_x; 253 254 PetscFunctionBegin; 255 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 256 if (transpose) { 257 apply_right = ctx->apply_left; 258 apply_left = ctx->apply_right; 259 } else { 260 apply_right = ctx->apply_right; 261 apply_left = ctx->apply_left; 262 } 263 reset_x = PETSC_FALSE; 264 if (apply_right) { 265 const PetscScalar *ax; 266 PetscInt nl,i; 267 268 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 269 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 270 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 271 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 272 for (i=0;i<ctx->benign_n;i++) { 273 PetscScalar sum,val; 274 const PetscInt *idxs; 275 PetscInt nz,j; 276 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 277 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 278 sum = 0.; 279 if (ctx->apply_p0) { 280 val = ctx->work[idxs[nz-1]]; 281 for (j=0;j<nz-1;j++) { 282 sum += ctx->work[idxs[j]]; 283 ctx->work[idxs[j]] += val; 284 } 285 } else { 286 for (j=0;j<nz-1;j++) { 287 sum += ctx->work[idxs[j]]; 288 } 289 } 290 ctx->work[idxs[nz-1]] -= sum; 291 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 292 } 293 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 294 reset_x = PETSC_TRUE; 295 } 296 if (transpose) { 297 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 298 } else { 299 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 300 } 301 if (reset_x) { 302 ierr = VecResetArray(x);CHKERRQ(ierr); 303 } 304 if (apply_left) { 305 PetscScalar *ay; 306 PetscInt i; 307 308 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 309 for (i=0;i<ctx->benign_n;i++) { 310 PetscScalar sum,val; 311 const PetscInt *idxs; 312 PetscInt nz,j; 313 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 314 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 315 val = -ay[idxs[nz-1]]; 316 if (ctx->apply_p0) { 317 sum = 0.; 318 for (j=0;j<nz-1;j++) { 319 sum += ay[idxs[j]]; 320 ay[idxs[j]] += val; 321 } 322 ay[idxs[nz-1]] += sum; 323 } else { 324 for (j=0;j<nz-1;j++) { 325 ay[idxs[j]] += val; 326 } 327 ay[idxs[nz-1]] = 0.; 328 } 329 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 330 } 331 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 332 } 333 PetscFunctionReturn(0); 334 } 335 336 #undef __FUNCT__ 337 #define __FUNCT__ "PCBDDCBenignMatMultTranspose_Private" 338 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 339 { 340 PetscErrorCode ierr; 341 342 PetscFunctionBegin; 343 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 344 PetscFunctionReturn(0); 345 } 346 347 #undef __FUNCT__ 348 #define __FUNCT__ "PCBDDCBenignMatMult_Private" 349 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 350 { 351 PetscErrorCode ierr; 352 353 PetscFunctionBegin; 354 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 355 PetscFunctionReturn(0); 356 } 357 358 #undef __FUNCT__ 359 #define __FUNCT__ "PCBDDCBenignShellMat" 360 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 361 { 362 PC_IS *pcis = (PC_IS*)pc->data; 363 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 364 PCBDDCBenignMatMult_ctx ctx; 365 PetscErrorCode ierr; 366 367 PetscFunctionBegin; 368 if (!restore) { 369 Mat A_IB,A_BI; 370 PetscScalar *work; 371 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 372 373 if (pcbddc->benign_original_mat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 374 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(0); 375 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 376 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 377 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 378 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 379 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 380 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 381 ierr = PetscNew(&ctx);CHKERRQ(ierr); 382 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 383 ctx->apply_left = PETSC_TRUE; 384 ctx->apply_right = PETSC_FALSE; 385 ctx->apply_p0 = PETSC_FALSE; 386 ctx->benign_n = pcbddc->benign_n; 387 if (reuse) { 388 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 389 ctx->free = PETSC_FALSE; 390 } else { /* TODO: could be optimized for successive solves */ 391 ISLocalToGlobalMapping N_to_D; 392 PetscInt i; 393 394 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 395 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 396 for (i=0;i<pcbddc->benign_n;i++) { 397 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 398 } 399 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 400 ctx->free = PETSC_TRUE; 401 } 402 ctx->A = pcis->A_IB; 403 ctx->work = work; 404 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 405 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 406 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 407 pcis->A_IB = A_IB; 408 409 /* A_BI as A_IB^T */ 410 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 411 pcbddc->benign_original_mat = pcis->A_BI; 412 pcis->A_BI = A_BI; 413 } else { 414 if (!pcbddc->benign_original_mat) { 415 PetscFunctionReturn(0); 416 } 417 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 418 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 419 pcis->A_IB = ctx->A; 420 ctx->A = NULL; 421 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 422 pcis->A_BI = pcbddc->benign_original_mat; 423 pcbddc->benign_original_mat = NULL; 424 if (ctx->free) { 425 PetscInt i; 426 for (i=0;i<ctx->benign_n;i++) { 427 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 428 } 429 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 430 } 431 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 432 ierr = PetscFree(ctx);CHKERRQ(ierr); 433 } 434 PetscFunctionReturn(0); 435 } 436 437 /* used just in bddc debug mode */ 438 #undef __FUNCT__ 439 #define __FUNCT__ "PCBDDCBenignProject" 440 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 441 { 442 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 443 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 444 Mat An; 445 PetscErrorCode ierr; 446 447 PetscFunctionBegin; 448 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 449 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 450 if (is1) { 451 ierr = MatGetSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 452 ierr = MatDestroy(&An);CHKERRQ(ierr); 453 } else { 454 *B = An; 455 } 456 PetscFunctionReturn(0); 457 } 458 459 /* TODO: add reuse flag */ 460 #undef __FUNCT__ 461 #define __FUNCT__ "MatSeqAIJCompress" 462 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 463 { 464 Mat Bt; 465 PetscScalar *a,*bdata; 466 const PetscInt *ii,*ij; 467 PetscInt m,n,i,nnz,*bii,*bij; 468 PetscBool flg_row; 469 PetscErrorCode ierr; 470 471 PetscFunctionBegin; 472 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 473 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 474 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 475 nnz = n; 476 for (i=0;i<ii[n];i++) { 477 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 478 } 479 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 480 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 481 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 482 nnz = 0; 483 bii[0] = 0; 484 for (i=0;i<n;i++) { 485 PetscInt j; 486 for (j=ii[i];j<ii[i+1];j++) { 487 PetscScalar entry = a[j]; 488 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 489 bij[nnz] = ij[j]; 490 bdata[nnz] = entry; 491 nnz++; 492 } 493 } 494 bii[i+1] = nnz; 495 } 496 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 497 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 498 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 499 { 500 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 501 b->free_a = PETSC_TRUE; 502 b->free_ij = PETSC_TRUE; 503 } 504 *B = Bt; 505 PetscFunctionReturn(0); 506 } 507 508 #undef __FUNCT__ 509 #define __FUNCT__ "MatDetectDisconnectedComponents" 510 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 511 { 512 Mat B; 513 IS is_dummy,*cc_n; 514 ISLocalToGlobalMapping l2gmap_dummy; 515 PCBDDCGraph graph; 516 PetscInt i,n; 517 PetscInt *xadj,*adjncy; 518 PetscInt *xadj_filtered,*adjncy_filtered; 519 PetscBool flg_row,isseqaij; 520 PetscErrorCode ierr; 521 522 PetscFunctionBegin; 523 if (!A->rmap->N || !A->cmap->N) { 524 *ncc = 0; 525 *cc = NULL; 526 PetscFunctionReturn(0); 527 } 528 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 529 if (!isseqaij && filter) { 530 PetscBool isseqdense; 531 532 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 533 if (!isseqdense) { 534 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 535 } else { /* TODO: rectangular case and LDA */ 536 PetscScalar *array; 537 PetscReal chop=1.e-6; 538 539 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 540 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 541 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 542 for (i=0;i<n;i++) { 543 PetscInt j; 544 for (j=i+1;j<n;j++) { 545 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 546 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 547 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 548 } 549 } 550 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 551 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 552 } 553 } else { 554 B = A; 555 } 556 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 557 558 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 559 if (filter) { 560 PetscScalar *data; 561 PetscInt j,cum; 562 563 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 564 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 565 cum = 0; 566 for (i=0;i<n;i++) { 567 PetscInt t; 568 569 for (j=xadj[i];j<xadj[i+1];j++) { 570 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 571 continue; 572 } 573 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 574 } 575 t = xadj_filtered[i]; 576 xadj_filtered[i] = cum; 577 cum += t; 578 } 579 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 580 } else { 581 xadj_filtered = NULL; 582 adjncy_filtered = NULL; 583 } 584 585 /* compute local connected components using PCBDDCGraph */ 586 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 587 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 588 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 589 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 590 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n);CHKERRQ(ierr); 591 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 592 if (xadj_filtered) { 593 graph->xadj = xadj_filtered; 594 graph->adjncy = adjncy_filtered; 595 } else { 596 graph->xadj = xadj; 597 graph->adjncy = adjncy; 598 } 599 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 600 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 601 /* partial clean up */ 602 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 603 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 604 if (A != B) { 605 ierr = MatDestroy(&B);CHKERRQ(ierr); 606 } 607 608 /* get back data */ 609 if (ncc) *ncc = graph->ncc; 610 if (cc) { 611 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 612 for (i=0;i<graph->ncc;i++) { 613 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 614 } 615 *cc = cc_n; 616 } 617 /* clean up graph */ 618 graph->xadj = 0; 619 graph->adjncy = 0; 620 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 621 PetscFunctionReturn(0); 622 } 623 624 #undef __FUNCT__ 625 #define __FUNCT__ "PCBDDCBenignCheck" 626 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 627 { 628 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 629 PC_IS* pcis = (PC_IS*)(pc->data); 630 IS dirIS = NULL; 631 PetscInt i; 632 PetscErrorCode ierr; 633 634 PetscFunctionBegin; 635 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 636 if (zerodiag) { 637 Mat A; 638 Vec vec3_N; 639 PetscScalar *vals; 640 const PetscInt *idxs; 641 PetscInt nz,*count; 642 643 /* p0 */ 644 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 645 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 646 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 647 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 648 for (i=0;i<nz;i++) vals[i] = 1.; 649 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 650 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 651 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 652 /* v_I */ 653 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 654 for (i=0;i<nz;i++) vals[i] = 0.; 655 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 656 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 657 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 658 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 659 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 660 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 661 if (dirIS) { 662 PetscInt n; 663 664 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 665 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 666 for (i=0;i<n;i++) vals[i] = 0.; 667 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 668 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 669 } 670 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 671 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 672 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 673 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 674 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 675 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 676 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 677 if (PetscAbsScalar(vals[0]) > 1.e-1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 678 ierr = PetscFree(vals);CHKERRQ(ierr); 679 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 680 681 /* there should not be any pressure dofs lying on the interface */ 682 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 683 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 684 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 685 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 686 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 687 for (i=0;i<nz;i++) if (count[idxs[i]]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 688 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 689 ierr = PetscFree(count);CHKERRQ(ierr); 690 } 691 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 692 693 /* check PCBDDCBenignGetOrSetP0 */ 694 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 695 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 696 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 697 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 698 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 699 for (i=0;i<pcbddc->benign_n;i++) { 700 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]); 701 if (val != -PetscGlobalRank-i) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %d instead of %g\n",PetscRealPart(pcbddc->benign_p0[i]),i,-PetscGlobalRank-i);CHKERRQ(ierr); 702 } 703 PetscFunctionReturn(0); 704 } 705 706 #undef __FUNCT__ 707 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 708 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 709 { 710 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 711 IS pressures,zerodiag,*zerodiag_subs; 712 PetscInt nz,n; 713 PetscInt *interior_dofs,n_interior_dofs; 714 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag; 715 PetscErrorCode ierr; 716 717 PetscFunctionBegin; 718 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 719 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 720 for (n=0;n<pcbddc->benign_n;n++) { 721 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 722 } 723 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 724 pcbddc->benign_n = 0; 725 /* if a local info on dofs is present, assumes that the last field represents "pressures" 726 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 727 Checks if all the pressure dofs in each subdomain have a zero diagonal 728 If not, a change of basis on pressures is not needed 729 since the local Schur complements are already SPD 730 */ 731 has_null_pressures = PETSC_TRUE; 732 have_null = PETSC_TRUE; 733 if (pcbddc->n_ISForDofsLocal) { 734 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 735 736 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 737 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 738 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 739 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 740 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 741 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 742 if (!sorted) { 743 ierr = ISSort(pressures);CHKERRQ(ierr); 744 } 745 } else { 746 pressures = NULL; 747 } 748 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 749 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 750 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 751 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 752 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 753 if (!sorted) { 754 ierr = ISSort(zerodiag);CHKERRQ(ierr); 755 } 756 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 757 if (!nz) { 758 if (n) have_null = PETSC_FALSE; 759 has_null_pressures = PETSC_FALSE; 760 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 761 } 762 recompute_zerodiag = PETSC_FALSE; 763 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 764 zerodiag_subs = NULL; 765 pcbddc->benign_n = 0; 766 n_interior_dofs = 0; 767 interior_dofs = NULL; 768 if (pcbddc->current_level) { /* need to compute interior nodes */ 769 PetscInt n,i,j; 770 PetscInt n_neigh,*neigh,*n_shared,**shared; 771 PetscInt *iwork; 772 773 ierr = ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);CHKERRQ(ierr); 774 ierr = ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 775 ierr = PetscCalloc1(n,&iwork);CHKERRQ(ierr); 776 ierr = PetscMalloc1(n,&interior_dofs);CHKERRQ(ierr); 777 for (i=0;i<n_neigh;i++) 778 for (j=0;j<n_shared[i];j++) 779 iwork[shared[i][j]] += 1; 780 for (i=0;i<n;i++) 781 if (!iwork[i]) 782 interior_dofs[n_interior_dofs++] = i; 783 ierr = PetscFree(iwork);CHKERRQ(ierr); 784 ierr = ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 785 } 786 if (has_null_pressures) { 787 IS *subs; 788 PetscInt nsubs,i,j,nl; 789 const PetscInt *idxs; 790 PetscScalar *array; 791 Vec *work; 792 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 793 794 subs = pcbddc->local_subs; 795 nsubs = pcbddc->n_local_subs; 796 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 797 if (pcbddc->current_level) { 798 ierr = VecDuplicateVecs(matis->y,2,&work);CHKERRQ(ierr); 799 ierr = ISGetLocalSize(zerodiag,&nl);CHKERRQ(ierr); 800 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 801 /* work[0] = 1_p */ 802 ierr = VecSet(work[0],0.);CHKERRQ(ierr); 803 ierr = VecGetArray(work[0],&array);CHKERRQ(ierr); 804 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 805 ierr = VecRestoreArray(work[0],&array);CHKERRQ(ierr); 806 /* work[0] = 1_v */ 807 ierr = VecSet(work[1],1.);CHKERRQ(ierr); 808 ierr = VecGetArray(work[1],&array);CHKERRQ(ierr); 809 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 810 ierr = VecRestoreArray(work[1],&array);CHKERRQ(ierr); 811 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 812 } 813 if (nsubs > 1) { 814 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 815 for (i=0;i<nsubs;i++) { 816 ISLocalToGlobalMapping l2g; 817 IS t_zerodiag_subs; 818 PetscInt nl; 819 820 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 821 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 822 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 823 if (nl) { 824 PetscBool valid = PETSC_TRUE; 825 826 if (pcbddc->current_level) { 827 ierr = VecSet(matis->x,0);CHKERRQ(ierr); 828 ierr = ISGetLocalSize(subs[i],&nl);CHKERRQ(ierr); 829 ierr = ISGetIndices(subs[i],&idxs);CHKERRQ(ierr); 830 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 831 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 832 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 833 ierr = ISRestoreIndices(subs[i],&idxs);CHKERRQ(ierr); 834 ierr = VecPointwiseMult(matis->x,work[0],matis->x);CHKERRQ(ierr); 835 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 836 ierr = VecPointwiseMult(matis->y,work[1],matis->y);CHKERRQ(ierr); 837 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 838 for (j=0;j<n_interior_dofs;j++) { 839 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 840 valid = PETSC_FALSE; 841 break; 842 } 843 } 844 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 845 } 846 if (valid && pcbddc->NeumannBoundariesLocal) { 847 IS t_bc; 848 PetscInt nzb; 849 850 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pcbddc->NeumannBoundariesLocal,&t_bc);CHKERRQ(ierr); 851 ierr = ISGetLocalSize(t_bc,&nzb);CHKERRQ(ierr); 852 ierr = ISDestroy(&t_bc);CHKERRQ(ierr); 853 if (nzb) valid = PETSC_FALSE; 854 } 855 if (valid && pressures) { 856 IS t_pressure_subs; 857 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 858 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 859 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 860 } 861 if (valid) { 862 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 863 pcbddc->benign_n++; 864 } else { 865 recompute_zerodiag = PETSC_TRUE; 866 } 867 } 868 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 869 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 870 } 871 } else { /* there's just one subdomain (or zero if they have not been detected */ 872 PetscBool valid = PETSC_TRUE; 873 874 if (pcbddc->NeumannBoundariesLocal) { 875 PetscInt nzb; 876 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nzb);CHKERRQ(ierr); 877 if (nzb) valid = PETSC_FALSE; 878 } 879 if (valid && pressures) { 880 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 881 } 882 if (valid && pcbddc->current_level) { 883 ierr = MatMult(matis->A,work[0],matis->x);CHKERRQ(ierr); 884 ierr = VecPointwiseMult(matis->x,work[1],matis->x);CHKERRQ(ierr); 885 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 886 for (j=0;j<n_interior_dofs;j++) { 887 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 888 valid = PETSC_FALSE; 889 break; 890 } 891 } 892 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 893 } 894 if (valid) { 895 pcbddc->benign_n = 1; 896 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 897 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 898 zerodiag_subs[0] = zerodiag; 899 } 900 } 901 if (pcbddc->current_level) { 902 ierr = VecDestroyVecs(2,&work);CHKERRQ(ierr); 903 } 904 } 905 ierr = PetscFree(interior_dofs);CHKERRQ(ierr); 906 907 if (!pcbddc->benign_n) { 908 PetscInt n; 909 910 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 911 recompute_zerodiag = PETSC_FALSE; 912 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 913 if (n) { 914 has_null_pressures = PETSC_FALSE; 915 have_null = PETSC_FALSE; 916 } 917 } 918 919 /* final check for null pressures */ 920 if (zerodiag && pressures) { 921 PetscInt nz,np; 922 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 923 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 924 if (nz != np) have_null = PETSC_FALSE; 925 } 926 927 if (recompute_zerodiag) { 928 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 929 if (pcbddc->benign_n == 1) { 930 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 931 zerodiag = zerodiag_subs[0]; 932 } else { 933 PetscInt i,nzn,*new_idxs; 934 935 nzn = 0; 936 for (i=0;i<pcbddc->benign_n;i++) { 937 PetscInt ns; 938 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 939 nzn += ns; 940 } 941 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 942 nzn = 0; 943 for (i=0;i<pcbddc->benign_n;i++) { 944 PetscInt ns,*idxs; 945 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 946 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 947 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 948 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 949 nzn += ns; 950 } 951 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 952 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 953 } 954 have_null = PETSC_FALSE; 955 } 956 957 /* Prepare matrix to compute no-net-flux */ 958 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 959 Mat A,loc_divudotp; 960 ISLocalToGlobalMapping rl2g,cl2g,l2gmap; 961 IS row,col,isused = NULL; 962 PetscInt M,N,n,st,n_isused; 963 964 if (pressures) { 965 isused = pressures; 966 } else { 967 isused = zerodiag; 968 } 969 ierr = MatGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);CHKERRQ(ierr); 970 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 971 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 972 if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field"); 973 n_isused = 0; 974 if (isused) { 975 ierr = ISGetLocalSize(isused,&n_isused);CHKERRQ(ierr); 976 } 977 ierr = MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 978 st = st-n_isused; 979 if (n) { 980 const PetscInt *gidxs; 981 982 ierr = MatGetSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);CHKERRQ(ierr); 983 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 984 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 985 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 986 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 987 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 988 } else { 989 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);CHKERRQ(ierr); 990 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 991 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 992 } 993 ierr = MatGetSize(pc->pmat,NULL,&N);CHKERRQ(ierr); 994 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 995 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 996 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 997 ierr = ISDestroy(&row);CHKERRQ(ierr); 998 ierr = ISDestroy(&col);CHKERRQ(ierr); 999 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);CHKERRQ(ierr); 1000 ierr = MatSetType(pcbddc->divudotp,MATIS);CHKERRQ(ierr); 1001 ierr = MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1002 ierr = MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);CHKERRQ(ierr); 1003 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 1004 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 1005 ierr = MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);CHKERRQ(ierr); 1006 ierr = MatDestroy(&loc_divudotp);CHKERRQ(ierr); 1007 ierr = MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1008 ierr = MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1009 } 1010 1011 /* change of basis and p0 dofs */ 1012 if (has_null_pressures) { 1013 IS zerodiagc; 1014 const PetscInt *idxs,*idxsc; 1015 PetscInt i,s,*nnz; 1016 1017 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 1018 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 1019 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 1020 /* local change of basis for pressures */ 1021 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 1022 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 1023 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 1024 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1025 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 1026 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 1027 for (i=0;i<pcbddc->benign_n;i++) { 1028 PetscInt nzs,j; 1029 1030 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 1031 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1032 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 1033 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 1034 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1035 } 1036 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 1037 ierr = PetscFree(nnz);CHKERRQ(ierr); 1038 /* set identity on velocities */ 1039 for (i=0;i<n-nz;i++) { 1040 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 1041 } 1042 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 1043 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 1044 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 1045 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 1046 /* set change on pressures */ 1047 for (s=0;s<pcbddc->benign_n;s++) { 1048 PetscScalar *array; 1049 PetscInt nzs; 1050 1051 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 1052 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 1053 for (i=0;i<nzs-1;i++) { 1054 PetscScalar vals[2]; 1055 PetscInt cols[2]; 1056 1057 cols[0] = idxs[i]; 1058 cols[1] = idxs[nzs-1]; 1059 vals[0] = 1.; 1060 vals[1] = 1.; 1061 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 1062 } 1063 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 1064 for (i=0;i<nzs-1;i++) array[i] = -1.; 1065 array[nzs-1] = 1.; 1066 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 1067 /* store local idxs for p0 */ 1068 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 1069 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 1070 ierr = PetscFree(array);CHKERRQ(ierr); 1071 } 1072 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1073 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1074 /* project if needed */ 1075 if (pcbddc->benign_change_explicit) { 1076 Mat M; 1077 1078 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 1079 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1080 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 1081 ierr = MatDestroy(&M);CHKERRQ(ierr); 1082 } 1083 /* store global idxs for p0 */ 1084 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 1085 } 1086 pcbddc->benign_zerodiag_subs = zerodiag_subs; 1087 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 1088 1089 /* determines if the coarse solver will be singular or not */ 1090 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1091 /* determines if the problem has subdomains with 0 pressure block */ 1092 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1093 *zerodiaglocal = zerodiag; 1094 PetscFunctionReturn(0); 1095 } 1096 1097 #undef __FUNCT__ 1098 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 1099 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 1100 { 1101 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1102 PetscScalar *array; 1103 PetscErrorCode ierr; 1104 1105 PetscFunctionBegin; 1106 if (!pcbddc->benign_sf) { 1107 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 1108 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 1109 } 1110 if (get) { 1111 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 1112 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 1113 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 1114 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 1115 } else { 1116 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 1117 ierr = PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 1118 ierr = PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 1119 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 1120 } 1121 PetscFunctionReturn(0); 1122 } 1123 1124 #undef __FUNCT__ 1125 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 1126 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 1127 { 1128 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1129 PetscErrorCode ierr; 1130 1131 PetscFunctionBegin; 1132 /* TODO: add error checking 1133 - avoid nested pop (or push) calls. 1134 - cannot push before pop. 1135 - cannot call this if pcbddc->local_mat is NULL 1136 */ 1137 if (!pcbddc->benign_n) { 1138 PetscFunctionReturn(0); 1139 } 1140 if (pop) { 1141 if (pcbddc->benign_change_explicit) { 1142 IS is_p0; 1143 MatReuse reuse; 1144 1145 /* extract B_0 */ 1146 reuse = MAT_INITIAL_MATRIX; 1147 if (pcbddc->benign_B0) { 1148 reuse = MAT_REUSE_MATRIX; 1149 } 1150 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 1151 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 1152 /* remove rows and cols from local problem */ 1153 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 1154 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1155 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 1156 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 1157 } else { 1158 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1159 PetscScalar *vals; 1160 PetscInt i,n,*idxs_ins; 1161 1162 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 1163 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 1164 if (!pcbddc->benign_B0) { 1165 PetscInt *nnz; 1166 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 1167 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 1168 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1169 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 1170 for (i=0;i<pcbddc->benign_n;i++) { 1171 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 1172 nnz[i] = n - nnz[i]; 1173 } 1174 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 1175 ierr = PetscFree(nnz);CHKERRQ(ierr); 1176 } 1177 1178 for (i=0;i<pcbddc->benign_n;i++) { 1179 PetscScalar *array; 1180 PetscInt *idxs,j,nz,cum; 1181 1182 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 1183 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1184 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 1185 for (j=0;j<nz;j++) vals[j] = 1.; 1186 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1187 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 1188 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 1189 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 1190 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 1191 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 1192 cum = 0; 1193 for (j=0;j<n;j++) { 1194 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 1195 vals[cum] = array[j]; 1196 idxs_ins[cum] = j; 1197 cum++; 1198 } 1199 } 1200 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 1201 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 1202 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 1203 } 1204 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1205 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1206 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 1207 } 1208 } else { /* push */ 1209 if (pcbddc->benign_change_explicit) { 1210 PetscInt i; 1211 1212 for (i=0;i<pcbddc->benign_n;i++) { 1213 PetscScalar *B0_vals; 1214 PetscInt *B0_cols,B0_ncol; 1215 1216 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 1217 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 1218 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 1219 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 1220 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 1221 } 1222 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1223 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1224 } else { 1225 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 1226 } 1227 } 1228 PetscFunctionReturn(0); 1229 } 1230 1231 #undef __FUNCT__ 1232 #define __FUNCT__ "PCBDDCAdaptiveSelection" 1233 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 1234 { 1235 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1236 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1237 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 1238 PetscBLASInt *B_iwork,*B_ifail; 1239 PetscScalar *work,lwork; 1240 PetscScalar *St,*S,*eigv; 1241 PetscScalar *Sarray,*Starray; 1242 PetscReal *eigs,thresh; 1243 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 1244 PetscBool allocated_S_St; 1245 #if defined(PETSC_USE_COMPLEX) 1246 PetscReal *rwork; 1247 #endif 1248 PetscErrorCode ierr; 1249 1250 PetscFunctionBegin; 1251 if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data"); 1252 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 1253 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 1254 1255 if (pcbddc->dbg_flag) { 1256 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1257 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1258 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 1259 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 1260 } 1261 1262 if (pcbddc->dbg_flag) { 1263 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 1264 } 1265 1266 /* max size of subsets */ 1267 mss = 0; 1268 for (i=0;i<sub_schurs->n_subs;i++) { 1269 PetscInt subset_size; 1270 1271 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1272 mss = PetscMax(mss,subset_size); 1273 } 1274 1275 /* min/max and threshold */ 1276 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 1277 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 1278 nmax = PetscMax(nmin,nmax); 1279 allocated_S_St = PETSC_FALSE; 1280 if (nmin) { 1281 allocated_S_St = PETSC_TRUE; 1282 } 1283 1284 /* allocate lapack workspace */ 1285 cum = cum2 = 0; 1286 maxneigs = 0; 1287 for (i=0;i<sub_schurs->n_subs;i++) { 1288 PetscInt n,subset_size; 1289 1290 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1291 n = PetscMin(subset_size,nmax); 1292 cum += subset_size; 1293 cum2 += subset_size*n; 1294 maxneigs = PetscMax(maxneigs,n); 1295 } 1296 if (mss) { 1297 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 1298 PetscBLASInt B_itype = 1; 1299 PetscBLASInt B_N = mss; 1300 PetscReal zero = 0.0; 1301 PetscReal eps = 0.0; /* dlamch? */ 1302 1303 B_lwork = -1; 1304 S = NULL; 1305 St = NULL; 1306 eigs = NULL; 1307 eigv = NULL; 1308 B_iwork = NULL; 1309 B_ifail = NULL; 1310 #if defined(PETSC_USE_COMPLEX) 1311 rwork = NULL; 1312 #endif 1313 thresh = 1.0; 1314 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1315 #if defined(PETSC_USE_COMPLEX) 1316 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1317 #else 1318 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1319 #endif 1320 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 1321 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1322 } else { 1323 /* TODO */ 1324 } 1325 } else { 1326 lwork = 0; 1327 } 1328 1329 nv = 0; 1330 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 1331 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 1332 } 1333 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 1334 if (allocated_S_St) { 1335 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 1336 } 1337 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 1338 #if defined(PETSC_USE_COMPLEX) 1339 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 1340 #endif 1341 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 1342 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 1343 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 1344 nv+cum,&pcbddc->adaptive_constraints_idxs, 1345 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 1346 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 1347 1348 maxneigs = 0; 1349 cum = cumarray = 0; 1350 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 1351 pcbddc->adaptive_constraints_data_ptr[0] = 0; 1352 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 1353 const PetscInt *idxs; 1354 1355 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 1356 for (cum=0;cum<nv;cum++) { 1357 pcbddc->adaptive_constraints_n[cum] = 1; 1358 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 1359 pcbddc->adaptive_constraints_data[cum] = 1.0; 1360 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 1361 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 1362 } 1363 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 1364 } 1365 1366 if (mss) { /* multilevel */ 1367 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1368 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1369 } 1370 1371 thresh = pcbddc->adaptive_threshold; 1372 for (i=0;i<sub_schurs->n_subs;i++) { 1373 const PetscInt *idxs; 1374 PetscReal upper,lower; 1375 PetscInt j,subset_size,eigs_start = 0; 1376 PetscBLASInt B_N; 1377 PetscBool same_data = PETSC_FALSE; 1378 1379 if (pcbddc->use_deluxe_scaling) { 1380 upper = PETSC_MAX_REAL; 1381 lower = thresh; 1382 } else { 1383 upper = 1./thresh; 1384 lower = 0.; 1385 } 1386 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1387 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1388 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 1389 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 1390 if (sub_schurs->is_hermitian) { 1391 PetscInt j,k; 1392 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 1393 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1394 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1395 } 1396 for (j=0;j<subset_size;j++) { 1397 for (k=j;k<subset_size;k++) { 1398 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 1399 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 1400 } 1401 } 1402 } else { 1403 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1404 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1405 } 1406 } else { 1407 S = Sarray + cumarray; 1408 St = Starray + cumarray; 1409 } 1410 /* see if we can save some work */ 1411 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 1412 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 1413 } 1414 1415 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 1416 B_neigs = 0; 1417 } else { 1418 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 1419 PetscBLASInt B_itype = 1; 1420 PetscBLASInt B_IL, B_IU; 1421 PetscReal eps = -1.0; /* dlamch? */ 1422 PetscInt nmin_s; 1423 PetscBool compute_range = PETSC_FALSE; 1424 1425 if (pcbddc->dbg_flag) { 1426 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 1427 } 1428 1429 compute_range = PETSC_FALSE; 1430 if (thresh > 1.+PETSC_SMALL && !same_data) { 1431 compute_range = PETSC_TRUE; 1432 } 1433 1434 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1435 if (compute_range) { 1436 1437 /* ask for eigenvalues larger than thresh */ 1438 #if defined(PETSC_USE_COMPLEX) 1439 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1440 #else 1441 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1442 #endif 1443 } else if (!same_data) { 1444 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 1445 B_IL = 1; 1446 #if defined(PETSC_USE_COMPLEX) 1447 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1448 #else 1449 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1450 #endif 1451 } else { /* same_data is true, so get the adaptive function requested by the user */ 1452 PetscInt k; 1453 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 1454 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 1455 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 1456 nmin = nmax; 1457 ierr = PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));CHKERRQ(ierr); 1458 for (k=0;k<nmax;k++) { 1459 eigs[k] = 1./PETSC_SMALL; 1460 eigv[k*(subset_size+1)] = 1.0; 1461 } 1462 } 1463 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1464 if (B_ierr) { 1465 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1466 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1467 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1468 } 1469 1470 if (B_neigs > nmax) { 1471 if (pcbddc->dbg_flag) { 1472 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 1473 } 1474 if (pcbddc->use_deluxe_scaling) eigs_start = B_neigs -nmax; 1475 B_neigs = nmax; 1476 } 1477 1478 nmin_s = PetscMin(nmin,B_N); 1479 if (B_neigs < nmin_s) { 1480 PetscBLASInt B_neigs2; 1481 1482 if (pcbddc->use_deluxe_scaling) { 1483 B_IL = B_N - nmin_s + 1; 1484 B_IU = B_N - B_neigs; 1485 } else { 1486 B_IL = B_neigs + 1; 1487 B_IU = nmin_s; 1488 } 1489 if (pcbddc->dbg_flag) { 1490 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 1491 } 1492 if (sub_schurs->is_hermitian) { 1493 PetscInt j,k; 1494 for (j=0;j<subset_size;j++) { 1495 for (k=j;k<subset_size;k++) { 1496 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 1497 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 1498 } 1499 } 1500 } else { 1501 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1502 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1503 } 1504 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1505 #if defined(PETSC_USE_COMPLEX) 1506 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1507 #else 1508 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1509 #endif 1510 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1511 B_neigs += B_neigs2; 1512 } 1513 if (B_ierr) { 1514 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1515 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1516 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1517 } 1518 if (pcbddc->dbg_flag) { 1519 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 1520 for (j=0;j<B_neigs;j++) { 1521 if (eigs[j] == 0.0) { 1522 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 1523 } else { 1524 if (pcbddc->use_deluxe_scaling) { 1525 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 1526 } else { 1527 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 1528 } 1529 } 1530 } 1531 } 1532 } else { 1533 /* TODO */ 1534 } 1535 } 1536 /* change the basis back to the original one */ 1537 if (sub_schurs->change) { 1538 Mat change,phi,phit; 1539 1540 if (pcbddc->dbg_flag > 1) { 1541 PetscInt ii; 1542 for (ii=0;ii<B_neigs;ii++) { 1543 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1544 for (j=0;j<B_N;j++) { 1545 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 1546 } 1547 } 1548 } 1549 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 1550 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 1551 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 1552 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1553 ierr = MatDestroy(&phit);CHKERRQ(ierr); 1554 ierr = MatDestroy(&phi);CHKERRQ(ierr); 1555 } 1556 maxneigs = PetscMax(B_neigs,maxneigs); 1557 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 1558 if (B_neigs) { 1559 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1560 1561 if (pcbddc->dbg_flag > 1) { 1562 PetscInt ii; 1563 for (ii=0;ii<B_neigs;ii++) { 1564 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1565 for (j=0;j<B_N;j++) { 1566 #if defined(PETSC_USE_COMPLEX) 1567 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1568 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1569 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 1570 #else 1571 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 1572 #endif 1573 } 1574 } 1575 } 1576 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 1577 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 1578 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 1579 cum++; 1580 } 1581 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1582 /* shift for next computation */ 1583 cumarray += subset_size*subset_size; 1584 } 1585 if (pcbddc->dbg_flag) { 1586 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1587 } 1588 1589 if (mss) { 1590 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1591 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1592 /* destroy matrices (junk) */ 1593 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 1594 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 1595 } 1596 if (allocated_S_St) { 1597 ierr = PetscFree2(S,St);CHKERRQ(ierr); 1598 } 1599 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 1600 #if defined(PETSC_USE_COMPLEX) 1601 ierr = PetscFree(rwork);CHKERRQ(ierr); 1602 #endif 1603 if (pcbddc->dbg_flag) { 1604 PetscInt maxneigs_r; 1605 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1606 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 1607 } 1608 PetscFunctionReturn(0); 1609 } 1610 1611 #undef __FUNCT__ 1612 #define __FUNCT__ "PCBDDCSetUpSolvers" 1613 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 1614 { 1615 PetscScalar *coarse_submat_vals; 1616 PetscErrorCode ierr; 1617 1618 PetscFunctionBegin; 1619 /* Setup local scatters R_to_B and (optionally) R_to_D */ 1620 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 1621 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 1622 1623 /* Setup local neumann solver ksp_R */ 1624 /* PCBDDCSetUpLocalScatters should be called first! */ 1625 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 1626 1627 /* 1628 Setup local correction and local part of coarse basis. 1629 Gives back the dense local part of the coarse matrix in column major ordering 1630 */ 1631 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 1632 1633 /* Compute total number of coarse nodes and setup coarse solver */ 1634 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 1635 1636 /* free */ 1637 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 1638 PetscFunctionReturn(0); 1639 } 1640 1641 #undef __FUNCT__ 1642 #define __FUNCT__ "PCBDDCResetCustomization" 1643 PetscErrorCode PCBDDCResetCustomization(PC pc) 1644 { 1645 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1646 PetscErrorCode ierr; 1647 1648 PetscFunctionBegin; 1649 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1650 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 1651 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1652 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 1653 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1654 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 1655 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1656 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1657 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1658 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 1659 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 1660 PetscFunctionReturn(0); 1661 } 1662 1663 #undef __FUNCT__ 1664 #define __FUNCT__ "PCBDDCResetTopography" 1665 PetscErrorCode PCBDDCResetTopography(PC pc) 1666 { 1667 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1668 PetscInt i; 1669 PetscErrorCode ierr; 1670 1671 PetscFunctionBegin; 1672 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 1673 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1674 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 1675 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 1676 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1677 ierr = MatDestroy(&pcbddc->divudotp);CHKERRQ(ierr); 1678 ierr = ISDestroy(&pcbddc->divudotp_vl2l);CHKERRQ(ierr); 1679 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 1680 pcbddc->graphanalyzed = PETSC_FALSE; 1681 for (i=0;i<pcbddc->n_local_subs;i++) { 1682 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 1683 } 1684 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 1685 if (pcbddc->sub_schurs) { 1686 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 1687 } 1688 PetscFunctionReturn(0); 1689 } 1690 1691 #undef __FUNCT__ 1692 #define __FUNCT__ "PCBDDCResetSolvers" 1693 PetscErrorCode PCBDDCResetSolvers(PC pc) 1694 { 1695 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1696 PetscErrorCode ierr; 1697 1698 PetscFunctionBegin; 1699 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 1700 if (pcbddc->coarse_phi_B) { 1701 PetscScalar *array; 1702 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 1703 ierr = PetscFree(array);CHKERRQ(ierr); 1704 } 1705 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1706 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1707 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1708 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1709 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1710 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1711 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1712 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1713 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1714 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1715 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1716 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1717 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1718 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 1719 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 1720 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 1721 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 1722 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1723 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1724 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1725 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 1726 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 1727 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 1728 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 1729 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 1730 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 1731 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 1732 if (pcbddc->benign_zerodiag_subs) { 1733 PetscInt i; 1734 for (i=0;i<pcbddc->benign_n;i++) { 1735 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1736 } 1737 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 1738 } 1739 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 1740 PetscFunctionReturn(0); 1741 } 1742 1743 #undef __FUNCT__ 1744 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 1745 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 1746 { 1747 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1748 PC_IS *pcis = (PC_IS*)pc->data; 1749 VecType impVecType; 1750 PetscInt n_constraints,n_R,old_size; 1751 PetscErrorCode ierr; 1752 1753 PetscFunctionBegin; 1754 if (!pcbddc->ConstraintMatrix) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 1755 /* get sizes */ 1756 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 1757 n_R = pcis->n - pcbddc->n_vertices; 1758 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 1759 /* local work vectors (try to avoid unneeded work)*/ 1760 /* R nodes */ 1761 old_size = -1; 1762 if (pcbddc->vec1_R) { 1763 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 1764 } 1765 if (n_R != old_size) { 1766 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1767 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1768 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 1769 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 1770 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 1771 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 1772 } 1773 /* local primal dofs */ 1774 old_size = -1; 1775 if (pcbddc->vec1_P) { 1776 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 1777 } 1778 if (pcbddc->local_primal_size != old_size) { 1779 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1780 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 1781 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 1782 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 1783 } 1784 /* local explicit constraints */ 1785 old_size = -1; 1786 if (pcbddc->vec1_C) { 1787 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 1788 } 1789 if (n_constraints && n_constraints != old_size) { 1790 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1791 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 1792 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 1793 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 1794 } 1795 PetscFunctionReturn(0); 1796 } 1797 1798 #undef __FUNCT__ 1799 #define __FUNCT__ "PCBDDCSetUpCorrection" 1800 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 1801 { 1802 PetscErrorCode ierr; 1803 /* pointers to pcis and pcbddc */ 1804 PC_IS* pcis = (PC_IS*)pc->data; 1805 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1806 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1807 /* submatrices of local problem */ 1808 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 1809 /* submatrices of local coarse problem */ 1810 Mat S_VV,S_CV,S_VC,S_CC; 1811 /* working matrices */ 1812 Mat C_CR; 1813 /* additional working stuff */ 1814 PC pc_R; 1815 Mat F; 1816 Vec dummy_vec; 1817 PetscBool isLU,isCHOL,isILU,need_benign_correction; 1818 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 1819 PetscScalar *work; 1820 PetscInt *idx_V_B; 1821 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 1822 PetscInt i,n_R,n_D,n_B; 1823 1824 /* some shortcuts to scalars */ 1825 PetscScalar one=1.0,m_one=-1.0; 1826 1827 PetscFunctionBegin; 1828 if (!pcbddc->symmetric_primal && pcbddc->benign_n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 1829 1830 /* Set Non-overlapping dimensions */ 1831 n_vertices = pcbddc->n_vertices; 1832 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 1833 n_B = pcis->n_B; 1834 n_D = pcis->n - n_B; 1835 n_R = pcis->n - n_vertices; 1836 1837 /* vertices in boundary numbering */ 1838 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 1839 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 1840 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i); 1841 1842 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 1843 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 1844 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 1845 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 1846 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 1847 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 1848 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 1849 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 1850 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 1851 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 1852 1853 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 1854 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 1855 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 1856 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 1857 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 1858 lda_rhs = n_R; 1859 need_benign_correction = PETSC_FALSE; 1860 if (isLU || isILU || isCHOL) { 1861 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 1862 } else if (sub_schurs && sub_schurs->reuse_solver) { 1863 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1864 MatFactorType type; 1865 1866 F = reuse_solver->F; 1867 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 1868 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 1869 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 1870 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 1871 } else { 1872 F = NULL; 1873 } 1874 1875 /* allocate workspace */ 1876 n = 0; 1877 if (n_constraints) { 1878 n += lda_rhs*n_constraints; 1879 } 1880 if (n_vertices) { 1881 n = PetscMax(2*lda_rhs*n_vertices,n); 1882 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 1883 } 1884 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1885 1886 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 1887 dummy_vec = NULL; 1888 if (need_benign_correction && lda_rhs != n_R && F) { 1889 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);CHKERRQ(ierr); 1890 } 1891 1892 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 1893 if (n_constraints) { 1894 Mat M1,M2,M3,C_B; 1895 IS is_aux; 1896 PetscScalar *array,*array2; 1897 1898 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1899 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1900 1901 /* Extract constraints on R nodes: C_{CR} */ 1902 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 1903 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 1904 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 1905 1906 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 1907 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 1908 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1909 for (i=0;i<n_constraints;i++) { 1910 const PetscScalar *row_cmat_values; 1911 const PetscInt *row_cmat_indices; 1912 PetscInt size_of_constraint,j; 1913 1914 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1915 for (j=0;j<size_of_constraint;j++) { 1916 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 1917 } 1918 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1919 } 1920 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 1921 if (F) { 1922 Mat B; 1923 1924 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 1925 if (need_benign_correction) { 1926 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1927 1928 /* rhs is already zero on interior dofs, no need to change the rhs */ 1929 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 1930 } 1931 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 1932 if (need_benign_correction) { 1933 PetscScalar *marr; 1934 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1935 1936 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1937 if (lda_rhs != n_R) { 1938 for (i=0;i<n_constraints;i++) { 1939 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 1940 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1941 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 1942 } 1943 } else { 1944 for (i=0;i<n_constraints;i++) { 1945 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1946 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1947 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1948 } 1949 } 1950 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1951 } 1952 ierr = MatDestroy(&B);CHKERRQ(ierr); 1953 } else { 1954 PetscScalar *marr; 1955 1956 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1957 for (i=0;i<n_constraints;i++) { 1958 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1959 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 1960 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1961 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1962 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1963 } 1964 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1965 } 1966 if (!pcbddc->switch_static) { 1967 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1968 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1969 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1970 for (i=0;i<n_constraints;i++) { 1971 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 1972 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 1973 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1974 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1975 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1976 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1977 } 1978 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1979 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1980 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1981 } else { 1982 if (lda_rhs != n_R) { 1983 IS dummy; 1984 1985 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 1986 ierr = MatGetSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1987 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1988 } else { 1989 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 1990 pcbddc->local_auxmat2 = local_auxmat2_R; 1991 } 1992 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1993 } 1994 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1995 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 1996 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 1997 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 1998 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 1999 if (isCHOL) { 2000 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 2001 } else { 2002 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 2003 } 2004 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 2005 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 2006 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 2007 ierr = MatDestroy(&M2);CHKERRQ(ierr); 2008 ierr = MatDestroy(&M3);CHKERRQ(ierr); 2009 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 2010 ierr = MatMatMult(M1,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 2011 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 2012 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 2013 ierr = MatDestroy(&M1);CHKERRQ(ierr); 2014 } 2015 2016 /* Get submatrices from subdomain matrix */ 2017 if (n_vertices) { 2018 IS is_aux; 2019 2020 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 2021 IS tis; 2022 2023 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 2024 ierr = ISSort(tis);CHKERRQ(ierr); 2025 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 2026 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2027 } else { 2028 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 2029 } 2030 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 2031 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 2032 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 2033 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 2034 } 2035 2036 /* Matrix of coarse basis functions (local) */ 2037 if (pcbddc->coarse_phi_B) { 2038 PetscInt on_B,on_primal,on_D=n_D; 2039 if (pcbddc->coarse_phi_D) { 2040 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 2041 } 2042 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 2043 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 2044 PetscScalar *marray; 2045 2046 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 2047 ierr = PetscFree(marray);CHKERRQ(ierr); 2048 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 2049 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 2050 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 2051 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 2052 } 2053 } 2054 2055 if (!pcbddc->coarse_phi_B) { 2056 PetscScalar *marray; 2057 2058 n = n_B*pcbddc->local_primal_size; 2059 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2060 n += n_D*pcbddc->local_primal_size; 2061 } 2062 if (!pcbddc->symmetric_primal) { 2063 n *= 2; 2064 } 2065 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 2066 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 2067 n = n_B*pcbddc->local_primal_size; 2068 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2069 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 2070 n += n_D*pcbddc->local_primal_size; 2071 } 2072 if (!pcbddc->symmetric_primal) { 2073 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 2074 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2075 n = n_B*pcbddc->local_primal_size; 2076 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 2077 } 2078 } else { 2079 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 2080 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 2081 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2082 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 2083 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 2084 } 2085 } 2086 } 2087 2088 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 2089 p0_lidx_I = NULL; 2090 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 2091 const PetscInt *idxs; 2092 2093 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 2094 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 2095 for (i=0;i<pcbddc->benign_n;i++) { 2096 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 2097 } 2098 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 2099 } 2100 2101 /* vertices */ 2102 if (n_vertices) { 2103 2104 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 2105 2106 if (n_R) { 2107 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 2108 PetscBLASInt B_N,B_one = 1; 2109 PetscScalar *x,*y; 2110 PetscBool isseqaij; 2111 2112 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 2113 if (need_benign_correction) { 2114 ISLocalToGlobalMapping RtoN; 2115 IS is_p0; 2116 PetscInt *idxs_p0,n; 2117 2118 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 2119 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 2120 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 2121 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 2122 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 2123 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 2124 ierr = MatGetSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 2125 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 2126 } 2127 2128 if (lda_rhs == n_R) { 2129 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 2130 } else { 2131 PetscScalar *av,*array; 2132 const PetscInt *xadj,*adjncy; 2133 PetscInt n; 2134 PetscBool flg_row; 2135 2136 array = work+lda_rhs*n_vertices; 2137 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 2138 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 2139 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2140 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 2141 for (i=0;i<n;i++) { 2142 PetscInt j; 2143 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 2144 } 2145 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2146 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 2147 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 2148 } 2149 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 2150 if (need_benign_correction) { 2151 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2152 PetscScalar *marr; 2153 2154 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 2155 /* need \Phi^T A_RV = (I+L)A_RV, L given by 2156 2157 | 0 0 0 | (V) 2158 L = | 0 0 -1 | (P-p0) 2159 | 0 0 -1 | (p0) 2160 2161 */ 2162 for (i=0;i<reuse_solver->benign_n;i++) { 2163 const PetscScalar *vals; 2164 const PetscInt *idxs,*idxs_zero; 2165 PetscInt n,j,nz; 2166 2167 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2168 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 2169 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 2170 for (j=0;j<n;j++) { 2171 PetscScalar val = vals[j]; 2172 PetscInt k,col = idxs[j]; 2173 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 2174 } 2175 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 2176 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 2177 } 2178 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 2179 } 2180 if (F) { 2181 /* need to correct the rhs */ 2182 if (need_benign_correction) { 2183 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2184 PetscScalar *marr; 2185 2186 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 2187 if (lda_rhs != n_R) { 2188 for (i=0;i<n_vertices;i++) { 2189 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 2190 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 2191 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 2192 } 2193 } else { 2194 for (i=0;i<n_vertices;i++) { 2195 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 2196 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 2197 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2198 } 2199 } 2200 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 2201 } 2202 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 2203 /* need to correct the solution */ 2204 if (need_benign_correction) { 2205 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2206 PetscScalar *marr; 2207 2208 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 2209 if (lda_rhs != n_R) { 2210 for (i=0;i<n_vertices;i++) { 2211 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 2212 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 2213 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 2214 } 2215 } else { 2216 for (i=0;i<n_vertices;i++) { 2217 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 2218 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 2219 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2220 } 2221 } 2222 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 2223 } 2224 } else { 2225 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 2226 for (i=0;i<n_vertices;i++) { 2227 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 2228 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 2229 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2230 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2231 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2232 } 2233 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 2234 } 2235 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 2236 /* S_VV and S_CV */ 2237 if (n_constraints) { 2238 Mat B; 2239 2240 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 2241 for (i=0;i<n_vertices;i++) { 2242 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 2243 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 2244 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2245 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2246 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2247 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2248 } 2249 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 2250 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 2251 ierr = MatDestroy(&B);CHKERRQ(ierr); 2252 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 2253 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 2254 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 2255 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 2256 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 2257 ierr = MatDestroy(&B);CHKERRQ(ierr); 2258 } 2259 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2260 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 2261 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 2262 } 2263 if (lda_rhs != n_R) { 2264 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 2265 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 2266 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 2267 } 2268 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 2269 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 2270 if (need_benign_correction) { 2271 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2272 PetscScalar *marr,*sums; 2273 2274 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 2275 ierr = MatDenseGetArray(S_VVt,&marr); 2276 for (i=0;i<reuse_solver->benign_n;i++) { 2277 const PetscScalar *vals; 2278 const PetscInt *idxs,*idxs_zero; 2279 PetscInt n,j,nz; 2280 2281 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2282 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 2283 for (j=0;j<n_vertices;j++) { 2284 PetscInt k; 2285 sums[j] = 0.; 2286 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 2287 } 2288 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 2289 for (j=0;j<n;j++) { 2290 PetscScalar val = vals[j]; 2291 PetscInt k; 2292 for (k=0;k<n_vertices;k++) { 2293 marr[idxs[j]+k*n_vertices] += val*sums[k]; 2294 } 2295 } 2296 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 2297 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 2298 } 2299 ierr = PetscFree(sums);CHKERRQ(ierr); 2300 ierr = MatDenseRestoreArray(S_VVt,&marr); 2301 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 2302 } 2303 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 2304 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 2305 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 2306 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 2307 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 2308 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 2309 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 2310 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2311 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 2312 } else { 2313 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2314 } 2315 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 2316 2317 /* coarse basis functions */ 2318 for (i=0;i<n_vertices;i++) { 2319 PetscScalar *y; 2320 2321 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 2322 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2323 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 2324 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2325 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2326 y[n_B*i+idx_V_B[i]] = 1.0; 2327 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2328 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2329 2330 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2331 PetscInt j; 2332 2333 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2334 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2335 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2336 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2337 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2338 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 2339 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2340 } 2341 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2342 } 2343 /* if n_R == 0 the object is not destroyed */ 2344 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 2345 } 2346 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 2347 2348 if (n_constraints) { 2349 Mat B; 2350 2351 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 2352 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 2353 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 2354 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 2355 if (n_vertices) { 2356 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 2357 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 2358 } else { 2359 Mat S_VCt; 2360 2361 if (lda_rhs != n_R) { 2362 ierr = MatDestroy(&B);CHKERRQ(ierr); 2363 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 2364 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 2365 } 2366 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 2367 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2368 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 2369 } 2370 } 2371 ierr = MatDestroy(&B);CHKERRQ(ierr); 2372 /* coarse basis functions */ 2373 for (i=0;i<n_constraints;i++) { 2374 PetscScalar *y; 2375 2376 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 2377 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2378 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 2379 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2380 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2381 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2382 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2383 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2384 PetscInt j; 2385 2386 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2387 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 2388 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2389 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2390 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2391 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 2392 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2393 } 2394 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2395 } 2396 } 2397 if (n_constraints) { 2398 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 2399 } 2400 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 2401 2402 /* coarse matrix entries relative to B_0 */ 2403 if (pcbddc->benign_n) { 2404 Mat B0_B,B0_BPHI; 2405 IS is_dummy; 2406 PetscScalar *data; 2407 PetscInt j; 2408 2409 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2410 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 2411 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2412 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2413 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2414 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 2415 for (j=0;j<pcbddc->benign_n;j++) { 2416 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2417 for (i=0;i<pcbddc->local_primal_size;i++) { 2418 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 2419 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 2420 } 2421 } 2422 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 2423 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2424 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2425 } 2426 2427 /* compute other basis functions for non-symmetric problems */ 2428 if (!pcbddc->symmetric_primal) { 2429 Mat B_V=NULL,B_C=NULL; 2430 PetscScalar *marray; 2431 2432 if (n_constraints) { 2433 Mat S_CCT,C_CRT; 2434 2435 ierr = MatTranspose(C_CR,MAT_INPLACE_MATRIX,&C_CRT);CHKERRQ(ierr); 2436 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 2437 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 2438 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 2439 if (n_vertices) { 2440 Mat S_VCT; 2441 2442 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 2443 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 2444 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 2445 } 2446 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 2447 } else { 2448 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);CHKERRQ(ierr); 2449 } 2450 if (n_vertices && n_R) { 2451 PetscScalar *av,*marray; 2452 const PetscInt *xadj,*adjncy; 2453 PetscInt n; 2454 PetscBool flg_row; 2455 2456 /* B_V = B_V - A_VR^T */ 2457 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 2458 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2459 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 2460 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2461 for (i=0;i<n;i++) { 2462 PetscInt j; 2463 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 2464 } 2465 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2466 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2467 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2468 } 2469 2470 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 2471 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2472 for (i=0;i<n_vertices;i++) { 2473 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 2474 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2475 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2476 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2477 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2478 } 2479 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2480 if (B_C) { 2481 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 2482 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 2483 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 2484 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2485 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2486 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2487 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2488 } 2489 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 2490 } 2491 /* coarse basis functions */ 2492 for (i=0;i<pcbddc->local_primal_size;i++) { 2493 PetscScalar *y; 2494 2495 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 2496 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2497 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 2498 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2499 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2500 if (i<n_vertices) { 2501 y[n_B*i+idx_V_B[i]] = 1.0; 2502 } 2503 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2504 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2505 2506 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2507 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2508 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2509 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2510 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2511 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2512 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2513 } 2514 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2515 } 2516 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 2517 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 2518 } 2519 /* free memory */ 2520 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 2521 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 2522 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 2523 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 2524 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 2525 ierr = PetscFree(work);CHKERRQ(ierr); 2526 if (n_vertices) { 2527 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2528 } 2529 if (n_constraints) { 2530 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 2531 } 2532 /* Checking coarse_sub_mat and coarse basis functios */ 2533 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2534 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2535 if (pcbddc->dbg_flag) { 2536 Mat coarse_sub_mat; 2537 Mat AUXMAT,TM1,TM2,TM3,TM4; 2538 Mat coarse_phi_D,coarse_phi_B; 2539 Mat coarse_psi_D,coarse_psi_B; 2540 Mat A_II,A_BB,A_IB,A_BI; 2541 Mat C_B,CPHI; 2542 IS is_dummy; 2543 Vec mones; 2544 MatType checkmattype=MATSEQAIJ; 2545 PetscReal real_value; 2546 2547 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 2548 Mat A; 2549 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 2550 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2551 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2552 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2553 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2554 ierr = MatDestroy(&A);CHKERRQ(ierr); 2555 } else { 2556 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2557 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2558 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2559 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2560 } 2561 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 2562 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 2563 if (!pcbddc->symmetric_primal) { 2564 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 2565 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 2566 } 2567 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 2568 2569 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2570 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 2571 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2572 if (!pcbddc->symmetric_primal) { 2573 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2574 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2575 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2576 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2577 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2578 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2579 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2580 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2581 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2582 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2583 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2584 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2585 } else { 2586 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2587 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2588 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2589 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2590 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2591 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2592 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2593 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2594 } 2595 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2596 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2597 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2598 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 2599 if (pcbddc->benign_n) { 2600 Mat B0_B,B0_BPHI; 2601 PetscScalar *data,*data2; 2602 PetscInt j; 2603 2604 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2605 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 2606 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2607 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2608 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 2609 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 2610 for (j=0;j<pcbddc->benign_n;j++) { 2611 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2612 for (i=0;i<pcbddc->local_primal_size;i++) { 2613 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 2614 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 2615 } 2616 } 2617 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 2618 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 2619 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2620 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2621 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2622 } 2623 #if 0 2624 { 2625 PetscViewer viewer; 2626 char filename[256]; 2627 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 2628 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 2629 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2630 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 2631 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 2632 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 2633 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 2634 if (save_change) { 2635 Mat phi_B; 2636 ierr = MatMatMult(save_change,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&phi_B);CHKERRQ(ierr); 2637 ierr = PetscObjectSetName((PetscObject)phi_B,"phi_B");CHKERRQ(ierr); 2638 ierr = MatView(phi_B,viewer);CHKERRQ(ierr); 2639 ierr = MatDestroy(&phi_B);CHKERRQ(ierr); 2640 } else { 2641 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 2642 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 2643 } 2644 if (pcbddc->coarse_phi_D) { 2645 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 2646 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 2647 } 2648 if (pcbddc->coarse_psi_B) { 2649 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 2650 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 2651 } 2652 if (pcbddc->coarse_psi_D) { 2653 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 2654 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 2655 } 2656 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 2657 } 2658 #endif 2659 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2660 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2661 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2662 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2663 2664 /* check constraints */ 2665 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2666 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 2667 if (!pcbddc->benign_n) { /* TODO: add benign case */ 2668 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2669 } else { 2670 PetscScalar *data; 2671 Mat tmat; 2672 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 2673 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 2674 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 2675 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2676 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2677 } 2678 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 2679 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2680 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2681 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2682 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2683 if (!pcbddc->symmetric_primal) { 2684 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2685 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2686 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2687 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2688 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2689 } 2690 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 2691 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 2692 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2693 ierr = VecDestroy(&mones);CHKERRQ(ierr); 2694 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2695 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 2696 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 2697 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 2698 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 2699 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 2700 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 2701 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 2702 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 2703 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 2704 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 2705 if (!pcbddc->symmetric_primal) { 2706 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 2707 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 2708 } 2709 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 2710 } 2711 /* get back data */ 2712 *coarse_submat_vals_n = coarse_submat_vals; 2713 PetscFunctionReturn(0); 2714 } 2715 2716 #undef __FUNCT__ 2717 #define __FUNCT__ "MatGetSubMatrixUnsorted" 2718 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 2719 { 2720 Mat *work_mat; 2721 IS isrow_s,iscol_s; 2722 PetscBool rsorted,csorted; 2723 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 2724 PetscErrorCode ierr; 2725 2726 PetscFunctionBegin; 2727 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 2728 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 2729 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 2730 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2731 2732 if (!rsorted) { 2733 const PetscInt *idxs; 2734 PetscInt *idxs_sorted,i; 2735 2736 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 2737 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 2738 for (i=0;i<rsize;i++) { 2739 idxs_perm_r[i] = i; 2740 } 2741 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 2742 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 2743 for (i=0;i<rsize;i++) { 2744 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 2745 } 2746 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 2747 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 2748 } else { 2749 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 2750 isrow_s = isrow; 2751 } 2752 2753 if (!csorted) { 2754 if (isrow == iscol) { 2755 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 2756 iscol_s = isrow_s; 2757 } else { 2758 const PetscInt *idxs; 2759 PetscInt *idxs_sorted,i; 2760 2761 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 2762 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 2763 for (i=0;i<csize;i++) { 2764 idxs_perm_c[i] = i; 2765 } 2766 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 2767 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 2768 for (i=0;i<csize;i++) { 2769 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 2770 } 2771 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 2772 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 2773 } 2774 } else { 2775 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 2776 iscol_s = iscol; 2777 } 2778 2779 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2780 2781 if (!rsorted || !csorted) { 2782 Mat new_mat; 2783 IS is_perm_r,is_perm_c; 2784 2785 if (!rsorted) { 2786 PetscInt *idxs_r,i; 2787 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 2788 for (i=0;i<rsize;i++) { 2789 idxs_r[idxs_perm_r[i]] = i; 2790 } 2791 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 2792 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 2793 } else { 2794 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 2795 } 2796 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 2797 2798 if (!csorted) { 2799 if (isrow_s == iscol_s) { 2800 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 2801 is_perm_c = is_perm_r; 2802 } else { 2803 PetscInt *idxs_c,i; 2804 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 2805 for (i=0;i<csize;i++) { 2806 idxs_c[idxs_perm_c[i]] = i; 2807 } 2808 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 2809 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 2810 } 2811 } else { 2812 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 2813 } 2814 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 2815 2816 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 2817 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 2818 work_mat[0] = new_mat; 2819 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 2820 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 2821 } 2822 2823 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 2824 *B = work_mat[0]; 2825 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 2826 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 2827 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 2828 PetscFunctionReturn(0); 2829 } 2830 2831 #undef __FUNCT__ 2832 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 2833 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 2834 { 2835 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2836 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2837 Mat new_mat; 2838 IS is_local,is_global; 2839 PetscInt local_size; 2840 PetscBool isseqaij; 2841 PetscErrorCode ierr; 2842 2843 PetscFunctionBegin; 2844 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2845 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 2846 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 2847 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 2848 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 2849 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 2850 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 2851 2852 /* check */ 2853 if (pcbddc->dbg_flag) { 2854 Vec x,x_change; 2855 PetscReal error; 2856 2857 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 2858 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2859 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 2860 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2861 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2862 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 2863 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2864 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2865 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2866 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2867 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2868 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 2869 ierr = VecDestroy(&x);CHKERRQ(ierr); 2870 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2871 } 2872 2873 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 2874 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2875 if (isseqaij) { 2876 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2877 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 2878 } else { 2879 Mat work_mat; 2880 2881 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2882 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2883 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 2884 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 2885 } 2886 if (matis->A->symmetric_set) { 2887 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 2888 #if !defined(PETSC_USE_COMPLEX) 2889 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 2890 #endif 2891 } 2892 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 2893 PetscFunctionReturn(0); 2894 } 2895 2896 #undef __FUNCT__ 2897 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 2898 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 2899 { 2900 PC_IS* pcis = (PC_IS*)(pc->data); 2901 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2902 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2903 PetscInt *idx_R_local=NULL; 2904 PetscInt n_vertices,i,j,n_R,n_D,n_B; 2905 PetscInt vbs,bs; 2906 PetscBT bitmask=NULL; 2907 PetscErrorCode ierr; 2908 2909 PetscFunctionBegin; 2910 /* 2911 No need to setup local scatters if 2912 - primal space is unchanged 2913 AND 2914 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 2915 AND 2916 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 2917 */ 2918 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 2919 PetscFunctionReturn(0); 2920 } 2921 /* destroy old objects */ 2922 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 2923 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 2924 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 2925 /* Set Non-overlapping dimensions */ 2926 n_B = pcis->n_B; 2927 n_D = pcis->n - n_B; 2928 n_vertices = pcbddc->n_vertices; 2929 2930 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 2931 2932 /* create auxiliary bitmask and allocate workspace */ 2933 if (!sub_schurs || !sub_schurs->reuse_solver) { 2934 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 2935 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 2936 for (i=0;i<n_vertices;i++) { 2937 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 2938 } 2939 2940 for (i=0, n_R=0; i<pcis->n; i++) { 2941 if (!PetscBTLookup(bitmask,i)) { 2942 idx_R_local[n_R++] = i; 2943 } 2944 } 2945 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 2946 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2947 2948 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2949 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 2950 } 2951 2952 /* Block code */ 2953 vbs = 1; 2954 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 2955 if (bs>1 && !(n_vertices%bs)) { 2956 PetscBool is_blocked = PETSC_TRUE; 2957 PetscInt *vary; 2958 if (!sub_schurs || !sub_schurs->reuse_solver) { 2959 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 2960 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 2961 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 2962 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 2963 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 2964 for (i=0; i<pcis->n/bs; i++) { 2965 if (vary[i]!=0 && vary[i]!=bs) { 2966 is_blocked = PETSC_FALSE; 2967 break; 2968 } 2969 } 2970 ierr = PetscFree(vary);CHKERRQ(ierr); 2971 } else { 2972 /* Verify directly the R set */ 2973 for (i=0; i<n_R/bs; i++) { 2974 PetscInt j,node=idx_R_local[bs*i]; 2975 for (j=1; j<bs; j++) { 2976 if (node != idx_R_local[bs*i+j]-j) { 2977 is_blocked = PETSC_FALSE; 2978 break; 2979 } 2980 } 2981 } 2982 } 2983 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 2984 vbs = bs; 2985 for (i=0;i<n_R/vbs;i++) { 2986 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 2987 } 2988 } 2989 } 2990 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 2991 if (sub_schurs && sub_schurs->reuse_solver) { 2992 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2993 2994 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2995 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 2996 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 2997 reuse_solver->is_R = pcbddc->is_R_local; 2998 } else { 2999 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 3000 } 3001 3002 /* print some info if requested */ 3003 if (pcbddc->dbg_flag) { 3004 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3005 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3006 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3007 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 3008 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 3009 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 3010 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3011 } 3012 3013 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 3014 if (!sub_schurs || !sub_schurs->reuse_solver) { 3015 IS is_aux1,is_aux2; 3016 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 3017 3018 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 3019 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 3020 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 3021 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3022 for (i=0; i<n_D; i++) { 3023 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 3024 } 3025 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3026 for (i=0, j=0; i<n_R; i++) { 3027 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 3028 aux_array1[j++] = i; 3029 } 3030 } 3031 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 3032 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3033 for (i=0, j=0; i<n_B; i++) { 3034 if (!PetscBTLookup(bitmask,is_indices[i])) { 3035 aux_array2[j++] = i; 3036 } 3037 } 3038 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3039 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 3040 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 3041 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 3042 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 3043 3044 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3045 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 3046 for (i=0, j=0; i<n_R; i++) { 3047 if (PetscBTLookup(bitmask,idx_R_local[i])) { 3048 aux_array1[j++] = i; 3049 } 3050 } 3051 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 3052 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 3053 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 3054 } 3055 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 3056 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 3057 } else { 3058 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3059 IS tis; 3060 PetscInt schur_size; 3061 3062 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 3063 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 3064 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 3065 ierr = ISDestroy(&tis);CHKERRQ(ierr); 3066 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3067 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 3068 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 3069 ierr = ISDestroy(&tis);CHKERRQ(ierr); 3070 } 3071 } 3072 PetscFunctionReturn(0); 3073 } 3074 3075 3076 #undef __FUNCT__ 3077 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 3078 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 3079 { 3080 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3081 PC_IS *pcis = (PC_IS*)pc->data; 3082 PC pc_temp; 3083 Mat A_RR; 3084 MatReuse reuse; 3085 PetscScalar m_one = -1.0; 3086 PetscReal value; 3087 PetscInt n_D,n_R; 3088 PetscBool check_corr[2],issbaij; 3089 PetscErrorCode ierr; 3090 /* prefixes stuff */ 3091 char dir_prefix[256],neu_prefix[256],str_level[16]; 3092 size_t len; 3093 3094 PetscFunctionBegin; 3095 3096 /* compute prefixes */ 3097 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 3098 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 3099 if (!pcbddc->current_level) { 3100 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 3101 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 3102 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 3103 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 3104 } else { 3105 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 3106 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 3107 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 3108 len -= 15; /* remove "pc_bddc_coarse_" */ 3109 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 3110 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 3111 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 3112 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 3113 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 3114 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 3115 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 3116 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 3117 } 3118 3119 /* DIRICHLET PROBLEM */ 3120 if (dirichlet) { 3121 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3122 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 3123 if (!sub_schurs || !sub_schurs->reuse_solver) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 3124 if (pcbddc->dbg_flag) { 3125 Mat A_IIn; 3126 3127 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 3128 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 3129 pcis->A_II = A_IIn; 3130 } 3131 } 3132 if (pcbddc->local_mat->symmetric_set) { 3133 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 3134 } 3135 /* Matrix for Dirichlet problem is pcis->A_II */ 3136 n_D = pcis->n - pcis->n_B; 3137 if (!pcbddc->ksp_D) { /* create object if not yet build */ 3138 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 3139 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 3140 /* default */ 3141 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 3142 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 3143 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 3144 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 3145 if (issbaij) { 3146 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 3147 } else { 3148 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 3149 } 3150 /* Allow user's customization */ 3151 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 3152 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 3153 } 3154 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 3155 if (sub_schurs && sub_schurs->reuse_solver) { 3156 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3157 3158 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 3159 } 3160 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 3161 if (!n_D) { 3162 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 3163 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 3164 } 3165 /* Set Up KSP for Dirichlet problem of BDDC */ 3166 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 3167 /* set ksp_D into pcis data */ 3168 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 3169 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 3170 pcis->ksp_D = pcbddc->ksp_D; 3171 } 3172 3173 /* NEUMANN PROBLEM */ 3174 A_RR = 0; 3175 if (neumann) { 3176 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3177 PetscInt ibs,mbs; 3178 PetscBool issbaij; 3179 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3180 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 3181 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 3182 if (pcbddc->ksp_R) { /* already created ksp */ 3183 PetscInt nn_R; 3184 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 3185 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 3186 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 3187 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 3188 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 3189 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3190 reuse = MAT_INITIAL_MATRIX; 3191 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 3192 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 3193 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3194 reuse = MAT_INITIAL_MATRIX; 3195 } else { /* safe to reuse the matrix */ 3196 reuse = MAT_REUSE_MATRIX; 3197 } 3198 } 3199 /* last check */ 3200 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 3201 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3202 reuse = MAT_INITIAL_MATRIX; 3203 } 3204 } else { /* first time, so we need to create the matrix */ 3205 reuse = MAT_INITIAL_MATRIX; 3206 } 3207 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */ 3208 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 3209 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 3210 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 3211 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 3212 if (matis->A == pcbddc->local_mat) { 3213 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3214 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 3215 } else { 3216 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 3217 } 3218 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 3219 if (matis->A == pcbddc->local_mat) { 3220 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3221 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 3222 } else { 3223 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 3224 } 3225 } 3226 /* extract A_RR */ 3227 if (sub_schurs && sub_schurs->reuse_solver) { 3228 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3229 3230 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 3231 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3232 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 3233 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 3234 } else { 3235 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 3236 } 3237 } else { 3238 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3239 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 3240 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 3241 } 3242 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 3243 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 3244 } 3245 if (pcbddc->local_mat->symmetric_set) { 3246 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 3247 } 3248 if (!pcbddc->ksp_R) { /* create object if not present */ 3249 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 3250 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 3251 /* default */ 3252 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 3253 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 3254 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 3255 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 3256 if (issbaij) { 3257 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 3258 } else { 3259 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 3260 } 3261 /* Allow user's customization */ 3262 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 3263 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 3264 } 3265 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 3266 if (!n_R) { 3267 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 3268 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 3269 } 3270 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 3271 /* Reuse solver if it is present */ 3272 if (sub_schurs && sub_schurs->reuse_solver) { 3273 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3274 3275 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 3276 } 3277 /* Set Up KSP for Neumann problem of BDDC */ 3278 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 3279 } 3280 3281 if (pcbddc->dbg_flag) { 3282 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3283 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3284 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3285 } 3286 3287 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 3288 check_corr[0] = check_corr[1] = PETSC_FALSE; 3289 if (pcbddc->NullSpace_corr[0]) { 3290 ierr = PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);CHKERRQ(ierr); 3291 } 3292 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 3293 check_corr[0] = PETSC_TRUE; 3294 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);CHKERRQ(ierr); 3295 } 3296 if (neumann && pcbddc->NullSpace_corr[2]) { 3297 check_corr[1] = PETSC_TRUE; 3298 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);CHKERRQ(ierr); 3299 } 3300 3301 /* check Dirichlet and Neumann solvers */ 3302 if (pcbddc->dbg_flag) { 3303 if (dirichlet) { /* Dirichlet */ 3304 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 3305 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 3306 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 3307 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 3308 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 3309 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 3310 if (check_corr[0]) { 3311 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_TRUE);CHKERRQ(ierr); 3312 } 3313 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3314 } 3315 if (neumann) { /* Neumann */ 3316 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 3317 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3318 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 3319 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 3320 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 3321 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 3322 if (check_corr[1]) { 3323 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_FALSE);CHKERRQ(ierr); 3324 } 3325 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3326 } 3327 } 3328 /* free Neumann problem's matrix */ 3329 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3330 PetscFunctionReturn(0); 3331 } 3332 3333 #undef __FUNCT__ 3334 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 3335 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 3336 { 3337 PetscErrorCode ierr; 3338 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3339 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3340 PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE ) : PETSC_FALSE; 3341 3342 PetscFunctionBegin; 3343 if (!reuse_solver) { 3344 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 3345 } 3346 if (!pcbddc->switch_static) { 3347 if (applytranspose && pcbddc->local_auxmat1) { 3348 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3349 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3350 } 3351 if (!reuse_solver) { 3352 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3353 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3354 } else { 3355 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3356 3357 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3358 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3359 } 3360 } else { 3361 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3362 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3363 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3364 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3365 if (applytranspose && pcbddc->local_auxmat1) { 3366 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 3367 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3368 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3369 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3370 } 3371 } 3372 if (!reuse_solver || pcbddc->switch_static) { 3373 if (applytranspose) { 3374 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3375 } else { 3376 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3377 } 3378 } else { 3379 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3380 3381 if (applytranspose) { 3382 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 3383 } else { 3384 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 3385 } 3386 } 3387 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 3388 if (!pcbddc->switch_static) { 3389 if (!reuse_solver) { 3390 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3391 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3392 } else { 3393 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3394 3395 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3396 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3397 } 3398 if (!applytranspose && pcbddc->local_auxmat1) { 3399 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3400 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3401 } 3402 } else { 3403 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3404 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3405 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3406 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3407 if (!applytranspose && pcbddc->local_auxmat1) { 3408 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3409 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3410 } 3411 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3412 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3413 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3414 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3415 } 3416 PetscFunctionReturn(0); 3417 } 3418 3419 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 3420 #undef __FUNCT__ 3421 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 3422 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 3423 { 3424 PetscErrorCode ierr; 3425 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3426 PC_IS* pcis = (PC_IS*) (pc->data); 3427 const PetscScalar zero = 0.0; 3428 3429 PetscFunctionBegin; 3430 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 3431 if (!pcbddc->benign_apply_coarse_only) { 3432 if (applytranspose) { 3433 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3434 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3435 } else { 3436 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3437 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3438 } 3439 } else { 3440 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 3441 } 3442 3443 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 3444 if (pcbddc->benign_n) { 3445 PetscScalar *array; 3446 PetscInt j; 3447 3448 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3449 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 3450 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3451 } 3452 3453 /* start communications from local primal nodes to rhs of coarse solver */ 3454 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 3455 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3456 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3457 3458 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 3459 if (pcbddc->coarse_ksp) { 3460 Mat coarse_mat; 3461 Vec rhs,sol; 3462 MatNullSpace nullsp; 3463 PetscBool isbddc = PETSC_FALSE; 3464 3465 if (pcbddc->benign_have_null) { 3466 PC coarse_pc; 3467 3468 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 3469 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 3470 /* we need to propagate to coarser levels the need for a possible benign correction */ 3471 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 3472 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 3473 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 3474 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 3475 } 3476 } 3477 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 3478 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 3479 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 3480 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 3481 if (nullsp) { 3482 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 3483 } 3484 if (applytranspose) { 3485 if (pcbddc->benign_apply_coarse_only) SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 3486 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3487 } else { 3488 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 3489 PC coarse_pc; 3490 3491 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 3492 ierr = PCPreSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 3493 ierr = PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);CHKERRQ(ierr); 3494 ierr = PCPostSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 3495 } else { 3496 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3497 } 3498 } 3499 /* we don't need the benign correction at coarser levels anymore */ 3500 if (pcbddc->benign_have_null && isbddc) { 3501 PC coarse_pc; 3502 PC_BDDC* coarsepcbddc; 3503 3504 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 3505 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 3506 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 3507 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 3508 } 3509 if (nullsp) { 3510 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 3511 } 3512 } 3513 3514 /* Local solution on R nodes */ 3515 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 3516 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 3517 } 3518 /* communications from coarse sol to local primal nodes */ 3519 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3520 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3521 3522 /* Sum contributions from the two levels */ 3523 if (!pcbddc->benign_apply_coarse_only) { 3524 if (applytranspose) { 3525 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3526 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3527 } else { 3528 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3529 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3530 } 3531 /* store p0 */ 3532 if (pcbddc->benign_n) { 3533 PetscScalar *array; 3534 PetscInt j; 3535 3536 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3537 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 3538 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3539 } 3540 } else { /* expand the coarse solution */ 3541 if (applytranspose) { 3542 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3543 } else { 3544 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3545 } 3546 } 3547 PetscFunctionReturn(0); 3548 } 3549 3550 #undef __FUNCT__ 3551 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 3552 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 3553 { 3554 PetscErrorCode ierr; 3555 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3556 PetscScalar *array; 3557 Vec from,to; 3558 3559 PetscFunctionBegin; 3560 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3561 from = pcbddc->coarse_vec; 3562 to = pcbddc->vec1_P; 3563 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3564 Vec tvec; 3565 3566 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3567 ierr = VecResetArray(tvec);CHKERRQ(ierr); 3568 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3569 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 3570 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 3571 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 3572 } 3573 } else { /* from local to global -> put data in coarse right hand side */ 3574 from = pcbddc->vec1_P; 3575 to = pcbddc->coarse_vec; 3576 } 3577 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3578 PetscFunctionReturn(0); 3579 } 3580 3581 #undef __FUNCT__ 3582 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 3583 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 3584 { 3585 PetscErrorCode ierr; 3586 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3587 PetscScalar *array; 3588 Vec from,to; 3589 3590 PetscFunctionBegin; 3591 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3592 from = pcbddc->coarse_vec; 3593 to = pcbddc->vec1_P; 3594 } else { /* from local to global -> put data in coarse right hand side */ 3595 from = pcbddc->vec1_P; 3596 to = pcbddc->coarse_vec; 3597 } 3598 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3599 if (smode == SCATTER_FORWARD) { 3600 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3601 Vec tvec; 3602 3603 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3604 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 3605 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 3606 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 3607 } 3608 } else { 3609 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 3610 ierr = VecResetArray(from);CHKERRQ(ierr); 3611 } 3612 } 3613 PetscFunctionReturn(0); 3614 } 3615 3616 /* uncomment for testing purposes */ 3617 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 3618 #undef __FUNCT__ 3619 #define __FUNCT__ "PCBDDCConstraintsSetUp" 3620 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 3621 { 3622 PetscErrorCode ierr; 3623 PC_IS* pcis = (PC_IS*)(pc->data); 3624 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3625 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3626 /* one and zero */ 3627 PetscScalar one=1.0,zero=0.0; 3628 /* space to store constraints and their local indices */ 3629 PetscScalar *constraints_data; 3630 PetscInt *constraints_idxs,*constraints_idxs_B; 3631 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 3632 PetscInt *constraints_n; 3633 /* iterators */ 3634 PetscInt i,j,k,total_counts,total_counts_cc,cum; 3635 /* BLAS integers */ 3636 PetscBLASInt lwork,lierr; 3637 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 3638 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 3639 /* reuse */ 3640 PetscInt olocal_primal_size,olocal_primal_size_cc; 3641 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 3642 /* change of basis */ 3643 PetscBool qr_needed; 3644 PetscBT change_basis,qr_needed_idx; 3645 /* auxiliary stuff */ 3646 PetscInt *nnz,*is_indices; 3647 PetscInt ncc; 3648 /* some quantities */ 3649 PetscInt n_vertices,total_primal_vertices,valid_constraints; 3650 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 3651 3652 PetscFunctionBegin; 3653 /* Destroy Mat objects computed previously */ 3654 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3655 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3656 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 3657 /* save info on constraints from previous setup (if any) */ 3658 olocal_primal_size = pcbddc->local_primal_size; 3659 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 3660 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 3661 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3662 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3663 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3664 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3665 3666 if (!pcbddc->adaptive_selection) { 3667 IS ISForVertices,*ISForFaces,*ISForEdges; 3668 MatNullSpace nearnullsp; 3669 const Vec *nearnullvecs; 3670 Vec *localnearnullsp; 3671 PetscScalar *array; 3672 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 3673 PetscBool nnsp_has_cnst; 3674 /* LAPACK working arrays for SVD or POD */ 3675 PetscBool skip_lapack,boolforchange; 3676 PetscScalar *work; 3677 PetscReal *singular_vals; 3678 #if defined(PETSC_USE_COMPLEX) 3679 PetscReal *rwork; 3680 #endif 3681 #if defined(PETSC_MISSING_LAPACK_GESVD) 3682 PetscScalar *temp_basis,*correlation_mat; 3683 #else 3684 PetscBLASInt dummy_int=1; 3685 PetscScalar dummy_scalar=1.; 3686 #endif 3687 3688 /* Get index sets for faces, edges and vertices from graph */ 3689 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 3690 /* print some info */ 3691 if (pcbddc->dbg_flag && !pcbddc->sub_schurs) { 3692 PetscInt nv; 3693 3694 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag-1,pcbddc->dbg_viewer);CHKERRQ(ierr); 3695 ierr = ISGetSize(ISForVertices,&nv);CHKERRQ(ierr); 3696 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3697 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3698 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 3699 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);CHKERRQ(ierr); 3700 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);CHKERRQ(ierr); 3701 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3702 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3703 } 3704 3705 /* free unneeded index sets */ 3706 if (!pcbddc->use_vertices) { 3707 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3708 } 3709 if (!pcbddc->use_edges) { 3710 for (i=0;i<n_ISForEdges;i++) { 3711 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3712 } 3713 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3714 n_ISForEdges = 0; 3715 } 3716 if (!pcbddc->use_faces) { 3717 for (i=0;i<n_ISForFaces;i++) { 3718 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3719 } 3720 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3721 n_ISForFaces = 0; 3722 } 3723 3724 /* check if near null space is attached to global mat */ 3725 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 3726 if (nearnullsp) { 3727 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 3728 /* remove any stored info */ 3729 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3730 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3731 /* store information for BDDC solver reuse */ 3732 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 3733 pcbddc->onearnullspace = nearnullsp; 3734 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3735 for (i=0;i<nnsp_size;i++) { 3736 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 3737 } 3738 } else { /* if near null space is not provided BDDC uses constants by default */ 3739 nnsp_size = 0; 3740 nnsp_has_cnst = PETSC_TRUE; 3741 } 3742 /* get max number of constraints on a single cc */ 3743 max_constraints = nnsp_size; 3744 if (nnsp_has_cnst) max_constraints++; 3745 3746 /* 3747 Evaluate maximum storage size needed by the procedure 3748 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 3749 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 3750 There can be multiple constraints per connected component 3751 */ 3752 n_vertices = 0; 3753 if (ISForVertices) { 3754 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 3755 } 3756 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 3757 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 3758 3759 total_counts = n_ISForFaces+n_ISForEdges; 3760 total_counts *= max_constraints; 3761 total_counts += n_vertices; 3762 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 3763 3764 total_counts = 0; 3765 max_size_of_constraint = 0; 3766 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 3767 IS used_is; 3768 if (i<n_ISForEdges) { 3769 used_is = ISForEdges[i]; 3770 } else { 3771 used_is = ISForFaces[i-n_ISForEdges]; 3772 } 3773 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 3774 total_counts += j; 3775 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 3776 } 3777 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 3778 3779 /* get local part of global near null space vectors */ 3780 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 3781 for (k=0;k<nnsp_size;k++) { 3782 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 3783 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3784 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3785 } 3786 3787 /* whether or not to skip lapack calls */ 3788 skip_lapack = PETSC_TRUE; 3789 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 3790 3791 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 3792 if (!skip_lapack) { 3793 PetscScalar temp_work; 3794 3795 #if defined(PETSC_MISSING_LAPACK_GESVD) 3796 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 3797 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 3798 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 3799 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 3800 #if defined(PETSC_USE_COMPLEX) 3801 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 3802 #endif 3803 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3804 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3805 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 3806 lwork = -1; 3807 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3808 #if !defined(PETSC_USE_COMPLEX) 3809 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 3810 #else 3811 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 3812 #endif 3813 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3814 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 3815 #else /* on missing GESVD */ 3816 /* SVD */ 3817 PetscInt max_n,min_n; 3818 max_n = max_size_of_constraint; 3819 min_n = max_constraints; 3820 if (max_size_of_constraint < max_constraints) { 3821 min_n = max_size_of_constraint; 3822 max_n = max_constraints; 3823 } 3824 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 3825 #if defined(PETSC_USE_COMPLEX) 3826 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 3827 #endif 3828 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3829 lwork = -1; 3830 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 3831 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 3832 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 3833 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3834 #if !defined(PETSC_USE_COMPLEX) 3835 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 3836 #else 3837 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 3838 #endif 3839 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3840 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 3841 #endif /* on missing GESVD */ 3842 /* Allocate optimal workspace */ 3843 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 3844 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 3845 } 3846 /* Now we can loop on constraining sets */ 3847 total_counts = 0; 3848 constraints_idxs_ptr[0] = 0; 3849 constraints_data_ptr[0] = 0; 3850 /* vertices */ 3851 if (n_vertices) { 3852 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3853 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3854 for (i=0;i<n_vertices;i++) { 3855 constraints_n[total_counts] = 1; 3856 constraints_data[total_counts] = 1.0; 3857 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 3858 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 3859 total_counts++; 3860 } 3861 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3862 n_vertices = total_counts; 3863 } 3864 3865 /* edges and faces */ 3866 total_counts_cc = total_counts; 3867 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 3868 IS used_is; 3869 PetscBool idxs_copied = PETSC_FALSE; 3870 3871 if (ncc<n_ISForEdges) { 3872 used_is = ISForEdges[ncc]; 3873 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 3874 } else { 3875 used_is = ISForFaces[ncc-n_ISForEdges]; 3876 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 3877 } 3878 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 3879 3880 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 3881 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3882 /* change of basis should not be performed on local periodic nodes */ 3883 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 3884 if (nnsp_has_cnst) { 3885 PetscScalar quad_value; 3886 3887 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3888 idxs_copied = PETSC_TRUE; 3889 3890 if (!pcbddc->use_nnsp_true) { 3891 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 3892 } else { 3893 quad_value = 1.0; 3894 } 3895 for (j=0;j<size_of_constraint;j++) { 3896 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 3897 } 3898 temp_constraints++; 3899 total_counts++; 3900 } 3901 for (k=0;k<nnsp_size;k++) { 3902 PetscReal real_value; 3903 PetscScalar *ptr_to_data; 3904 3905 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3906 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 3907 for (j=0;j<size_of_constraint;j++) { 3908 ptr_to_data[j] = array[is_indices[j]]; 3909 } 3910 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3911 /* check if array is null on the connected component */ 3912 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3913 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 3914 if (real_value > 0.0) { /* keep indices and values */ 3915 temp_constraints++; 3916 total_counts++; 3917 if (!idxs_copied) { 3918 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3919 idxs_copied = PETSC_TRUE; 3920 } 3921 } 3922 } 3923 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3924 valid_constraints = temp_constraints; 3925 if (!pcbddc->use_nnsp_true && temp_constraints) { 3926 if (temp_constraints == 1) { /* just normalize the constraint */ 3927 PetscScalar norm,*ptr_to_data; 3928 3929 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3930 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3931 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 3932 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 3933 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 3934 } else { /* perform SVD */ 3935 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 3936 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3937 3938 #if defined(PETSC_MISSING_LAPACK_GESVD) 3939 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 3940 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 3941 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 3942 the constraints basis will differ (by a complex factor with absolute value equal to 1) 3943 from that computed using LAPACKgesvd 3944 -> This is due to a different computation of eigenvectors in LAPACKheev 3945 -> The quality of the POD-computed basis will be the same */ 3946 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3947 /* Store upper triangular part of correlation matrix */ 3948 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3949 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3950 for (j=0;j<temp_constraints;j++) { 3951 for (k=0;k<j+1;k++) { 3952 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 3953 } 3954 } 3955 /* compute eigenvalues and eigenvectors of correlation matrix */ 3956 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3957 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 3958 #if !defined(PETSC_USE_COMPLEX) 3959 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 3960 #else 3961 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 3962 #endif 3963 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3964 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 3965 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 3966 j = 0; 3967 while (j < temp_constraints && singular_vals[j] < tol) j++; 3968 total_counts = total_counts-j; 3969 valid_constraints = temp_constraints-j; 3970 /* scale and copy POD basis into used quadrature memory */ 3971 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3972 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3973 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 3974 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3975 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 3976 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3977 if (j<temp_constraints) { 3978 PetscInt ii; 3979 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 3980 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3981 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 3982 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3983 for (k=0;k<temp_constraints-j;k++) { 3984 for (ii=0;ii<size_of_constraint;ii++) { 3985 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 3986 } 3987 } 3988 } 3989 #else /* on missing GESVD */ 3990 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3991 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3992 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3993 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3994 #if !defined(PETSC_USE_COMPLEX) 3995 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 3996 #else 3997 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 3998 #endif 3999 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 4000 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4001 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 4002 k = temp_constraints; 4003 if (k > size_of_constraint) k = size_of_constraint; 4004 j = 0; 4005 while (j < k && singular_vals[k-j-1] < tol) j++; 4006 valid_constraints = k-j; 4007 total_counts = total_counts-temp_constraints+valid_constraints; 4008 #endif /* on missing GESVD */ 4009 } 4010 } 4011 /* update pointers information */ 4012 if (valid_constraints) { 4013 constraints_n[total_counts_cc] = valid_constraints; 4014 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 4015 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 4016 /* set change_of_basis flag */ 4017 if (boolforchange) { 4018 PetscBTSet(change_basis,total_counts_cc); 4019 } 4020 total_counts_cc++; 4021 } 4022 } 4023 /* free workspace */ 4024 if (!skip_lapack) { 4025 ierr = PetscFree(work);CHKERRQ(ierr); 4026 #if defined(PETSC_USE_COMPLEX) 4027 ierr = PetscFree(rwork);CHKERRQ(ierr); 4028 #endif 4029 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 4030 #if defined(PETSC_MISSING_LAPACK_GESVD) 4031 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 4032 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 4033 #endif 4034 } 4035 for (k=0;k<nnsp_size;k++) { 4036 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 4037 } 4038 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 4039 /* free index sets of faces, edges and vertices */ 4040 for (i=0;i<n_ISForFaces;i++) { 4041 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 4042 } 4043 if (n_ISForFaces) { 4044 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 4045 } 4046 for (i=0;i<n_ISForEdges;i++) { 4047 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 4048 } 4049 if (n_ISForEdges) { 4050 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 4051 } 4052 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 4053 } else { 4054 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4055 4056 total_counts = 0; 4057 n_vertices = 0; 4058 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 4059 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 4060 } 4061 max_constraints = 0; 4062 total_counts_cc = 0; 4063 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 4064 total_counts += pcbddc->adaptive_constraints_n[i]; 4065 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 4066 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 4067 } 4068 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 4069 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 4070 constraints_idxs = pcbddc->adaptive_constraints_idxs; 4071 constraints_data = pcbddc->adaptive_constraints_data; 4072 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 4073 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 4074 total_counts_cc = 0; 4075 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 4076 if (pcbddc->adaptive_constraints_n[i]) { 4077 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 4078 } 4079 } 4080 #if 0 4081 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 4082 for (i=0;i<total_counts_cc;i++) { 4083 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 4084 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 4085 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 4086 printf(" %d",constraints_idxs[j]); 4087 } 4088 printf("\n"); 4089 printf("number of cc: %d\n",constraints_n[i]); 4090 } 4091 for (i=0;i<n_vertices;i++) { 4092 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 4093 } 4094 for (i=0;i<sub_schurs->n_subs;i++) { 4095 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 4096 } 4097 #endif 4098 4099 max_size_of_constraint = 0; 4100 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 4101 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 4102 /* Change of basis */ 4103 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 4104 if (pcbddc->use_change_of_basis) { 4105 for (i=0;i<sub_schurs->n_subs;i++) { 4106 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 4107 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 4108 } 4109 } 4110 } 4111 } 4112 pcbddc->local_primal_size = total_counts; 4113 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 4114 4115 /* map constraints_idxs in boundary numbering */ 4116 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 4117 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i); 4118 4119 /* Create constraint matrix */ 4120 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 4121 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 4122 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 4123 4124 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 4125 /* determine if a QR strategy is needed for change of basis */ 4126 qr_needed = PETSC_FALSE; 4127 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 4128 total_primal_vertices=0; 4129 pcbddc->local_primal_size_cc = 0; 4130 for (i=0;i<total_counts_cc;i++) { 4131 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 4132 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 4133 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 4134 pcbddc->local_primal_size_cc += 1; 4135 } else if (PetscBTLookup(change_basis,i)) { 4136 for (k=0;k<constraints_n[i];k++) { 4137 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 4138 } 4139 pcbddc->local_primal_size_cc += constraints_n[i]; 4140 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 4141 PetscBTSet(qr_needed_idx,i); 4142 qr_needed = PETSC_TRUE; 4143 } 4144 } else { 4145 pcbddc->local_primal_size_cc += 1; 4146 } 4147 } 4148 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 4149 pcbddc->n_vertices = total_primal_vertices; 4150 /* permute indices in order to have a sorted set of vertices */ 4151 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 4152 4153 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 4154 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 4155 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 4156 4157 /* nonzero structure of constraint matrix */ 4158 /* and get reference dof for local constraints */ 4159 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 4160 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 4161 4162 j = total_primal_vertices; 4163 total_counts = total_primal_vertices; 4164 cum = total_primal_vertices; 4165 for (i=n_vertices;i<total_counts_cc;i++) { 4166 if (!PetscBTLookup(change_basis,i)) { 4167 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 4168 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 4169 cum++; 4170 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 4171 for (k=0;k<constraints_n[i];k++) { 4172 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 4173 nnz[j+k] = size_of_constraint; 4174 } 4175 j += constraints_n[i]; 4176 } 4177 } 4178 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 4179 ierr = PetscFree(nnz);CHKERRQ(ierr); 4180 4181 /* set values in constraint matrix */ 4182 for (i=0;i<total_primal_vertices;i++) { 4183 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4184 } 4185 total_counts = total_primal_vertices; 4186 for (i=n_vertices;i<total_counts_cc;i++) { 4187 if (!PetscBTLookup(change_basis,i)) { 4188 PetscInt *cols; 4189 4190 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 4191 cols = constraints_idxs+constraints_idxs_ptr[i]; 4192 for (k=0;k<constraints_n[i];k++) { 4193 PetscInt row = total_counts+k; 4194 PetscScalar *vals; 4195 4196 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 4197 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 4198 } 4199 total_counts += constraints_n[i]; 4200 } 4201 } 4202 /* assembling */ 4203 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4204 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4205 4206 /* 4207 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4208 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 4209 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 4210 */ 4211 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 4212 if (pcbddc->use_change_of_basis) { 4213 /* dual and primal dofs on a single cc */ 4214 PetscInt dual_dofs,primal_dofs; 4215 /* working stuff for GEQRF */ 4216 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 4217 PetscBLASInt lqr_work; 4218 /* working stuff for UNGQR */ 4219 PetscScalar *gqr_work,lgqr_work_t; 4220 PetscBLASInt lgqr_work; 4221 /* working stuff for TRTRS */ 4222 PetscScalar *trs_rhs; 4223 PetscBLASInt Blas_NRHS; 4224 /* pointers for values insertion into change of basis matrix */ 4225 PetscInt *start_rows,*start_cols; 4226 PetscScalar *start_vals; 4227 /* working stuff for values insertion */ 4228 PetscBT is_primal; 4229 PetscInt *aux_primal_numbering_B; 4230 /* matrix sizes */ 4231 PetscInt global_size,local_size; 4232 /* temporary change of basis */ 4233 Mat localChangeOfBasisMatrix; 4234 /* extra space for debugging */ 4235 PetscScalar *dbg_work; 4236 4237 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 4238 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 4239 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 4240 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 4241 /* nonzeros for local mat */ 4242 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 4243 if (!pcbddc->benign_change || pcbddc->fake_change) { 4244 for (i=0;i<pcis->n;i++) nnz[i]=1; 4245 } else { 4246 const PetscInt *ii; 4247 PetscInt n; 4248 PetscBool flg_row; 4249 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 4250 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 4251 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 4252 } 4253 for (i=n_vertices;i<total_counts_cc;i++) { 4254 if (PetscBTLookup(change_basis,i)) { 4255 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 4256 if (PetscBTLookup(qr_needed_idx,i)) { 4257 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 4258 } else { 4259 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 4260 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 4261 } 4262 } 4263 } 4264 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 4265 ierr = PetscFree(nnz);CHKERRQ(ierr); 4266 /* Set interior change in the matrix */ 4267 if (!pcbddc->benign_change || pcbddc->fake_change) { 4268 for (i=0;i<pcis->n;i++) { 4269 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 4270 } 4271 } else { 4272 const PetscInt *ii,*jj; 4273 PetscScalar *aa; 4274 PetscInt n; 4275 PetscBool flg_row; 4276 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 4277 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 4278 for (i=0;i<n;i++) { 4279 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 4280 } 4281 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 4282 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 4283 } 4284 4285 if (pcbddc->dbg_flag) { 4286 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 4287 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4288 } 4289 4290 4291 /* Now we loop on the constraints which need a change of basis */ 4292 /* 4293 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 4294 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 4295 4296 Basic blocks of change of basis matrix T computed by 4297 4298 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 4299 4300 | 1 0 ... 0 s_1/S | 4301 | 0 1 ... 0 s_2/S | 4302 | ... | 4303 | 0 ... 1 s_{n-1}/S | 4304 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 4305 4306 with S = \sum_{i=1}^n s_i^2 4307 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 4308 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 4309 4310 - QR decomposition of constraints otherwise 4311 */ 4312 if (qr_needed) { 4313 /* space to store Q */ 4314 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 4315 /* first we issue queries for optimal work */ 4316 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 4317 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 4318 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4319 lqr_work = -1; 4320 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 4321 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 4322 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 4323 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 4324 lgqr_work = -1; 4325 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 4326 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 4327 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 4328 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4329 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 4330 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 4331 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 4332 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 4333 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 4334 /* array to store scaling factors for reflectors */ 4335 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 4336 /* array to store rhs and solution of triangular solver */ 4337 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 4338 /* allocating workspace for check */ 4339 if (pcbddc->dbg_flag) { 4340 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 4341 } 4342 } 4343 /* array to store whether a node is primal or not */ 4344 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 4345 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 4346 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 4347 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i); 4348 for (i=0;i<total_primal_vertices;i++) { 4349 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 4350 } 4351 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 4352 4353 /* loop on constraints and see whether or not they need a change of basis and compute it */ 4354 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 4355 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 4356 if (PetscBTLookup(change_basis,total_counts)) { 4357 /* get constraint info */ 4358 primal_dofs = constraints_n[total_counts]; 4359 dual_dofs = size_of_constraint-primal_dofs; 4360 4361 if (pcbddc->dbg_flag) { 4362 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 4363 } 4364 4365 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 4366 4367 /* copy quadrature constraints for change of basis check */ 4368 if (pcbddc->dbg_flag) { 4369 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4370 } 4371 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 4372 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4373 4374 /* compute QR decomposition of constraints */ 4375 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4376 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4377 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4378 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4379 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 4380 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 4381 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4382 4383 /* explictly compute R^-T */ 4384 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 4385 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 4386 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4387 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 4388 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4389 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 4390 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4391 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 4392 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 4393 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4394 4395 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 4396 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4397 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4398 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 4399 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4400 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4401 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 4402 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 4403 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4404 4405 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 4406 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 4407 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 4408 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4409 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4410 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 4411 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4412 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 4413 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 4414 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4415 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 4416 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4417 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4418 4419 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 4420 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 4421 /* insert cols for primal dofs */ 4422 for (j=0;j<primal_dofs;j++) { 4423 start_vals = &qr_basis[j*size_of_constraint]; 4424 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 4425 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 4426 } 4427 /* insert cols for dual dofs */ 4428 for (j=0,k=0;j<dual_dofs;k++) { 4429 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 4430 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 4431 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 4432 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 4433 j++; 4434 } 4435 } 4436 4437 /* check change of basis */ 4438 if (pcbddc->dbg_flag) { 4439 PetscInt ii,jj; 4440 PetscBool valid_qr=PETSC_TRUE; 4441 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 4442 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4443 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 4444 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4445 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 4446 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 4447 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4448 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 4449 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4450 for (jj=0;jj<size_of_constraint;jj++) { 4451 for (ii=0;ii<primal_dofs;ii++) { 4452 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 4453 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 4454 } 4455 } 4456 if (!valid_qr) { 4457 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 4458 for (jj=0;jj<size_of_constraint;jj++) { 4459 for (ii=0;ii<primal_dofs;ii++) { 4460 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 4461 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4462 } 4463 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 4464 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4465 } 4466 } 4467 } 4468 } else { 4469 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 4470 } 4471 } 4472 } else { /* simple transformation block */ 4473 PetscInt row,col; 4474 PetscScalar val,norm; 4475 4476 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4477 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 4478 for (j=0;j<size_of_constraint;j++) { 4479 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 4480 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 4481 if (!PetscBTLookup(is_primal,row_B)) { 4482 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 4483 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 4484 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 4485 } else { 4486 for (k=0;k<size_of_constraint;k++) { 4487 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 4488 if (row != col) { 4489 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 4490 } else { 4491 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 4492 } 4493 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 4494 } 4495 } 4496 } 4497 if (pcbddc->dbg_flag) { 4498 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 4499 } 4500 } 4501 } else { 4502 if (pcbddc->dbg_flag) { 4503 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 4504 } 4505 } 4506 } 4507 4508 /* free workspace */ 4509 if (qr_needed) { 4510 if (pcbddc->dbg_flag) { 4511 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 4512 } 4513 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 4514 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 4515 ierr = PetscFree(qr_work);CHKERRQ(ierr); 4516 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 4517 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 4518 } 4519 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 4520 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4521 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4522 4523 /* assembling of global change of variable */ 4524 if (!pcbddc->fake_change) { 4525 Mat tmat; 4526 PetscInt bs; 4527 4528 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 4529 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 4530 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 4531 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 4532 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4533 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 4534 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 4535 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 4536 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 4537 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 4538 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4539 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4540 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4541 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4542 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4543 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4544 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 4545 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 4546 4547 /* check */ 4548 if (pcbddc->dbg_flag) { 4549 PetscReal error; 4550 Vec x,x_change; 4551 4552 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 4553 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 4554 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4555 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 4556 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4557 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4558 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 4559 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4560 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4561 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 4562 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4563 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4564 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4565 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 4566 ierr = VecDestroy(&x);CHKERRQ(ierr); 4567 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4568 } 4569 /* adapt sub_schurs computed (if any) */ 4570 if (pcbddc->use_deluxe_scaling) { 4571 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4572 4573 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");CHKERRQ(ierr); 4574 if (sub_schurs && sub_schurs->S_Ej_all) { 4575 Mat S_new,tmat; 4576 IS is_all_N,is_V_Sall = NULL; 4577 4578 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 4579 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 4580 if (pcbddc->deluxe_zerorows) { 4581 ISLocalToGlobalMapping NtoSall; 4582 IS is_V; 4583 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 4584 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 4585 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 4586 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 4587 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 4588 } 4589 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 4590 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4591 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 4592 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4593 if (pcbddc->deluxe_zerorows) { 4594 const PetscScalar *array; 4595 const PetscInt *idxs_V,*idxs_all; 4596 PetscInt i,n_V; 4597 4598 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4599 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 4600 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4601 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4602 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 4603 for (i=0;i<n_V;i++) { 4604 PetscScalar val; 4605 PetscInt idx; 4606 4607 idx = idxs_V[i]; 4608 val = array[idxs_all[idxs_V[i]]]; 4609 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 4610 } 4611 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4612 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4613 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 4614 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4615 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4616 } 4617 sub_schurs->S_Ej_all = S_new; 4618 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4619 if (sub_schurs->sum_S_Ej_all) { 4620 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4621 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 4622 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4623 if (pcbddc->deluxe_zerorows) { 4624 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4625 } 4626 sub_schurs->sum_S_Ej_all = S_new; 4627 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4628 } 4629 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 4630 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4631 } 4632 /* destroy any change of basis context in sub_schurs */ 4633 if (sub_schurs && sub_schurs->change) { 4634 PetscInt i; 4635 4636 for (i=0;i<sub_schurs->n_subs;i++) { 4637 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 4638 } 4639 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 4640 } 4641 } 4642 if (pcbddc->switch_static) { /* need to save the local change */ 4643 pcbddc->switch_static_change = localChangeOfBasisMatrix; 4644 } else { 4645 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 4646 } 4647 /* determine if any process has changed the pressures locally */ 4648 pcbddc->change_interior = pcbddc->benign_have_null; 4649 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 4650 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 4651 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 4652 pcbddc->use_qr_single = qr_needed; 4653 } 4654 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 4655 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 4656 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 4657 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 4658 } else { 4659 Mat benign_global = NULL; 4660 if (pcbddc->benign_have_null) { 4661 Mat tmat; 4662 4663 pcbddc->change_interior = PETSC_TRUE; 4664 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4665 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4666 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4667 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4668 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 4669 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4670 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4671 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 4672 if (pcbddc->benign_change) { 4673 Mat M; 4674 4675 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 4676 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 4677 ierr = MatISSetLocalMat(tmat,M);CHKERRQ(ierr); 4678 ierr = MatDestroy(&M);CHKERRQ(ierr); 4679 } else { 4680 Mat eye; 4681 PetscScalar *array; 4682 4683 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4684 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&eye);CHKERRQ(ierr); 4685 for (i=0;i<pcis->n;i++) { 4686 ierr = MatSetValue(eye,i,i,array[i],INSERT_VALUES);CHKERRQ(ierr); 4687 } 4688 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4689 ierr = MatAssemblyBegin(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4690 ierr = MatAssemblyEnd(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4691 ierr = MatISSetLocalMat(tmat,eye);CHKERRQ(ierr); 4692 ierr = MatDestroy(&eye);CHKERRQ(ierr); 4693 } 4694 ierr = MatISGetMPIXAIJ(tmat,MAT_INITIAL_MATRIX,&benign_global);CHKERRQ(ierr); 4695 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4696 } 4697 if (pcbddc->user_ChangeOfBasisMatrix) { 4698 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4699 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 4700 } else if (pcbddc->benign_have_null) { 4701 pcbddc->ChangeOfBasisMatrix = benign_global; 4702 } 4703 } 4704 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 4705 IS is_global; 4706 const PetscInt *gidxs; 4707 4708 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 4709 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 4710 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 4711 ierr = MatGetSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 4712 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 4713 } 4714 } 4715 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 4716 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 4717 } 4718 4719 if (!pcbddc->fake_change) { 4720 /* add pressure dofs to set of primal nodes for numbering purposes */ 4721 for (i=0;i<pcbddc->benign_n;i++) { 4722 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 4723 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 4724 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 4725 pcbddc->local_primal_size_cc++; 4726 pcbddc->local_primal_size++; 4727 } 4728 4729 /* check if a new primal space has been introduced (also take into account benign trick) */ 4730 pcbddc->new_primal_space_local = PETSC_TRUE; 4731 if (olocal_primal_size == pcbddc->local_primal_size) { 4732 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4733 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4734 if (!pcbddc->new_primal_space_local) { 4735 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4736 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4737 } 4738 } 4739 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 4740 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4741 } 4742 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 4743 4744 /* flush dbg viewer */ 4745 if (pcbddc->dbg_flag) { 4746 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4747 } 4748 4749 /* free workspace */ 4750 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 4751 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 4752 if (!pcbddc->adaptive_selection) { 4753 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 4754 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 4755 } else { 4756 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 4757 pcbddc->adaptive_constraints_idxs_ptr, 4758 pcbddc->adaptive_constraints_data_ptr, 4759 pcbddc->adaptive_constraints_idxs, 4760 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 4761 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 4762 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 4763 } 4764 PetscFunctionReturn(0); 4765 } 4766 4767 #undef __FUNCT__ 4768 #define __FUNCT__ "PCBDDCAnalyzeInterface" 4769 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 4770 { 4771 ISLocalToGlobalMapping map; 4772 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4773 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 4774 PetscInt ierr,i,N; 4775 4776 PetscFunctionBegin; 4777 if (pcbddc->graphanalyzed) PetscFunctionReturn(0); 4778 /* Reset previously computed graph */ 4779 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 4780 /* Init local Graph struct */ 4781 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 4782 ierr = MatGetLocalToGlobalMapping(pc->pmat,&map,NULL);CHKERRQ(ierr); 4783 ierr = PCBDDCGraphInit(pcbddc->mat_graph,map,N);CHKERRQ(ierr); 4784 4785 /* Check validity of the csr graph passed in by the user */ 4786 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 4787 4788 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 4789 if ( (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) && pcbddc->use_local_adj) { 4790 PetscInt *xadj,*adjncy; 4791 PetscInt nvtxs; 4792 PetscBool flg_row=PETSC_FALSE; 4793 4794 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4795 if (flg_row) { 4796 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 4797 pcbddc->computed_rowadj = PETSC_TRUE; 4798 } 4799 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4800 } 4801 if (pcbddc->dbg_flag) { 4802 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4803 } 4804 4805 /* Setup of Graph */ 4806 pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */ 4807 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 4808 4809 /* attach info on disconnected subdomains if present */ 4810 if (pcbddc->n_local_subs) { 4811 PetscInt *local_subs; 4812 4813 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 4814 for (i=0;i<pcbddc->n_local_subs;i++) { 4815 const PetscInt *idxs; 4816 PetscInt nl,j; 4817 4818 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 4819 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4820 for (j=0;j<nl;j++) local_subs[idxs[j]] = i; 4821 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4822 } 4823 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 4824 pcbddc->mat_graph->local_subs = local_subs; 4825 } 4826 4827 /* Graph's connected components analysis */ 4828 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 4829 4830 /* set flag indicating analysis has been done */ 4831 pcbddc->graphanalyzed = PETSC_TRUE; 4832 PetscFunctionReturn(0); 4833 } 4834 4835 #undef __FUNCT__ 4836 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 4837 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 4838 { 4839 PetscInt i,j; 4840 PetscScalar *alphas; 4841 PetscErrorCode ierr; 4842 4843 PetscFunctionBegin; 4844 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 4845 for (i=0;i<n;i++) { 4846 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 4847 ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],alphas);CHKERRQ(ierr); 4848 for (j=0;j<n-i-1;j++) alphas[j] = PetscConj(-alphas[j]); 4849 ierr = VecMAXPY(vecs[j],n-i-1,alphas,vecs+i);CHKERRQ(ierr); 4850 } 4851 ierr = PetscFree(alphas);CHKERRQ(ierr); 4852 PetscFunctionReturn(0); 4853 } 4854 4855 #undef __FUNCT__ 4856 #define __FUNCT__ "MatISGetSubassemblingPattern" 4857 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 4858 { 4859 Mat A; 4860 PetscInt n_neighs,*neighs,*n_shared,**shared; 4861 PetscMPIInt size,rank,color; 4862 PetscInt *xadj,*adjncy; 4863 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 4864 PetscInt im_active,active_procs,n,i,j,local_size,threshold = 2; 4865 PetscInt void_procs,*procs_candidates = NULL; 4866 PetscInt xadj_count, *count; 4867 PetscBool ismatis,use_vwgt=PETSC_FALSE; 4868 PetscSubcomm psubcomm; 4869 MPI_Comm subcomm; 4870 PetscErrorCode ierr; 4871 4872 PetscFunctionBegin; 4873 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 4874 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 4875 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 4876 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 4877 PetscValidLogicalCollectiveInt(mat,redprocs,3); 4878 if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %d\n",*n_subdomains); 4879 4880 if (have_void) *have_void = PETSC_FALSE; 4881 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 4882 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 4883 ierr = MatISGetLocalMat(mat,&A);CHKERRQ(ierr); 4884 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 4885 im_active = !!(n); 4886 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 4887 void_procs = size - active_procs; 4888 /* get ranks of of non-active processes in mat communicator */ 4889 if (void_procs) { 4890 PetscInt ncand; 4891 4892 if (have_void) *have_void = PETSC_TRUE; 4893 ierr = PetscMalloc1(size,&procs_candidates);CHKERRQ(ierr); 4894 ierr = MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 4895 for (i=0,ncand=0;i<size;i++) { 4896 if (!procs_candidates[i]) { 4897 procs_candidates[ncand++] = i; 4898 } 4899 } 4900 /* force n_subdomains to be not greater that the number of non-active processes */ 4901 *n_subdomains = PetscMin(void_procs,*n_subdomains); 4902 } 4903 4904 /* number of subdomains requested greater than active processes -> just shift the matrix */ 4905 if (active_procs < *n_subdomains) { 4906 PetscInt issize,isidx; 4907 if (im_active) { 4908 issize = 1; 4909 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 4910 isidx = procs_candidates[rank]; 4911 } else { 4912 isidx = rank; 4913 } 4914 } else { 4915 issize = 0; 4916 isidx = -1; 4917 } 4918 *n_subdomains = active_procs; 4919 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);CHKERRQ(ierr); 4920 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 4921 PetscFunctionReturn(0); 4922 } 4923 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 4924 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 4925 threshold = PetscMax(threshold,2); 4926 4927 /* Get info on mapping */ 4928 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 4929 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4930 4931 /* build local CSR graph of subdomains' connectivity */ 4932 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 4933 xadj[0] = 0; 4934 xadj[1] = PetscMax(n_neighs-1,0); 4935 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 4936 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 4937 ierr = PetscCalloc1(local_size,&count);CHKERRQ(ierr); 4938 for (i=1;i<n_neighs;i++) 4939 for (j=0;j<n_shared[i];j++) 4940 count[shared[i][j]] += 1; 4941 4942 xadj_count = 0; 4943 for (i=1;i<n_neighs;i++) { 4944 for (j=0;j<n_shared[i];j++) { 4945 if (count[shared[i][j]] < threshold) { 4946 adjncy[xadj_count] = neighs[i]; 4947 adjncy_wgt[xadj_count] = n_shared[i]; 4948 xadj_count++; 4949 break; 4950 } 4951 } 4952 } 4953 xadj[1] = xadj_count; 4954 ierr = PetscFree(count);CHKERRQ(ierr); 4955 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4956 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4957 4958 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 4959 4960 /* Restrict work on active processes only */ 4961 ierr = PetscMPIIntCast(im_active,&color);CHKERRQ(ierr); 4962 if (void_procs) { 4963 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);CHKERRQ(ierr); 4964 ierr = PetscSubcommSetNumber(psubcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 4965 ierr = PetscSubcommSetTypeGeneral(psubcomm,color,rank);CHKERRQ(ierr); 4966 subcomm = PetscSubcommChild(psubcomm); 4967 } else { 4968 psubcomm = NULL; 4969 subcomm = PetscObjectComm((PetscObject)mat); 4970 } 4971 4972 v_wgt = NULL; 4973 if (!color) { 4974 ierr = PetscFree(xadj);CHKERRQ(ierr); 4975 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4976 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4977 } else { 4978 Mat subdomain_adj; 4979 IS new_ranks,new_ranks_contig; 4980 MatPartitioning partitioner; 4981 PetscInt rstart=0,rend=0; 4982 PetscInt *is_indices,*oldranks; 4983 PetscMPIInt size; 4984 PetscBool aggregate; 4985 4986 ierr = MPI_Comm_size(subcomm,&size);CHKERRQ(ierr); 4987 if (void_procs) { 4988 PetscInt prank = rank; 4989 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 4990 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);CHKERRQ(ierr); 4991 for (i=0;i<xadj[1];i++) { 4992 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 4993 } 4994 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4995 } else { 4996 oldranks = NULL; 4997 } 4998 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 4999 if (aggregate) { /* TODO: all this part could be made more efficient */ 5000 PetscInt lrows,row,ncols,*cols; 5001 PetscMPIInt nrank; 5002 PetscScalar *vals; 5003 5004 ierr = MPI_Comm_rank(subcomm,&nrank);CHKERRQ(ierr); 5005 lrows = 0; 5006 if (nrank<redprocs) { 5007 lrows = size/redprocs; 5008 if (nrank<size%redprocs) lrows++; 5009 } 5010 ierr = MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 5011 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 5012 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 5013 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 5014 row = nrank; 5015 ncols = xadj[1]-xadj[0]; 5016 cols = adjncy; 5017 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 5018 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 5019 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 5020 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5021 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5022 ierr = PetscFree(xadj);CHKERRQ(ierr); 5023 ierr = PetscFree(adjncy);CHKERRQ(ierr); 5024 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 5025 ierr = PetscFree(vals);CHKERRQ(ierr); 5026 if (use_vwgt) { 5027 Vec v; 5028 const PetscScalar *array; 5029 PetscInt nl; 5030 5031 ierr = MatCreateVecs(subdomain_adj,&v,NULL);CHKERRQ(ierr); 5032 ierr = VecSetValue(v,row,(PetscScalar)local_size,INSERT_VALUES);CHKERRQ(ierr); 5033 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 5034 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 5035 ierr = VecGetLocalSize(v,&nl);CHKERRQ(ierr); 5036 ierr = VecGetArrayRead(v,&array);CHKERRQ(ierr); 5037 ierr = PetscMalloc1(nl,&v_wgt);CHKERRQ(ierr); 5038 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 5039 ierr = VecRestoreArrayRead(v,&array);CHKERRQ(ierr); 5040 ierr = VecDestroy(&v);CHKERRQ(ierr); 5041 } 5042 } else { 5043 ierr = MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 5044 if (use_vwgt) { 5045 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 5046 v_wgt[0] = local_size; 5047 } 5048 } 5049 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 5050 5051 /* Partition */ 5052 ierr = MatPartitioningCreate(subcomm,&partitioner);CHKERRQ(ierr); 5053 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 5054 if (v_wgt) { 5055 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 5056 } 5057 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 5058 ierr = MatPartitioningSetNParts(partitioner,*n_subdomains);CHKERRQ(ierr); 5059 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 5060 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 5061 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 5062 5063 /* renumber new_ranks to avoid "holes" in new set of processors */ 5064 ierr = ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 5065 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 5066 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5067 if (!aggregate) { 5068 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 5069 #if defined(PETSC_USE_DEBUG) 5070 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 5071 #endif 5072 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 5073 } else if (oldranks) { 5074 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 5075 } else { 5076 ranks_send_to_idx[0] = is_indices[0]; 5077 } 5078 } else { 5079 PetscInt idxs[1]; 5080 PetscMPIInt tag; 5081 MPI_Request *reqs; 5082 5083 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 5084 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 5085 for (i=rstart;i<rend;i++) { 5086 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);CHKERRQ(ierr); 5087 } 5088 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 5089 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5090 ierr = PetscFree(reqs);CHKERRQ(ierr); 5091 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 5092 #if defined(PETSC_USE_DEBUG) 5093 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 5094 #endif 5095 ranks_send_to_idx[0] = procs_candidates[oldranks[idxs[0]]]; 5096 } else if (oldranks) { 5097 ranks_send_to_idx[0] = oldranks[idxs[0]]; 5098 } else { 5099 ranks_send_to_idx[0] = idxs[0]; 5100 } 5101 } 5102 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5103 /* clean up */ 5104 ierr = PetscFree(oldranks);CHKERRQ(ierr); 5105 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 5106 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 5107 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 5108 } 5109 ierr = PetscSubcommDestroy(&psubcomm);CHKERRQ(ierr); 5110 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 5111 5112 /* assemble parallel IS for sends */ 5113 i = 1; 5114 if (!color) i=0; 5115 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);CHKERRQ(ierr); 5116 PetscFunctionReturn(0); 5117 } 5118 5119 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 5120 5121 #undef __FUNCT__ 5122 #define __FUNCT__ "MatISSubassemble" 5123 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 5124 { 5125 Mat local_mat; 5126 IS is_sends_internal; 5127 PetscInt rows,cols,new_local_rows; 5128 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs; 5129 PetscBool ismatis,isdense,newisdense,destroy_mat; 5130 ISLocalToGlobalMapping l2gmap; 5131 PetscInt* l2gmap_indices; 5132 const PetscInt* is_indices; 5133 MatType new_local_type; 5134 /* buffers */ 5135 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 5136 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 5137 PetscInt *recv_buffer_idxs_local; 5138 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 5139 PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs; 5140 /* MPI */ 5141 MPI_Comm comm,comm_n; 5142 PetscSubcomm subcomm; 5143 PetscMPIInt n_sends,n_recvs,commsize; 5144 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 5145 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 5146 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest; 5147 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs; 5148 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs; 5149 PetscErrorCode ierr; 5150 5151 PetscFunctionBegin; 5152 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 5153 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 5154 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 5155 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 5156 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 5157 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 5158 PetscValidLogicalCollectiveBool(mat,reuse,6); 5159 PetscValidLogicalCollectiveInt(mat,nis,8); 5160 PetscValidLogicalCollectiveInt(mat,nvecs,10); 5161 if (nvecs) { 5162 if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported"); 5163 PetscValidHeaderSpecific(nnsp_vec[0],VEC_CLASSID,11); 5164 } 5165 /* further checks */ 5166 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 5167 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 5168 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 5169 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 5170 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 5171 if (reuse && *mat_n) { 5172 PetscInt mrows,mcols,mnrows,mncols; 5173 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 5174 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 5175 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 5176 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 5177 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 5178 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 5179 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 5180 } 5181 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 5182 PetscValidLogicalCollectiveInt(mat,bs,0); 5183 5184 /* prepare IS for sending if not provided */ 5185 if (!is_sends) { 5186 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 5187 ierr = MatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);CHKERRQ(ierr); 5188 } else { 5189 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 5190 is_sends_internal = is_sends; 5191 } 5192 5193 /* get comm */ 5194 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 5195 5196 /* compute number of sends */ 5197 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 5198 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 5199 5200 /* compute number of receives */ 5201 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 5202 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 5203 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 5204 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 5205 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 5206 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 5207 ierr = PetscFree(iflags);CHKERRQ(ierr); 5208 5209 /* restrict comm if requested */ 5210 subcomm = 0; 5211 destroy_mat = PETSC_FALSE; 5212 if (restrict_comm) { 5213 PetscMPIInt color,subcommsize; 5214 5215 color = 0; 5216 if (restrict_full) { 5217 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 5218 } else { 5219 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 5220 } 5221 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 5222 subcommsize = commsize - subcommsize; 5223 /* check if reuse has been requested */ 5224 if (reuse) { 5225 if (*mat_n) { 5226 PetscMPIInt subcommsize2; 5227 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 5228 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 5229 comm_n = PetscObjectComm((PetscObject)*mat_n); 5230 } else { 5231 comm_n = PETSC_COMM_SELF; 5232 } 5233 } else { /* MAT_INITIAL_MATRIX */ 5234 PetscMPIInt rank; 5235 5236 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5237 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 5238 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 5239 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 5240 comm_n = PetscSubcommChild(subcomm); 5241 } 5242 /* flag to destroy *mat_n if not significative */ 5243 if (color) destroy_mat = PETSC_TRUE; 5244 } else { 5245 comm_n = comm; 5246 } 5247 5248 /* prepare send/receive buffers */ 5249 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 5250 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 5251 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 5252 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 5253 if (nis) { 5254 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 5255 } 5256 5257 /* Get data from local matrices */ 5258 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 5259 /* TODO: See below some guidelines on how to prepare the local buffers */ 5260 /* 5261 send_buffer_vals should contain the raw values of the local matrix 5262 send_buffer_idxs should contain: 5263 - MatType_PRIVATE type 5264 - PetscInt size_of_l2gmap 5265 - PetscInt global_row_indices[size_of_l2gmap] 5266 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 5267 */ 5268 else { 5269 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 5270 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 5271 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 5272 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 5273 send_buffer_idxs[1] = i; 5274 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 5275 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 5276 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 5277 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 5278 for (i=0;i<n_sends;i++) { 5279 ilengths_vals[is_indices[i]] = len*len; 5280 ilengths_idxs[is_indices[i]] = len+2; 5281 } 5282 } 5283 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 5284 /* additional is (if any) */ 5285 if (nis) { 5286 PetscMPIInt psum; 5287 PetscInt j; 5288 for (j=0,psum=0;j<nis;j++) { 5289 PetscInt plen; 5290 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 5291 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 5292 psum += len+1; /* indices + lenght */ 5293 } 5294 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 5295 for (j=0,psum=0;j<nis;j++) { 5296 PetscInt plen; 5297 const PetscInt *is_array_idxs; 5298 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 5299 send_buffer_idxs_is[psum] = plen; 5300 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 5301 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 5302 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 5303 psum += plen+1; /* indices + lenght */ 5304 } 5305 for (i=0;i<n_sends;i++) { 5306 ilengths_idxs_is[is_indices[i]] = psum; 5307 } 5308 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 5309 } 5310 5311 buf_size_idxs = 0; 5312 buf_size_vals = 0; 5313 buf_size_idxs_is = 0; 5314 buf_size_vecs = 0; 5315 for (i=0;i<n_recvs;i++) { 5316 buf_size_idxs += (PetscInt)olengths_idxs[i]; 5317 buf_size_vals += (PetscInt)olengths_vals[i]; 5318 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 5319 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 5320 } 5321 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 5322 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 5323 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 5324 ierr = PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);CHKERRQ(ierr); 5325 5326 /* get new tags for clean communications */ 5327 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 5328 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 5329 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 5330 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);CHKERRQ(ierr); 5331 5332 /* allocate for requests */ 5333 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 5334 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 5335 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 5336 ierr = PetscMalloc1(n_sends,&send_req_vecs);CHKERRQ(ierr); 5337 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 5338 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 5339 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 5340 ierr = PetscMalloc1(n_recvs,&recv_req_vecs);CHKERRQ(ierr); 5341 5342 /* communications */ 5343 ptr_idxs = recv_buffer_idxs; 5344 ptr_vals = recv_buffer_vals; 5345 ptr_idxs_is = recv_buffer_idxs_is; 5346 ptr_vecs = recv_buffer_vecs; 5347 for (i=0;i<n_recvs;i++) { 5348 source_dest = onodes[i]; 5349 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 5350 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 5351 ptr_idxs += olengths_idxs[i]; 5352 ptr_vals += olengths_vals[i]; 5353 if (nis) { 5354 source_dest = onodes_is[i]; 5355 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 5356 ptr_idxs_is += olengths_idxs_is[i]; 5357 } 5358 if (nvecs) { 5359 source_dest = onodes[i]; 5360 ierr = MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);CHKERRQ(ierr); 5361 ptr_vecs += olengths_idxs[i]-2; 5362 } 5363 } 5364 for (i=0;i<n_sends;i++) { 5365 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 5366 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 5367 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 5368 if (nis) { 5369 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 5370 } 5371 if (nvecs) { 5372 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 5373 ierr = MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);CHKERRQ(ierr); 5374 } 5375 } 5376 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 5377 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 5378 5379 /* assemble new l2g map */ 5380 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5381 ptr_idxs = recv_buffer_idxs; 5382 new_local_rows = 0; 5383 for (i=0;i<n_recvs;i++) { 5384 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 5385 ptr_idxs += olengths_idxs[i]; 5386 } 5387 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 5388 ptr_idxs = recv_buffer_idxs; 5389 new_local_rows = 0; 5390 for (i=0;i<n_recvs;i++) { 5391 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 5392 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 5393 ptr_idxs += olengths_idxs[i]; 5394 } 5395 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 5396 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 5397 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 5398 5399 /* infer new local matrix type from received local matrices type */ 5400 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 5401 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 5402 if (n_recvs) { 5403 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 5404 ptr_idxs = recv_buffer_idxs; 5405 for (i=0;i<n_recvs;i++) { 5406 if ((PetscInt)new_local_type_private != *ptr_idxs) { 5407 new_local_type_private = MATAIJ_PRIVATE; 5408 break; 5409 } 5410 ptr_idxs += olengths_idxs[i]; 5411 } 5412 switch (new_local_type_private) { 5413 case MATDENSE_PRIVATE: 5414 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 5415 new_local_type = MATSEQAIJ; 5416 bs = 1; 5417 } else { /* if I receive only 1 dense matrix */ 5418 new_local_type = MATSEQDENSE; 5419 bs = 1; 5420 } 5421 break; 5422 case MATAIJ_PRIVATE: 5423 new_local_type = MATSEQAIJ; 5424 bs = 1; 5425 break; 5426 case MATBAIJ_PRIVATE: 5427 new_local_type = MATSEQBAIJ; 5428 break; 5429 case MATSBAIJ_PRIVATE: 5430 new_local_type = MATSEQSBAIJ; 5431 break; 5432 default: 5433 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 5434 break; 5435 } 5436 } else { /* by default, new_local_type is seqdense */ 5437 new_local_type = MATSEQDENSE; 5438 bs = 1; 5439 } 5440 5441 /* create MATIS object if needed */ 5442 if (!reuse) { 5443 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 5444 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 5445 } else { 5446 /* it also destroys the local matrices */ 5447 if (*mat_n) { 5448 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 5449 } else { /* this is a fake object */ 5450 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 5451 } 5452 } 5453 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 5454 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 5455 5456 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5457 5458 /* Global to local map of received indices */ 5459 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 5460 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 5461 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 5462 5463 /* restore attributes -> type of incoming data and its size */ 5464 buf_size_idxs = 0; 5465 for (i=0;i<n_recvs;i++) { 5466 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 5467 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 5468 buf_size_idxs += (PetscInt)olengths_idxs[i]; 5469 } 5470 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 5471 5472 /* set preallocation */ 5473 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 5474 if (!newisdense) { 5475 PetscInt *new_local_nnz=0; 5476 5477 ptr_idxs = recv_buffer_idxs_local; 5478 if (n_recvs) { 5479 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 5480 } 5481 for (i=0;i<n_recvs;i++) { 5482 PetscInt j; 5483 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 5484 for (j=0;j<*(ptr_idxs+1);j++) { 5485 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 5486 } 5487 } else { 5488 /* TODO */ 5489 } 5490 ptr_idxs += olengths_idxs[i]; 5491 } 5492 if (new_local_nnz) { 5493 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 5494 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 5495 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 5496 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5497 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 5498 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5499 } else { 5500 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5501 } 5502 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 5503 } else { 5504 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5505 } 5506 5507 /* set values */ 5508 ptr_vals = recv_buffer_vals; 5509 ptr_idxs = recv_buffer_idxs_local; 5510 for (i=0;i<n_recvs;i++) { 5511 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 5512 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 5513 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 5514 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5515 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5516 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 5517 } else { 5518 /* TODO */ 5519 } 5520 ptr_idxs += olengths_idxs[i]; 5521 ptr_vals += olengths_vals[i]; 5522 } 5523 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5524 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5525 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5526 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5527 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 5528 5529 #if 0 5530 if (!restrict_comm) { /* check */ 5531 Vec lvec,rvec; 5532 PetscReal infty_error; 5533 5534 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 5535 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 5536 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 5537 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 5538 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 5539 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5540 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 5541 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 5542 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 5543 } 5544 #endif 5545 5546 /* assemble new additional is (if any) */ 5547 if (nis) { 5548 PetscInt **temp_idxs,*count_is,j,psum; 5549 5550 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5551 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 5552 ptr_idxs = recv_buffer_idxs_is; 5553 psum = 0; 5554 for (i=0;i<n_recvs;i++) { 5555 for (j=0;j<nis;j++) { 5556 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5557 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 5558 psum += plen; 5559 ptr_idxs += plen+1; /* shift pointer to received data */ 5560 } 5561 } 5562 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 5563 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 5564 for (i=1;i<nis;i++) { 5565 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 5566 } 5567 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 5568 ptr_idxs = recv_buffer_idxs_is; 5569 for (i=0;i<n_recvs;i++) { 5570 for (j=0;j<nis;j++) { 5571 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5572 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 5573 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 5574 ptr_idxs += plen+1; /* shift pointer to received data */ 5575 } 5576 } 5577 for (i=0;i<nis;i++) { 5578 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5579 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 5580 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5581 } 5582 ierr = PetscFree(count_is);CHKERRQ(ierr); 5583 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 5584 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 5585 } 5586 /* free workspace */ 5587 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 5588 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5589 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 5590 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5591 if (isdense) { 5592 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 5593 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 5594 } else { 5595 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 5596 } 5597 if (nis) { 5598 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5599 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 5600 } 5601 5602 if (nvecs) { 5603 ierr = MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5604 ierr = MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5605 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 5606 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 5607 ierr = VecCreate(comm_n,&nnsp_vec[0]);CHKERRQ(ierr); 5608 ierr = VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);CHKERRQ(ierr); 5609 ierr = VecSetType(nnsp_vec[0],VECSTANDARD);CHKERRQ(ierr); 5610 /* set values */ 5611 ptr_vals = recv_buffer_vecs; 5612 ptr_idxs = recv_buffer_idxs_local; 5613 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 5614 for (i=0;i<n_recvs;i++) { 5615 PetscInt j; 5616 for (j=0;j<*(ptr_idxs+1);j++) { 5617 send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j); 5618 } 5619 ptr_idxs += olengths_idxs[i]; 5620 ptr_vals += olengths_idxs[i]-2; 5621 } 5622 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 5623 ierr = VecAssemblyBegin(nnsp_vec[0]);CHKERRQ(ierr); 5624 ierr = VecAssemblyEnd(nnsp_vec[0]);CHKERRQ(ierr); 5625 } 5626 5627 ierr = PetscFree(recv_buffer_vecs);CHKERRQ(ierr); 5628 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 5629 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 5630 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 5631 ierr = PetscFree(recv_req_vecs);CHKERRQ(ierr); 5632 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 5633 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 5634 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 5635 ierr = PetscFree(send_req_vecs);CHKERRQ(ierr); 5636 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 5637 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 5638 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 5639 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 5640 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 5641 ierr = PetscFree(onodes);CHKERRQ(ierr); 5642 if (nis) { 5643 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 5644 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 5645 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 5646 } 5647 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 5648 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 5649 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 5650 for (i=0;i<nis;i++) { 5651 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5652 } 5653 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 5654 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 5655 } 5656 *mat_n = NULL; 5657 } 5658 PetscFunctionReturn(0); 5659 } 5660 5661 /* temporary hack into ksp private data structure */ 5662 #include <petsc/private/kspimpl.h> 5663 5664 #undef __FUNCT__ 5665 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 5666 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 5667 { 5668 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5669 PC_IS *pcis = (PC_IS*)pc->data; 5670 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 5671 Mat coarsedivudotp = NULL; 5672 MatNullSpace CoarseNullSpace = NULL; 5673 ISLocalToGlobalMapping coarse_islg; 5674 IS coarse_is,*isarray; 5675 PetscInt i,im_active=-1,active_procs=-1; 5676 PetscInt nis,nisdofs,nisneu,nisvert; 5677 PC pc_temp; 5678 PCType coarse_pc_type; 5679 KSPType coarse_ksp_type; 5680 PetscBool multilevel_requested,multilevel_allowed; 5681 PetscBool isredundant,isbddc,isnn,coarse_reuse; 5682 Mat t_coarse_mat_is; 5683 PetscInt ncoarse; 5684 PetscBool compute_vecs = PETSC_FALSE; 5685 PetscScalar *array; 5686 MatReuse coarse_mat_reuse; 5687 PetscBool restr, full_restr, have_void; 5688 PetscErrorCode ierr; 5689 5690 PetscFunctionBegin; 5691 /* Assign global numbering to coarse dofs */ 5692 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 5693 PetscInt ocoarse_size; 5694 compute_vecs = PETSC_TRUE; 5695 ocoarse_size = pcbddc->coarse_size; 5696 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 5697 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 5698 /* see if we can avoid some work */ 5699 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 5700 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 5701 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 5702 PC pc; 5703 PetscBool isbddc; 5704 5705 /* temporary workaround since PCBDDC does not have a reset method so far */ 5706 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 5707 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5708 if (isbddc) { 5709 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 5710 } else { 5711 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 5712 } 5713 coarse_reuse = PETSC_FALSE; 5714 } else { /* we can safely reuse already computed coarse matrix */ 5715 coarse_reuse = PETSC_TRUE; 5716 } 5717 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 5718 coarse_reuse = PETSC_FALSE; 5719 } 5720 /* reset any subassembling information */ 5721 if (!coarse_reuse || pcbddc->recompute_topography) { 5722 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5723 } 5724 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 5725 coarse_reuse = PETSC_TRUE; 5726 } 5727 /* assemble coarse matrix */ 5728 if (coarse_reuse && pcbddc->coarse_ksp) { 5729 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5730 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 5731 coarse_mat_reuse = MAT_REUSE_MATRIX; 5732 } else { 5733 coarse_mat = NULL; 5734 coarse_mat_reuse = MAT_INITIAL_MATRIX; 5735 } 5736 5737 /* creates temporary l2gmap and IS for coarse indexes */ 5738 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 5739 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 5740 5741 /* creates temporary MATIS object for coarse matrix */ 5742 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 5743 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5744 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 5745 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5746 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 5747 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 5748 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5749 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5750 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 5751 5752 /* count "active" (i.e. with positive local size) and "void" processes */ 5753 im_active = !!(pcis->n); 5754 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5755 5756 /* determine number of process partecipating to coarse solver and compute subassembling pattern */ 5757 /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */ 5758 /* full_restr : just use the receivers from the subassembling pattern */ 5759 coarse_mat_is = NULL; 5760 multilevel_allowed = PETSC_FALSE; 5761 multilevel_requested = PETSC_FALSE; 5762 full_restr = PETSC_TRUE; 5763 pcbddc->coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc); 5764 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 5765 if (multilevel_requested) { 5766 ncoarse = active_procs/pcbddc->coarsening_ratio; 5767 restr = PETSC_FALSE; 5768 full_restr = PETSC_FALSE; 5769 } else { 5770 ncoarse = pcbddc->coarse_size/pcbddc->coarse_eqs_per_proc; 5771 restr = PETSC_TRUE; 5772 full_restr = PETSC_TRUE; 5773 } 5774 if (!pcbddc->coarse_size) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 5775 ncoarse = PetscMax(1,ncoarse); 5776 if (!pcbddc->coarse_subassembling) { 5777 if (pcbddc->coarsening_ratio > 1) { 5778 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 5779 } else { 5780 PetscMPIInt size,rank; 5781 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 5782 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 5783 have_void = (active_procs == (PetscInt)size) ? PETSC_FALSE : PETSC_TRUE; 5784 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5785 } 5786 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 5787 PetscInt psum; 5788 PetscMPIInt size; 5789 if (pcbddc->coarse_ksp) psum = 1; 5790 else psum = 0; 5791 ierr = MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5792 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 5793 if (ncoarse < size) have_void = PETSC_TRUE; 5794 } 5795 /* determine if we can go multilevel */ 5796 if (multilevel_requested) { 5797 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 5798 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 5799 } 5800 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 5801 5802 /* dump subassembling pattern */ 5803 if (pcbddc->dbg_flag && multilevel_allowed) { 5804 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 5805 } 5806 5807 /* compute dofs splitting and neumann boundaries for coarse dofs */ 5808 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal)) { /* protects from unneded computations */ 5809 PetscInt *tidxs,*tidxs2,nout,tsize,i; 5810 const PetscInt *idxs; 5811 ISLocalToGlobalMapping tmap; 5812 5813 /* create map between primal indices (in local representative ordering) and local primal numbering */ 5814 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 5815 /* allocate space for temporary storage */ 5816 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 5817 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 5818 /* allocate for IS array */ 5819 nisdofs = pcbddc->n_ISForDofsLocal; 5820 nisneu = !!pcbddc->NeumannBoundariesLocal; 5821 nisvert = 0; /* nisvert is not used */ 5822 nis = nisdofs + nisneu + nisvert; 5823 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 5824 /* dofs splitting */ 5825 for (i=0;i<nisdofs;i++) { 5826 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 5827 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 5828 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5829 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5830 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5831 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5832 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5833 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 5834 } 5835 /* neumann boundaries */ 5836 if (pcbddc->NeumannBoundariesLocal) { 5837 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 5838 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 5839 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5840 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5841 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5842 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5843 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 5844 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 5845 } 5846 /* free memory */ 5847 ierr = PetscFree(tidxs);CHKERRQ(ierr); 5848 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 5849 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 5850 } else { 5851 nis = 0; 5852 nisdofs = 0; 5853 nisneu = 0; 5854 nisvert = 0; 5855 isarray = NULL; 5856 } 5857 /* destroy no longer needed map */ 5858 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 5859 5860 /* subassemble */ 5861 if (multilevel_allowed) { 5862 Vec vp[1]; 5863 PetscInt nvecs = 0; 5864 PetscBool reuse,reuser; 5865 5866 if (coarse_mat) reuse = PETSC_TRUE; 5867 else reuse = PETSC_FALSE; 5868 ierr = MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5869 vp[0] = NULL; 5870 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 5871 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);CHKERRQ(ierr); 5872 ierr = VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);CHKERRQ(ierr); 5873 ierr = VecSetType(vp[0],VECSTANDARD);CHKERRQ(ierr); 5874 nvecs = 1; 5875 5876 if (pcbddc->divudotp) { 5877 Mat B,loc_divudotp; 5878 Vec v,p; 5879 IS dummy; 5880 PetscInt np; 5881 5882 ierr = MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);CHKERRQ(ierr); 5883 ierr = MatGetSize(loc_divudotp,&np,NULL);CHKERRQ(ierr); 5884 ierr = ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);CHKERRQ(ierr); 5885 ierr = MatGetSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 5886 ierr = MatCreateVecs(B,&v,&p);CHKERRQ(ierr); 5887 ierr = VecSet(p,1.);CHKERRQ(ierr); 5888 ierr = MatMultTranspose(B,p,v);CHKERRQ(ierr); 5889 ierr = VecDestroy(&p);CHKERRQ(ierr); 5890 ierr = MatDestroy(&B);CHKERRQ(ierr); 5891 ierr = VecGetArray(vp[0],&array);CHKERRQ(ierr); 5892 ierr = VecPlaceArray(pcbddc->vec1_P,array);CHKERRQ(ierr); 5893 ierr = VecRestoreArray(vp[0],&array);CHKERRQ(ierr); 5894 ierr = MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);CHKERRQ(ierr); 5895 ierr = VecResetArray(pcbddc->vec1_P);CHKERRQ(ierr); 5896 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 5897 ierr = VecDestroy(&v);CHKERRQ(ierr); 5898 } 5899 } 5900 if (reuser) { 5901 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);CHKERRQ(ierr); 5902 } else { 5903 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);CHKERRQ(ierr); 5904 } 5905 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 5906 PetscScalar *arraym,*arrayv; 5907 PetscInt nl; 5908 ierr = VecGetLocalSize(vp[0],&nl);CHKERRQ(ierr); 5909 ierr = MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);CHKERRQ(ierr); 5910 ierr = MatDenseGetArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 5911 ierr = VecGetArray(vp[0],&arrayv);CHKERRQ(ierr); 5912 ierr = PetscMemcpy(arraym,arrayv,nl*sizeof(PetscScalar));CHKERRQ(ierr); 5913 ierr = VecRestoreArray(vp[0],&arrayv);CHKERRQ(ierr); 5914 ierr = MatDenseRestoreArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 5915 ierr = VecDestroy(&vp[0]);CHKERRQ(ierr); 5916 } else { 5917 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);CHKERRQ(ierr); 5918 } 5919 } else { 5920 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,0,NULL);CHKERRQ(ierr); 5921 } 5922 if (coarse_mat_is || coarse_mat) { 5923 PetscMPIInt size; 5924 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)coarse_mat_is),&size); 5925 if (!multilevel_allowed) { 5926 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 5927 } else { 5928 Mat A; 5929 5930 /* if this matrix is present, it means we are not reusing the coarse matrix */ 5931 if (coarse_mat_is) { 5932 if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 5933 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 5934 coarse_mat = coarse_mat_is; 5935 } 5936 /* be sure we don't have MatSeqDENSE as local mat */ 5937 ierr = MatISGetLocalMat(coarse_mat,&A);CHKERRQ(ierr); 5938 ierr = MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 5939 } 5940 } 5941 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 5942 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 5943 5944 /* create local to global scatters for coarse problem */ 5945 if (compute_vecs) { 5946 PetscInt lrows; 5947 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 5948 if (coarse_mat) { 5949 ierr = MatGetLocalSize(coarse_mat,&lrows,NULL);CHKERRQ(ierr); 5950 } else { 5951 lrows = 0; 5952 } 5953 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 5954 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 5955 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 5956 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5957 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5958 } 5959 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 5960 5961 /* set defaults for coarse KSP and PC */ 5962 if (multilevel_allowed) { 5963 coarse_ksp_type = KSPRICHARDSON; 5964 coarse_pc_type = PCBDDC; 5965 } else { 5966 coarse_ksp_type = KSPPREONLY; 5967 coarse_pc_type = PCREDUNDANT; 5968 } 5969 5970 /* print some info if requested */ 5971 if (pcbddc->dbg_flag) { 5972 if (!multilevel_allowed) { 5973 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5974 if (multilevel_requested) { 5975 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5976 } else if (pcbddc->max_levels) { 5977 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 5978 } 5979 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5980 } 5981 } 5982 5983 /* create the coarse KSP object only once with defaults */ 5984 if (coarse_mat) { 5985 PetscViewer dbg_viewer = NULL; 5986 if (pcbddc->dbg_flag) { 5987 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 5988 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5989 } 5990 if (!pcbddc->coarse_ksp) { 5991 char prefix[256],str_level[16]; 5992 size_t len; 5993 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 5994 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5995 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 5996 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 5997 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5998 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 5999 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 6000 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 6001 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 6002 /* prefix */ 6003 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 6004 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 6005 if (!pcbddc->current_level) { 6006 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 6007 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 6008 } else { 6009 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 6010 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 6011 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 6012 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 6013 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 6014 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 6015 } 6016 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 6017 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 6018 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 6019 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 6020 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 6021 /* allow user customization */ 6022 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 6023 } 6024 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 6025 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 6026 if (nisdofs) { 6027 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 6028 for (i=0;i<nisdofs;i++) { 6029 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 6030 } 6031 } 6032 if (nisneu) { 6033 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 6034 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 6035 } 6036 if (nisvert) { 6037 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 6038 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 6039 } 6040 6041 /* get some info after set from options */ 6042 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 6043 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 6044 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 6045 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 6046 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 6047 isbddc = PETSC_FALSE; 6048 } 6049 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 6050 if (isredundant) { 6051 KSP inner_ksp; 6052 PC inner_pc; 6053 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 6054 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 6055 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 6056 } 6057 6058 /* parameters which miss an API */ 6059 if (isbddc) { 6060 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 6061 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 6062 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 6063 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 6064 if (pcbddc_coarse->benign_saddle_point) { 6065 Mat coarsedivudotp_is; 6066 ISLocalToGlobalMapping l2gmap,rl2g,cl2g; 6067 IS row,col; 6068 const PetscInt *gidxs; 6069 PetscInt n,st,M,N; 6070 6071 ierr = MatGetSize(coarsedivudotp,&n,NULL);CHKERRQ(ierr); 6072 ierr = MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));CHKERRQ(ierr); 6073 st = st-n; 6074 ierr = ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);CHKERRQ(ierr); 6075 ierr = MatGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);CHKERRQ(ierr); 6076 ierr = ISLocalToGlobalMappingGetSize(l2gmap,&n);CHKERRQ(ierr); 6077 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 6078 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 6079 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 6080 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 6081 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 6082 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 6083 ierr = MatGetSize(coarse_mat,&N,NULL);CHKERRQ(ierr); 6084 ierr = ISDestroy(&row);CHKERRQ(ierr); 6085 ierr = ISDestroy(&col);CHKERRQ(ierr); 6086 ierr = MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);CHKERRQ(ierr); 6087 ierr = MatSetType(coarsedivudotp_is,MATIS);CHKERRQ(ierr); 6088 ierr = MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 6089 ierr = MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);CHKERRQ(ierr); 6090 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 6091 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 6092 ierr = MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);CHKERRQ(ierr); 6093 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 6094 ierr = PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL);CHKERRQ(ierr); 6095 ierr = MatDestroy(&coarsedivudotp_is);CHKERRQ(ierr); 6096 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 6097 if (pcbddc->adaptive_threshold < 1.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 6098 } 6099 } 6100 6101 /* propagate symmetry info of coarse matrix */ 6102 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 6103 if (pc->pmat->symmetric_set) { 6104 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 6105 } 6106 if (pc->pmat->hermitian_set) { 6107 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 6108 } 6109 if (pc->pmat->spd_set) { 6110 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 6111 } 6112 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 6113 ierr = MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 6114 } 6115 /* set operators */ 6116 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 6117 if (pcbddc->dbg_flag) { 6118 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 6119 } 6120 } 6121 ierr = PetscFree(isarray);CHKERRQ(ierr); 6122 #if 0 6123 { 6124 PetscViewer viewer; 6125 char filename[256]; 6126 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 6127 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 6128 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 6129 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 6130 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 6131 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 6132 } 6133 #endif 6134 6135 if (pcbddc->coarse_ksp) { 6136 Vec crhs,csol; 6137 6138 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 6139 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 6140 if (!csol) { 6141 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 6142 } 6143 if (!crhs) { 6144 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 6145 } 6146 } 6147 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 6148 6149 /* compute null space for coarse solver if the benign trick has been requested */ 6150 if (pcbddc->benign_null) { 6151 6152 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 6153 for (i=0;i<pcbddc->benign_n;i++) { 6154 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 6155 } 6156 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 6157 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 6158 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6159 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6160 if (coarse_mat) { 6161 Vec nullv; 6162 PetscScalar *array,*array2; 6163 PetscInt nl; 6164 6165 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 6166 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 6167 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 6168 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 6169 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 6170 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 6171 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 6172 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 6173 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 6174 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 6175 } 6176 } 6177 6178 if (pcbddc->coarse_ksp) { 6179 PetscBool ispreonly; 6180 6181 if (CoarseNullSpace) { 6182 PetscBool isnull; 6183 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 6184 if (isnull) { 6185 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 6186 } 6187 /* TODO: add local nullspaces (if any) */ 6188 } 6189 /* setup coarse ksp */ 6190 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 6191 /* Check coarse problem if in debug mode or if solving with an iterative method */ 6192 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 6193 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 6194 KSP check_ksp; 6195 KSPType check_ksp_type; 6196 PC check_pc; 6197 Vec check_vec,coarse_vec; 6198 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 6199 PetscInt its; 6200 PetscBool compute_eigs; 6201 PetscReal *eigs_r,*eigs_c; 6202 PetscInt neigs; 6203 const char *prefix; 6204 6205 /* Create ksp object suitable for estimation of extreme eigenvalues */ 6206 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 6207 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 6208 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 6209 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 6210 /* prevent from setup unneeded object */ 6211 ierr = KSPGetPC(check_ksp,&check_pc);CHKERRQ(ierr); 6212 ierr = PCSetType(check_pc,PCNONE);CHKERRQ(ierr); 6213 if (ispreonly) { 6214 check_ksp_type = KSPPREONLY; 6215 compute_eigs = PETSC_FALSE; 6216 } else { 6217 check_ksp_type = KSPGMRES; 6218 compute_eigs = PETSC_TRUE; 6219 } 6220 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 6221 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 6222 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 6223 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 6224 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 6225 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 6226 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 6227 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 6228 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 6229 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 6230 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 6231 /* create random vec */ 6232 ierr = MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);CHKERRQ(ierr); 6233 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 6234 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 6235 /* solve coarse problem */ 6236 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 6237 /* set eigenvalue estimation if preonly has not been requested */ 6238 if (compute_eigs) { 6239 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 6240 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 6241 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 6242 if (neigs) { 6243 lambda_max = eigs_r[neigs-1]; 6244 lambda_min = eigs_r[0]; 6245 if (pcbddc->use_coarse_estimates) { 6246 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 6247 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);CHKERRQ(ierr); 6248 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 6249 } 6250 } 6251 } 6252 } 6253 6254 /* check coarse problem residual error */ 6255 if (pcbddc->dbg_flag) { 6256 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 6257 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 6258 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 6259 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 6260 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 6261 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 6262 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 6263 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 6264 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 6265 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 6266 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 6267 if (CoarseNullSpace) { 6268 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 6269 } 6270 if (compute_eigs) { 6271 PetscReal lambda_max_s,lambda_min_s; 6272 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 6273 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 6274 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 6275 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 6276 for (i=0;i<neigs;i++) { 6277 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 6278 } 6279 } 6280 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 6281 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 6282 } 6283 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 6284 ierr = VecDestroy(&coarse_vec);CHKERRQ(ierr); 6285 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 6286 if (compute_eigs) { 6287 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 6288 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 6289 } 6290 } 6291 } 6292 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 6293 /* print additional info */ 6294 if (pcbddc->dbg_flag) { 6295 /* waits until all processes reaches this point */ 6296 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 6297 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 6298 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6299 } 6300 6301 /* free memory */ 6302 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 6303 PetscFunctionReturn(0); 6304 } 6305 6306 #undef __FUNCT__ 6307 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 6308 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 6309 { 6310 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 6311 PC_IS* pcis = (PC_IS*)pc->data; 6312 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6313 IS subset,subset_mult,subset_n; 6314 PetscInt local_size,coarse_size=0; 6315 PetscInt *local_primal_indices=NULL; 6316 const PetscInt *t_local_primal_indices; 6317 PetscErrorCode ierr; 6318 6319 PetscFunctionBegin; 6320 /* Compute global number of coarse dofs */ 6321 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 6322 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 6323 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 6324 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 6325 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 6326 ierr = ISRenumber(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 6327 ierr = ISDestroy(&subset);CHKERRQ(ierr); 6328 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 6329 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 6330 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 6331 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 6332 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 6333 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 6334 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 6335 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 6336 6337 /* check numbering */ 6338 if (pcbddc->dbg_flag) { 6339 PetscScalar coarsesum,*array,*array2; 6340 PetscInt i; 6341 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 6342 6343 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6344 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 6345 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 6346 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6347 /* counter */ 6348 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6349 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6350 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6351 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6352 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6353 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6354 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 6355 for (i=0;i<pcbddc->local_primal_size;i++) { 6356 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 6357 } 6358 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6359 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6360 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6361 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6362 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6363 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6364 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6365 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6366 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 6367 for (i=0;i<pcis->n;i++) { 6368 if (array[i] != 0.0 && array[i] != array2[i]) { 6369 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 6370 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 6371 set_error = PETSC_TRUE; 6372 ierr = ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);CHKERRQ(ierr); 6373 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d (gid %d) owned by %d processes instead of %d!\n",PetscGlobalRank,i,gi,owned,neigh);CHKERRQ(ierr); 6374 } 6375 } 6376 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 6377 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6378 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6379 for (i=0;i<pcis->n;i++) { 6380 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 6381 } 6382 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6383 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6384 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6385 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6386 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 6387 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 6388 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 6389 PetscInt *gidxs; 6390 6391 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 6392 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 6393 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 6394 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6395 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 6396 for (i=0;i<pcbddc->local_primal_size;i++) { 6397 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 6398 } 6399 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6400 ierr = PetscFree(gidxs);CHKERRQ(ierr); 6401 } 6402 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6403 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6404 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 6405 } 6406 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 6407 /* get back data */ 6408 *coarse_size_n = coarse_size; 6409 *local_primal_indices_n = local_primal_indices; 6410 PetscFunctionReturn(0); 6411 } 6412 6413 #undef __FUNCT__ 6414 #define __FUNCT__ "PCBDDCGlobalToLocal" 6415 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 6416 { 6417 IS localis_t; 6418 PetscInt i,lsize,*idxs,n; 6419 PetscScalar *vals; 6420 PetscErrorCode ierr; 6421 6422 PetscFunctionBegin; 6423 /* get indices in local ordering exploiting local to global map */ 6424 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 6425 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 6426 for (i=0;i<lsize;i++) vals[i] = 1.0; 6427 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 6428 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 6429 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 6430 if (idxs) { /* multilevel guard */ 6431 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 6432 } 6433 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 6434 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 6435 ierr = PetscFree(vals);CHKERRQ(ierr); 6436 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 6437 /* now compute set in local ordering */ 6438 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6439 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6440 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6441 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 6442 for (i=0,lsize=0;i<n;i++) { 6443 if (PetscRealPart(vals[i]) > 0.5) { 6444 lsize++; 6445 } 6446 } 6447 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 6448 for (i=0,lsize=0;i<n;i++) { 6449 if (PetscRealPart(vals[i]) > 0.5) { 6450 idxs[lsize++] = i; 6451 } 6452 } 6453 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6454 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 6455 *localis = localis_t; 6456 PetscFunctionReturn(0); 6457 } 6458 6459 #undef __FUNCT__ 6460 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 6461 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 6462 { 6463 PC_IS *pcis=(PC_IS*)pc->data; 6464 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6465 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6466 Mat S_j; 6467 PetscInt *used_xadj,*used_adjncy; 6468 PetscBool free_used_adj; 6469 PetscErrorCode ierr; 6470 6471 PetscFunctionBegin; 6472 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 6473 free_used_adj = PETSC_FALSE; 6474 if (pcbddc->sub_schurs_layers == -1) { 6475 used_xadj = NULL; 6476 used_adjncy = NULL; 6477 } else { 6478 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 6479 used_xadj = pcbddc->mat_graph->xadj; 6480 used_adjncy = pcbddc->mat_graph->adjncy; 6481 } else if (pcbddc->computed_rowadj) { 6482 used_xadj = pcbddc->mat_graph->xadj; 6483 used_adjncy = pcbddc->mat_graph->adjncy; 6484 } else { 6485 PetscBool flg_row=PETSC_FALSE; 6486 const PetscInt *xadj,*adjncy; 6487 PetscInt nvtxs; 6488 6489 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6490 if (flg_row) { 6491 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 6492 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 6493 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 6494 free_used_adj = PETSC_TRUE; 6495 } else { 6496 pcbddc->sub_schurs_layers = -1; 6497 used_xadj = NULL; 6498 used_adjncy = NULL; 6499 } 6500 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6501 } 6502 } 6503 6504 /* setup sub_schurs data */ 6505 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6506 if (!sub_schurs->schur_explicit) { 6507 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 6508 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6509 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 6510 } else { 6511 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 6512 PetscBool isseqaij,need_change = PETSC_FALSE; 6513 PetscInt benign_n; 6514 Mat change = NULL; 6515 Vec scaling = NULL; 6516 IS change_primal = NULL; 6517 6518 if (!pcbddc->use_vertices && reuse_solvers) { 6519 PetscInt n_vertices; 6520 6521 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 6522 reuse_solvers = (PetscBool)!n_vertices; 6523 } 6524 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 6525 if (!isseqaij) { 6526 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6527 if (matis->A == pcbddc->local_mat) { 6528 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 6529 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6530 } else { 6531 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6532 } 6533 } 6534 if (!pcbddc->benign_change_explicit) { 6535 benign_n = pcbddc->benign_n; 6536 } else { 6537 benign_n = 0; 6538 } 6539 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 6540 We need a global reduction to avoid possible deadlocks. 6541 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 6542 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 6543 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 6544 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6545 need_change = (PetscBool)(!need_change); 6546 } 6547 /* If the user defines additional constraints, we import them here. 6548 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 6549 if (need_change) { 6550 PC_IS *pcisf; 6551 PC_BDDC *pcbddcf; 6552 PC pcf; 6553 6554 if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 6555 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 6556 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 6557 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 6558 /* hacks */ 6559 pcisf = (PC_IS*)pcf->data; 6560 pcisf->is_B_local = pcis->is_B_local; 6561 pcisf->vec1_N = pcis->vec1_N; 6562 pcisf->BtoNmap = pcis->BtoNmap; 6563 pcisf->n = pcis->n; 6564 pcisf->n_B = pcis->n_B; 6565 pcbddcf = (PC_BDDC*)pcf->data; 6566 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 6567 pcbddcf->mat_graph = pcbddc->mat_graph; 6568 pcbddcf->use_faces = PETSC_TRUE; 6569 pcbddcf->use_change_of_basis = PETSC_TRUE; 6570 pcbddcf->use_change_on_faces = PETSC_TRUE; 6571 pcbddcf->use_qr_single = PETSC_TRUE; 6572 pcbddcf->fake_change = PETSC_TRUE; 6573 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 6574 /* store information on primal vertices and change of basis (in local numbering) */ 6575 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 6576 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 6577 change = pcbddcf->ConstraintMatrix; 6578 pcbddcf->ConstraintMatrix = NULL; 6579 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 6580 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 6581 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 6582 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 6583 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 6584 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 6585 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 6586 pcf->ops->destroy = NULL; 6587 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 6588 } 6589 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 6590 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 6591 ierr = MatDestroy(&change);CHKERRQ(ierr); 6592 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 6593 } 6594 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6595 6596 /* free adjacency */ 6597 if (free_used_adj) { 6598 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 6599 } 6600 PetscFunctionReturn(0); 6601 } 6602 6603 #undef __FUNCT__ 6604 #define __FUNCT__ "PCBDDCInitSubSchurs" 6605 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 6606 { 6607 PC_IS *pcis=(PC_IS*)pc->data; 6608 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6609 PCBDDCGraph graph; 6610 PetscErrorCode ierr; 6611 6612 PetscFunctionBegin; 6613 /* attach interface graph for determining subsets */ 6614 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 6615 IS verticesIS,verticescomm; 6616 PetscInt vsize,*idxs; 6617 6618 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 6619 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 6620 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6621 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 6622 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6623 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 6624 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 6625 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 6626 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 6627 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 6628 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 6629 } else { 6630 graph = pcbddc->mat_graph; 6631 } 6632 /* print some info */ 6633 if (pcbddc->dbg_flag) { 6634 IS vertices; 6635 PetscInt nv,nedges,nfaces; 6636 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag-1,pcbddc->dbg_viewer);CHKERRQ(ierr); 6637 ierr = PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 6638 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 6639 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 6640 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6641 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 6642 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 6643 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 6644 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 6645 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6646 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6647 } 6648 6649 /* sub_schurs init */ 6650 if (!pcbddc->sub_schurs) { 6651 ierr = PCBDDCSubSchursCreate(&pcbddc->sub_schurs);CHKERRQ(ierr); 6652 } 6653 ierr = PCBDDCSubSchursInit(pcbddc->sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 6654 6655 /* free graph struct */ 6656 if (pcbddc->sub_schurs_rebuild) { 6657 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 6658 } 6659 PetscFunctionReturn(0); 6660 } 6661 6662 #undef __FUNCT__ 6663 #define __FUNCT__ "PCBDDCCheckOperator" 6664 PetscErrorCode PCBDDCCheckOperator(PC pc) 6665 { 6666 PC_IS *pcis=(PC_IS*)pc->data; 6667 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6668 PetscErrorCode ierr; 6669 6670 PetscFunctionBegin; 6671 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 6672 IS zerodiag = NULL; 6673 Mat S_j,B0_B=NULL; 6674 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 6675 PetscScalar *p0_check,*array,*array2; 6676 PetscReal norm; 6677 PetscInt i; 6678 6679 /* B0 and B0_B */ 6680 if (zerodiag) { 6681 IS dummy; 6682 6683 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 6684 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 6685 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 6686 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 6687 } 6688 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 6689 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 6690 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 6691 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6692 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6693 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6694 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6695 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 6696 /* S_j */ 6697 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6698 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6699 6700 /* mimic vector in \widetilde{W}_\Gamma */ 6701 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 6702 /* continuous in primal space */ 6703 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 6704 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6705 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6706 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6707 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 6708 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 6709 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6710 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6711 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6712 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6713 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6714 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6715 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 6716 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 6717 6718 /* assemble rhs for coarse problem */ 6719 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 6720 /* local with Schur */ 6721 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 6722 if (zerodiag) { 6723 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6724 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 6725 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6726 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 6727 } 6728 /* sum on primal nodes the local contributions */ 6729 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6730 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6731 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6732 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6733 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 6734 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6735 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6736 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 6737 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6738 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6739 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6740 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6741 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6742 /* scale primal nodes (BDDC sums contibutions) */ 6743 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 6744 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6745 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6746 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6747 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6748 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6749 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6750 /* global: \widetilde{B0}_B w_\Gamma */ 6751 if (zerodiag) { 6752 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 6753 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6754 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 6755 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6756 } 6757 /* BDDC */ 6758 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 6759 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 6760 6761 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 6762 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 6763 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 6764 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 6765 for (i=0;i<pcbddc->benign_n;i++) { 6766 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 6767 } 6768 ierr = PetscFree(p0_check);CHKERRQ(ierr); 6769 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 6770 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 6771 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 6772 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6773 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 6774 } 6775 PetscFunctionReturn(0); 6776 } 6777