1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 #include <petsc/private/sfimpl.h> 6 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCComputeNoNetFlux" 10 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 11 { 12 Mat loc_divudotp; 13 Vec p,v,vins,quad_vec,*quad_vecs; 14 ISLocalToGlobalMapping rmap; 15 IS *faces,*edges; 16 PetscScalar *vals; 17 const PetscScalar *array; 18 PetscInt i,maxneighs,lmaxneighs,maxsize,nf,ne; 19 PetscMPIInt rank; 20 PetscErrorCode ierr; 21 22 PetscFunctionBegin; 23 ierr = PCBDDCGraphGetCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 24 if (graph->twodim) { 25 lmaxneighs = 2; 26 } else { 27 lmaxneighs = 1; 28 for (i=0;i<ne;i++) { 29 const PetscInt *idxs; 30 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 31 lmaxneighs = PetscMax(lmaxneighs,graph->count[idxs[0]]); 32 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 33 } 34 lmaxneighs++; /* graph count does not include self */ 35 } 36 ierr = MPIU_Allreduce(&lmaxneighs,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 37 maxsize = 0; 38 for (i=0;i<ne;i++) { 39 PetscInt nn; 40 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 41 maxsize = PetscMax(maxsize,nn); 42 } 43 for (i=0;i<nf;i++) { 44 PetscInt nn; 45 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 46 maxsize = PetscMax(maxsize,nn); 47 } 48 ierr = PetscMalloc1(maxsize,&vals);CHKERRQ(ierr); 49 /* create vectors to hold quadrature weights */ 50 ierr = MatCreateVecs(A,&quad_vec,NULL);CHKERRQ(ierr); 51 ierr = MatGetLocalToGlobalMapping(A,&rmap,NULL);CHKERRQ(ierr); 52 ierr = VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);CHKERRQ(ierr); 53 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 54 for (i=0;i<maxneighs;i++) { 55 PetscInt first,last; 56 57 ierr = VecSetLocalToGlobalMapping(quad_vecs[i],rmap);CHKERRQ(ierr); 58 /* the near-null space fo BDDC carries information on quadrature weights, 59 and these can be collinear -> so cheat with MatNullSpaceCreate 60 and create a suitable set of basis vectors first */ 61 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 62 if (i>=first && i < last) { 63 PetscScalar *data; 64 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 65 data[i-first] = 1.; 66 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 67 } 68 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 69 } 70 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);CHKERRQ(ierr); 71 for (i=0;i<maxneighs;i++) { /* reset vectors */ 72 PetscInt first,last; 73 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 74 if (i>=first && i < last) { 75 PetscScalar *data; 76 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 77 data[i-first] = 0.; 78 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 79 } 80 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 81 } 82 /* compute local quad vec */ 83 ierr = MatISGetLocalMat(divudotp,&loc_divudotp);CHKERRQ(ierr); 84 ierr = MatCreateVecs(loc_divudotp,&v,&p);CHKERRQ(ierr); 85 ierr = VecSet(p,1.);CHKERRQ(ierr); 86 ierr = MatMultTranspose(loc_divudotp,p,v);CHKERRQ(ierr); 87 if (vl2l) { 88 ierr = VecGetSubVector(v,vl2l,&vins);CHKERRQ(ierr); 89 } else { 90 vins = v; 91 } 92 ierr = VecGetArrayRead(vins,&array);CHKERRQ(ierr); 93 ierr = VecDestroy(&p);CHKERRQ(ierr); 94 /* insert in global quadrature vecs */ 95 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 96 for (i=0;i<nf;i++) { 97 const PetscInt *idxs; 98 PetscInt idx,nn,j; 99 100 ierr = ISGetIndices(faces[i],&idxs);CHKERRQ(ierr); 101 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 102 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 103 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 104 idx = -(idx+1); 105 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 106 ierr = ISRestoreIndices(faces[i],&idxs);CHKERRQ(ierr); 107 ierr = ISDestroy(&faces[i]);CHKERRQ(ierr); 108 } 109 for (i=0;i<ne;i++) { 110 const PetscInt *idxs; 111 PetscInt idx,nn,j; 112 113 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 114 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 115 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 116 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 117 idx = -(idx+1); 118 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 119 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 120 ierr = ISDestroy(&edges[i]);CHKERRQ(ierr); 121 } 122 ierr = PetscFree(edges);CHKERRQ(ierr); 123 ierr = PetscFree(faces);CHKERRQ(ierr); 124 ierr = VecRestoreArrayRead(vins,&array);CHKERRQ(ierr); 125 if (vl2l) { 126 ierr = VecRestoreSubVector(v,vl2l,&vins);CHKERRQ(ierr); 127 } 128 ierr = VecDestroy(&v);CHKERRQ(ierr); 129 ierr = PetscFree(vals);CHKERRQ(ierr); 130 131 /* assemble near null space */ 132 for (i=0;i<maxneighs;i++) { 133 ierr = VecAssemblyBegin(quad_vecs[i]);CHKERRQ(ierr); 134 } 135 for (i=0;i<maxneighs;i++) { 136 ierr = VecAssemblyEnd(quad_vecs[i]);CHKERRQ(ierr); 137 } 138 ierr = VecDestroyVecs(maxneighs,&quad_vecs);CHKERRQ(ierr); 139 PetscFunctionReturn(0); 140 } 141 142 143 #undef __FUNCT__ 144 #define __FUNCT__ "PCBDDCComputeLocalTopologyInfo" 145 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 146 { 147 PetscErrorCode ierr; 148 Vec local,global; 149 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 150 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 151 152 PetscFunctionBegin; 153 ierr = MatCreateVecs(pc->pmat,&global,NULL);CHKERRQ(ierr); 154 /* need to convert from global to local topology information and remove references to information in global ordering */ 155 ierr = MatCreateVecs(matis->A,&local,NULL);CHKERRQ(ierr); 156 if (pcbddc->user_provided_isfordofs) { 157 if (pcbddc->n_ISForDofs) { 158 PetscInt i; 159 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 160 for (i=0;i<pcbddc->n_ISForDofs;i++) { 161 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 162 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 163 } 164 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 165 pcbddc->n_ISForDofs = 0; 166 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 167 } 168 } else { 169 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering if bs > 1 */ 170 PetscInt i, n = matis->A->rmap->n; 171 ierr = MatGetBlockSize(pc->pmat,&i);CHKERRQ(ierr); 172 if (i > 1) { 173 pcbddc->n_ISForDofsLocal = i; 174 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 175 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 176 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 177 } 178 } 179 } 180 } 181 182 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 183 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 184 } 185 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 186 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 187 } 188 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 189 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 190 } 191 ierr = VecDestroy(&global);CHKERRQ(ierr); 192 ierr = VecDestroy(&local);CHKERRQ(ierr); 193 PetscFunctionReturn(0); 194 } 195 196 #undef __FUNCT__ 197 #define __FUNCT__ "PCBDDCBenignRemoveInterior" 198 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z) 199 { 200 PC_IS *pcis = (PC_IS*)(pc->data); 201 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 202 PetscErrorCode ierr; 203 204 PetscFunctionBegin; 205 if (!pcbddc->benign_have_null) { 206 PetscFunctionReturn(0); 207 } 208 if (pcbddc->ChangeOfBasisMatrix) { 209 Vec swap; 210 211 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);CHKERRQ(ierr); 212 swap = pcbddc->work_change; 213 pcbddc->work_change = r; 214 r = swap; 215 } 216 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 217 ierr = VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 218 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 219 ierr = VecSet(z,0.);CHKERRQ(ierr); 220 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 221 ierr = VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 222 if (pcbddc->ChangeOfBasisMatrix) { 223 Vec swap; 224 225 swap = r; 226 r = pcbddc->work_change; 227 pcbddc->work_change = swap; 228 ierr = VecCopy(z,pcbddc->work_change);CHKERRQ(ierr); 229 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);CHKERRQ(ierr); 230 } 231 PetscFunctionReturn(0); 232 } 233 234 #undef __FUNCT__ 235 #define __FUNCT__ "PCBDDCBenignMatMult_Private_Private" 236 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 237 { 238 PCBDDCBenignMatMult_ctx ctx; 239 PetscErrorCode ierr; 240 PetscBool apply_right,apply_left,reset_x; 241 242 PetscFunctionBegin; 243 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 244 if (transpose) { 245 apply_right = ctx->apply_left; 246 apply_left = ctx->apply_right; 247 } else { 248 apply_right = ctx->apply_right; 249 apply_left = ctx->apply_left; 250 } 251 reset_x = PETSC_FALSE; 252 if (apply_right) { 253 const PetscScalar *ax; 254 PetscInt nl,i; 255 256 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 257 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 258 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 259 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 260 for (i=0;i<ctx->benign_n;i++) { 261 PetscScalar sum,val; 262 const PetscInt *idxs; 263 PetscInt nz,j; 264 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 265 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 266 sum = 0.; 267 if (ctx->apply_p0) { 268 val = ctx->work[idxs[nz-1]]; 269 for (j=0;j<nz-1;j++) { 270 sum += ctx->work[idxs[j]]; 271 ctx->work[idxs[j]] += val; 272 } 273 } else { 274 for (j=0;j<nz-1;j++) { 275 sum += ctx->work[idxs[j]]; 276 } 277 } 278 ctx->work[idxs[nz-1]] -= sum; 279 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 280 } 281 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 282 reset_x = PETSC_TRUE; 283 } 284 if (transpose) { 285 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 286 } else { 287 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 288 } 289 if (reset_x) { 290 ierr = VecResetArray(x);CHKERRQ(ierr); 291 } 292 if (apply_left) { 293 PetscScalar *ay; 294 PetscInt i; 295 296 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 297 for (i=0;i<ctx->benign_n;i++) { 298 PetscScalar sum,val; 299 const PetscInt *idxs; 300 PetscInt nz,j; 301 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 302 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 303 val = -ay[idxs[nz-1]]; 304 if (ctx->apply_p0) { 305 sum = 0.; 306 for (j=0;j<nz-1;j++) { 307 sum += ay[idxs[j]]; 308 ay[idxs[j]] += val; 309 } 310 ay[idxs[nz-1]] += sum; 311 } else { 312 for (j=0;j<nz-1;j++) { 313 ay[idxs[j]] += val; 314 } 315 ay[idxs[nz-1]] = 0.; 316 } 317 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 318 } 319 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 320 } 321 PetscFunctionReturn(0); 322 } 323 324 #undef __FUNCT__ 325 #define __FUNCT__ "PCBDDCBenignMatMultTranspose_Private" 326 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 327 { 328 PetscErrorCode ierr; 329 330 PetscFunctionBegin; 331 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 332 PetscFunctionReturn(0); 333 } 334 335 #undef __FUNCT__ 336 #define __FUNCT__ "PCBDDCBenignMatMult_Private" 337 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 338 { 339 PetscErrorCode ierr; 340 341 PetscFunctionBegin; 342 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 343 PetscFunctionReturn(0); 344 } 345 346 #undef __FUNCT__ 347 #define __FUNCT__ "PCBDDCBenignShellMat" 348 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 349 { 350 PC_IS *pcis = (PC_IS*)pc->data; 351 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 352 PCBDDCBenignMatMult_ctx ctx; 353 PetscErrorCode ierr; 354 355 PetscFunctionBegin; 356 if (!restore) { 357 Mat A_IB,A_BI; 358 PetscScalar *work; 359 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 360 361 if (pcbddc->benign_original_mat) { 362 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 363 } 364 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) { 365 PetscFunctionReturn(0); 366 } 367 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 368 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 369 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 370 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 371 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 372 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 373 ierr = PetscNew(&ctx);CHKERRQ(ierr); 374 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 375 ctx->apply_left = PETSC_TRUE; 376 ctx->apply_right = PETSC_FALSE; 377 ctx->apply_p0 = PETSC_FALSE; 378 ctx->benign_n = pcbddc->benign_n; 379 if (reuse) { 380 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 381 ctx->free = PETSC_FALSE; 382 } else { /* TODO: could be optimized for successive solves */ 383 ISLocalToGlobalMapping N_to_D; 384 PetscInt i; 385 386 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 387 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 388 for (i=0;i<pcbddc->benign_n;i++) { 389 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 390 } 391 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 392 ctx->free = PETSC_TRUE; 393 } 394 ctx->A = pcis->A_IB; 395 ctx->work = work; 396 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 397 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 398 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 399 pcis->A_IB = A_IB; 400 401 /* A_BI as A_IB^T */ 402 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 403 pcbddc->benign_original_mat = pcis->A_BI; 404 pcis->A_BI = A_BI; 405 } else { 406 if (!pcbddc->benign_original_mat) { 407 PetscFunctionReturn(0); 408 } 409 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 410 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 411 pcis->A_IB = ctx->A; 412 ctx->A = NULL; 413 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 414 pcis->A_BI = pcbddc->benign_original_mat; 415 pcbddc->benign_original_mat = NULL; 416 if (ctx->free) { 417 PetscInt i; 418 for (i=0;i<ctx->benign_n;i++) { 419 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 420 } 421 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 422 } 423 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 424 ierr = PetscFree(ctx);CHKERRQ(ierr); 425 } 426 PetscFunctionReturn(0); 427 } 428 429 /* used just in bddc debug mode */ 430 #undef __FUNCT__ 431 #define __FUNCT__ "PCBDDCBenignProject" 432 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 433 { 434 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 435 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 436 Mat An; 437 PetscErrorCode ierr; 438 439 PetscFunctionBegin; 440 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 441 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 442 if (is1) { 443 ierr = MatGetSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 444 ierr = MatDestroy(&An);CHKERRQ(ierr); 445 } else { 446 *B = An; 447 } 448 PetscFunctionReturn(0); 449 } 450 451 /* TODO: add reuse flag */ 452 #undef __FUNCT__ 453 #define __FUNCT__ "MatSeqAIJCompress" 454 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 455 { 456 Mat Bt; 457 PetscScalar *a,*bdata; 458 const PetscInt *ii,*ij; 459 PetscInt m,n,i,nnz,*bii,*bij; 460 PetscBool flg_row; 461 PetscErrorCode ierr; 462 463 PetscFunctionBegin; 464 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 465 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 466 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 467 nnz = n; 468 for (i=0;i<ii[n];i++) { 469 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 470 } 471 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 472 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 473 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 474 nnz = 0; 475 bii[0] = 0; 476 for (i=0;i<n;i++) { 477 PetscInt j; 478 for (j=ii[i];j<ii[i+1];j++) { 479 PetscScalar entry = a[j]; 480 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 481 bij[nnz] = ij[j]; 482 bdata[nnz] = entry; 483 nnz++; 484 } 485 } 486 bii[i+1] = nnz; 487 } 488 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 489 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 490 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 491 { 492 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 493 b->free_a = PETSC_TRUE; 494 b->free_ij = PETSC_TRUE; 495 } 496 *B = Bt; 497 PetscFunctionReturn(0); 498 } 499 500 #undef __FUNCT__ 501 #define __FUNCT__ "MatDetectDisconnectedComponents" 502 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 503 { 504 Mat B; 505 IS is_dummy,*cc_n; 506 ISLocalToGlobalMapping l2gmap_dummy; 507 PCBDDCGraph graph; 508 PetscInt i,n; 509 PetscInt *xadj,*adjncy; 510 PetscInt *xadj_filtered,*adjncy_filtered; 511 PetscBool flg_row,isseqaij; 512 PetscErrorCode ierr; 513 514 PetscFunctionBegin; 515 if (!A->rmap->N || !A->cmap->N) { 516 *ncc = 0; 517 *cc = NULL; 518 PetscFunctionReturn(0); 519 } 520 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 521 if (!isseqaij && filter) { 522 PetscBool isseqdense; 523 524 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 525 if (!isseqdense) { 526 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 527 } else { /* TODO: rectangular case and LDA */ 528 PetscScalar *array; 529 PetscReal chop=1.e-6; 530 531 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 532 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 533 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 534 for (i=0;i<n;i++) { 535 PetscInt j; 536 for (j=i+1;j<n;j++) { 537 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 538 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 539 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 540 } 541 } 542 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 543 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 544 } 545 } else { 546 B = A; 547 } 548 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 549 550 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 551 if (filter) { 552 PetscScalar *data; 553 PetscInt j,cum; 554 555 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 556 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 557 cum = 0; 558 for (i=0;i<n;i++) { 559 PetscInt t; 560 561 for (j=xadj[i];j<xadj[i+1];j++) { 562 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 563 continue; 564 } 565 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 566 } 567 t = xadj_filtered[i]; 568 xadj_filtered[i] = cum; 569 cum += t; 570 } 571 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 572 } else { 573 xadj_filtered = NULL; 574 adjncy_filtered = NULL; 575 } 576 577 /* compute local connected components using PCBDDCGraph */ 578 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 579 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 580 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 581 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 582 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n);CHKERRQ(ierr); 583 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 584 if (xadj_filtered) { 585 graph->xadj = xadj_filtered; 586 graph->adjncy = adjncy_filtered; 587 } else { 588 graph->xadj = xadj; 589 graph->adjncy = adjncy; 590 } 591 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 592 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 593 /* partial clean up */ 594 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 595 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 596 if (A != B) { 597 ierr = MatDestroy(&B);CHKERRQ(ierr); 598 } 599 600 /* get back data */ 601 if (ncc) *ncc = graph->ncc; 602 if (cc) { 603 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 604 for (i=0;i<graph->ncc;i++) { 605 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 606 } 607 *cc = cc_n; 608 } 609 /* clean up graph */ 610 graph->xadj = 0; 611 graph->adjncy = 0; 612 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 613 PetscFunctionReturn(0); 614 } 615 616 #undef __FUNCT__ 617 #define __FUNCT__ "PCBDDCBenignCheck" 618 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 619 { 620 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 621 PC_IS* pcis = (PC_IS*)(pc->data); 622 IS dirIS = NULL; 623 PetscInt i; 624 PetscErrorCode ierr; 625 626 PetscFunctionBegin; 627 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 628 if (zerodiag) { 629 Mat A; 630 Vec vec3_N; 631 PetscScalar *vals; 632 const PetscInt *idxs; 633 PetscInt nz,*count; 634 635 /* p0 */ 636 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 637 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 638 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 639 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 640 for (i=0;i<nz;i++) vals[i] = 1.; 641 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 642 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 643 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 644 /* v_I */ 645 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 646 for (i=0;i<nz;i++) vals[i] = 0.; 647 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 648 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 649 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 650 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 651 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 652 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 653 if (dirIS) { 654 PetscInt n; 655 656 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 657 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 658 for (i=0;i<n;i++) vals[i] = 0.; 659 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 660 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 661 } 662 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 663 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 664 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 665 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 666 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 667 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 668 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 669 if (PetscAbsScalar(vals[0]) > 1.e-1) { 670 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 671 } 672 ierr = PetscFree(vals);CHKERRQ(ierr); 673 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 674 675 /* there should not be any pressure dofs lying on the interface */ 676 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 677 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 678 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 679 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 680 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 681 for (i=0;i<nz;i++) { 682 if (count[idxs[i]]) { 683 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 684 } 685 } 686 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 687 ierr = PetscFree(count);CHKERRQ(ierr); 688 } 689 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 690 691 /* check PCBDDCBenignGetOrSetP0 */ 692 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 693 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 694 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 695 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 696 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 697 for (i=0;i<pcbddc->benign_n;i++) { 698 if ((PetscInt)PetscRealPart(pcbddc->benign_p0[i]) != -PetscGlobalRank-i) { 699 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %1.4e at %d instead of %1.4e\n",pcbddc->benign_p0[i],i,-PetscGlobalRank-i);CHKERRQ(ierr); 700 } 701 } 702 PetscFunctionReturn(0); 703 } 704 705 #undef __FUNCT__ 706 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 707 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 708 { 709 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 710 IS pressures,zerodiag,*zerodiag_subs; 711 PetscInt nz,n; 712 PetscInt *interior_dofs,n_interior_dofs; 713 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag; 714 PetscErrorCode ierr; 715 716 PetscFunctionBegin; 717 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 718 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 719 for (n=0;n<pcbddc->benign_n;n++) { 720 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 721 } 722 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 723 pcbddc->benign_n = 0; 724 /* if a local info on dofs is present, assumes that the last field represents "pressures" 725 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 726 Checks if all the pressure dofs in each subdomain have a zero diagonal 727 If not, a change of basis on pressures is not needed 728 since the local Schur complements are already SPD 729 */ 730 has_null_pressures = PETSC_TRUE; 731 have_null = PETSC_TRUE; 732 if (pcbddc->n_ISForDofsLocal) { 733 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 734 735 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 736 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 737 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 738 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 739 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 740 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 741 if (!sorted) { 742 ierr = ISSort(pressures);CHKERRQ(ierr); 743 } 744 } else { 745 pressures = NULL; 746 } 747 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 748 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 749 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 750 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 751 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 752 if (!sorted) { 753 ierr = ISSort(zerodiag);CHKERRQ(ierr); 754 } 755 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 756 if (!nz) { 757 if (n) have_null = PETSC_FALSE; 758 has_null_pressures = PETSC_FALSE; 759 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 760 } 761 recompute_zerodiag = PETSC_FALSE; 762 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 763 zerodiag_subs = NULL; 764 pcbddc->benign_n = 0; 765 n_interior_dofs = 0; 766 interior_dofs = NULL; 767 if (pcbddc->current_level) { /* need to compute interior nodes */ 768 PetscInt n,i,j; 769 PetscInt n_neigh,*neigh,*n_shared,**shared; 770 PetscInt *iwork; 771 772 ierr = ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);CHKERRQ(ierr); 773 ierr = ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 774 ierr = PetscCalloc1(n,&iwork);CHKERRQ(ierr); 775 ierr = PetscMalloc1(n,&interior_dofs);CHKERRQ(ierr); 776 for (i=0;i<n_neigh;i++) 777 for (j=0;j<n_shared[i];j++) 778 iwork[shared[i][j]] += 1; 779 for (i=0;i<n;i++) 780 if (!iwork[i]) 781 interior_dofs[n_interior_dofs++] = i; 782 ierr = PetscFree(iwork);CHKERRQ(ierr); 783 ierr = ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 784 } 785 if (has_null_pressures) { 786 IS *subs; 787 PetscInt nsubs,i,j,nl; 788 const PetscInt *idxs; 789 PetscScalar *array; 790 Vec *work; 791 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 792 793 subs = pcbddc->local_subs; 794 nsubs = pcbddc->n_local_subs; 795 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 796 if (pcbddc->current_level) { 797 ierr = VecDuplicateVecs(matis->y,2,&work);CHKERRQ(ierr); 798 ierr = ISGetLocalSize(zerodiag,&nl);CHKERRQ(ierr); 799 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 800 /* work[0] = 1_p */ 801 ierr = VecSet(work[0],0.);CHKERRQ(ierr); 802 ierr = VecGetArray(work[0],&array);CHKERRQ(ierr); 803 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 804 ierr = VecRestoreArray(work[0],&array);CHKERRQ(ierr); 805 /* work[0] = 1_v */ 806 ierr = VecSet(work[1],1.);CHKERRQ(ierr); 807 ierr = VecGetArray(work[1],&array);CHKERRQ(ierr); 808 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 809 ierr = VecRestoreArray(work[1],&array);CHKERRQ(ierr); 810 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 811 } 812 if (nsubs > 1) { 813 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 814 for (i=0;i<nsubs;i++) { 815 ISLocalToGlobalMapping l2g; 816 IS t_zerodiag_subs; 817 PetscInt nl; 818 819 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 820 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 821 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 822 if (nl) { 823 PetscBool valid = PETSC_TRUE; 824 825 if (pcbddc->current_level) { 826 ierr = VecSet(matis->x,0);CHKERRQ(ierr); 827 ierr = ISGetLocalSize(subs[i],&nl);CHKERRQ(ierr); 828 ierr = ISGetIndices(subs[i],&idxs);CHKERRQ(ierr); 829 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 830 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 831 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 832 ierr = ISRestoreIndices(subs[i],&idxs);CHKERRQ(ierr); 833 ierr = VecPointwiseMult(matis->x,work[0],matis->x);CHKERRQ(ierr); 834 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 835 ierr = VecPointwiseMult(matis->y,work[1],matis->y);CHKERRQ(ierr); 836 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 837 for (j=0;j<n_interior_dofs;j++) { 838 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 839 valid = PETSC_FALSE; 840 break; 841 } 842 } 843 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 844 } 845 if (valid && pcbddc->NeumannBoundariesLocal) { 846 IS t_bc; 847 PetscInt nzb; 848 849 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pcbddc->NeumannBoundariesLocal,&t_bc);CHKERRQ(ierr); 850 ierr = ISGetLocalSize(t_bc,&nzb);CHKERRQ(ierr); 851 ierr = ISDestroy(&t_bc);CHKERRQ(ierr); 852 if (nzb) valid = PETSC_FALSE; 853 } 854 if (valid && pressures) { 855 IS t_pressure_subs; 856 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 857 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 858 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 859 } 860 if (valid) { 861 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 862 pcbddc->benign_n++; 863 } else { 864 recompute_zerodiag = PETSC_TRUE; 865 } 866 } 867 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 868 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 869 } 870 } else { /* there's just one subdomain (or zero if they have not been detected */ 871 PetscBool valid = PETSC_TRUE; 872 873 if (pcbddc->NeumannBoundariesLocal) { 874 PetscInt nzb; 875 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nzb);CHKERRQ(ierr); 876 if (nzb) valid = PETSC_FALSE; 877 } 878 if (valid && pressures) { 879 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 880 } 881 if (valid && pcbddc->current_level) { 882 ierr = MatMult(matis->A,work[0],matis->x);CHKERRQ(ierr); 883 ierr = VecPointwiseMult(matis->x,work[1],matis->x);CHKERRQ(ierr); 884 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 885 for (j=0;j<n_interior_dofs;j++) { 886 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 887 valid = PETSC_FALSE; 888 break; 889 } 890 } 891 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 892 } 893 if (valid) { 894 pcbddc->benign_n = 1; 895 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 896 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 897 zerodiag_subs[0] = zerodiag; 898 } 899 } 900 if (pcbddc->current_level) { 901 ierr = VecDestroyVecs(2,&work);CHKERRQ(ierr); 902 } 903 } 904 ierr = PetscFree(interior_dofs);CHKERRQ(ierr); 905 906 if (!pcbddc->benign_n) { 907 PetscInt n; 908 909 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 910 recompute_zerodiag = PETSC_FALSE; 911 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 912 if (n) { 913 has_null_pressures = PETSC_FALSE; 914 have_null = PETSC_FALSE; 915 } 916 } 917 918 /* final check for null pressures */ 919 if (zerodiag && pressures) { 920 PetscInt nz,np; 921 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 922 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 923 if (nz != np) have_null = PETSC_FALSE; 924 } 925 926 if (recompute_zerodiag) { 927 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 928 if (pcbddc->benign_n == 1) { 929 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 930 zerodiag = zerodiag_subs[0]; 931 } else { 932 PetscInt i,nzn,*new_idxs; 933 934 nzn = 0; 935 for (i=0;i<pcbddc->benign_n;i++) { 936 PetscInt ns; 937 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 938 nzn += ns; 939 } 940 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 941 nzn = 0; 942 for (i=0;i<pcbddc->benign_n;i++) { 943 PetscInt ns,*idxs; 944 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 945 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 946 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 947 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 948 nzn += ns; 949 } 950 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 951 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 952 } 953 have_null = PETSC_FALSE; 954 } 955 956 /* Prepare matrix to compute no-net-flux */ 957 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 958 Mat A,loc_divudotp; 959 ISLocalToGlobalMapping rl2g,cl2g,l2gmap; 960 IS row,col,isused = NULL; 961 PetscInt M,N,n,st,n_isused; 962 963 if (pressures) { 964 isused = pressures; 965 } else { 966 isused = zerodiag; 967 } 968 ierr = MatGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);CHKERRQ(ierr); 969 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 970 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 971 if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field"); 972 n_isused = 0; 973 if (isused) { 974 ierr = ISGetLocalSize(isused,&n_isused);CHKERRQ(ierr); 975 } 976 ierr = MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 977 st = st-n_isused; 978 if (n) { 979 const PetscInt *gidxs; 980 981 ierr = MatGetSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);CHKERRQ(ierr); 982 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 983 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 984 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 985 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 986 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 987 } else { 988 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);CHKERRQ(ierr); 989 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 990 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 991 } 992 ierr = MatGetSize(pc->pmat,NULL,&N);CHKERRQ(ierr); 993 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 994 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 995 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 996 ierr = ISDestroy(&row);CHKERRQ(ierr); 997 ierr = ISDestroy(&col);CHKERRQ(ierr); 998 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);CHKERRQ(ierr); 999 ierr = MatSetType(pcbddc->divudotp,MATIS);CHKERRQ(ierr); 1000 ierr = MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1001 ierr = MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);CHKERRQ(ierr); 1002 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 1003 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 1004 ierr = MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);CHKERRQ(ierr); 1005 ierr = MatDestroy(&loc_divudotp);CHKERRQ(ierr); 1006 ierr = MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1007 ierr = MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1008 } 1009 1010 /* change of basis and p0 dofs */ 1011 if (has_null_pressures) { 1012 IS zerodiagc; 1013 const PetscInt *idxs,*idxsc; 1014 PetscInt i,s,*nnz; 1015 1016 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 1017 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 1018 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 1019 /* local change of basis for pressures */ 1020 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 1021 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 1022 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 1023 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1024 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 1025 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 1026 for (i=0;i<pcbddc->benign_n;i++) { 1027 PetscInt nzs,j; 1028 1029 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 1030 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1031 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 1032 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 1033 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1034 } 1035 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 1036 ierr = PetscFree(nnz);CHKERRQ(ierr); 1037 /* set identity on velocities */ 1038 for (i=0;i<n-nz;i++) { 1039 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 1040 } 1041 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 1042 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 1043 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 1044 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 1045 /* set change on pressures */ 1046 for (s=0;s<pcbddc->benign_n;s++) { 1047 PetscScalar *array; 1048 PetscInt nzs; 1049 1050 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 1051 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 1052 for (i=0;i<nzs-1;i++) { 1053 PetscScalar vals[2]; 1054 PetscInt cols[2]; 1055 1056 cols[0] = idxs[i]; 1057 cols[1] = idxs[nzs-1]; 1058 vals[0] = 1.; 1059 vals[1] = 1.; 1060 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 1061 } 1062 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 1063 for (i=0;i<nzs-1;i++) array[i] = -1.; 1064 array[nzs-1] = 1.; 1065 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 1066 /* store local idxs for p0 */ 1067 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 1068 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 1069 ierr = PetscFree(array);CHKERRQ(ierr); 1070 } 1071 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1072 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1073 /* project if needed */ 1074 if (pcbddc->benign_change_explicit) { 1075 Mat M; 1076 1077 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 1078 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1079 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 1080 ierr = MatDestroy(&M);CHKERRQ(ierr); 1081 } 1082 /* store global idxs for p0 */ 1083 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 1084 } 1085 pcbddc->benign_zerodiag_subs = zerodiag_subs; 1086 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 1087 1088 /* determines if the coarse solver will be singular or not */ 1089 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1090 /* determines if the problem has subdomains with 0 pressure block */ 1091 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1092 *zerodiaglocal = zerodiag; 1093 PetscFunctionReturn(0); 1094 } 1095 1096 #undef __FUNCT__ 1097 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 1098 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 1099 { 1100 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1101 PetscScalar *array; 1102 PetscErrorCode ierr; 1103 1104 PetscFunctionBegin; 1105 if (!pcbddc->benign_sf) { 1106 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 1107 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 1108 } 1109 if (get) { 1110 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 1111 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 1112 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 1113 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 1114 } else { 1115 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 1116 ierr = PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 1117 ierr = PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 1118 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 1119 } 1120 PetscFunctionReturn(0); 1121 } 1122 1123 #undef __FUNCT__ 1124 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 1125 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 1126 { 1127 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1128 PetscErrorCode ierr; 1129 1130 PetscFunctionBegin; 1131 /* TODO: add error checking 1132 - avoid nested pop (or push) calls. 1133 - cannot push before pop. 1134 - cannot call this if pcbddc->local_mat is NULL 1135 */ 1136 if (!pcbddc->benign_n) { 1137 PetscFunctionReturn(0); 1138 } 1139 if (pop) { 1140 if (pcbddc->benign_change_explicit) { 1141 IS is_p0; 1142 MatReuse reuse; 1143 1144 /* extract B_0 */ 1145 reuse = MAT_INITIAL_MATRIX; 1146 if (pcbddc->benign_B0) { 1147 reuse = MAT_REUSE_MATRIX; 1148 } 1149 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 1150 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 1151 /* remove rows and cols from local problem */ 1152 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 1153 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1154 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 1155 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 1156 } else { 1157 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1158 PetscScalar *vals; 1159 PetscInt i,n,*idxs_ins; 1160 1161 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 1162 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 1163 if (!pcbddc->benign_B0) { 1164 PetscInt *nnz; 1165 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 1166 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 1167 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1168 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 1169 for (i=0;i<pcbddc->benign_n;i++) { 1170 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 1171 nnz[i] = n - nnz[i]; 1172 } 1173 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 1174 ierr = PetscFree(nnz);CHKERRQ(ierr); 1175 } 1176 1177 for (i=0;i<pcbddc->benign_n;i++) { 1178 PetscScalar *array; 1179 PetscInt *idxs,j,nz,cum; 1180 1181 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 1182 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1183 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 1184 for (j=0;j<nz;j++) vals[j] = 1.; 1185 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1186 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 1187 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 1188 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 1189 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 1190 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 1191 cum = 0; 1192 for (j=0;j<n;j++) { 1193 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 1194 vals[cum] = array[j]; 1195 idxs_ins[cum] = j; 1196 cum++; 1197 } 1198 } 1199 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 1200 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 1201 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 1202 } 1203 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1204 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1205 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 1206 } 1207 } else { /* push */ 1208 if (pcbddc->benign_change_explicit) { 1209 PetscInt i; 1210 1211 for (i=0;i<pcbddc->benign_n;i++) { 1212 PetscScalar *B0_vals; 1213 PetscInt *B0_cols,B0_ncol; 1214 1215 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 1216 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 1217 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 1218 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 1219 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 1220 } 1221 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1222 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1223 } else { 1224 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 1225 } 1226 } 1227 PetscFunctionReturn(0); 1228 } 1229 1230 #undef __FUNCT__ 1231 #define __FUNCT__ "PCBDDCAdaptiveSelection" 1232 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 1233 { 1234 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1235 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1236 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 1237 PetscBLASInt *B_iwork,*B_ifail; 1238 PetscScalar *work,lwork; 1239 PetscScalar *St,*S,*eigv; 1240 PetscScalar *Sarray,*Starray; 1241 PetscReal *eigs,thresh; 1242 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 1243 PetscBool allocated_S_St; 1244 #if defined(PETSC_USE_COMPLEX) 1245 PetscReal *rwork; 1246 #endif 1247 PetscErrorCode ierr; 1248 1249 PetscFunctionBegin; 1250 if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data"); 1251 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 1252 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 1253 1254 if (pcbddc->dbg_flag) { 1255 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1256 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1257 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 1258 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 1259 } 1260 1261 if (pcbddc->dbg_flag) { 1262 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 1263 } 1264 1265 /* max size of subsets */ 1266 mss = 0; 1267 for (i=0;i<sub_schurs->n_subs;i++) { 1268 PetscInt subset_size; 1269 1270 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1271 mss = PetscMax(mss,subset_size); 1272 } 1273 1274 /* min/max and threshold */ 1275 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 1276 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 1277 nmax = PetscMax(nmin,nmax); 1278 allocated_S_St = PETSC_FALSE; 1279 if (nmin) { 1280 allocated_S_St = PETSC_TRUE; 1281 } 1282 1283 /* allocate lapack workspace */ 1284 cum = cum2 = 0; 1285 maxneigs = 0; 1286 for (i=0;i<sub_schurs->n_subs;i++) { 1287 PetscInt n,subset_size; 1288 1289 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1290 n = PetscMin(subset_size,nmax); 1291 cum += subset_size; 1292 cum2 += subset_size*n; 1293 maxneigs = PetscMax(maxneigs,n); 1294 } 1295 if (mss) { 1296 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 1297 PetscBLASInt B_itype = 1; 1298 PetscBLASInt B_N = mss; 1299 PetscReal zero = 0.0; 1300 PetscReal eps = 0.0; /* dlamch? */ 1301 1302 B_lwork = -1; 1303 S = NULL; 1304 St = NULL; 1305 eigs = NULL; 1306 eigv = NULL; 1307 B_iwork = NULL; 1308 B_ifail = NULL; 1309 #if defined(PETSC_USE_COMPLEX) 1310 rwork = NULL; 1311 #endif 1312 thresh = 1.0; 1313 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1314 #if defined(PETSC_USE_COMPLEX) 1315 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1316 #else 1317 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1318 #endif 1319 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 1320 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1321 } else { 1322 /* TODO */ 1323 } 1324 } else { 1325 lwork = 0; 1326 } 1327 1328 nv = 0; 1329 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 1330 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 1331 } 1332 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 1333 if (allocated_S_St) { 1334 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 1335 } 1336 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 1337 #if defined(PETSC_USE_COMPLEX) 1338 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 1339 #endif 1340 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 1341 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 1342 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 1343 nv+cum,&pcbddc->adaptive_constraints_idxs, 1344 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 1345 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 1346 1347 maxneigs = 0; 1348 cum = cumarray = 0; 1349 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 1350 pcbddc->adaptive_constraints_data_ptr[0] = 0; 1351 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 1352 const PetscInt *idxs; 1353 1354 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 1355 for (cum=0;cum<nv;cum++) { 1356 pcbddc->adaptive_constraints_n[cum] = 1; 1357 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 1358 pcbddc->adaptive_constraints_data[cum] = 1.0; 1359 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 1360 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 1361 } 1362 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 1363 } 1364 1365 if (mss) { /* multilevel */ 1366 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1367 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1368 } 1369 1370 thresh = pcbddc->adaptive_threshold; 1371 for (i=0;i<sub_schurs->n_subs;i++) { 1372 const PetscInt *idxs; 1373 PetscReal upper,lower; 1374 PetscInt j,subset_size,eigs_start = 0; 1375 PetscBLASInt B_N; 1376 PetscBool same_data = PETSC_FALSE; 1377 1378 if (pcbddc->use_deluxe_scaling) { 1379 upper = PETSC_MAX_REAL; 1380 lower = thresh; 1381 } else { 1382 upper = 1./thresh; 1383 lower = 0.; 1384 } 1385 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 1386 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1387 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 1388 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 1389 if (sub_schurs->is_hermitian) { 1390 PetscInt j,k; 1391 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 1392 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1393 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1394 } 1395 for (j=0;j<subset_size;j++) { 1396 for (k=j;k<subset_size;k++) { 1397 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 1398 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 1399 } 1400 } 1401 } else { 1402 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1403 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1404 } 1405 } else { 1406 S = Sarray + cumarray; 1407 St = Starray + cumarray; 1408 } 1409 /* see if we can save some work */ 1410 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 1411 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 1412 } 1413 1414 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 1415 B_neigs = 0; 1416 } else { 1417 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 1418 PetscBLASInt B_itype = 1; 1419 PetscBLASInt B_IL, B_IU; 1420 PetscReal eps = -1.0; /* dlamch? */ 1421 PetscInt nmin_s; 1422 PetscBool compute_range = PETSC_FALSE; 1423 1424 if (pcbddc->dbg_flag) { 1425 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 1426 } 1427 1428 compute_range = PETSC_FALSE; 1429 if (thresh > 1.+PETSC_SMALL && !same_data) { 1430 compute_range = PETSC_TRUE; 1431 } 1432 1433 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1434 if (compute_range) { 1435 1436 /* ask for eigenvalues larger than thresh */ 1437 #if defined(PETSC_USE_COMPLEX) 1438 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1439 #else 1440 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1441 #endif 1442 } else if (!same_data) { 1443 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 1444 B_IL = 1; 1445 #if defined(PETSC_USE_COMPLEX) 1446 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1447 #else 1448 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1449 #endif 1450 } else { /* same_data is true, so get the adaptive function requested by the user */ 1451 PetscInt k; 1452 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 1453 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 1454 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 1455 nmin = nmax; 1456 ierr = PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));CHKERRQ(ierr); 1457 for (k=0;k<nmax;k++) { 1458 eigs[k] = 1./PETSC_SMALL; 1459 eigv[k*(subset_size+1)] = 1.0; 1460 } 1461 } 1462 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1463 if (B_ierr) { 1464 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1465 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1466 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1467 } 1468 1469 if (B_neigs > nmax) { 1470 if (pcbddc->dbg_flag) { 1471 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 1472 } 1473 if (pcbddc->use_deluxe_scaling) eigs_start = B_neigs -nmax; 1474 B_neigs = nmax; 1475 } 1476 1477 nmin_s = PetscMin(nmin,B_N); 1478 if (B_neigs < nmin_s) { 1479 PetscBLASInt B_neigs2; 1480 1481 if (pcbddc->use_deluxe_scaling) { 1482 B_IL = B_N - nmin_s + 1; 1483 B_IU = B_N - B_neigs; 1484 } else { 1485 B_IL = B_neigs + 1; 1486 B_IU = nmin_s; 1487 } 1488 if (pcbddc->dbg_flag) { 1489 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 1490 } 1491 if (sub_schurs->is_hermitian) { 1492 PetscInt j,k; 1493 for (j=0;j<subset_size;j++) { 1494 for (k=j;k<subset_size;k++) { 1495 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 1496 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 1497 } 1498 } 1499 } else { 1500 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1501 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1502 } 1503 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1504 #if defined(PETSC_USE_COMPLEX) 1505 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 1506 #else 1507 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 1508 #endif 1509 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1510 B_neigs += B_neigs2; 1511 } 1512 if (B_ierr) { 1513 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 1514 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 1515 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 1516 } 1517 if (pcbddc->dbg_flag) { 1518 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 1519 for (j=0;j<B_neigs;j++) { 1520 if (eigs[j] == 0.0) { 1521 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 1522 } else { 1523 if (pcbddc->use_deluxe_scaling) { 1524 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 1525 } else { 1526 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 1527 } 1528 } 1529 } 1530 } 1531 } else { 1532 /* TODO */ 1533 } 1534 } 1535 /* change the basis back to the original one */ 1536 if (sub_schurs->change) { 1537 Mat change,phi,phit; 1538 1539 if (pcbddc->dbg_flag > 1) { 1540 PetscInt ii; 1541 for (ii=0;ii<B_neigs;ii++) { 1542 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1543 for (j=0;j<B_N;j++) { 1544 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 1545 } 1546 } 1547 } 1548 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 1549 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 1550 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 1551 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1552 ierr = MatDestroy(&phit);CHKERRQ(ierr); 1553 ierr = MatDestroy(&phi);CHKERRQ(ierr); 1554 } 1555 maxneigs = PetscMax(B_neigs,maxneigs); 1556 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 1557 if (B_neigs) { 1558 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 1559 1560 if (pcbddc->dbg_flag > 1) { 1561 PetscInt ii; 1562 for (ii=0;ii<B_neigs;ii++) { 1563 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 1564 for (j=0;j<B_N;j++) { 1565 #if defined(PETSC_USE_COMPLEX) 1566 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1567 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 1568 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 1569 #else 1570 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 1571 #endif 1572 } 1573 } 1574 } 1575 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 1576 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 1577 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 1578 cum++; 1579 } 1580 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 1581 /* shift for next computation */ 1582 cumarray += subset_size*subset_size; 1583 } 1584 if (pcbddc->dbg_flag) { 1585 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1586 } 1587 1588 if (mss) { 1589 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 1590 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 1591 /* destroy matrices (junk) */ 1592 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 1593 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 1594 } 1595 if (allocated_S_St) { 1596 ierr = PetscFree2(S,St);CHKERRQ(ierr); 1597 } 1598 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 1599 #if defined(PETSC_USE_COMPLEX) 1600 ierr = PetscFree(rwork);CHKERRQ(ierr); 1601 #endif 1602 if (pcbddc->dbg_flag) { 1603 PetscInt maxneigs_r; 1604 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1605 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 1606 } 1607 PetscFunctionReturn(0); 1608 } 1609 1610 #undef __FUNCT__ 1611 #define __FUNCT__ "PCBDDCSetUpSolvers" 1612 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 1613 { 1614 PetscScalar *coarse_submat_vals; 1615 PetscErrorCode ierr; 1616 1617 PetscFunctionBegin; 1618 /* Setup local scatters R_to_B and (optionally) R_to_D */ 1619 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 1620 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 1621 1622 /* Setup local neumann solver ksp_R */ 1623 /* PCBDDCSetUpLocalScatters should be called first! */ 1624 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 1625 1626 /* 1627 Setup local correction and local part of coarse basis. 1628 Gives back the dense local part of the coarse matrix in column major ordering 1629 */ 1630 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 1631 1632 /* Compute total number of coarse nodes and setup coarse solver */ 1633 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 1634 1635 /* free */ 1636 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 1637 PetscFunctionReturn(0); 1638 } 1639 1640 #undef __FUNCT__ 1641 #define __FUNCT__ "PCBDDCResetCustomization" 1642 PetscErrorCode PCBDDCResetCustomization(PC pc) 1643 { 1644 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1645 PetscErrorCode ierr; 1646 1647 PetscFunctionBegin; 1648 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1649 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 1650 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1651 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 1652 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1653 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 1654 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1655 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1656 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1657 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 1658 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 1659 PetscFunctionReturn(0); 1660 } 1661 1662 #undef __FUNCT__ 1663 #define __FUNCT__ "PCBDDCResetTopography" 1664 PetscErrorCode PCBDDCResetTopography(PC pc) 1665 { 1666 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1667 PetscInt i; 1668 PetscErrorCode ierr; 1669 1670 PetscFunctionBegin; 1671 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 1672 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1673 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 1674 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 1675 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1676 ierr = MatDestroy(&pcbddc->divudotp);CHKERRQ(ierr); 1677 ierr = ISDestroy(&pcbddc->divudotp_vl2l);CHKERRQ(ierr); 1678 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 1679 for (i=0;i<pcbddc->n_local_subs;i++) { 1680 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 1681 } 1682 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 1683 if (pcbddc->sub_schurs) { 1684 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 1685 } 1686 PetscFunctionReturn(0); 1687 } 1688 1689 #undef __FUNCT__ 1690 #define __FUNCT__ "PCBDDCResetSolvers" 1691 PetscErrorCode PCBDDCResetSolvers(PC pc) 1692 { 1693 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1694 PetscErrorCode ierr; 1695 1696 PetscFunctionBegin; 1697 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 1698 if (pcbddc->coarse_phi_B) { 1699 PetscScalar *array; 1700 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 1701 ierr = PetscFree(array);CHKERRQ(ierr); 1702 } 1703 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1704 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1705 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1706 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1707 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1708 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1709 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1710 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1711 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1712 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1713 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1714 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1715 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1716 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 1717 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 1718 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 1719 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 1720 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1721 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1722 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1723 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 1724 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 1725 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 1726 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 1727 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 1728 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 1729 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 1730 if (pcbddc->benign_zerodiag_subs) { 1731 PetscInt i; 1732 for (i=0;i<pcbddc->benign_n;i++) { 1733 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1734 } 1735 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 1736 } 1737 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 1738 PetscFunctionReturn(0); 1739 } 1740 1741 #undef __FUNCT__ 1742 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 1743 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 1744 { 1745 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1746 PC_IS *pcis = (PC_IS*)pc->data; 1747 VecType impVecType; 1748 PetscInt n_constraints,n_R,old_size; 1749 PetscErrorCode ierr; 1750 1751 PetscFunctionBegin; 1752 if (!pcbddc->ConstraintMatrix) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 1753 /* get sizes */ 1754 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 1755 n_R = pcis->n - pcbddc->n_vertices; 1756 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 1757 /* local work vectors (try to avoid unneeded work)*/ 1758 /* R nodes */ 1759 old_size = -1; 1760 if (pcbddc->vec1_R) { 1761 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 1762 } 1763 if (n_R != old_size) { 1764 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1765 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1766 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 1767 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 1768 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 1769 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 1770 } 1771 /* local primal dofs */ 1772 old_size = -1; 1773 if (pcbddc->vec1_P) { 1774 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 1775 } 1776 if (pcbddc->local_primal_size != old_size) { 1777 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1778 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 1779 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 1780 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 1781 } 1782 /* local explicit constraints */ 1783 old_size = -1; 1784 if (pcbddc->vec1_C) { 1785 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 1786 } 1787 if (n_constraints && n_constraints != old_size) { 1788 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1789 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 1790 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 1791 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 1792 } 1793 PetscFunctionReturn(0); 1794 } 1795 1796 #undef __FUNCT__ 1797 #define __FUNCT__ "PCBDDCSetUpCorrection" 1798 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 1799 { 1800 PetscErrorCode ierr; 1801 /* pointers to pcis and pcbddc */ 1802 PC_IS* pcis = (PC_IS*)pc->data; 1803 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1804 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1805 /* submatrices of local problem */ 1806 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 1807 /* submatrices of local coarse problem */ 1808 Mat S_VV,S_CV,S_VC,S_CC; 1809 /* working matrices */ 1810 Mat C_CR; 1811 /* additional working stuff */ 1812 PC pc_R; 1813 Mat F; 1814 Vec dummy_vec; 1815 PetscBool isLU,isCHOL,isILU,need_benign_correction; 1816 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 1817 PetscScalar *work; 1818 PetscInt *idx_V_B; 1819 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 1820 PetscInt i,n_R,n_D,n_B; 1821 1822 /* some shortcuts to scalars */ 1823 PetscScalar one=1.0,m_one=-1.0; 1824 1825 PetscFunctionBegin; 1826 if (!pcbddc->symmetric_primal && pcbddc->benign_n) { 1827 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 1828 } 1829 1830 /* Set Non-overlapping dimensions */ 1831 n_vertices = pcbddc->n_vertices; 1832 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 1833 n_B = pcis->n_B; 1834 n_D = pcis->n - n_B; 1835 n_R = pcis->n - n_vertices; 1836 1837 /* vertices in boundary numbering */ 1838 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 1839 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 1840 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i); 1841 1842 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 1843 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 1844 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 1845 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 1846 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 1847 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 1848 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 1849 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 1850 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 1851 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 1852 1853 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 1854 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 1855 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 1856 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 1857 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 1858 lda_rhs = n_R; 1859 need_benign_correction = PETSC_FALSE; 1860 if (isLU || isILU || isCHOL) { 1861 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 1862 } else if (sub_schurs && sub_schurs->reuse_solver) { 1863 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1864 MatFactorType type; 1865 1866 F = reuse_solver->F; 1867 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 1868 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 1869 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 1870 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 1871 } else { 1872 F = NULL; 1873 } 1874 1875 /* allocate workspace */ 1876 n = 0; 1877 if (n_constraints) { 1878 n += lda_rhs*n_constraints; 1879 } 1880 if (n_vertices) { 1881 n = PetscMax(2*lda_rhs*n_vertices,n); 1882 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 1883 } 1884 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1885 1886 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 1887 dummy_vec = NULL; 1888 if (need_benign_correction && lda_rhs != n_R && F) { 1889 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);CHKERRQ(ierr); 1890 } 1891 1892 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 1893 if (n_constraints) { 1894 Mat M1,M2,M3,C_B; 1895 IS is_aux; 1896 PetscScalar *array,*array2; 1897 1898 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1899 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1900 1901 /* Extract constraints on R nodes: C_{CR} */ 1902 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 1903 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 1904 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 1905 1906 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 1907 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 1908 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1909 for (i=0;i<n_constraints;i++) { 1910 const PetscScalar *row_cmat_values; 1911 const PetscInt *row_cmat_indices; 1912 PetscInt size_of_constraint,j; 1913 1914 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1915 for (j=0;j<size_of_constraint;j++) { 1916 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 1917 } 1918 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1919 } 1920 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 1921 if (F) { 1922 Mat B; 1923 1924 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 1925 if (need_benign_correction) { 1926 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1927 1928 /* rhs is already zero on interior dofs, no need to change the rhs */ 1929 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 1930 } 1931 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 1932 if (need_benign_correction) { 1933 PetscScalar *marr; 1934 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 1935 1936 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1937 if (lda_rhs != n_R) { 1938 for (i=0;i<n_constraints;i++) { 1939 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 1940 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1941 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 1942 } 1943 } else { 1944 for (i=0;i<n_constraints;i++) { 1945 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 1946 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 1947 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1948 } 1949 } 1950 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1951 } 1952 ierr = MatDestroy(&B);CHKERRQ(ierr); 1953 } else { 1954 PetscScalar *marr; 1955 1956 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1957 for (i=0;i<n_constraints;i++) { 1958 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1959 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 1960 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1961 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1962 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1963 } 1964 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1965 } 1966 if (!pcbddc->switch_static) { 1967 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1968 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1969 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1970 for (i=0;i<n_constraints;i++) { 1971 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 1972 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 1973 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1974 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1975 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1976 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1977 } 1978 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1979 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1980 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1981 } else { 1982 if (lda_rhs != n_R) { 1983 IS dummy; 1984 1985 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 1986 ierr = MatGetSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1987 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1988 } else { 1989 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 1990 pcbddc->local_auxmat2 = local_auxmat2_R; 1991 } 1992 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1993 } 1994 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1995 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 1996 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 1997 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 1998 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 1999 if (isCHOL) { 2000 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 2001 } else { 2002 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 2003 } 2004 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 2005 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 2006 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 2007 ierr = MatDestroy(&M2);CHKERRQ(ierr); 2008 ierr = MatDestroy(&M3);CHKERRQ(ierr); 2009 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 2010 ierr = MatMatMult(M1,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 2011 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 2012 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 2013 ierr = MatDestroy(&M1);CHKERRQ(ierr); 2014 } 2015 2016 /* Get submatrices from subdomain matrix */ 2017 if (n_vertices) { 2018 IS is_aux; 2019 2020 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 2021 IS tis; 2022 2023 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 2024 ierr = ISSort(tis);CHKERRQ(ierr); 2025 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 2026 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2027 } else { 2028 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 2029 } 2030 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 2031 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 2032 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 2033 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 2034 } 2035 2036 /* Matrix of coarse basis functions (local) */ 2037 if (pcbddc->coarse_phi_B) { 2038 PetscInt on_B,on_primal,on_D=n_D; 2039 if (pcbddc->coarse_phi_D) { 2040 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 2041 } 2042 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 2043 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 2044 PetscScalar *marray; 2045 2046 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 2047 ierr = PetscFree(marray);CHKERRQ(ierr); 2048 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 2049 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 2050 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 2051 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 2052 } 2053 } 2054 2055 if (!pcbddc->coarse_phi_B) { 2056 PetscScalar *marray; 2057 2058 n = n_B*pcbddc->local_primal_size; 2059 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2060 n += n_D*pcbddc->local_primal_size; 2061 } 2062 if (!pcbddc->symmetric_primal) { 2063 n *= 2; 2064 } 2065 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 2066 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 2067 n = n_B*pcbddc->local_primal_size; 2068 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2069 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 2070 n += n_D*pcbddc->local_primal_size; 2071 } 2072 if (!pcbddc->symmetric_primal) { 2073 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 2074 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2075 n = n_B*pcbddc->local_primal_size; 2076 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 2077 } 2078 } else { 2079 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 2080 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 2081 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2082 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 2083 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 2084 } 2085 } 2086 } 2087 2088 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 2089 p0_lidx_I = NULL; 2090 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 2091 const PetscInt *idxs; 2092 2093 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 2094 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 2095 for (i=0;i<pcbddc->benign_n;i++) { 2096 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 2097 } 2098 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 2099 } 2100 2101 /* vertices */ 2102 if (n_vertices) { 2103 2104 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 2105 2106 if (n_R) { 2107 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 2108 PetscBLASInt B_N,B_one = 1; 2109 PetscScalar *x,*y; 2110 PetscBool isseqaij; 2111 2112 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 2113 if (need_benign_correction) { 2114 ISLocalToGlobalMapping RtoN; 2115 IS is_p0; 2116 PetscInt *idxs_p0,n; 2117 2118 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 2119 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 2120 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 2121 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 2122 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 2123 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 2124 ierr = MatGetSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 2125 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 2126 } 2127 2128 if (lda_rhs == n_R) { 2129 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 2130 } else { 2131 PetscScalar *av,*array; 2132 const PetscInt *xadj,*adjncy; 2133 PetscInt n; 2134 PetscBool flg_row; 2135 2136 array = work+lda_rhs*n_vertices; 2137 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 2138 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 2139 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2140 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 2141 for (i=0;i<n;i++) { 2142 PetscInt j; 2143 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 2144 } 2145 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2146 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 2147 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 2148 } 2149 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 2150 if (need_benign_correction) { 2151 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2152 PetscScalar *marr; 2153 2154 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 2155 /* need \Phi^T A_RV = (I+L)A_RV, L given by 2156 2157 | 0 0 0 | (V) 2158 L = | 0 0 -1 | (P-p0) 2159 | 0 0 -1 | (p0) 2160 2161 */ 2162 for (i=0;i<reuse_solver->benign_n;i++) { 2163 const PetscScalar *vals; 2164 const PetscInt *idxs,*idxs_zero; 2165 PetscInt n,j,nz; 2166 2167 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2168 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 2169 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 2170 for (j=0;j<n;j++) { 2171 PetscScalar val = vals[j]; 2172 PetscInt k,col = idxs[j]; 2173 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 2174 } 2175 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 2176 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 2177 } 2178 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 2179 } 2180 if (F) { 2181 /* need to correct the rhs */ 2182 if (need_benign_correction) { 2183 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2184 PetscScalar *marr; 2185 2186 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 2187 if (lda_rhs != n_R) { 2188 for (i=0;i<n_vertices;i++) { 2189 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 2190 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 2191 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 2192 } 2193 } else { 2194 for (i=0;i<n_vertices;i++) { 2195 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 2196 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 2197 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2198 } 2199 } 2200 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 2201 } 2202 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 2203 /* need to correct the solution */ 2204 if (need_benign_correction) { 2205 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2206 PetscScalar *marr; 2207 2208 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 2209 if (lda_rhs != n_R) { 2210 for (i=0;i<n_vertices;i++) { 2211 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 2212 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 2213 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 2214 } 2215 } else { 2216 for (i=0;i<n_vertices;i++) { 2217 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 2218 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 2219 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2220 } 2221 } 2222 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 2223 } 2224 } else { 2225 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 2226 for (i=0;i<n_vertices;i++) { 2227 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 2228 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 2229 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2230 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2231 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2232 } 2233 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 2234 } 2235 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 2236 /* S_VV and S_CV */ 2237 if (n_constraints) { 2238 Mat B; 2239 2240 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 2241 for (i=0;i<n_vertices;i++) { 2242 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 2243 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 2244 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2245 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2246 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2247 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2248 } 2249 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 2250 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 2251 ierr = MatDestroy(&B);CHKERRQ(ierr); 2252 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 2253 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 2254 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 2255 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 2256 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 2257 ierr = MatDestroy(&B);CHKERRQ(ierr); 2258 } 2259 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2260 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 2261 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 2262 } 2263 if (lda_rhs != n_R) { 2264 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 2265 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 2266 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 2267 } 2268 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 2269 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 2270 if (need_benign_correction) { 2271 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2272 PetscScalar *marr,*sums; 2273 2274 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 2275 ierr = MatDenseGetArray(S_VVt,&marr); 2276 for (i=0;i<reuse_solver->benign_n;i++) { 2277 const PetscScalar *vals; 2278 const PetscInt *idxs,*idxs_zero; 2279 PetscInt n,j,nz; 2280 2281 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2282 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 2283 for (j=0;j<n_vertices;j++) { 2284 PetscInt k; 2285 sums[j] = 0.; 2286 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 2287 } 2288 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 2289 for (j=0;j<n;j++) { 2290 PetscScalar val = vals[j]; 2291 PetscInt k; 2292 for (k=0;k<n_vertices;k++) { 2293 marr[idxs[j]+k*n_vertices] += val*sums[k]; 2294 } 2295 } 2296 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 2297 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 2298 } 2299 ierr = PetscFree(sums);CHKERRQ(ierr); 2300 ierr = MatDenseRestoreArray(S_VVt,&marr); 2301 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 2302 } 2303 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 2304 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 2305 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 2306 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 2307 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 2308 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 2309 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 2310 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2311 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 2312 } else { 2313 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2314 } 2315 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 2316 2317 /* coarse basis functions */ 2318 for (i=0;i<n_vertices;i++) { 2319 PetscScalar *y; 2320 2321 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 2322 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2323 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 2324 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2325 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2326 y[n_B*i+idx_V_B[i]] = 1.0; 2327 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2328 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2329 2330 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2331 PetscInt j; 2332 2333 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2334 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2335 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2336 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2337 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2338 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 2339 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2340 } 2341 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2342 } 2343 /* if n_R == 0 the object is not destroyed */ 2344 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 2345 } 2346 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 2347 2348 if (n_constraints) { 2349 Mat B; 2350 2351 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 2352 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 2353 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 2354 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 2355 if (n_vertices) { 2356 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 2357 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 2358 } else { 2359 Mat S_VCt; 2360 2361 if (lda_rhs != n_R) { 2362 ierr = MatDestroy(&B);CHKERRQ(ierr); 2363 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 2364 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 2365 } 2366 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 2367 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2368 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 2369 } 2370 } 2371 ierr = MatDestroy(&B);CHKERRQ(ierr); 2372 /* coarse basis functions */ 2373 for (i=0;i<n_constraints;i++) { 2374 PetscScalar *y; 2375 2376 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 2377 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2378 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 2379 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2380 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2381 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 2382 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2383 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2384 PetscInt j; 2385 2386 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2387 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 2388 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2389 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2390 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2391 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 2392 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 2393 } 2394 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2395 } 2396 } 2397 if (n_constraints) { 2398 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 2399 } 2400 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 2401 2402 /* coarse matrix entries relative to B_0 */ 2403 if (pcbddc->benign_n) { 2404 Mat B0_B,B0_BPHI; 2405 IS is_dummy; 2406 PetscScalar *data; 2407 PetscInt j; 2408 2409 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2410 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 2411 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2412 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2413 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2414 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 2415 for (j=0;j<pcbddc->benign_n;j++) { 2416 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2417 for (i=0;i<pcbddc->local_primal_size;i++) { 2418 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 2419 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 2420 } 2421 } 2422 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 2423 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2424 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2425 } 2426 2427 /* compute other basis functions for non-symmetric problems */ 2428 if (!pcbddc->symmetric_primal) { 2429 Mat B_V=NULL,B_C=NULL; 2430 PetscScalar *marray; 2431 2432 if (n_constraints) { 2433 Mat S_CCT,C_CRT; 2434 2435 ierr = MatTranspose(C_CR,MAT_INPLACE_MATRIX,&C_CRT);CHKERRQ(ierr); 2436 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 2437 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 2438 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 2439 if (n_vertices) { 2440 Mat S_VCT; 2441 2442 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 2443 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 2444 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 2445 } 2446 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 2447 } else { 2448 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);CHKERRQ(ierr); 2449 } 2450 if (n_vertices && n_R) { 2451 PetscScalar *av,*marray; 2452 const PetscInt *xadj,*adjncy; 2453 PetscInt n; 2454 PetscBool flg_row; 2455 2456 /* B_V = B_V - A_VR^T */ 2457 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 2458 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2459 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 2460 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2461 for (i=0;i<n;i++) { 2462 PetscInt j; 2463 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 2464 } 2465 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2466 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2467 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2468 } 2469 2470 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 2471 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 2472 for (i=0;i<n_vertices;i++) { 2473 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 2474 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2475 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2476 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2477 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2478 } 2479 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 2480 if (B_C) { 2481 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 2482 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 2483 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 2484 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 2485 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2486 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2487 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2488 } 2489 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 2490 } 2491 /* coarse basis functions */ 2492 for (i=0;i<pcbddc->local_primal_size;i++) { 2493 PetscScalar *y; 2494 2495 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 2496 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2497 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 2498 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2499 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2500 if (i<n_vertices) { 2501 y[n_B*i+idx_V_B[i]] = 1.0; 2502 } 2503 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 2504 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2505 2506 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2507 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2508 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 2509 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2510 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2511 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 2512 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 2513 } 2514 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2515 } 2516 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 2517 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 2518 } 2519 /* free memory */ 2520 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 2521 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 2522 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 2523 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 2524 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 2525 ierr = PetscFree(work);CHKERRQ(ierr); 2526 if (n_vertices) { 2527 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2528 } 2529 if (n_constraints) { 2530 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 2531 } 2532 /* Checking coarse_sub_mat and coarse basis functios */ 2533 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2534 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2535 if (pcbddc->dbg_flag) { 2536 Mat coarse_sub_mat; 2537 Mat AUXMAT,TM1,TM2,TM3,TM4; 2538 Mat coarse_phi_D,coarse_phi_B; 2539 Mat coarse_psi_D,coarse_psi_B; 2540 Mat A_II,A_BB,A_IB,A_BI; 2541 Mat C_B,CPHI; 2542 IS is_dummy; 2543 Vec mones; 2544 MatType checkmattype=MATSEQAIJ; 2545 PetscReal real_value; 2546 2547 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 2548 Mat A; 2549 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 2550 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2551 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2552 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2553 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2554 ierr = MatDestroy(&A);CHKERRQ(ierr); 2555 } else { 2556 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2557 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2558 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2559 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2560 } 2561 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 2562 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 2563 if (!pcbddc->symmetric_primal) { 2564 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 2565 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 2566 } 2567 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 2568 2569 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2570 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 2571 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2572 if (!pcbddc->symmetric_primal) { 2573 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2574 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2575 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2576 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2577 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2578 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2579 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2580 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2581 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2582 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2583 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2584 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2585 } else { 2586 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2587 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2588 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2589 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2590 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2591 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2592 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2593 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2594 } 2595 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2596 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2597 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2598 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 2599 if (pcbddc->benign_n) { 2600 Mat B0_B,B0_BPHI; 2601 PetscScalar *data,*data2; 2602 PetscInt j; 2603 2604 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2605 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 2606 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 2607 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 2608 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 2609 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 2610 for (j=0;j<pcbddc->benign_n;j++) { 2611 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 2612 for (i=0;i<pcbddc->local_primal_size;i++) { 2613 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 2614 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 2615 } 2616 } 2617 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 2618 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 2619 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 2620 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2621 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 2622 } 2623 #if 0 2624 { 2625 PetscViewer viewer; 2626 char filename[256]; 2627 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 2628 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 2629 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2630 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 2631 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 2632 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 2633 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 2634 if (save_change) { 2635 Mat phi_B; 2636 ierr = MatMatMult(save_change,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&phi_B);CHKERRQ(ierr); 2637 ierr = PetscObjectSetName((PetscObject)phi_B,"phi_B");CHKERRQ(ierr); 2638 ierr = MatView(phi_B,viewer);CHKERRQ(ierr); 2639 ierr = MatDestroy(&phi_B);CHKERRQ(ierr); 2640 } else { 2641 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 2642 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 2643 } 2644 if (pcbddc->coarse_phi_D) { 2645 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 2646 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 2647 } 2648 if (pcbddc->coarse_psi_B) { 2649 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 2650 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 2651 } 2652 if (pcbddc->coarse_psi_D) { 2653 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 2654 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 2655 } 2656 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 2657 } 2658 #endif 2659 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2660 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2661 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2662 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2663 2664 /* check constraints */ 2665 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 2666 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 2667 if (!pcbddc->benign_n) { /* TODO: add benign case */ 2668 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2669 } else { 2670 PetscScalar *data; 2671 Mat tmat; 2672 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 2673 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 2674 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 2675 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2676 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2677 } 2678 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 2679 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2680 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2681 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2682 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2683 if (!pcbddc->symmetric_primal) { 2684 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 2685 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 2686 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 2687 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 2688 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 2689 } 2690 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 2691 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 2692 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2693 ierr = VecDestroy(&mones);CHKERRQ(ierr); 2694 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2695 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 2696 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 2697 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 2698 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 2699 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 2700 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 2701 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 2702 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 2703 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 2704 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 2705 if (!pcbddc->symmetric_primal) { 2706 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 2707 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 2708 } 2709 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 2710 } 2711 /* get back data */ 2712 *coarse_submat_vals_n = coarse_submat_vals; 2713 PetscFunctionReturn(0); 2714 } 2715 2716 #undef __FUNCT__ 2717 #define __FUNCT__ "MatGetSubMatrixUnsorted" 2718 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 2719 { 2720 Mat *work_mat; 2721 IS isrow_s,iscol_s; 2722 PetscBool rsorted,csorted; 2723 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 2724 PetscErrorCode ierr; 2725 2726 PetscFunctionBegin; 2727 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 2728 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 2729 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 2730 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2731 2732 if (!rsorted) { 2733 const PetscInt *idxs; 2734 PetscInt *idxs_sorted,i; 2735 2736 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 2737 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 2738 for (i=0;i<rsize;i++) { 2739 idxs_perm_r[i] = i; 2740 } 2741 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 2742 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 2743 for (i=0;i<rsize;i++) { 2744 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 2745 } 2746 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 2747 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 2748 } else { 2749 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 2750 isrow_s = isrow; 2751 } 2752 2753 if (!csorted) { 2754 if (isrow == iscol) { 2755 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 2756 iscol_s = isrow_s; 2757 } else { 2758 const PetscInt *idxs; 2759 PetscInt *idxs_sorted,i; 2760 2761 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 2762 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 2763 for (i=0;i<csize;i++) { 2764 idxs_perm_c[i] = i; 2765 } 2766 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 2767 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 2768 for (i=0;i<csize;i++) { 2769 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 2770 } 2771 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 2772 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 2773 } 2774 } else { 2775 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 2776 iscol_s = iscol; 2777 } 2778 2779 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2780 2781 if (!rsorted || !csorted) { 2782 Mat new_mat; 2783 IS is_perm_r,is_perm_c; 2784 2785 if (!rsorted) { 2786 PetscInt *idxs_r,i; 2787 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 2788 for (i=0;i<rsize;i++) { 2789 idxs_r[idxs_perm_r[i]] = i; 2790 } 2791 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 2792 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 2793 } else { 2794 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 2795 } 2796 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 2797 2798 if (!csorted) { 2799 if (isrow_s == iscol_s) { 2800 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 2801 is_perm_c = is_perm_r; 2802 } else { 2803 PetscInt *idxs_c,i; 2804 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 2805 for (i=0;i<csize;i++) { 2806 idxs_c[idxs_perm_c[i]] = i; 2807 } 2808 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 2809 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 2810 } 2811 } else { 2812 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 2813 } 2814 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 2815 2816 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 2817 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 2818 work_mat[0] = new_mat; 2819 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 2820 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 2821 } 2822 2823 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 2824 *B = work_mat[0]; 2825 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 2826 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 2827 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 2828 PetscFunctionReturn(0); 2829 } 2830 2831 #undef __FUNCT__ 2832 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 2833 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 2834 { 2835 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2836 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2837 Mat new_mat; 2838 IS is_local,is_global; 2839 PetscInt local_size; 2840 PetscBool isseqaij; 2841 PetscErrorCode ierr; 2842 2843 PetscFunctionBegin; 2844 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2845 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 2846 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 2847 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 2848 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 2849 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 2850 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 2851 2852 /* check */ 2853 if (pcbddc->dbg_flag) { 2854 Vec x,x_change; 2855 PetscReal error; 2856 2857 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 2858 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2859 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 2860 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2861 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2862 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 2863 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2864 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2865 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2866 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2867 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2868 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 2869 ierr = VecDestroy(&x);CHKERRQ(ierr); 2870 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2871 } 2872 2873 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 2874 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2875 if (isseqaij) { 2876 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2877 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 2878 } else { 2879 Mat work_mat; 2880 2881 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2882 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2883 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 2884 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 2885 } 2886 if (matis->A->symmetric_set) { 2887 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 2888 #if !defined(PETSC_USE_COMPLEX) 2889 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 2890 #endif 2891 } 2892 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 2893 PetscFunctionReturn(0); 2894 } 2895 2896 #undef __FUNCT__ 2897 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 2898 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 2899 { 2900 PC_IS* pcis = (PC_IS*)(pc->data); 2901 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2902 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2903 PetscInt *idx_R_local=NULL; 2904 PetscInt n_vertices,i,j,n_R,n_D,n_B; 2905 PetscInt vbs,bs; 2906 PetscBT bitmask=NULL; 2907 PetscErrorCode ierr; 2908 2909 PetscFunctionBegin; 2910 /* 2911 No need to setup local scatters if 2912 - primal space is unchanged 2913 AND 2914 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 2915 AND 2916 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 2917 */ 2918 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 2919 PetscFunctionReturn(0); 2920 } 2921 /* destroy old objects */ 2922 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 2923 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 2924 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 2925 /* Set Non-overlapping dimensions */ 2926 n_B = pcis->n_B; 2927 n_D = pcis->n - n_B; 2928 n_vertices = pcbddc->n_vertices; 2929 2930 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 2931 2932 /* create auxiliary bitmask and allocate workspace */ 2933 if (!sub_schurs || !sub_schurs->reuse_solver) { 2934 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 2935 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 2936 for (i=0;i<n_vertices;i++) { 2937 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 2938 } 2939 2940 for (i=0, n_R=0; i<pcis->n; i++) { 2941 if (!PetscBTLookup(bitmask,i)) { 2942 idx_R_local[n_R++] = i; 2943 } 2944 } 2945 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 2946 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2947 2948 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2949 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 2950 } 2951 2952 /* Block code */ 2953 vbs = 1; 2954 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 2955 if (bs>1 && !(n_vertices%bs)) { 2956 PetscBool is_blocked = PETSC_TRUE; 2957 PetscInt *vary; 2958 if (!sub_schurs || !sub_schurs->reuse_solver) { 2959 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 2960 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 2961 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 2962 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 2963 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 2964 for (i=0; i<pcis->n/bs; i++) { 2965 if (vary[i]!=0 && vary[i]!=bs) { 2966 is_blocked = PETSC_FALSE; 2967 break; 2968 } 2969 } 2970 ierr = PetscFree(vary);CHKERRQ(ierr); 2971 } else { 2972 /* Verify directly the R set */ 2973 for (i=0; i<n_R/bs; i++) { 2974 PetscInt j,node=idx_R_local[bs*i]; 2975 for (j=1; j<bs; j++) { 2976 if (node != idx_R_local[bs*i+j]-j) { 2977 is_blocked = PETSC_FALSE; 2978 break; 2979 } 2980 } 2981 } 2982 } 2983 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 2984 vbs = bs; 2985 for (i=0;i<n_R/vbs;i++) { 2986 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 2987 } 2988 } 2989 } 2990 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 2991 if (sub_schurs && sub_schurs->reuse_solver) { 2992 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2993 2994 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2995 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 2996 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 2997 reuse_solver->is_R = pcbddc->is_R_local; 2998 } else { 2999 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 3000 } 3001 3002 /* print some info if requested */ 3003 if (pcbddc->dbg_flag) { 3004 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3005 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3006 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3007 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 3008 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 3009 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 3010 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3011 } 3012 3013 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 3014 if (!sub_schurs || !sub_schurs->reuse_solver) { 3015 IS is_aux1,is_aux2; 3016 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 3017 3018 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 3019 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 3020 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 3021 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3022 for (i=0; i<n_D; i++) { 3023 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 3024 } 3025 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3026 for (i=0, j=0; i<n_R; i++) { 3027 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 3028 aux_array1[j++] = i; 3029 } 3030 } 3031 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 3032 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3033 for (i=0, j=0; i<n_B; i++) { 3034 if (!PetscBTLookup(bitmask,is_indices[i])) { 3035 aux_array2[j++] = i; 3036 } 3037 } 3038 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3039 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 3040 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 3041 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 3042 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 3043 3044 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3045 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 3046 for (i=0, j=0; i<n_R; i++) { 3047 if (PetscBTLookup(bitmask,idx_R_local[i])) { 3048 aux_array1[j++] = i; 3049 } 3050 } 3051 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 3052 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 3053 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 3054 } 3055 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 3056 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 3057 } else { 3058 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3059 IS tis; 3060 PetscInt schur_size; 3061 3062 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 3063 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 3064 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 3065 ierr = ISDestroy(&tis);CHKERRQ(ierr); 3066 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3067 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 3068 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 3069 ierr = ISDestroy(&tis);CHKERRQ(ierr); 3070 } 3071 } 3072 PetscFunctionReturn(0); 3073 } 3074 3075 3076 #undef __FUNCT__ 3077 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 3078 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 3079 { 3080 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3081 PC_IS *pcis = (PC_IS*)pc->data; 3082 PC pc_temp; 3083 Mat A_RR; 3084 MatReuse reuse; 3085 PetscScalar m_one = -1.0; 3086 PetscReal value; 3087 PetscInt n_D,n_R; 3088 PetscBool check_corr[2],issbaij; 3089 PetscErrorCode ierr; 3090 /* prefixes stuff */ 3091 char dir_prefix[256],neu_prefix[256],str_level[16]; 3092 size_t len; 3093 3094 PetscFunctionBegin; 3095 3096 /* compute prefixes */ 3097 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 3098 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 3099 if (!pcbddc->current_level) { 3100 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 3101 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 3102 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 3103 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 3104 } else { 3105 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 3106 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 3107 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 3108 len -= 15; /* remove "pc_bddc_coarse_" */ 3109 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 3110 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 3111 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 3112 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 3113 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 3114 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 3115 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 3116 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 3117 } 3118 3119 /* DIRICHLET PROBLEM */ 3120 if (dirichlet) { 3121 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3122 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 3123 if (!sub_schurs || !sub_schurs->reuse_solver) { 3124 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 3125 } 3126 if (pcbddc->dbg_flag) { 3127 Mat A_IIn; 3128 3129 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 3130 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 3131 pcis->A_II = A_IIn; 3132 } 3133 } 3134 if (pcbddc->local_mat->symmetric_set) { 3135 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 3136 } 3137 /* Matrix for Dirichlet problem is pcis->A_II */ 3138 n_D = pcis->n - pcis->n_B; 3139 if (!pcbddc->ksp_D) { /* create object if not yet build */ 3140 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 3141 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 3142 /* default */ 3143 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 3144 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 3145 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 3146 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 3147 if (issbaij) { 3148 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 3149 } else { 3150 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 3151 } 3152 /* Allow user's customization */ 3153 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 3154 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 3155 } 3156 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 3157 if (sub_schurs && sub_schurs->reuse_solver) { 3158 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3159 3160 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 3161 } 3162 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 3163 if (!n_D) { 3164 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 3165 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 3166 } 3167 /* Set Up KSP for Dirichlet problem of BDDC */ 3168 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 3169 /* set ksp_D into pcis data */ 3170 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 3171 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 3172 pcis->ksp_D = pcbddc->ksp_D; 3173 } 3174 3175 /* NEUMANN PROBLEM */ 3176 A_RR = 0; 3177 if (neumann) { 3178 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3179 PetscInt ibs,mbs; 3180 PetscBool issbaij; 3181 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3182 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 3183 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 3184 if (pcbddc->ksp_R) { /* already created ksp */ 3185 PetscInt nn_R; 3186 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 3187 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 3188 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 3189 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 3190 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 3191 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3192 reuse = MAT_INITIAL_MATRIX; 3193 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 3194 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 3195 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3196 reuse = MAT_INITIAL_MATRIX; 3197 } else { /* safe to reuse the matrix */ 3198 reuse = MAT_REUSE_MATRIX; 3199 } 3200 } 3201 /* last check */ 3202 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 3203 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3204 reuse = MAT_INITIAL_MATRIX; 3205 } 3206 } else { /* first time, so we need to create the matrix */ 3207 reuse = MAT_INITIAL_MATRIX; 3208 } 3209 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */ 3210 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 3211 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 3212 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 3213 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 3214 if (matis->A == pcbddc->local_mat) { 3215 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3216 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 3217 } else { 3218 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 3219 } 3220 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 3221 if (matis->A == pcbddc->local_mat) { 3222 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3223 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 3224 } else { 3225 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 3226 } 3227 } 3228 /* extract A_RR */ 3229 if (sub_schurs && sub_schurs->reuse_solver) { 3230 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3231 3232 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 3233 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3234 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 3235 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 3236 } else { 3237 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 3238 } 3239 } else { 3240 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3241 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 3242 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 3243 } 3244 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 3245 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 3246 } 3247 if (pcbddc->local_mat->symmetric_set) { 3248 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 3249 } 3250 if (!pcbddc->ksp_R) { /* create object if not present */ 3251 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 3252 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 3253 /* default */ 3254 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 3255 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 3256 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 3257 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 3258 if (issbaij) { 3259 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 3260 } else { 3261 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 3262 } 3263 /* Allow user's customization */ 3264 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 3265 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 3266 } 3267 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 3268 if (!n_R) { 3269 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 3270 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 3271 } 3272 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 3273 /* Reuse solver if it is present */ 3274 if (sub_schurs && sub_schurs->reuse_solver) { 3275 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3276 3277 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 3278 } 3279 /* Set Up KSP for Neumann problem of BDDC */ 3280 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 3281 } 3282 3283 if (pcbddc->dbg_flag) { 3284 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3285 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3286 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3287 } 3288 3289 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 3290 check_corr[0] = check_corr[1] = PETSC_FALSE; 3291 if (pcbddc->NullSpace_corr[0]) { 3292 ierr = PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);CHKERRQ(ierr); 3293 } 3294 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 3295 check_corr[0] = PETSC_TRUE; 3296 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);CHKERRQ(ierr); 3297 } 3298 if (neumann && pcbddc->NullSpace_corr[2]) { 3299 check_corr[1] = PETSC_TRUE; 3300 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);CHKERRQ(ierr); 3301 } 3302 3303 /* check Dirichlet and Neumann solvers */ 3304 if (pcbddc->dbg_flag) { 3305 if (dirichlet) { /* Dirichlet */ 3306 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 3307 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 3308 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 3309 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 3310 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 3311 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 3312 if (check_corr[0]) { 3313 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_TRUE);CHKERRQ(ierr); 3314 } 3315 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3316 } 3317 if (neumann) { /* Neumann */ 3318 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 3319 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3320 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 3321 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 3322 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 3323 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 3324 if (check_corr[1]) { 3325 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_FALSE);CHKERRQ(ierr); 3326 } 3327 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3328 } 3329 } 3330 /* free Neumann problem's matrix */ 3331 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3332 PetscFunctionReturn(0); 3333 } 3334 3335 #undef __FUNCT__ 3336 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 3337 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 3338 { 3339 PetscErrorCode ierr; 3340 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3341 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3342 PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE ) : PETSC_FALSE; 3343 3344 PetscFunctionBegin; 3345 if (!reuse_solver) { 3346 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 3347 } 3348 if (!pcbddc->switch_static) { 3349 if (applytranspose && pcbddc->local_auxmat1) { 3350 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3351 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3352 } 3353 if (!reuse_solver) { 3354 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3355 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3356 } else { 3357 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3358 3359 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3360 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3361 } 3362 } else { 3363 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3364 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3365 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3366 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3367 if (applytranspose && pcbddc->local_auxmat1) { 3368 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 3369 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3370 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3371 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3372 } 3373 } 3374 if (!reuse_solver || pcbddc->switch_static) { 3375 if (applytranspose) { 3376 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3377 } else { 3378 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3379 } 3380 } else { 3381 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3382 3383 if (applytranspose) { 3384 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 3385 } else { 3386 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 3387 } 3388 } 3389 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 3390 if (!pcbddc->switch_static) { 3391 if (!reuse_solver) { 3392 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3393 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3394 } else { 3395 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3396 3397 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3398 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3399 } 3400 if (!applytranspose && pcbddc->local_auxmat1) { 3401 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3402 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 3403 } 3404 } else { 3405 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3406 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3407 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3408 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3409 if (!applytranspose && pcbddc->local_auxmat1) { 3410 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 3411 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3412 } 3413 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3414 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3415 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3416 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3417 } 3418 PetscFunctionReturn(0); 3419 } 3420 3421 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 3422 #undef __FUNCT__ 3423 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 3424 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 3425 { 3426 PetscErrorCode ierr; 3427 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3428 PC_IS* pcis = (PC_IS*) (pc->data); 3429 const PetscScalar zero = 0.0; 3430 3431 PetscFunctionBegin; 3432 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 3433 if (!pcbddc->benign_apply_coarse_only) { 3434 if (applytranspose) { 3435 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3436 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3437 } else { 3438 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 3439 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 3440 } 3441 } else { 3442 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 3443 } 3444 3445 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 3446 if (pcbddc->benign_n) { 3447 PetscScalar *array; 3448 PetscInt j; 3449 3450 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3451 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 3452 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3453 } 3454 3455 /* start communications from local primal nodes to rhs of coarse solver */ 3456 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 3457 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3458 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3459 3460 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 3461 if (pcbddc->coarse_ksp) { 3462 Mat coarse_mat; 3463 Vec rhs,sol; 3464 MatNullSpace nullsp; 3465 PetscBool isbddc = PETSC_FALSE; 3466 3467 if (pcbddc->benign_have_null) { 3468 PC coarse_pc; 3469 3470 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 3471 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 3472 /* we need to propagate to coarser levels the need for a possible benign correction */ 3473 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 3474 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 3475 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 3476 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 3477 } 3478 } 3479 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 3480 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 3481 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 3482 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 3483 if (nullsp) { 3484 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 3485 } 3486 if (applytranspose) { 3487 if (pcbddc->benign_apply_coarse_only) { 3488 SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 3489 } else { 3490 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3491 } 3492 } else { 3493 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 3494 PC coarse_pc; 3495 3496 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 3497 ierr = PCPreSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 3498 ierr = PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);CHKERRQ(ierr); 3499 ierr = PCPostSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 3500 } else { 3501 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 3502 } 3503 } 3504 /* we don't need the benign correction at coarser levels anymore */ 3505 if (pcbddc->benign_have_null && isbddc) { 3506 PC coarse_pc; 3507 PC_BDDC* coarsepcbddc; 3508 3509 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 3510 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 3511 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 3512 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 3513 } 3514 if (nullsp) { 3515 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 3516 } 3517 } 3518 3519 /* Local solution on R nodes */ 3520 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 3521 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 3522 } 3523 /* communications from coarse sol to local primal nodes */ 3524 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3525 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3526 3527 /* Sum contributions from the two levels */ 3528 if (!pcbddc->benign_apply_coarse_only) { 3529 if (applytranspose) { 3530 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3531 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3532 } else { 3533 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 3534 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 3535 } 3536 /* store p0 */ 3537 if (pcbddc->benign_n) { 3538 PetscScalar *array; 3539 PetscInt j; 3540 3541 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3542 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 3543 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3544 } 3545 } else { /* expand the coarse solution */ 3546 if (applytranspose) { 3547 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3548 } else { 3549 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 3550 } 3551 } 3552 PetscFunctionReturn(0); 3553 } 3554 3555 #undef __FUNCT__ 3556 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 3557 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 3558 { 3559 PetscErrorCode ierr; 3560 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3561 PetscScalar *array; 3562 Vec from,to; 3563 3564 PetscFunctionBegin; 3565 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3566 from = pcbddc->coarse_vec; 3567 to = pcbddc->vec1_P; 3568 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3569 Vec tvec; 3570 3571 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3572 ierr = VecResetArray(tvec);CHKERRQ(ierr); 3573 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3574 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 3575 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 3576 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 3577 } 3578 } else { /* from local to global -> put data in coarse right hand side */ 3579 from = pcbddc->vec1_P; 3580 to = pcbddc->coarse_vec; 3581 } 3582 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3583 PetscFunctionReturn(0); 3584 } 3585 3586 #undef __FUNCT__ 3587 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 3588 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 3589 { 3590 PetscErrorCode ierr; 3591 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 3592 PetscScalar *array; 3593 Vec from,to; 3594 3595 PetscFunctionBegin; 3596 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 3597 from = pcbddc->coarse_vec; 3598 to = pcbddc->vec1_P; 3599 } else { /* from local to global -> put data in coarse right hand side */ 3600 from = pcbddc->vec1_P; 3601 to = pcbddc->coarse_vec; 3602 } 3603 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 3604 if (smode == SCATTER_FORWARD) { 3605 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 3606 Vec tvec; 3607 3608 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 3609 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 3610 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 3611 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 3612 } 3613 } else { 3614 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 3615 ierr = VecResetArray(from);CHKERRQ(ierr); 3616 } 3617 } 3618 PetscFunctionReturn(0); 3619 } 3620 3621 /* uncomment for testing purposes */ 3622 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 3623 #undef __FUNCT__ 3624 #define __FUNCT__ "PCBDDCConstraintsSetUp" 3625 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 3626 { 3627 PetscErrorCode ierr; 3628 PC_IS* pcis = (PC_IS*)(pc->data); 3629 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3630 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3631 /* one and zero */ 3632 PetscScalar one=1.0,zero=0.0; 3633 /* space to store constraints and their local indices */ 3634 PetscScalar *constraints_data; 3635 PetscInt *constraints_idxs,*constraints_idxs_B; 3636 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 3637 PetscInt *constraints_n; 3638 /* iterators */ 3639 PetscInt i,j,k,total_counts,total_counts_cc,cum; 3640 /* BLAS integers */ 3641 PetscBLASInt lwork,lierr; 3642 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 3643 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 3644 /* reuse */ 3645 PetscInt olocal_primal_size,olocal_primal_size_cc; 3646 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 3647 /* change of basis */ 3648 PetscBool qr_needed; 3649 PetscBT change_basis,qr_needed_idx; 3650 /* auxiliary stuff */ 3651 PetscInt *nnz,*is_indices; 3652 PetscInt ncc; 3653 /* some quantities */ 3654 PetscInt n_vertices,total_primal_vertices,valid_constraints; 3655 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 3656 3657 PetscFunctionBegin; 3658 /* Destroy Mat objects computed previously */ 3659 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3660 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3661 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 3662 /* save info on constraints from previous setup (if any) */ 3663 olocal_primal_size = pcbddc->local_primal_size; 3664 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 3665 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 3666 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3667 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 3668 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3669 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3670 3671 if (!pcbddc->adaptive_selection) { 3672 IS ISForVertices,*ISForFaces,*ISForEdges; 3673 MatNullSpace nearnullsp; 3674 const Vec *nearnullvecs; 3675 Vec *localnearnullsp; 3676 PetscScalar *array; 3677 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 3678 PetscBool nnsp_has_cnst; 3679 /* LAPACK working arrays for SVD or POD */ 3680 PetscBool skip_lapack,boolforchange; 3681 PetscScalar *work; 3682 PetscReal *singular_vals; 3683 #if defined(PETSC_USE_COMPLEX) 3684 PetscReal *rwork; 3685 #endif 3686 #if defined(PETSC_MISSING_LAPACK_GESVD) 3687 PetscScalar *temp_basis,*correlation_mat; 3688 #else 3689 PetscBLASInt dummy_int=1; 3690 PetscScalar dummy_scalar=1.; 3691 #endif 3692 3693 /* Get index sets for faces, edges and vertices from graph */ 3694 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 3695 /* print some info */ 3696 if (pcbddc->dbg_flag && !pcbddc->sub_schurs) { 3697 PetscInt nv; 3698 3699 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 3700 ierr = ISGetSize(ISForVertices,&nv);CHKERRQ(ierr); 3701 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3702 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3703 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 3704 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);CHKERRQ(ierr); 3705 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);CHKERRQ(ierr); 3706 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3707 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3708 } 3709 3710 /* free unneeded index sets */ 3711 if (!pcbddc->use_vertices) { 3712 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3713 } 3714 if (!pcbddc->use_edges) { 3715 for (i=0;i<n_ISForEdges;i++) { 3716 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3717 } 3718 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3719 n_ISForEdges = 0; 3720 } 3721 if (!pcbddc->use_faces) { 3722 for (i=0;i<n_ISForFaces;i++) { 3723 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3724 } 3725 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3726 n_ISForFaces = 0; 3727 } 3728 3729 /* check if near null space is attached to global mat */ 3730 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 3731 if (nearnullsp) { 3732 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 3733 /* remove any stored info */ 3734 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3735 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3736 /* store information for BDDC solver reuse */ 3737 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 3738 pcbddc->onearnullspace = nearnullsp; 3739 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3740 for (i=0;i<nnsp_size;i++) { 3741 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 3742 } 3743 } else { /* if near null space is not provided BDDC uses constants by default */ 3744 nnsp_size = 0; 3745 nnsp_has_cnst = PETSC_TRUE; 3746 } 3747 /* get max number of constraints on a single cc */ 3748 max_constraints = nnsp_size; 3749 if (nnsp_has_cnst) max_constraints++; 3750 3751 /* 3752 Evaluate maximum storage size needed by the procedure 3753 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 3754 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 3755 There can be multiple constraints per connected component 3756 */ 3757 n_vertices = 0; 3758 if (ISForVertices) { 3759 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 3760 } 3761 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 3762 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 3763 3764 total_counts = n_ISForFaces+n_ISForEdges; 3765 total_counts *= max_constraints; 3766 total_counts += n_vertices; 3767 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 3768 3769 total_counts = 0; 3770 max_size_of_constraint = 0; 3771 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 3772 IS used_is; 3773 if (i<n_ISForEdges) { 3774 used_is = ISForEdges[i]; 3775 } else { 3776 used_is = ISForFaces[i-n_ISForEdges]; 3777 } 3778 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 3779 total_counts += j; 3780 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 3781 } 3782 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 3783 3784 /* get local part of global near null space vectors */ 3785 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 3786 for (k=0;k<nnsp_size;k++) { 3787 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 3788 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3789 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3790 } 3791 3792 /* whether or not to skip lapack calls */ 3793 skip_lapack = PETSC_TRUE; 3794 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 3795 3796 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 3797 if (!skip_lapack) { 3798 PetscScalar temp_work; 3799 3800 #if defined(PETSC_MISSING_LAPACK_GESVD) 3801 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 3802 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 3803 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 3804 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 3805 #if defined(PETSC_USE_COMPLEX) 3806 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 3807 #endif 3808 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3809 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3810 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 3811 lwork = -1; 3812 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3813 #if !defined(PETSC_USE_COMPLEX) 3814 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 3815 #else 3816 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 3817 #endif 3818 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3819 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 3820 #else /* on missing GESVD */ 3821 /* SVD */ 3822 PetscInt max_n,min_n; 3823 max_n = max_size_of_constraint; 3824 min_n = max_constraints; 3825 if (max_size_of_constraint < max_constraints) { 3826 min_n = max_size_of_constraint; 3827 max_n = max_constraints; 3828 } 3829 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 3830 #if defined(PETSC_USE_COMPLEX) 3831 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 3832 #endif 3833 /* now we evaluate the optimal workspace using query with lwork=-1 */ 3834 lwork = -1; 3835 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 3836 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 3837 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 3838 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3839 #if !defined(PETSC_USE_COMPLEX) 3840 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 3841 #else 3842 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 3843 #endif 3844 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3845 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 3846 #endif /* on missing GESVD */ 3847 /* Allocate optimal workspace */ 3848 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 3849 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 3850 } 3851 /* Now we can loop on constraining sets */ 3852 total_counts = 0; 3853 constraints_idxs_ptr[0] = 0; 3854 constraints_data_ptr[0] = 0; 3855 /* vertices */ 3856 if (n_vertices) { 3857 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3858 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3859 for (i=0;i<n_vertices;i++) { 3860 constraints_n[total_counts] = 1; 3861 constraints_data[total_counts] = 1.0; 3862 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 3863 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 3864 total_counts++; 3865 } 3866 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3867 n_vertices = total_counts; 3868 } 3869 3870 /* edges and faces */ 3871 total_counts_cc = total_counts; 3872 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 3873 IS used_is; 3874 PetscBool idxs_copied = PETSC_FALSE; 3875 3876 if (ncc<n_ISForEdges) { 3877 used_is = ISForEdges[ncc]; 3878 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 3879 } else { 3880 used_is = ISForFaces[ncc-n_ISForEdges]; 3881 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 3882 } 3883 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 3884 3885 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 3886 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3887 /* change of basis should not be performed on local periodic nodes */ 3888 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 3889 if (nnsp_has_cnst) { 3890 PetscScalar quad_value; 3891 3892 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3893 idxs_copied = PETSC_TRUE; 3894 3895 if (!pcbddc->use_nnsp_true) { 3896 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 3897 } else { 3898 quad_value = 1.0; 3899 } 3900 for (j=0;j<size_of_constraint;j++) { 3901 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 3902 } 3903 temp_constraints++; 3904 total_counts++; 3905 } 3906 for (k=0;k<nnsp_size;k++) { 3907 PetscReal real_value; 3908 PetscScalar *ptr_to_data; 3909 3910 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3911 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 3912 for (j=0;j<size_of_constraint;j++) { 3913 ptr_to_data[j] = array[is_indices[j]]; 3914 } 3915 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3916 /* check if array is null on the connected component */ 3917 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3918 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 3919 if (real_value > 0.0) { /* keep indices and values */ 3920 temp_constraints++; 3921 total_counts++; 3922 if (!idxs_copied) { 3923 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3924 idxs_copied = PETSC_TRUE; 3925 } 3926 } 3927 } 3928 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3929 valid_constraints = temp_constraints; 3930 if (!pcbddc->use_nnsp_true && temp_constraints) { 3931 if (temp_constraints == 1) { /* just normalize the constraint */ 3932 PetscScalar norm,*ptr_to_data; 3933 3934 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3935 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3936 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 3937 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 3938 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 3939 } else { /* perform SVD */ 3940 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 3941 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3942 3943 #if defined(PETSC_MISSING_LAPACK_GESVD) 3944 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 3945 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 3946 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 3947 the constraints basis will differ (by a complex factor with absolute value equal to 1) 3948 from that computed using LAPACKgesvd 3949 -> This is due to a different computation of eigenvectors in LAPACKheev 3950 -> The quality of the POD-computed basis will be the same */ 3951 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3952 /* Store upper triangular part of correlation matrix */ 3953 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3954 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3955 for (j=0;j<temp_constraints;j++) { 3956 for (k=0;k<j+1;k++) { 3957 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 3958 } 3959 } 3960 /* compute eigenvalues and eigenvectors of correlation matrix */ 3961 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3962 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 3963 #if !defined(PETSC_USE_COMPLEX) 3964 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 3965 #else 3966 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 3967 #endif 3968 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3969 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 3970 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 3971 j = 0; 3972 while (j < temp_constraints && singular_vals[j] < tol) j++; 3973 total_counts = total_counts-j; 3974 valid_constraints = temp_constraints-j; 3975 /* scale and copy POD basis into used quadrature memory */ 3976 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3977 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3978 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 3979 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3980 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 3981 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3982 if (j<temp_constraints) { 3983 PetscInt ii; 3984 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 3985 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3986 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 3987 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3988 for (k=0;k<temp_constraints-j;k++) { 3989 for (ii=0;ii<size_of_constraint;ii++) { 3990 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 3991 } 3992 } 3993 } 3994 #else /* on missing GESVD */ 3995 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3996 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3997 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3998 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3999 #if !defined(PETSC_USE_COMPLEX) 4000 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 4001 #else 4002 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 4003 #endif 4004 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 4005 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4006 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 4007 k = temp_constraints; 4008 if (k > size_of_constraint) k = size_of_constraint; 4009 j = 0; 4010 while (j < k && singular_vals[k-j-1] < tol) j++; 4011 valid_constraints = k-j; 4012 total_counts = total_counts-temp_constraints+valid_constraints; 4013 #endif /* on missing GESVD */ 4014 } 4015 } 4016 /* update pointers information */ 4017 if (valid_constraints) { 4018 constraints_n[total_counts_cc] = valid_constraints; 4019 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 4020 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 4021 /* set change_of_basis flag */ 4022 if (boolforchange) { 4023 PetscBTSet(change_basis,total_counts_cc); 4024 } 4025 total_counts_cc++; 4026 } 4027 } 4028 /* free workspace */ 4029 if (!skip_lapack) { 4030 ierr = PetscFree(work);CHKERRQ(ierr); 4031 #if defined(PETSC_USE_COMPLEX) 4032 ierr = PetscFree(rwork);CHKERRQ(ierr); 4033 #endif 4034 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 4035 #if defined(PETSC_MISSING_LAPACK_GESVD) 4036 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 4037 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 4038 #endif 4039 } 4040 for (k=0;k<nnsp_size;k++) { 4041 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 4042 } 4043 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 4044 /* free index sets of faces, edges and vertices */ 4045 for (i=0;i<n_ISForFaces;i++) { 4046 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 4047 } 4048 if (n_ISForFaces) { 4049 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 4050 } 4051 for (i=0;i<n_ISForEdges;i++) { 4052 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 4053 } 4054 if (n_ISForEdges) { 4055 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 4056 } 4057 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 4058 } else { 4059 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4060 4061 total_counts = 0; 4062 n_vertices = 0; 4063 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 4064 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 4065 } 4066 max_constraints = 0; 4067 total_counts_cc = 0; 4068 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 4069 total_counts += pcbddc->adaptive_constraints_n[i]; 4070 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 4071 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 4072 } 4073 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 4074 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 4075 constraints_idxs = pcbddc->adaptive_constraints_idxs; 4076 constraints_data = pcbddc->adaptive_constraints_data; 4077 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 4078 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 4079 total_counts_cc = 0; 4080 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 4081 if (pcbddc->adaptive_constraints_n[i]) { 4082 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 4083 } 4084 } 4085 #if 0 4086 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 4087 for (i=0;i<total_counts_cc;i++) { 4088 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 4089 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 4090 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 4091 printf(" %d",constraints_idxs[j]); 4092 } 4093 printf("\n"); 4094 printf("number of cc: %d\n",constraints_n[i]); 4095 } 4096 for (i=0;i<n_vertices;i++) { 4097 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 4098 } 4099 for (i=0;i<sub_schurs->n_subs;i++) { 4100 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 4101 } 4102 #endif 4103 4104 max_size_of_constraint = 0; 4105 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 4106 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 4107 /* Change of basis */ 4108 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 4109 if (pcbddc->use_change_of_basis) { 4110 for (i=0;i<sub_schurs->n_subs;i++) { 4111 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 4112 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 4113 } 4114 } 4115 } 4116 } 4117 pcbddc->local_primal_size = total_counts; 4118 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 4119 4120 /* map constraints_idxs in boundary numbering */ 4121 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 4122 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i); 4123 4124 /* Create constraint matrix */ 4125 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 4126 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 4127 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 4128 4129 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 4130 /* determine if a QR strategy is needed for change of basis */ 4131 qr_needed = PETSC_FALSE; 4132 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 4133 total_primal_vertices=0; 4134 pcbddc->local_primal_size_cc = 0; 4135 for (i=0;i<total_counts_cc;i++) { 4136 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 4137 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 4138 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 4139 pcbddc->local_primal_size_cc += 1; 4140 } else if (PetscBTLookup(change_basis,i)) { 4141 for (k=0;k<constraints_n[i];k++) { 4142 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 4143 } 4144 pcbddc->local_primal_size_cc += constraints_n[i]; 4145 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 4146 PetscBTSet(qr_needed_idx,i); 4147 qr_needed = PETSC_TRUE; 4148 } 4149 } else { 4150 pcbddc->local_primal_size_cc += 1; 4151 } 4152 } 4153 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 4154 pcbddc->n_vertices = total_primal_vertices; 4155 /* permute indices in order to have a sorted set of vertices */ 4156 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 4157 4158 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 4159 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 4160 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 4161 4162 /* nonzero structure of constraint matrix */ 4163 /* and get reference dof for local constraints */ 4164 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 4165 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 4166 4167 j = total_primal_vertices; 4168 total_counts = total_primal_vertices; 4169 cum = total_primal_vertices; 4170 for (i=n_vertices;i<total_counts_cc;i++) { 4171 if (!PetscBTLookup(change_basis,i)) { 4172 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 4173 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 4174 cum++; 4175 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 4176 for (k=0;k<constraints_n[i];k++) { 4177 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 4178 nnz[j+k] = size_of_constraint; 4179 } 4180 j += constraints_n[i]; 4181 } 4182 } 4183 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 4184 ierr = PetscFree(nnz);CHKERRQ(ierr); 4185 4186 /* set values in constraint matrix */ 4187 for (i=0;i<total_primal_vertices;i++) { 4188 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4189 } 4190 total_counts = total_primal_vertices; 4191 for (i=n_vertices;i<total_counts_cc;i++) { 4192 if (!PetscBTLookup(change_basis,i)) { 4193 PetscInt *cols; 4194 4195 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 4196 cols = constraints_idxs+constraints_idxs_ptr[i]; 4197 for (k=0;k<constraints_n[i];k++) { 4198 PetscInt row = total_counts+k; 4199 PetscScalar *vals; 4200 4201 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 4202 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 4203 } 4204 total_counts += constraints_n[i]; 4205 } 4206 } 4207 /* assembling */ 4208 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4209 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4210 4211 /* 4212 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4213 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 4214 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 4215 */ 4216 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 4217 if (pcbddc->use_change_of_basis) { 4218 /* dual and primal dofs on a single cc */ 4219 PetscInt dual_dofs,primal_dofs; 4220 /* working stuff for GEQRF */ 4221 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 4222 PetscBLASInt lqr_work; 4223 /* working stuff for UNGQR */ 4224 PetscScalar *gqr_work,lgqr_work_t; 4225 PetscBLASInt lgqr_work; 4226 /* working stuff for TRTRS */ 4227 PetscScalar *trs_rhs; 4228 PetscBLASInt Blas_NRHS; 4229 /* pointers for values insertion into change of basis matrix */ 4230 PetscInt *start_rows,*start_cols; 4231 PetscScalar *start_vals; 4232 /* working stuff for values insertion */ 4233 PetscBT is_primal; 4234 PetscInt *aux_primal_numbering_B; 4235 /* matrix sizes */ 4236 PetscInt global_size,local_size; 4237 /* temporary change of basis */ 4238 Mat localChangeOfBasisMatrix; 4239 /* extra space for debugging */ 4240 PetscScalar *dbg_work; 4241 4242 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 4243 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 4244 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 4245 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 4246 /* nonzeros for local mat */ 4247 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 4248 if (!pcbddc->benign_change || pcbddc->fake_change) { 4249 for (i=0;i<pcis->n;i++) nnz[i]=1; 4250 } else { 4251 const PetscInt *ii; 4252 PetscInt n; 4253 PetscBool flg_row; 4254 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 4255 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 4256 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 4257 } 4258 for (i=n_vertices;i<total_counts_cc;i++) { 4259 if (PetscBTLookup(change_basis,i)) { 4260 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 4261 if (PetscBTLookup(qr_needed_idx,i)) { 4262 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 4263 } else { 4264 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 4265 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 4266 } 4267 } 4268 } 4269 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 4270 ierr = PetscFree(nnz);CHKERRQ(ierr); 4271 /* Set interior change in the matrix */ 4272 if (!pcbddc->benign_change || pcbddc->fake_change) { 4273 for (i=0;i<pcis->n;i++) { 4274 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 4275 } 4276 } else { 4277 const PetscInt *ii,*jj; 4278 PetscScalar *aa; 4279 PetscInt n; 4280 PetscBool flg_row; 4281 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 4282 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 4283 for (i=0;i<n;i++) { 4284 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 4285 } 4286 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 4287 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 4288 } 4289 4290 if (pcbddc->dbg_flag) { 4291 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 4292 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4293 } 4294 4295 4296 /* Now we loop on the constraints which need a change of basis */ 4297 /* 4298 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 4299 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 4300 4301 Basic blocks of change of basis matrix T computed by 4302 4303 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 4304 4305 | 1 0 ... 0 s_1/S | 4306 | 0 1 ... 0 s_2/S | 4307 | ... | 4308 | 0 ... 1 s_{n-1}/S | 4309 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 4310 4311 with S = \sum_{i=1}^n s_i^2 4312 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 4313 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 4314 4315 - QR decomposition of constraints otherwise 4316 */ 4317 if (qr_needed) { 4318 /* space to store Q */ 4319 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 4320 /* first we issue queries for optimal work */ 4321 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 4322 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 4323 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4324 lqr_work = -1; 4325 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 4326 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 4327 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 4328 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 4329 lgqr_work = -1; 4330 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 4331 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 4332 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 4333 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4334 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 4335 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 4336 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 4337 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 4338 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 4339 /* array to store scaling factors for reflectors */ 4340 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 4341 /* array to store rhs and solution of triangular solver */ 4342 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 4343 /* allocating workspace for check */ 4344 if (pcbddc->dbg_flag) { 4345 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 4346 } 4347 } 4348 /* array to store whether a node is primal or not */ 4349 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 4350 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 4351 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 4352 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i); 4353 for (i=0;i<total_primal_vertices;i++) { 4354 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 4355 } 4356 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 4357 4358 /* loop on constraints and see whether or not they need a change of basis and compute it */ 4359 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 4360 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 4361 if (PetscBTLookup(change_basis,total_counts)) { 4362 /* get constraint info */ 4363 primal_dofs = constraints_n[total_counts]; 4364 dual_dofs = size_of_constraint-primal_dofs; 4365 4366 if (pcbddc->dbg_flag) { 4367 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 4368 } 4369 4370 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 4371 4372 /* copy quadrature constraints for change of basis check */ 4373 if (pcbddc->dbg_flag) { 4374 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4375 } 4376 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 4377 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4378 4379 /* compute QR decomposition of constraints */ 4380 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4381 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4382 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4383 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4384 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 4385 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 4386 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4387 4388 /* explictly compute R^-T */ 4389 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 4390 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 4391 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4392 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 4393 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4394 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 4395 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4396 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 4397 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 4398 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4399 4400 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 4401 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4402 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4403 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 4404 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4405 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4406 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 4407 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 4408 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4409 4410 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 4411 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 4412 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 4413 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4414 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 4415 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 4416 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4417 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 4418 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 4419 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4420 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 4421 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4422 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 4423 4424 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 4425 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 4426 /* insert cols for primal dofs */ 4427 for (j=0;j<primal_dofs;j++) { 4428 start_vals = &qr_basis[j*size_of_constraint]; 4429 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 4430 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 4431 } 4432 /* insert cols for dual dofs */ 4433 for (j=0,k=0;j<dual_dofs;k++) { 4434 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 4435 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 4436 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 4437 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 4438 j++; 4439 } 4440 } 4441 4442 /* check change of basis */ 4443 if (pcbddc->dbg_flag) { 4444 PetscInt ii,jj; 4445 PetscBool valid_qr=PETSC_TRUE; 4446 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 4447 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4448 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 4449 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4450 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 4451 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 4452 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4453 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 4454 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4455 for (jj=0;jj<size_of_constraint;jj++) { 4456 for (ii=0;ii<primal_dofs;ii++) { 4457 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 4458 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 4459 } 4460 } 4461 if (!valid_qr) { 4462 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 4463 for (jj=0;jj<size_of_constraint;jj++) { 4464 for (ii=0;ii<primal_dofs;ii++) { 4465 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 4466 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4467 } 4468 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 4469 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 4470 } 4471 } 4472 } 4473 } else { 4474 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 4475 } 4476 } 4477 } else { /* simple transformation block */ 4478 PetscInt row,col; 4479 PetscScalar val,norm; 4480 4481 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4482 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 4483 for (j=0;j<size_of_constraint;j++) { 4484 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 4485 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 4486 if (!PetscBTLookup(is_primal,row_B)) { 4487 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 4488 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 4489 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 4490 } else { 4491 for (k=0;k<size_of_constraint;k++) { 4492 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 4493 if (row != col) { 4494 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 4495 } else { 4496 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 4497 } 4498 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 4499 } 4500 } 4501 } 4502 if (pcbddc->dbg_flag) { 4503 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 4504 } 4505 } 4506 } else { 4507 if (pcbddc->dbg_flag) { 4508 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 4509 } 4510 } 4511 } 4512 4513 /* free workspace */ 4514 if (qr_needed) { 4515 if (pcbddc->dbg_flag) { 4516 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 4517 } 4518 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 4519 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 4520 ierr = PetscFree(qr_work);CHKERRQ(ierr); 4521 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 4522 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 4523 } 4524 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 4525 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4526 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4527 4528 /* assembling of global change of variable */ 4529 if (!pcbddc->fake_change) { 4530 Mat tmat; 4531 PetscInt bs; 4532 4533 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 4534 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 4535 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 4536 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 4537 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4538 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 4539 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 4540 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 4541 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 4542 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 4543 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4544 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4545 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4546 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4547 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4548 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4549 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 4550 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 4551 4552 /* check */ 4553 if (pcbddc->dbg_flag) { 4554 PetscReal error; 4555 Vec x,x_change; 4556 4557 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 4558 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 4559 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4560 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 4561 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4562 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4563 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 4564 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4565 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4566 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 4567 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4568 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4569 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4570 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 4571 ierr = VecDestroy(&x);CHKERRQ(ierr); 4572 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4573 } 4574 /* adapt sub_schurs computed (if any) */ 4575 if (pcbddc->use_deluxe_scaling) { 4576 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4577 4578 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) { 4579 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");CHKERRQ(ierr); 4580 } 4581 if (sub_schurs && sub_schurs->S_Ej_all) { 4582 Mat S_new,tmat; 4583 IS is_all_N,is_V_Sall = NULL; 4584 4585 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 4586 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 4587 if (pcbddc->deluxe_zerorows) { 4588 ISLocalToGlobalMapping NtoSall; 4589 IS is_V; 4590 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 4591 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 4592 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 4593 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 4594 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 4595 } 4596 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 4597 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4598 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 4599 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4600 if (pcbddc->deluxe_zerorows) { 4601 const PetscScalar *array; 4602 const PetscInt *idxs_V,*idxs_all; 4603 PetscInt i,n_V; 4604 4605 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4606 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 4607 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4608 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4609 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 4610 for (i=0;i<n_V;i++) { 4611 PetscScalar val; 4612 PetscInt idx; 4613 4614 idx = idxs_V[i]; 4615 val = array[idxs_all[idxs_V[i]]]; 4616 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 4617 } 4618 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4619 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4620 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 4621 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 4622 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 4623 } 4624 sub_schurs->S_Ej_all = S_new; 4625 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4626 if (sub_schurs->sum_S_Ej_all) { 4627 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 4628 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 4629 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 4630 if (pcbddc->deluxe_zerorows) { 4631 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 4632 } 4633 sub_schurs->sum_S_Ej_all = S_new; 4634 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 4635 } 4636 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 4637 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4638 } 4639 /* destroy any change of basis context in sub_schurs */ 4640 if (sub_schurs && sub_schurs->change) { 4641 PetscInt i; 4642 4643 for (i=0;i<sub_schurs->n_subs;i++) { 4644 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 4645 } 4646 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 4647 } 4648 } 4649 if (pcbddc->switch_static) { /* need to save the local change */ 4650 pcbddc->switch_static_change = localChangeOfBasisMatrix; 4651 } else { 4652 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 4653 } 4654 /* determine if any process has changed the pressures locally */ 4655 pcbddc->change_interior = pcbddc->benign_have_null; 4656 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 4657 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 4658 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 4659 pcbddc->use_qr_single = qr_needed; 4660 } 4661 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 4662 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 4663 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 4664 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 4665 } else { 4666 Mat benign_global = NULL; 4667 if (pcbddc->benign_have_null) { 4668 Mat tmat; 4669 4670 pcbddc->change_interior = PETSC_TRUE; 4671 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4672 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4673 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4674 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4675 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 4676 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4677 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4678 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 4679 if (pcbddc->benign_change) { 4680 Mat M; 4681 4682 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 4683 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 4684 ierr = MatISSetLocalMat(tmat,M);CHKERRQ(ierr); 4685 ierr = MatDestroy(&M);CHKERRQ(ierr); 4686 } else { 4687 Mat eye; 4688 PetscScalar *array; 4689 4690 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4691 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&eye);CHKERRQ(ierr); 4692 for (i=0;i<pcis->n;i++) { 4693 ierr = MatSetValue(eye,i,i,array[i],INSERT_VALUES);CHKERRQ(ierr); 4694 } 4695 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4696 ierr = MatAssemblyBegin(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4697 ierr = MatAssemblyEnd(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4698 ierr = MatISSetLocalMat(tmat,eye);CHKERRQ(ierr); 4699 ierr = MatDestroy(&eye);CHKERRQ(ierr); 4700 } 4701 ierr = MatISGetMPIXAIJ(tmat,MAT_INITIAL_MATRIX,&benign_global);CHKERRQ(ierr); 4702 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4703 } 4704 if (pcbddc->user_ChangeOfBasisMatrix) { 4705 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4706 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 4707 } else if (pcbddc->benign_have_null) { 4708 pcbddc->ChangeOfBasisMatrix = benign_global; 4709 } 4710 } 4711 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 4712 IS is_global; 4713 const PetscInt *gidxs; 4714 4715 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 4716 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 4717 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 4718 ierr = MatGetSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 4719 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 4720 } 4721 } 4722 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 4723 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 4724 } 4725 4726 if (!pcbddc->fake_change) { 4727 /* add pressure dofs to set of primal nodes for numbering purposes */ 4728 for (i=0;i<pcbddc->benign_n;i++) { 4729 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 4730 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 4731 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 4732 pcbddc->local_primal_size_cc++; 4733 pcbddc->local_primal_size++; 4734 } 4735 4736 /* check if a new primal space has been introduced (also take into account benign trick) */ 4737 pcbddc->new_primal_space_local = PETSC_TRUE; 4738 if (olocal_primal_size == pcbddc->local_primal_size) { 4739 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4740 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4741 if (!pcbddc->new_primal_space_local) { 4742 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 4743 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 4744 } 4745 } 4746 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 4747 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4748 } 4749 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 4750 4751 /* flush dbg viewer */ 4752 if (pcbddc->dbg_flag) { 4753 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4754 } 4755 4756 /* free workspace */ 4757 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 4758 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 4759 if (!pcbddc->adaptive_selection) { 4760 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 4761 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 4762 } else { 4763 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 4764 pcbddc->adaptive_constraints_idxs_ptr, 4765 pcbddc->adaptive_constraints_data_ptr, 4766 pcbddc->adaptive_constraints_idxs, 4767 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 4768 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 4769 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 4770 } 4771 PetscFunctionReturn(0); 4772 } 4773 4774 #undef __FUNCT__ 4775 #define __FUNCT__ "PCBDDCAnalyzeInterface" 4776 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 4777 { 4778 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4779 PC_IS *pcis = (PC_IS*)pc->data; 4780 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 4781 PetscInt ierr,i,N; 4782 4783 PetscFunctionBegin; 4784 /* Reset previously computed graph */ 4785 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 4786 /* Init local Graph struct */ 4787 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 4788 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 4789 4790 /* Check validity of the csr graph passed in by the user */ 4791 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 4792 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 4793 } 4794 4795 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 4796 if ( (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) && pcbddc->use_local_adj) { 4797 PetscInt *xadj,*adjncy; 4798 PetscInt nvtxs; 4799 PetscBool flg_row=PETSC_FALSE; 4800 4801 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4802 if (flg_row) { 4803 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 4804 pcbddc->computed_rowadj = PETSC_TRUE; 4805 } 4806 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 4807 } 4808 if (pcbddc->dbg_flag) { 4809 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4810 } 4811 4812 /* Setup of Graph */ 4813 pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */ 4814 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 4815 4816 /* attach info on disconnected subdomains if present */ 4817 if (pcbddc->n_local_subs) { 4818 PetscInt *local_subs; 4819 4820 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 4821 for (i=0;i<pcbddc->n_local_subs;i++) { 4822 const PetscInt *idxs; 4823 PetscInt nl,j; 4824 4825 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 4826 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4827 for (j=0;j<nl;j++) { 4828 local_subs[idxs[j]] = i; 4829 } 4830 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 4831 } 4832 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 4833 pcbddc->mat_graph->local_subs = local_subs; 4834 } 4835 4836 /* Graph's connected components analysis */ 4837 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 4838 PetscFunctionReturn(0); 4839 } 4840 4841 #undef __FUNCT__ 4842 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 4843 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 4844 { 4845 PetscInt i,j; 4846 PetscScalar *alphas; 4847 PetscErrorCode ierr; 4848 4849 PetscFunctionBegin; 4850 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 4851 for (i=0;i<n;i++) { 4852 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 4853 ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],alphas);CHKERRQ(ierr); 4854 for (j=0;j<n-i-1;j++) alphas[j] = PetscConj(-alphas[j]); 4855 ierr = VecMAXPY(vecs[j],n-i-1,alphas,vecs+i);CHKERRQ(ierr); 4856 } 4857 ierr = PetscFree(alphas);CHKERRQ(ierr); 4858 PetscFunctionReturn(0); 4859 } 4860 4861 #undef __FUNCT__ 4862 #define __FUNCT__ "MatISGetSubassemblingPattern" 4863 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 4864 { 4865 Mat A; 4866 PetscInt n_neighs,*neighs,*n_shared,**shared; 4867 PetscMPIInt size,rank,color; 4868 PetscInt *xadj,*adjncy; 4869 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 4870 PetscInt im_active,active_procs,n,i,j,local_size,threshold = 2; 4871 PetscInt void_procs,*procs_candidates = NULL; 4872 PetscInt xadj_count, *count; 4873 PetscBool ismatis,use_vwgt=PETSC_FALSE; 4874 PetscSubcomm psubcomm; 4875 MPI_Comm subcomm; 4876 PetscErrorCode ierr; 4877 4878 PetscFunctionBegin; 4879 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 4880 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 4881 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 4882 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 4883 PetscValidLogicalCollectiveInt(mat,redprocs,3); 4884 if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %d\n",*n_subdomains); 4885 4886 if (have_void) *have_void = PETSC_FALSE; 4887 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 4888 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 4889 ierr = MatISGetLocalMat(mat,&A);CHKERRQ(ierr); 4890 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 4891 im_active = !!(n); 4892 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 4893 void_procs = size - active_procs; 4894 /* get ranks of of non-active processes in mat communicator */ 4895 if (void_procs) { 4896 PetscInt ncand; 4897 4898 if (have_void) *have_void = PETSC_TRUE; 4899 ierr = PetscMalloc1(size,&procs_candidates);CHKERRQ(ierr); 4900 ierr = MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 4901 for (i=0,ncand=0;i<size;i++) { 4902 if (!procs_candidates[i]) { 4903 procs_candidates[ncand++] = i; 4904 } 4905 } 4906 /* force n_subdomains to be not greater that the number of non-active processes */ 4907 *n_subdomains = PetscMin(void_procs,*n_subdomains); 4908 } 4909 4910 /* number of subdomains requested greater than active processes -> just shift the matrix */ 4911 if (active_procs < *n_subdomains) { 4912 PetscInt issize,isidx; 4913 if (im_active) { 4914 issize = 1; 4915 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 4916 isidx = procs_candidates[rank]; 4917 } else { 4918 isidx = rank; 4919 } 4920 } else { 4921 issize = 0; 4922 isidx = -1; 4923 } 4924 *n_subdomains = active_procs; 4925 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);CHKERRQ(ierr); 4926 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 4927 PetscFunctionReturn(0); 4928 } 4929 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 4930 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 4931 threshold = PetscMax(threshold,2); 4932 4933 /* Get info on mapping */ 4934 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 4935 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4936 4937 /* build local CSR graph of subdomains' connectivity */ 4938 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 4939 xadj[0] = 0; 4940 xadj[1] = PetscMax(n_neighs-1,0); 4941 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 4942 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 4943 ierr = PetscCalloc1(local_size,&count);CHKERRQ(ierr); 4944 for (i=1;i<n_neighs;i++) 4945 for (j=0;j<n_shared[i];j++) 4946 count[shared[i][j]] += 1; 4947 4948 xadj_count = 0; 4949 for (i=1;i<n_neighs;i++) { 4950 for (j=0;j<n_shared[i];j++) { 4951 if (count[shared[i][j]] < threshold) { 4952 adjncy[xadj_count] = neighs[i]; 4953 adjncy_wgt[xadj_count] = n_shared[i]; 4954 xadj_count++; 4955 break; 4956 } 4957 } 4958 } 4959 xadj[1] = xadj_count; 4960 ierr = PetscFree(count);CHKERRQ(ierr); 4961 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4962 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4963 4964 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 4965 4966 /* Restrict work on active processes only */ 4967 ierr = PetscMPIIntCast(im_active,&color);CHKERRQ(ierr); 4968 if (void_procs) { 4969 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);CHKERRQ(ierr); 4970 ierr = PetscSubcommSetNumber(psubcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 4971 ierr = PetscSubcommSetTypeGeneral(psubcomm,color,rank);CHKERRQ(ierr); 4972 subcomm = PetscSubcommChild(psubcomm); 4973 } else { 4974 psubcomm = NULL; 4975 subcomm = PetscObjectComm((PetscObject)mat); 4976 } 4977 4978 v_wgt = NULL; 4979 if (!color) { 4980 ierr = PetscFree(xadj);CHKERRQ(ierr); 4981 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4982 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4983 } else { 4984 Mat subdomain_adj; 4985 IS new_ranks,new_ranks_contig; 4986 MatPartitioning partitioner; 4987 PetscInt rstart=0,rend=0; 4988 PetscInt *is_indices,*oldranks; 4989 PetscMPIInt size; 4990 PetscBool aggregate; 4991 4992 ierr = MPI_Comm_size(subcomm,&size);CHKERRQ(ierr); 4993 if (void_procs) { 4994 PetscInt prank = rank; 4995 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 4996 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);CHKERRQ(ierr); 4997 for (i=0;i<xadj[1];i++) { 4998 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 4999 } 5000 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 5001 } else { 5002 oldranks = NULL; 5003 } 5004 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 5005 if (aggregate) { /* TODO: all this part could be made more efficient */ 5006 PetscInt lrows,row,ncols,*cols; 5007 PetscMPIInt nrank; 5008 PetscScalar *vals; 5009 5010 ierr = MPI_Comm_rank(subcomm,&nrank);CHKERRQ(ierr); 5011 lrows = 0; 5012 if (nrank<redprocs) { 5013 lrows = size/redprocs; 5014 if (nrank<size%redprocs) lrows++; 5015 } 5016 ierr = MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 5017 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 5018 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 5019 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 5020 row = nrank; 5021 ncols = xadj[1]-xadj[0]; 5022 cols = adjncy; 5023 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 5024 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 5025 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 5026 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5027 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5028 ierr = PetscFree(xadj);CHKERRQ(ierr); 5029 ierr = PetscFree(adjncy);CHKERRQ(ierr); 5030 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 5031 ierr = PetscFree(vals);CHKERRQ(ierr); 5032 if (use_vwgt) { 5033 Vec v; 5034 const PetscScalar *array; 5035 PetscInt nl; 5036 5037 ierr = MatCreateVecs(subdomain_adj,&v,NULL);CHKERRQ(ierr); 5038 ierr = VecSetValue(v,row,(PetscScalar)local_size,INSERT_VALUES);CHKERRQ(ierr); 5039 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 5040 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 5041 ierr = VecGetLocalSize(v,&nl);CHKERRQ(ierr); 5042 ierr = VecGetArrayRead(v,&array);CHKERRQ(ierr); 5043 ierr = PetscMalloc1(nl,&v_wgt);CHKERRQ(ierr); 5044 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 5045 ierr = VecRestoreArrayRead(v,&array);CHKERRQ(ierr); 5046 ierr = VecDestroy(&v);CHKERRQ(ierr); 5047 } 5048 } else { 5049 ierr = MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 5050 if (use_vwgt) { 5051 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 5052 v_wgt[0] = local_size; 5053 } 5054 } 5055 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 5056 5057 /* Partition */ 5058 ierr = MatPartitioningCreate(subcomm,&partitioner);CHKERRQ(ierr); 5059 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 5060 if (v_wgt) { 5061 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 5062 } 5063 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 5064 ierr = MatPartitioningSetNParts(partitioner,*n_subdomains);CHKERRQ(ierr); 5065 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 5066 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 5067 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 5068 5069 /* renumber new_ranks to avoid "holes" in new set of processors */ 5070 ierr = ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 5071 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 5072 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5073 if (!aggregate) { 5074 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 5075 #if defined(PETSC_USE_DEBUG) 5076 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 5077 #endif 5078 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 5079 } else if (oldranks) { 5080 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 5081 } else { 5082 ranks_send_to_idx[0] = is_indices[0]; 5083 } 5084 } else { 5085 PetscInt idxs[1]; 5086 PetscMPIInt tag; 5087 MPI_Request *reqs; 5088 5089 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 5090 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 5091 for (i=rstart;i<rend;i++) { 5092 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);CHKERRQ(ierr); 5093 } 5094 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 5095 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5096 ierr = PetscFree(reqs);CHKERRQ(ierr); 5097 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 5098 #if defined(PETSC_USE_DEBUG) 5099 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 5100 #endif 5101 ranks_send_to_idx[0] = procs_candidates[oldranks[idxs[0]]]; 5102 } else if (oldranks) { 5103 ranks_send_to_idx[0] = oldranks[idxs[0]]; 5104 } else { 5105 ranks_send_to_idx[0] = idxs[0]; 5106 } 5107 } 5108 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5109 /* clean up */ 5110 ierr = PetscFree(oldranks);CHKERRQ(ierr); 5111 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 5112 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 5113 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 5114 } 5115 ierr = PetscSubcommDestroy(&psubcomm);CHKERRQ(ierr); 5116 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 5117 5118 /* assemble parallel IS for sends */ 5119 i = 1; 5120 if (!color) i=0; 5121 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);CHKERRQ(ierr); 5122 PetscFunctionReturn(0); 5123 } 5124 5125 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 5126 5127 #undef __FUNCT__ 5128 #define __FUNCT__ "MatISSubassemble" 5129 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 5130 { 5131 Mat local_mat; 5132 IS is_sends_internal; 5133 PetscInt rows,cols,new_local_rows; 5134 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs; 5135 PetscBool ismatis,isdense,newisdense,destroy_mat; 5136 ISLocalToGlobalMapping l2gmap; 5137 PetscInt* l2gmap_indices; 5138 const PetscInt* is_indices; 5139 MatType new_local_type; 5140 /* buffers */ 5141 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 5142 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 5143 PetscInt *recv_buffer_idxs_local; 5144 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 5145 PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs; 5146 /* MPI */ 5147 MPI_Comm comm,comm_n; 5148 PetscSubcomm subcomm; 5149 PetscMPIInt n_sends,n_recvs,commsize; 5150 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 5151 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 5152 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest; 5153 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs; 5154 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs; 5155 PetscErrorCode ierr; 5156 5157 PetscFunctionBegin; 5158 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 5159 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 5160 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 5161 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 5162 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 5163 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 5164 PetscValidLogicalCollectiveBool(mat,reuse,6); 5165 PetscValidLogicalCollectiveInt(mat,nis,8); 5166 PetscValidLogicalCollectiveInt(mat,nvecs,10); 5167 if (nvecs) { 5168 if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported"); 5169 PetscValidHeaderSpecific(nnsp_vec[0],VEC_CLASSID,11); 5170 } 5171 /* further checks */ 5172 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 5173 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 5174 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 5175 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 5176 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 5177 if (reuse && *mat_n) { 5178 PetscInt mrows,mcols,mnrows,mncols; 5179 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 5180 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 5181 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 5182 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 5183 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 5184 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 5185 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 5186 } 5187 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 5188 PetscValidLogicalCollectiveInt(mat,bs,0); 5189 5190 /* prepare IS for sending if not provided */ 5191 if (!is_sends) { 5192 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 5193 ierr = MatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);CHKERRQ(ierr); 5194 } else { 5195 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 5196 is_sends_internal = is_sends; 5197 } 5198 5199 /* get comm */ 5200 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 5201 5202 /* compute number of sends */ 5203 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 5204 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 5205 5206 /* compute number of receives */ 5207 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 5208 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 5209 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 5210 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 5211 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 5212 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 5213 ierr = PetscFree(iflags);CHKERRQ(ierr); 5214 5215 /* restrict comm if requested */ 5216 subcomm = 0; 5217 destroy_mat = PETSC_FALSE; 5218 if (restrict_comm) { 5219 PetscMPIInt color,subcommsize; 5220 5221 color = 0; 5222 if (restrict_full) { 5223 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 5224 } else { 5225 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 5226 } 5227 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 5228 subcommsize = commsize - subcommsize; 5229 /* check if reuse has been requested */ 5230 if (reuse) { 5231 if (*mat_n) { 5232 PetscMPIInt subcommsize2; 5233 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 5234 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 5235 comm_n = PetscObjectComm((PetscObject)*mat_n); 5236 } else { 5237 comm_n = PETSC_COMM_SELF; 5238 } 5239 } else { /* MAT_INITIAL_MATRIX */ 5240 PetscMPIInt rank; 5241 5242 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5243 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 5244 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 5245 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 5246 comm_n = PetscSubcommChild(subcomm); 5247 } 5248 /* flag to destroy *mat_n if not significative */ 5249 if (color) destroy_mat = PETSC_TRUE; 5250 } else { 5251 comm_n = comm; 5252 } 5253 5254 /* prepare send/receive buffers */ 5255 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 5256 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 5257 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 5258 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 5259 if (nis) { 5260 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 5261 } 5262 5263 /* Get data from local matrices */ 5264 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 5265 /* TODO: See below some guidelines on how to prepare the local buffers */ 5266 /* 5267 send_buffer_vals should contain the raw values of the local matrix 5268 send_buffer_idxs should contain: 5269 - MatType_PRIVATE type 5270 - PetscInt size_of_l2gmap 5271 - PetscInt global_row_indices[size_of_l2gmap] 5272 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 5273 */ 5274 else { 5275 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 5276 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 5277 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 5278 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 5279 send_buffer_idxs[1] = i; 5280 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 5281 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 5282 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 5283 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 5284 for (i=0;i<n_sends;i++) { 5285 ilengths_vals[is_indices[i]] = len*len; 5286 ilengths_idxs[is_indices[i]] = len+2; 5287 } 5288 } 5289 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 5290 /* additional is (if any) */ 5291 if (nis) { 5292 PetscMPIInt psum; 5293 PetscInt j; 5294 for (j=0,psum=0;j<nis;j++) { 5295 PetscInt plen; 5296 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 5297 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 5298 psum += len+1; /* indices + lenght */ 5299 } 5300 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 5301 for (j=0,psum=0;j<nis;j++) { 5302 PetscInt plen; 5303 const PetscInt *is_array_idxs; 5304 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 5305 send_buffer_idxs_is[psum] = plen; 5306 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 5307 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 5308 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 5309 psum += plen+1; /* indices + lenght */ 5310 } 5311 for (i=0;i<n_sends;i++) { 5312 ilengths_idxs_is[is_indices[i]] = psum; 5313 } 5314 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 5315 } 5316 5317 buf_size_idxs = 0; 5318 buf_size_vals = 0; 5319 buf_size_idxs_is = 0; 5320 buf_size_vecs = 0; 5321 for (i=0;i<n_recvs;i++) { 5322 buf_size_idxs += (PetscInt)olengths_idxs[i]; 5323 buf_size_vals += (PetscInt)olengths_vals[i]; 5324 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 5325 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 5326 } 5327 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 5328 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 5329 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 5330 ierr = PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);CHKERRQ(ierr); 5331 5332 /* get new tags for clean communications */ 5333 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 5334 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 5335 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 5336 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);CHKERRQ(ierr); 5337 5338 /* allocate for requests */ 5339 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 5340 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 5341 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 5342 ierr = PetscMalloc1(n_sends,&send_req_vecs);CHKERRQ(ierr); 5343 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 5344 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 5345 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 5346 ierr = PetscMalloc1(n_recvs,&recv_req_vecs);CHKERRQ(ierr); 5347 5348 /* communications */ 5349 ptr_idxs = recv_buffer_idxs; 5350 ptr_vals = recv_buffer_vals; 5351 ptr_idxs_is = recv_buffer_idxs_is; 5352 ptr_vecs = recv_buffer_vecs; 5353 for (i=0;i<n_recvs;i++) { 5354 source_dest = onodes[i]; 5355 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 5356 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 5357 ptr_idxs += olengths_idxs[i]; 5358 ptr_vals += olengths_vals[i]; 5359 if (nis) { 5360 source_dest = onodes_is[i]; 5361 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 5362 ptr_idxs_is += olengths_idxs_is[i]; 5363 } 5364 if (nvecs) { 5365 source_dest = onodes[i]; 5366 ierr = MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);CHKERRQ(ierr); 5367 ptr_vecs += olengths_idxs[i]-2; 5368 } 5369 } 5370 for (i=0;i<n_sends;i++) { 5371 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 5372 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 5373 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 5374 if (nis) { 5375 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 5376 } 5377 if (nvecs) { 5378 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 5379 ierr = MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);CHKERRQ(ierr); 5380 } 5381 } 5382 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 5383 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 5384 5385 /* assemble new l2g map */ 5386 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5387 ptr_idxs = recv_buffer_idxs; 5388 new_local_rows = 0; 5389 for (i=0;i<n_recvs;i++) { 5390 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 5391 ptr_idxs += olengths_idxs[i]; 5392 } 5393 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 5394 ptr_idxs = recv_buffer_idxs; 5395 new_local_rows = 0; 5396 for (i=0;i<n_recvs;i++) { 5397 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 5398 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 5399 ptr_idxs += olengths_idxs[i]; 5400 } 5401 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 5402 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 5403 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 5404 5405 /* infer new local matrix type from received local matrices type */ 5406 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 5407 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 5408 if (n_recvs) { 5409 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 5410 ptr_idxs = recv_buffer_idxs; 5411 for (i=0;i<n_recvs;i++) { 5412 if ((PetscInt)new_local_type_private != *ptr_idxs) { 5413 new_local_type_private = MATAIJ_PRIVATE; 5414 break; 5415 } 5416 ptr_idxs += olengths_idxs[i]; 5417 } 5418 switch (new_local_type_private) { 5419 case MATDENSE_PRIVATE: 5420 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 5421 new_local_type = MATSEQAIJ; 5422 bs = 1; 5423 } else { /* if I receive only 1 dense matrix */ 5424 new_local_type = MATSEQDENSE; 5425 bs = 1; 5426 } 5427 break; 5428 case MATAIJ_PRIVATE: 5429 new_local_type = MATSEQAIJ; 5430 bs = 1; 5431 break; 5432 case MATBAIJ_PRIVATE: 5433 new_local_type = MATSEQBAIJ; 5434 break; 5435 case MATSBAIJ_PRIVATE: 5436 new_local_type = MATSEQSBAIJ; 5437 break; 5438 default: 5439 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 5440 break; 5441 } 5442 } else { /* by default, new_local_type is seqdense */ 5443 new_local_type = MATSEQDENSE; 5444 bs = 1; 5445 } 5446 5447 /* create MATIS object if needed */ 5448 if (!reuse) { 5449 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 5450 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 5451 } else { 5452 /* it also destroys the local matrices */ 5453 if (*mat_n) { 5454 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 5455 } else { /* this is a fake object */ 5456 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 5457 } 5458 } 5459 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 5460 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 5461 5462 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5463 5464 /* Global to local map of received indices */ 5465 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 5466 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 5467 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 5468 5469 /* restore attributes -> type of incoming data and its size */ 5470 buf_size_idxs = 0; 5471 for (i=0;i<n_recvs;i++) { 5472 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 5473 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 5474 buf_size_idxs += (PetscInt)olengths_idxs[i]; 5475 } 5476 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 5477 5478 /* set preallocation */ 5479 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 5480 if (!newisdense) { 5481 PetscInt *new_local_nnz=0; 5482 5483 ptr_idxs = recv_buffer_idxs_local; 5484 if (n_recvs) { 5485 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 5486 } 5487 for (i=0;i<n_recvs;i++) { 5488 PetscInt j; 5489 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 5490 for (j=0;j<*(ptr_idxs+1);j++) { 5491 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 5492 } 5493 } else { 5494 /* TODO */ 5495 } 5496 ptr_idxs += olengths_idxs[i]; 5497 } 5498 if (new_local_nnz) { 5499 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 5500 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 5501 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 5502 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5503 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 5504 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 5505 } else { 5506 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5507 } 5508 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 5509 } else { 5510 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 5511 } 5512 5513 /* set values */ 5514 ptr_vals = recv_buffer_vals; 5515 ptr_idxs = recv_buffer_idxs_local; 5516 for (i=0;i<n_recvs;i++) { 5517 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 5518 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 5519 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 5520 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5521 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 5522 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 5523 } else { 5524 /* TODO */ 5525 } 5526 ptr_idxs += olengths_idxs[i]; 5527 ptr_vals += olengths_vals[i]; 5528 } 5529 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5530 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5531 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5532 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5533 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 5534 5535 #if 0 5536 if (!restrict_comm) { /* check */ 5537 Vec lvec,rvec; 5538 PetscReal infty_error; 5539 5540 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 5541 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 5542 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 5543 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 5544 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 5545 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5546 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 5547 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 5548 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 5549 } 5550 #endif 5551 5552 /* assemble new additional is (if any) */ 5553 if (nis) { 5554 PetscInt **temp_idxs,*count_is,j,psum; 5555 5556 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5557 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 5558 ptr_idxs = recv_buffer_idxs_is; 5559 psum = 0; 5560 for (i=0;i<n_recvs;i++) { 5561 for (j=0;j<nis;j++) { 5562 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5563 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 5564 psum += plen; 5565 ptr_idxs += plen+1; /* shift pointer to received data */ 5566 } 5567 } 5568 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 5569 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 5570 for (i=1;i<nis;i++) { 5571 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 5572 } 5573 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 5574 ptr_idxs = recv_buffer_idxs_is; 5575 for (i=0;i<n_recvs;i++) { 5576 for (j=0;j<nis;j++) { 5577 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 5578 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 5579 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 5580 ptr_idxs += plen+1; /* shift pointer to received data */ 5581 } 5582 } 5583 for (i=0;i<nis;i++) { 5584 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5585 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 5586 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5587 } 5588 ierr = PetscFree(count_is);CHKERRQ(ierr); 5589 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 5590 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 5591 } 5592 /* free workspace */ 5593 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 5594 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5595 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 5596 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5597 if (isdense) { 5598 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 5599 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 5600 } else { 5601 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 5602 } 5603 if (nis) { 5604 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5605 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 5606 } 5607 5608 if (nvecs) { 5609 ierr = MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5610 ierr = MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5611 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 5612 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 5613 ierr = VecCreate(comm_n,&nnsp_vec[0]);CHKERRQ(ierr); 5614 ierr = VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);CHKERRQ(ierr); 5615 ierr = VecSetType(nnsp_vec[0],VECSTANDARD);CHKERRQ(ierr); 5616 /* set values */ 5617 ptr_vals = recv_buffer_vecs; 5618 ptr_idxs = recv_buffer_idxs_local; 5619 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 5620 for (i=0;i<n_recvs;i++) { 5621 PetscInt j; 5622 for (j=0;j<*(ptr_idxs+1);j++) { 5623 send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j); 5624 } 5625 ptr_idxs += olengths_idxs[i]; 5626 ptr_vals += olengths_idxs[i]-2; 5627 } 5628 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 5629 ierr = VecAssemblyBegin(nnsp_vec[0]);CHKERRQ(ierr); 5630 ierr = VecAssemblyEnd(nnsp_vec[0]);CHKERRQ(ierr); 5631 } 5632 5633 ierr = PetscFree(recv_buffer_vecs);CHKERRQ(ierr); 5634 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 5635 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 5636 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 5637 ierr = PetscFree(recv_req_vecs);CHKERRQ(ierr); 5638 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 5639 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 5640 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 5641 ierr = PetscFree(send_req_vecs);CHKERRQ(ierr); 5642 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 5643 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 5644 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 5645 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 5646 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 5647 ierr = PetscFree(onodes);CHKERRQ(ierr); 5648 if (nis) { 5649 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 5650 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 5651 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 5652 } 5653 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 5654 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 5655 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 5656 for (i=0;i<nis;i++) { 5657 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5658 } 5659 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 5660 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 5661 } 5662 *mat_n = NULL; 5663 } 5664 PetscFunctionReturn(0); 5665 } 5666 5667 /* temporary hack into ksp private data structure */ 5668 #include <petsc/private/kspimpl.h> 5669 5670 #undef __FUNCT__ 5671 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 5672 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 5673 { 5674 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5675 PC_IS *pcis = (PC_IS*)pc->data; 5676 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 5677 Mat coarsedivudotp = NULL; 5678 MatNullSpace CoarseNullSpace = NULL; 5679 ISLocalToGlobalMapping coarse_islg; 5680 IS coarse_is,*isarray; 5681 PetscInt i,im_active=-1,active_procs=-1; 5682 PetscInt nis,nisdofs,nisneu,nisvert; 5683 PC pc_temp; 5684 PCType coarse_pc_type; 5685 KSPType coarse_ksp_type; 5686 PetscBool multilevel_requested,multilevel_allowed; 5687 PetscBool isredundant,isbddc,isnn,coarse_reuse; 5688 Mat t_coarse_mat_is; 5689 PetscInt ncoarse; 5690 PetscBool compute_vecs = PETSC_FALSE; 5691 PetscScalar *array; 5692 MatReuse coarse_mat_reuse; 5693 PetscBool restr, full_restr, have_void; 5694 PetscErrorCode ierr; 5695 5696 PetscFunctionBegin; 5697 /* Assign global numbering to coarse dofs */ 5698 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 5699 PetscInt ocoarse_size; 5700 compute_vecs = PETSC_TRUE; 5701 ocoarse_size = pcbddc->coarse_size; 5702 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 5703 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 5704 /* see if we can avoid some work */ 5705 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 5706 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 5707 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 5708 PC pc; 5709 PetscBool isbddc; 5710 5711 /* temporary workaround since PCBDDC does not have a reset method so far */ 5712 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 5713 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5714 if (isbddc) { 5715 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 5716 } else { 5717 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 5718 } 5719 coarse_reuse = PETSC_FALSE; 5720 } else { /* we can safely reuse already computed coarse matrix */ 5721 coarse_reuse = PETSC_TRUE; 5722 } 5723 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 5724 coarse_reuse = PETSC_FALSE; 5725 } 5726 /* reset any subassembling information */ 5727 if (!coarse_reuse || pcbddc->recompute_topography) { 5728 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5729 } 5730 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 5731 coarse_reuse = PETSC_TRUE; 5732 } 5733 /* assemble coarse matrix */ 5734 if (coarse_reuse && pcbddc->coarse_ksp) { 5735 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5736 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 5737 coarse_mat_reuse = MAT_REUSE_MATRIX; 5738 } else { 5739 coarse_mat = NULL; 5740 coarse_mat_reuse = MAT_INITIAL_MATRIX; 5741 } 5742 5743 /* creates temporary l2gmap and IS for coarse indexes */ 5744 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 5745 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 5746 5747 /* creates temporary MATIS object for coarse matrix */ 5748 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 5749 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5750 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 5751 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 5752 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 5753 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 5754 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5755 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5756 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 5757 5758 /* count "active" (i.e. with positive local size) and "void" processes */ 5759 im_active = !!(pcis->n); 5760 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5761 5762 /* determine number of process partecipating to coarse solver and compute subassembling pattern */ 5763 /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */ 5764 /* full_restr : just use the receivers from the subassembling pattern */ 5765 coarse_mat_is = NULL; 5766 multilevel_allowed = PETSC_FALSE; 5767 multilevel_requested = PETSC_FALSE; 5768 full_restr = PETSC_TRUE; 5769 pcbddc->coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc); 5770 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 5771 if (multilevel_requested) { 5772 ncoarse = active_procs/pcbddc->coarsening_ratio; 5773 restr = PETSC_FALSE; 5774 full_restr = PETSC_FALSE; 5775 } else { 5776 ncoarse = pcbddc->coarse_size/pcbddc->coarse_eqs_per_proc; 5777 restr = PETSC_TRUE; 5778 full_restr = PETSC_TRUE; 5779 } 5780 if (!pcbddc->coarse_size) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 5781 ncoarse = PetscMax(1,ncoarse); 5782 if (!pcbddc->coarse_subassembling) { 5783 if (pcbddc->coarsening_ratio > 1) { 5784 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 5785 } else { 5786 PetscMPIInt size,rank; 5787 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 5788 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 5789 have_void = (active_procs == (PetscInt)size) ? PETSC_FALSE : PETSC_TRUE; 5790 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5791 } 5792 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 5793 PetscInt psum; 5794 PetscMPIInt size; 5795 if (pcbddc->coarse_ksp) psum = 1; 5796 else psum = 0; 5797 ierr = MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5798 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 5799 if (ncoarse < size) have_void = PETSC_TRUE; 5800 } 5801 /* determine if we can go multilevel */ 5802 if (multilevel_requested) { 5803 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 5804 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 5805 } 5806 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 5807 5808 /* dump subassembling pattern */ 5809 if (pcbddc->dbg_flag && multilevel_allowed) { 5810 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 5811 } 5812 5813 /* compute dofs splitting and neumann boundaries for coarse dofs */ 5814 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal)) { /* protects from unneded computations */ 5815 PetscInt *tidxs,*tidxs2,nout,tsize,i; 5816 const PetscInt *idxs; 5817 ISLocalToGlobalMapping tmap; 5818 5819 /* create map between primal indices (in local representative ordering) and local primal numbering */ 5820 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 5821 /* allocate space for temporary storage */ 5822 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 5823 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 5824 /* allocate for IS array */ 5825 nisdofs = pcbddc->n_ISForDofsLocal; 5826 nisneu = !!pcbddc->NeumannBoundariesLocal; 5827 nisvert = 0; /* nisvert is not used */ 5828 nis = nisdofs + nisneu + nisvert; 5829 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 5830 /* dofs splitting */ 5831 for (i=0;i<nisdofs;i++) { 5832 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 5833 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 5834 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5835 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5836 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 5837 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5838 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 5839 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 5840 } 5841 /* neumann boundaries */ 5842 if (pcbddc->NeumannBoundariesLocal) { 5843 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 5844 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 5845 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5846 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 5847 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 5848 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 5849 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 5850 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 5851 } 5852 /* free memory */ 5853 ierr = PetscFree(tidxs);CHKERRQ(ierr); 5854 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 5855 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 5856 } else { 5857 nis = 0; 5858 nisdofs = 0; 5859 nisneu = 0; 5860 nisvert = 0; 5861 isarray = NULL; 5862 } 5863 /* destroy no longer needed map */ 5864 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 5865 5866 /* subassemble */ 5867 if (multilevel_allowed) { 5868 Vec vp[1]; 5869 PetscInt nvecs = 0; 5870 PetscBool reuse,reuser; 5871 5872 if (coarse_mat) reuse = PETSC_TRUE; 5873 else reuse = PETSC_FALSE; 5874 ierr = MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5875 vp[0] = NULL; 5876 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 5877 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);CHKERRQ(ierr); 5878 ierr = VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);CHKERRQ(ierr); 5879 ierr = VecSetType(vp[0],VECSTANDARD);CHKERRQ(ierr); 5880 nvecs = 1; 5881 5882 if (pcbddc->divudotp) { 5883 Mat B,loc_divudotp; 5884 Vec v,p; 5885 IS dummy; 5886 PetscInt np; 5887 5888 ierr = MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);CHKERRQ(ierr); 5889 ierr = MatGetSize(loc_divudotp,&np,NULL);CHKERRQ(ierr); 5890 ierr = ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);CHKERRQ(ierr); 5891 ierr = MatGetSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 5892 ierr = MatCreateVecs(B,&v,&p);CHKERRQ(ierr); 5893 ierr = VecSet(p,1.);CHKERRQ(ierr); 5894 ierr = MatMultTranspose(B,p,v);CHKERRQ(ierr); 5895 ierr = VecDestroy(&p);CHKERRQ(ierr); 5896 ierr = MatDestroy(&B);CHKERRQ(ierr); 5897 ierr = VecGetArray(vp[0],&array);CHKERRQ(ierr); 5898 ierr = VecPlaceArray(pcbddc->vec1_P,array);CHKERRQ(ierr); 5899 ierr = VecRestoreArray(vp[0],&array);CHKERRQ(ierr); 5900 ierr = MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);CHKERRQ(ierr); 5901 ierr = VecResetArray(pcbddc->vec1_P);CHKERRQ(ierr); 5902 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 5903 ierr = VecDestroy(&v);CHKERRQ(ierr); 5904 } 5905 } 5906 if (reuser) { 5907 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);CHKERRQ(ierr); 5908 } else { 5909 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);CHKERRQ(ierr); 5910 } 5911 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 5912 PetscScalar *arraym,*arrayv; 5913 PetscInt nl; 5914 ierr = VecGetLocalSize(vp[0],&nl);CHKERRQ(ierr); 5915 ierr = MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);CHKERRQ(ierr); 5916 ierr = MatDenseGetArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 5917 ierr = VecGetArray(vp[0],&arrayv);CHKERRQ(ierr); 5918 ierr = PetscMemcpy(arraym,arrayv,nl*sizeof(PetscScalar));CHKERRQ(ierr); 5919 ierr = VecRestoreArray(vp[0],&arrayv);CHKERRQ(ierr); 5920 ierr = MatDenseRestoreArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 5921 ierr = VecDestroy(&vp[0]);CHKERRQ(ierr); 5922 } else { 5923 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);CHKERRQ(ierr); 5924 } 5925 } else { 5926 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,0,NULL);CHKERRQ(ierr); 5927 } 5928 if (coarse_mat_is || coarse_mat) { 5929 PetscMPIInt size; 5930 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)coarse_mat_is),&size); 5931 if (!multilevel_allowed) { 5932 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 5933 } else { 5934 Mat A; 5935 5936 /* if this matrix is present, it means we are not reusing the coarse matrix */ 5937 if (coarse_mat_is) { 5938 if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 5939 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 5940 coarse_mat = coarse_mat_is; 5941 } 5942 /* be sure we don't have MatSeqDENSE as local mat */ 5943 ierr = MatISGetLocalMat(coarse_mat,&A);CHKERRQ(ierr); 5944 ierr = MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 5945 } 5946 } 5947 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 5948 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 5949 5950 /* create local to global scatters for coarse problem */ 5951 if (compute_vecs) { 5952 PetscInt lrows; 5953 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 5954 if (coarse_mat) { 5955 ierr = MatGetLocalSize(coarse_mat,&lrows,NULL);CHKERRQ(ierr); 5956 } else { 5957 lrows = 0; 5958 } 5959 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 5960 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 5961 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 5962 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5963 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5964 } 5965 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 5966 5967 /* set defaults for coarse KSP and PC */ 5968 if (multilevel_allowed) { 5969 coarse_ksp_type = KSPRICHARDSON; 5970 coarse_pc_type = PCBDDC; 5971 } else { 5972 coarse_ksp_type = KSPPREONLY; 5973 coarse_pc_type = PCREDUNDANT; 5974 } 5975 5976 /* print some info if requested */ 5977 if (pcbddc->dbg_flag) { 5978 if (!multilevel_allowed) { 5979 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5980 if (multilevel_requested) { 5981 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5982 } else if (pcbddc->max_levels) { 5983 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 5984 } 5985 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5986 } 5987 } 5988 5989 /* create the coarse KSP object only once with defaults */ 5990 if (coarse_mat) { 5991 PetscViewer dbg_viewer = NULL; 5992 if (pcbddc->dbg_flag) { 5993 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 5994 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5995 } 5996 if (!pcbddc->coarse_ksp) { 5997 char prefix[256],str_level[16]; 5998 size_t len; 5999 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 6000 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 6001 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 6002 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 6003 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 6004 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 6005 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 6006 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 6007 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 6008 /* prefix */ 6009 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 6010 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 6011 if (!pcbddc->current_level) { 6012 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 6013 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 6014 } else { 6015 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 6016 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 6017 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 6018 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 6019 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 6020 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 6021 } 6022 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 6023 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 6024 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 6025 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 6026 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 6027 /* allow user customization */ 6028 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 6029 } 6030 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 6031 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 6032 if (nisdofs) { 6033 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 6034 for (i=0;i<nisdofs;i++) { 6035 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 6036 } 6037 } 6038 if (nisneu) { 6039 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 6040 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 6041 } 6042 if (nisvert) { 6043 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 6044 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 6045 } 6046 6047 /* get some info after set from options */ 6048 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 6049 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 6050 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 6051 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 6052 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 6053 isbddc = PETSC_FALSE; 6054 } 6055 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 6056 if (isredundant) { 6057 KSP inner_ksp; 6058 PC inner_pc; 6059 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 6060 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 6061 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 6062 } 6063 6064 /* parameters which miss an API */ 6065 if (isbddc) { 6066 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 6067 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 6068 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 6069 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 6070 if (pcbddc_coarse->benign_saddle_point) { 6071 Mat coarsedivudotp_is; 6072 ISLocalToGlobalMapping l2gmap,rl2g,cl2g; 6073 IS row,col; 6074 const PetscInt *gidxs; 6075 PetscInt n,st,M,N; 6076 6077 ierr = MatGetSize(coarsedivudotp,&n,NULL);CHKERRQ(ierr); 6078 ierr = MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));CHKERRQ(ierr); 6079 st = st-n; 6080 ierr = ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);CHKERRQ(ierr); 6081 ierr = MatGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);CHKERRQ(ierr); 6082 ierr = ISLocalToGlobalMappingGetSize(l2gmap,&n);CHKERRQ(ierr); 6083 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 6084 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 6085 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 6086 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 6087 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 6088 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 6089 ierr = MatGetSize(coarse_mat,&N,NULL);CHKERRQ(ierr); 6090 ierr = ISDestroy(&row);CHKERRQ(ierr); 6091 ierr = ISDestroy(&col);CHKERRQ(ierr); 6092 ierr = MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);CHKERRQ(ierr); 6093 ierr = MatSetType(coarsedivudotp_is,MATIS);CHKERRQ(ierr); 6094 ierr = MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 6095 ierr = MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);CHKERRQ(ierr); 6096 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 6097 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 6098 ierr = MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);CHKERRQ(ierr); 6099 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 6100 ierr = PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,NULL);CHKERRQ(ierr); 6101 ierr = MatDestroy(&coarsedivudotp_is);CHKERRQ(ierr); 6102 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 6103 if (pcbddc->adaptive_threshold < 1.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 6104 } 6105 } 6106 6107 /* propagate symmetry info of coarse matrix */ 6108 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 6109 if (pc->pmat->symmetric_set) { 6110 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 6111 } 6112 if (pc->pmat->hermitian_set) { 6113 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 6114 } 6115 if (pc->pmat->spd_set) { 6116 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 6117 } 6118 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 6119 ierr = MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 6120 } 6121 /* set operators */ 6122 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 6123 if (pcbddc->dbg_flag) { 6124 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 6125 } 6126 } 6127 ierr = PetscFree(isarray);CHKERRQ(ierr); 6128 #if 0 6129 { 6130 PetscViewer viewer; 6131 char filename[256]; 6132 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 6133 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 6134 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 6135 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 6136 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 6137 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 6138 } 6139 #endif 6140 6141 if (pcbddc->coarse_ksp) { 6142 Vec crhs,csol; 6143 6144 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 6145 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 6146 if (!csol) { 6147 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 6148 } 6149 if (!crhs) { 6150 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 6151 } 6152 } 6153 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 6154 6155 /* compute null space for coarse solver if the benign trick has been requested */ 6156 if (pcbddc->benign_null) { 6157 6158 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 6159 for (i=0;i<pcbddc->benign_n;i++) { 6160 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 6161 } 6162 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 6163 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 6164 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6165 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6166 if (coarse_mat) { 6167 Vec nullv; 6168 PetscScalar *array,*array2; 6169 PetscInt nl; 6170 6171 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 6172 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 6173 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 6174 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 6175 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 6176 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 6177 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 6178 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 6179 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 6180 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 6181 } 6182 } 6183 6184 if (pcbddc->coarse_ksp) { 6185 PetscBool ispreonly; 6186 6187 if (CoarseNullSpace) { 6188 PetscBool isnull; 6189 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 6190 if (isnull) { 6191 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 6192 } 6193 /* TODO: add local nullspaces (if any) */ 6194 } 6195 /* setup coarse ksp */ 6196 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 6197 /* Check coarse problem if in debug mode or if solving with an iterative method */ 6198 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 6199 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 6200 KSP check_ksp; 6201 KSPType check_ksp_type; 6202 PC check_pc; 6203 Vec check_vec,coarse_vec; 6204 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 6205 PetscInt its; 6206 PetscBool compute_eigs; 6207 PetscReal *eigs_r,*eigs_c; 6208 PetscInt neigs; 6209 const char *prefix; 6210 6211 /* Create ksp object suitable for estimation of extreme eigenvalues */ 6212 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 6213 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 6214 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 6215 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 6216 /* prevent from setup unneeded object */ 6217 ierr = KSPGetPC(check_ksp,&check_pc);CHKERRQ(ierr); 6218 ierr = PCSetType(check_pc,PCNONE);CHKERRQ(ierr); 6219 if (ispreonly) { 6220 check_ksp_type = KSPPREONLY; 6221 compute_eigs = PETSC_FALSE; 6222 } else { 6223 check_ksp_type = KSPGMRES; 6224 compute_eigs = PETSC_TRUE; 6225 } 6226 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 6227 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 6228 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 6229 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 6230 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 6231 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 6232 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 6233 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 6234 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 6235 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 6236 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 6237 /* create random vec */ 6238 ierr = MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);CHKERRQ(ierr); 6239 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 6240 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 6241 /* solve coarse problem */ 6242 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 6243 /* set eigenvalue estimation if preonly has not been requested */ 6244 if (compute_eigs) { 6245 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 6246 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 6247 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 6248 if (neigs) { 6249 lambda_max = eigs_r[neigs-1]; 6250 lambda_min = eigs_r[0]; 6251 if (pcbddc->use_coarse_estimates) { 6252 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 6253 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);CHKERRQ(ierr); 6254 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 6255 } 6256 } 6257 } 6258 } 6259 6260 /* check coarse problem residual error */ 6261 if (pcbddc->dbg_flag) { 6262 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 6263 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 6264 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 6265 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 6266 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 6267 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 6268 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 6269 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 6270 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 6271 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 6272 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 6273 if (CoarseNullSpace) { 6274 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 6275 } 6276 if (compute_eigs) { 6277 PetscReal lambda_max_s,lambda_min_s; 6278 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 6279 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 6280 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 6281 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 6282 for (i=0;i<neigs;i++) { 6283 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 6284 } 6285 } 6286 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 6287 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 6288 } 6289 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 6290 ierr = VecDestroy(&coarse_vec);CHKERRQ(ierr); 6291 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 6292 if (compute_eigs) { 6293 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 6294 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 6295 } 6296 } 6297 } 6298 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 6299 /* print additional info */ 6300 if (pcbddc->dbg_flag) { 6301 /* waits until all processes reaches this point */ 6302 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 6303 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 6304 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6305 } 6306 6307 /* free memory */ 6308 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 6309 PetscFunctionReturn(0); 6310 } 6311 6312 #undef __FUNCT__ 6313 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 6314 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 6315 { 6316 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 6317 PC_IS* pcis = (PC_IS*)pc->data; 6318 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6319 IS subset,subset_mult,subset_n; 6320 PetscInt local_size,coarse_size=0; 6321 PetscInt *local_primal_indices=NULL; 6322 const PetscInt *t_local_primal_indices; 6323 PetscErrorCode ierr; 6324 6325 PetscFunctionBegin; 6326 /* Compute global number of coarse dofs */ 6327 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 6328 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 6329 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 6330 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 6331 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 6332 ierr = ISRenumber(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 6333 ierr = ISDestroy(&subset);CHKERRQ(ierr); 6334 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 6335 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 6336 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 6337 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 6338 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 6339 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 6340 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 6341 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 6342 6343 /* check numbering */ 6344 if (pcbddc->dbg_flag) { 6345 PetscScalar coarsesum,*array,*array2; 6346 PetscInt i; 6347 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 6348 6349 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6350 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 6351 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 6352 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6353 /* counter */ 6354 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6355 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6356 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6357 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6358 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6359 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6360 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 6361 for (i=0;i<pcbddc->local_primal_size;i++) { 6362 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 6363 } 6364 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6365 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6366 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6367 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6368 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6369 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6370 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6371 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6372 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 6373 for (i=0;i<pcis->n;i++) { 6374 if (array[i] != 0.0 && array[i] != array2[i]) { 6375 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 6376 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 6377 set_error = PETSC_TRUE; 6378 ierr = ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);CHKERRQ(ierr); 6379 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d (gid %d) owned by %d processes instead of %d!\n",PetscGlobalRank,i,gi,owned,neigh);CHKERRQ(ierr); 6380 } 6381 } 6382 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 6383 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6384 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6385 for (i=0;i<pcis->n;i++) { 6386 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 6387 } 6388 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6389 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6390 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6391 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6392 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 6393 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 6394 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 6395 PetscInt *gidxs; 6396 6397 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 6398 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 6399 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 6400 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6401 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 6402 for (i=0;i<pcbddc->local_primal_size;i++) { 6403 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 6404 } 6405 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6406 ierr = PetscFree(gidxs);CHKERRQ(ierr); 6407 } 6408 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6409 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6410 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 6411 } 6412 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 6413 /* get back data */ 6414 *coarse_size_n = coarse_size; 6415 *local_primal_indices_n = local_primal_indices; 6416 PetscFunctionReturn(0); 6417 } 6418 6419 #undef __FUNCT__ 6420 #define __FUNCT__ "PCBDDCGlobalToLocal" 6421 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 6422 { 6423 IS localis_t; 6424 PetscInt i,lsize,*idxs,n; 6425 PetscScalar *vals; 6426 PetscErrorCode ierr; 6427 6428 PetscFunctionBegin; 6429 /* get indices in local ordering exploiting local to global map */ 6430 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 6431 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 6432 for (i=0;i<lsize;i++) vals[i] = 1.0; 6433 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 6434 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 6435 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 6436 if (idxs) { /* multilevel guard */ 6437 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 6438 } 6439 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 6440 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 6441 ierr = PetscFree(vals);CHKERRQ(ierr); 6442 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 6443 /* now compute set in local ordering */ 6444 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6445 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6446 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6447 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 6448 for (i=0,lsize=0;i<n;i++) { 6449 if (PetscRealPart(vals[i]) > 0.5) { 6450 lsize++; 6451 } 6452 } 6453 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 6454 for (i=0,lsize=0;i<n;i++) { 6455 if (PetscRealPart(vals[i]) > 0.5) { 6456 idxs[lsize++] = i; 6457 } 6458 } 6459 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 6460 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 6461 *localis = localis_t; 6462 PetscFunctionReturn(0); 6463 } 6464 6465 #undef __FUNCT__ 6466 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 6467 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 6468 { 6469 PC_IS *pcis=(PC_IS*)pc->data; 6470 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6471 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6472 Mat S_j; 6473 PetscInt *used_xadj,*used_adjncy; 6474 PetscBool free_used_adj; 6475 PetscErrorCode ierr; 6476 6477 PetscFunctionBegin; 6478 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 6479 free_used_adj = PETSC_FALSE; 6480 if (pcbddc->sub_schurs_layers == -1) { 6481 used_xadj = NULL; 6482 used_adjncy = NULL; 6483 } else { 6484 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 6485 used_xadj = pcbddc->mat_graph->xadj; 6486 used_adjncy = pcbddc->mat_graph->adjncy; 6487 } else if (pcbddc->computed_rowadj) { 6488 used_xadj = pcbddc->mat_graph->xadj; 6489 used_adjncy = pcbddc->mat_graph->adjncy; 6490 } else { 6491 PetscBool flg_row=PETSC_FALSE; 6492 const PetscInt *xadj,*adjncy; 6493 PetscInt nvtxs; 6494 6495 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6496 if (flg_row) { 6497 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 6498 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 6499 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 6500 free_used_adj = PETSC_TRUE; 6501 } else { 6502 pcbddc->sub_schurs_layers = -1; 6503 used_xadj = NULL; 6504 used_adjncy = NULL; 6505 } 6506 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 6507 } 6508 } 6509 6510 /* setup sub_schurs data */ 6511 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6512 if (!sub_schurs->schur_explicit) { 6513 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 6514 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6515 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 6516 } else { 6517 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 6518 PetscBool isseqaij,need_change = PETSC_FALSE;; 6519 PetscInt benign_n; 6520 Mat change = NULL; 6521 Vec scaling = NULL; 6522 IS change_primal = NULL; 6523 6524 if (!pcbddc->use_vertices && reuse_solvers) { 6525 PetscInt n_vertices; 6526 6527 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 6528 reuse_solvers = (PetscBool)!n_vertices; 6529 } 6530 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 6531 if (!isseqaij) { 6532 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 6533 if (matis->A == pcbddc->local_mat) { 6534 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 6535 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6536 } else { 6537 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 6538 } 6539 } 6540 if (!pcbddc->benign_change_explicit) { 6541 benign_n = pcbddc->benign_n; 6542 } else { 6543 benign_n = 0; 6544 } 6545 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 6546 We need a global reduction to avoid possible deadlocks. 6547 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 6548 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 6549 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 6550 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6551 need_change = (PetscBool)(!need_change); 6552 } 6553 /* If the user defines additional constraints, we import them here. 6554 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 6555 if (need_change) { 6556 PC_IS *pcisf; 6557 PC_BDDC *pcbddcf; 6558 PC pcf; 6559 6560 if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 6561 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 6562 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 6563 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 6564 /* hacks */ 6565 pcisf = (PC_IS*)pcf->data; 6566 pcisf->is_B_local = pcis->is_B_local; 6567 pcisf->vec1_N = pcis->vec1_N; 6568 pcisf->BtoNmap = pcis->BtoNmap; 6569 pcisf->n = pcis->n; 6570 pcisf->n_B = pcis->n_B; 6571 pcbddcf = (PC_BDDC*)pcf->data; 6572 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 6573 pcbddcf->mat_graph = pcbddc->mat_graph; 6574 pcbddcf->use_faces = PETSC_TRUE; 6575 pcbddcf->use_change_of_basis = PETSC_TRUE; 6576 pcbddcf->use_change_on_faces = PETSC_TRUE; 6577 pcbddcf->use_qr_single = PETSC_TRUE; 6578 pcbddcf->fake_change = PETSC_TRUE; 6579 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 6580 /* store information on primal vertices and change of basis (in local numbering) */ 6581 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 6582 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 6583 change = pcbddcf->ConstraintMatrix; 6584 pcbddcf->ConstraintMatrix = NULL; 6585 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 6586 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 6587 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 6588 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 6589 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 6590 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 6591 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 6592 pcf->ops->destroy = NULL; 6593 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 6594 } 6595 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 6596 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 6597 ierr = MatDestroy(&change);CHKERRQ(ierr); 6598 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 6599 } 6600 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6601 6602 /* free adjacency */ 6603 if (free_used_adj) { 6604 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 6605 } 6606 PetscFunctionReturn(0); 6607 } 6608 6609 #undef __FUNCT__ 6610 #define __FUNCT__ "PCBDDCInitSubSchurs" 6611 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 6612 { 6613 PC_IS *pcis=(PC_IS*)pc->data; 6614 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6615 PCBDDCGraph graph; 6616 PetscErrorCode ierr; 6617 6618 PetscFunctionBegin; 6619 /* attach interface graph for determining subsets */ 6620 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 6621 IS verticesIS,verticescomm; 6622 PetscInt vsize,*idxs; 6623 6624 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 6625 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 6626 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6627 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 6628 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 6629 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 6630 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 6631 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 6632 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 6633 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 6634 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 6635 } else { 6636 graph = pcbddc->mat_graph; 6637 } 6638 /* print some info */ 6639 if (pcbddc->dbg_flag) { 6640 IS vertices; 6641 PetscInt nv,nedges,nfaces; 6642 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 6643 ierr = PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 6644 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 6645 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 6646 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6647 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 6648 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 6649 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 6650 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 6651 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6652 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 6653 } 6654 6655 /* sub_schurs init */ 6656 if (!pcbddc->sub_schurs) { 6657 ierr = PCBDDCSubSchursCreate(&pcbddc->sub_schurs);CHKERRQ(ierr); 6658 } 6659 ierr = PCBDDCSubSchursInit(pcbddc->sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 6660 6661 /* free graph struct */ 6662 if (pcbddc->sub_schurs_rebuild) { 6663 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 6664 } 6665 PetscFunctionReturn(0); 6666 } 6667 6668 #undef __FUNCT__ 6669 #define __FUNCT__ "PCBDDCCheckOperator" 6670 PetscErrorCode PCBDDCCheckOperator(PC pc) 6671 { 6672 PC_IS *pcis=(PC_IS*)pc->data; 6673 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 6674 PetscErrorCode ierr; 6675 6676 PetscFunctionBegin; 6677 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 6678 IS zerodiag = NULL; 6679 Mat S_j,B0_B=NULL; 6680 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 6681 PetscScalar *p0_check,*array,*array2; 6682 PetscReal norm; 6683 PetscInt i; 6684 6685 /* B0 and B0_B */ 6686 if (zerodiag) { 6687 IS dummy; 6688 6689 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 6690 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 6691 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 6692 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 6693 } 6694 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 6695 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 6696 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 6697 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6698 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6699 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6700 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6701 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 6702 /* S_j */ 6703 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 6704 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 6705 6706 /* mimic vector in \widetilde{W}_\Gamma */ 6707 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 6708 /* continuous in primal space */ 6709 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 6710 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6711 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6712 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6713 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 6714 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 6715 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6716 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6717 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6718 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6719 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6720 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6721 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 6722 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 6723 6724 /* assemble rhs for coarse problem */ 6725 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 6726 /* local with Schur */ 6727 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 6728 if (zerodiag) { 6729 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6730 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 6731 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6732 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 6733 } 6734 /* sum on primal nodes the local contributions */ 6735 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6736 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6737 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6738 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6739 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 6740 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 6741 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6742 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 6743 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6744 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6745 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6746 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6747 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6748 /* scale primal nodes (BDDC sums contibutions) */ 6749 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 6750 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 6751 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 6752 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 6753 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 6754 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6755 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6756 /* global: \widetilde{B0}_B w_\Gamma */ 6757 if (zerodiag) { 6758 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 6759 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 6760 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 6761 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 6762 } 6763 /* BDDC */ 6764 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 6765 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 6766 6767 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 6768 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 6769 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 6770 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 6771 for (i=0;i<pcbddc->benign_n;i++) { 6772 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 6773 } 6774 ierr = PetscFree(p0_check);CHKERRQ(ierr); 6775 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 6776 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 6777 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 6778 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 6779 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 6780 } 6781 PetscFunctionReturn(0); 6782 } 6783